From 5b1016e62f74c53e0330403025954c8d95384c03 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Wed, 19 Mar 2014 17:49:37 -0700 Subject: bcache: Fix a bug when detaching After detaching a backing device from a cache set, a bit wasn't getting reset meaning the second detach wouldn't work correctly. Signed-off-by: Kent Overstreet diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c index 926ded8..1ea9fa2 100644 --- a/drivers/md/bcache/super.c +++ b/drivers/md/bcache/super.c @@ -927,6 +927,7 @@ static void cached_dev_detach_finish(struct work_struct *w) list_move(&dc->list, &uncached_devices); clear_bit(BCACHE_DEV_DETACHING, &dc->disk.flags); + clear_bit(BCACHE_DEV_UNLINK_DONE, &dc->disk.flags); mutex_unlock(&bch_register_lock); @@ -1405,9 +1406,11 @@ static void cache_set_flush(struct closure *cl) if (ca->alloc_thread) kthread_stop(ca->alloc_thread); - cancel_delayed_work_sync(&c->journal.work); - /* flush last journal entry if needed */ - c->journal.work.work.func(&c->journal.work.work); + if (c->journal.cur) { + cancel_delayed_work_sync(&c->journal.work); + /* flush last journal entry if needed */ + c->journal.work.work.func(&c->journal.work.work); + } closure_return(cl); } -- cgit v0.10.2 From 9aa61a992acceeec0d1de2cd99938421498659d5 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Thu, 10 Apr 2014 17:58:49 -0700 Subject: bcache: Fix a journal replay bug journal replay wansn't validating pointers with bch_extent_invalid() before derefing, fixed Signed-off-by: Kent Overstreet diff --git a/drivers/md/bcache/extents.c b/drivers/md/bcache/extents.c index 3a0de4c..243de0bf 100644 --- a/drivers/md/bcache/extents.c +++ b/drivers/md/bcache/extents.c @@ -474,9 +474,8 @@ out: return false; } -static bool bch_extent_invalid(struct btree_keys *bk, const struct bkey *k) +bool __bch_extent_invalid(struct cache_set *c, const struct bkey *k) { - struct btree *b = container_of(bk, struct btree, keys); char buf[80]; if (!KEY_SIZE(k)) @@ -485,16 +484,22 @@ static bool bch_extent_invalid(struct btree_keys *bk, const struct bkey *k) if (KEY_SIZE(k) > KEY_OFFSET(k)) goto bad; - if (__ptr_invalid(b->c, k)) + if (__ptr_invalid(c, k)) goto bad; return false; bad: bch_extent_to_text(buf, sizeof(buf), k); - cache_bug(b->c, "spotted extent %s: %s", buf, bch_ptr_status(b->c, k)); + cache_bug(c, "spotted extent %s: %s", buf, bch_ptr_status(c, k)); return true; } +static bool bch_extent_invalid(struct btree_keys *bk, const struct bkey *k) +{ + struct btree *b = container_of(bk, struct btree, keys); + return __bch_extent_invalid(b->c, k); +} + static bool bch_extent_bad_expensive(struct btree *b, const struct bkey *k, unsigned ptr) { diff --git a/drivers/md/bcache/extents.h b/drivers/md/bcache/extents.h index e4e2340..e2ed540 100644 --- a/drivers/md/bcache/extents.h +++ b/drivers/md/bcache/extents.h @@ -9,5 +9,6 @@ struct cache_set; void bch_extent_to_text(char *, size_t, const struct bkey *); bool __bch_btree_ptr_invalid(struct cache_set *, const struct bkey *); +bool __bch_extent_invalid(struct cache_set *, const struct bkey *); #endif /* _BCACHE_EXTENTS_H */ diff --git a/drivers/md/bcache/journal.c b/drivers/md/bcache/journal.c index 59e8202..363b881 100644 --- a/drivers/md/bcache/journal.c +++ b/drivers/md/bcache/journal.c @@ -7,6 +7,7 @@ #include "bcache.h" #include "btree.h" #include "debug.h" +#include "extents.h" #include @@ -291,15 +292,16 @@ void bch_journal_mark(struct cache_set *c, struct list_head *list) for (k = i->j.start; k < bset_bkey_last(&i->j); - k = bkey_next(k)) { - unsigned j; + k = bkey_next(k)) + if (!__bch_extent_invalid(c, k)) { + unsigned j; - for (j = 0; j < KEY_PTRS(k); j++) - if (ptr_available(c, k, j)) - atomic_inc(&PTR_BUCKET(c, k, j)->pin); + for (j = 0; j < KEY_PTRS(k); j++) + if (ptr_available(c, k, j)) + atomic_inc(&PTR_BUCKET(c, k, j)->pin); - bch_initial_mark_key(c, 0, k); - } + bch_initial_mark_key(c, 0, k); + } } } -- cgit v0.10.2 From dbd810ab678d262d3772d29b65844d7b20dc47bc Mon Sep 17 00:00:00 2001 From: Surbhi Palande Date: Thu, 10 Apr 2014 16:09:51 -0700 Subject: bcache: Fix to remove the rcu_sched stalls. while loop was executing infinitely. This fix ends the while loop gracefully. Signed-off-by: Surbhi Palande Signed-off-by: Kent Overstreet diff --git a/drivers/md/bcache/journal.c b/drivers/md/bcache/journal.c index 363b881..ead001c 100644 --- a/drivers/md/bcache/journal.c +++ b/drivers/md/bcache/journal.c @@ -194,7 +194,8 @@ int bch_journal_read(struct cache_set *c, struct list_head *list) continue; bsearch: /* Binary search */ - m = r = find_next_bit(bitmap, ca->sb.njournal_buckets, l + 1); + m = l; + r = find_next_bit(bitmap, ca->sb.njournal_buckets, l + 1); pr_debug("starting binary search, l %u r %u", l, r); while (l + 1 < r) { -- cgit v0.10.2 From 8b326d3a2a76912dfed2f0ab937d59fae9512ca2 Mon Sep 17 00:00:00 2001 From: Slava Pestov Date: Mon, 21 Apr 2014 18:22:35 -0700 Subject: bcache allocator: send discards with correct size diff --git a/drivers/md/bcache/alloc.c b/drivers/md/bcache/alloc.c index 443d03f..8eeab72 100644 --- a/drivers/md/bcache/alloc.c +++ b/drivers/md/bcache/alloc.c @@ -331,7 +331,7 @@ static int bch_allocator_thread(void *arg) mutex_unlock(&ca->set->bucket_lock); blkdev_issue_discard(ca->bdev, bucket_to_sector(ca->set, bucket), - ca->sb.block_size, GFP_KERNEL, 0); + ca->sb.bucket_size, GFP_KERNEL, 0); mutex_lock(&ca->set->bucket_lock); } -- cgit v0.10.2 From e5112201c1285841f8b565ece5d6ae7e0d7947a2 Mon Sep 17 00:00:00 2001 From: Slava Pestov Date: Tue, 29 Apr 2014 15:39:27 -0700 Subject: bcache: fix lockdep warnings on shutdown diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c index 1ea9fa2..09573c2 100644 --- a/drivers/md/bcache/super.c +++ b/drivers/md/bcache/super.c @@ -1214,7 +1214,9 @@ void bch_flash_dev_release(struct kobject *kobj) static void flash_dev_free(struct closure *cl) { struct bcache_device *d = container_of(cl, struct bcache_device, cl); + mutex_lock(&bch_register_lock); bcache_device_free(d); + mutex_unlock(&bch_register_lock); kobject_put(&d->kobj); } @@ -1222,7 +1224,9 @@ static void flash_dev_flush(struct closure *cl) { struct bcache_device *d = container_of(cl, struct bcache_device, cl); + mutex_lock(&bch_register_lock); bcache_device_unlink(d); + mutex_unlock(&bch_register_lock); kobject_del(&d->kobj); continue_at(cl, flash_dev_free, system_wq); } -- cgit v0.10.2 From a664d0f05a2ec02c8f042db536d84d15d6e19e81 Mon Sep 17 00:00:00 2001 From: Slava Pestov Date: Tue, 20 May 2014 12:20:28 -0700 Subject: bcache: fix crash on shutdown in passthrough mode We never started the writeback thread in this case, so don't stop it. diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c index 09573c2..6ceaec3 100644 --- a/drivers/md/bcache/super.c +++ b/drivers/md/bcache/super.c @@ -1071,7 +1071,8 @@ static void cached_dev_free(struct closure *cl) struct cached_dev *dc = container_of(cl, struct cached_dev, disk.cl); cancel_delayed_work_sync(&dc->writeback_rate_update); - kthread_stop(dc->writeback_thread); + if (!IS_ERR_OR_NULL(dc->writeback_thread)) + kthread_stop(dc->writeback_thread); mutex_lock(&bch_register_lock); -- cgit v0.10.2 From c5aa4a3157b55bdca18dd2a9d9f43314470b6d32 Mon Sep 17 00:00:00 2001 From: Slava Pestov Date: Mon, 21 Apr 2014 18:23:12 -0700 Subject: bcache: wait for buckets when allocating new btree root Tested: - sometimes bcache_tier test would hang on startup with a failure to allocate the btree root -- no longer seeing this Signed-off-by: Kent Overstreet diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c index 7347b61..9dd9f1c 100644 --- a/drivers/md/bcache/btree.c +++ b/drivers/md/bcache/btree.c @@ -1060,15 +1060,15 @@ static void btree_node_free(struct btree *b) mutex_unlock(&b->c->bucket_lock); } -struct btree *bch_btree_node_alloc(struct cache_set *c, struct btree_op *op, - int level) +struct btree *__bch_btree_node_alloc(struct cache_set *c, struct btree_op *op, + int level, bool wait) { BKEY_PADDED(key) k; struct btree *b = ERR_PTR(-EAGAIN); mutex_lock(&c->bucket_lock); retry: - if (__bch_bucket_alloc_set(c, RESERVE_BTREE, &k.key, 1, op != NULL)) + if (__bch_bucket_alloc_set(c, RESERVE_BTREE, &k.key, 1, wait)) goto err; bkey_put(c, &k.key); @@ -1100,6 +1100,12 @@ err: return b; } +static struct btree *bch_btree_node_alloc(struct cache_set *c, + struct btree_op *op, int level) +{ + return __bch_btree_node_alloc(c, op, level, op != NULL); +} + static struct btree *btree_node_alloc_replacement(struct btree *b, struct btree_op *op) { diff --git a/drivers/md/bcache/btree.h b/drivers/md/bcache/btree.h index 91dfa5e..0044182 100644 --- a/drivers/md/bcache/btree.h +++ b/drivers/md/bcache/btree.h @@ -242,7 +242,8 @@ void __bch_btree_node_write(struct btree *, struct closure *); void bch_btree_node_write(struct btree *, struct closure *); void bch_btree_set_root(struct btree *); -struct btree *bch_btree_node_alloc(struct cache_set *, struct btree_op *, int); +struct btree *__bch_btree_node_alloc(struct cache_set *, struct btree_op *, + int, bool); struct btree *bch_btree_node_get(struct cache_set *, struct btree_op *, struct bkey *, int, bool); diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c index 6ceaec3..3b043a0 100644 --- a/drivers/md/bcache/super.c +++ b/drivers/md/bcache/super.c @@ -1669,7 +1669,7 @@ static void run_cache_set(struct cache_set *c) goto err; err = "cannot allocate new btree root"; - c->root = bch_btree_node_alloc(c, NULL, 0); + c->root = __bch_btree_node_alloc(c, NULL, 0, true); if (IS_ERR_OR_NULL(c->root)) goto err; -- cgit v0.10.2 From 9e5c353510b26500bd6b8309823ac9ef2837b761 Mon Sep 17 00:00:00 2001 From: Slava Pestov Date: Thu, 1 May 2014 13:48:57 -0700 Subject: bcache: fix uninterruptible sleep in writeback thread There were two issues here: - writeback thread did not start until the device first became dirty - writeback thread used uninterruptible sleep once running Without this patch I see kernel warnings printed and a load average of 1.52 after booting my test VM. With this patch the warnings are gone and the load average is near 0.00 as expected. Signed-off-by: Kent Overstreet diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c index 3b043a0..00cc425 100644 --- a/drivers/md/bcache/super.c +++ b/drivers/md/bcache/super.c @@ -1042,6 +1042,9 @@ int bch_cached_dev_attach(struct cached_dev *dc, struct cache_set *c) */ atomic_set(&dc->count, 1); + if (bch_cached_dev_writeback_start(dc)) + return -ENOMEM; + if (BDEV_STATE(&dc->sb) == BDEV_STATE_DIRTY) { bch_sectors_dirty_init(dc); atomic_set(&dc->has_dirty, 1); diff --git a/drivers/md/bcache/writeback.c b/drivers/md/bcache/writeback.c index f4300e4..f1986bc 100644 --- a/drivers/md/bcache/writeback.c +++ b/drivers/md/bcache/writeback.c @@ -239,7 +239,7 @@ static void read_dirty(struct cached_dev *dc) if (KEY_START(&w->key) != dc->last_read || jiffies_to_msecs(delay) > 50) while (!kthread_should_stop() && delay) - delay = schedule_timeout_uninterruptible(delay); + delay = schedule_timeout_interruptible(delay); dc->last_read = KEY_OFFSET(&w->key); @@ -436,7 +436,7 @@ static int bch_writeback_thread(void *arg) while (delay && !kthread_should_stop() && !test_bit(BCACHE_DEV_DETACHING, &dc->disk.flags)) - delay = schedule_timeout_uninterruptible(delay); + delay = schedule_timeout_interruptible(delay); } } @@ -478,7 +478,7 @@ void bch_sectors_dirty_init(struct cached_dev *dc) dc->disk.sectors_dirty_last = bcache_dev_sectors_dirty(&dc->disk); } -int bch_cached_dev_writeback_init(struct cached_dev *dc) +void bch_cached_dev_writeback_init(struct cached_dev *dc) { sema_init(&dc->in_flight, 64); init_rwsem(&dc->writeback_lock); @@ -494,14 +494,20 @@ int bch_cached_dev_writeback_init(struct cached_dev *dc) dc->writeback_rate_d_term = 30; dc->writeback_rate_p_term_inverse = 6000; + INIT_DELAYED_WORK(&dc->writeback_rate_update, update_writeback_rate); +} + +int bch_cached_dev_writeback_start(struct cached_dev *dc) +{ dc->writeback_thread = kthread_create(bch_writeback_thread, dc, "bcache_writeback"); if (IS_ERR(dc->writeback_thread)) return PTR_ERR(dc->writeback_thread); - INIT_DELAYED_WORK(&dc->writeback_rate_update, update_writeback_rate); schedule_delayed_work(&dc->writeback_rate_update, dc->writeback_rate_update_seconds * HZ); + bch_writeback_queue(dc); + return 0; } diff --git a/drivers/md/bcache/writeback.h b/drivers/md/bcache/writeback.h index e2f8598..0a9dab1 100644 --- a/drivers/md/bcache/writeback.h +++ b/drivers/md/bcache/writeback.h @@ -85,6 +85,7 @@ static inline void bch_writeback_add(struct cached_dev *dc) void bcache_dev_sectors_dirty_add(struct cache_set *, unsigned, uint64_t, int); void bch_sectors_dirty_init(struct cached_dev *dc); -int bch_cached_dev_writeback_init(struct cached_dev *); +void bch_cached_dev_writeback_init(struct cached_dev *); +int bch_cached_dev_writeback_start(struct cached_dev *); #endif -- cgit v0.10.2 From bcf090e0040e30f8409e6a535a01e6473afb096f Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Mon, 19 May 2014 08:57:55 -0700 Subject: bcache: Make sure to pass GFP_WAIT to mempool_alloc() this was very wrong - mempool_alloc() only guarantees success with GFP_WAIT. bcache uses GFP_NOWAIT in various other places where we have a fallback, circuits must've gotten crossed when writing this code or something. Signed-off-by: Kent Overstreet diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c index 9dd9f1c..e538d45 100644 --- a/drivers/md/bcache/btree.c +++ b/drivers/md/bcache/btree.c @@ -202,7 +202,7 @@ void bch_btree_node_read_done(struct btree *b) struct bset *i = btree_bset_first(b); struct btree_iter *iter; - iter = mempool_alloc(b->c->fill_iter, GFP_NOWAIT); + iter = mempool_alloc(b->c->fill_iter, GFP_NOIO); iter->size = b->c->sb.bucket_size / b->c->sb.block_size; iter->used = 0; -- cgit v0.10.2 From 501d52a90cbe652b41336c206ff0e95799d5a9b5 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Mon, 19 May 2014 08:55:40 -0700 Subject: bcache: Allocate bounce buffers with GFP_NOWAIT There's no point in blocking on these allocations, since our fallback paths will probably go faster than blocking. Change-Id: I733ca202c25cb36bde02607a0a60552229a4241c diff --git a/drivers/md/bcache/bset.c b/drivers/md/bcache/bset.c index 5454164..646fe85 100644 --- a/drivers/md/bcache/bset.c +++ b/drivers/md/bcache/bset.c @@ -1182,7 +1182,7 @@ static void __btree_sort(struct btree_keys *b, struct btree_iter *iter, { uint64_t start_time; bool used_mempool = false; - struct bset *out = (void *) __get_free_pages(__GFP_NOWARN|GFP_NOIO, + struct bset *out = (void *) __get_free_pages(__GFP_NOWARN|GFP_NOWAIT, order); if (!out) { struct page *outp; diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c index e538d45..39c7f5b 100644 --- a/drivers/md/bcache/btree.c +++ b/drivers/md/bcache/btree.c @@ -421,7 +421,7 @@ static void do_btree_node_write(struct btree *b) SET_PTR_OFFSET(&k.key, 0, PTR_OFFSET(&k.key, 0) + bset_sector_offset(&b->keys, i)); - if (!bio_alloc_pages(b->bio, GFP_NOIO)) { + if (!bio_alloc_pages(b->bio, __GFP_NOWARN|GFP_NOWAIT)) { int j; struct bio_vec *bv; void *base = (void *) ((unsigned long) i & ~(PAGE_SIZE - 1)); -- cgit v0.10.2 From 8e0948080670f6330229718b15a6a1a011d441ce Mon Sep 17 00:00:00 2001 From: Slava Pestov Date: Mon, 30 Jun 2014 22:31:20 -0700 Subject: bcache: fix typo in bch_bkey_equal_header Signed-off-by: Kent Overstreet diff --git a/drivers/md/bcache/bset.h b/drivers/md/bcache/bset.h index 5f6728d..ae96462 100644 --- a/drivers/md/bcache/bset.h +++ b/drivers/md/bcache/bset.h @@ -453,7 +453,7 @@ static inline bool bch_bkey_equal_header(const struct bkey *l, { return (KEY_DIRTY(l) == KEY_DIRTY(r) && KEY_PTRS(l) == KEY_PTRS(r) && - KEY_CSUM(l) == KEY_CSUM(l)); + KEY_CSUM(l) == KEY_CSUM(r)); } /* Keylists */ -- cgit v0.10.2 From 60ae81eee86dd7a520db8c1e3d702b49fc0418b5 Mon Sep 17 00:00:00 2001 From: Slava Pestov Date: Thu, 22 May 2014 12:14:24 -0700 Subject: bcache: bcache_write tracepoint was crashing Signed-off-by: Kent Overstreet diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c index 15fff4f..62e6e98 100644 --- a/drivers/md/bcache/request.c +++ b/drivers/md/bcache/request.c @@ -311,7 +311,8 @@ void bch_data_insert(struct closure *cl) { struct data_insert_op *op = container_of(cl, struct data_insert_op, cl); - trace_bcache_write(op->bio, op->writeback, op->bypass); + trace_bcache_write(op->c, op->inode, op->bio, + op->writeback, op->bypass); bch_keylist_init(&op->insert_keys); bio_get(op->bio); diff --git a/include/trace/events/bcache.h b/include/trace/events/bcache.h index c9c3c04..6778e41 100644 --- a/include/trace/events/bcache.h +++ b/include/trace/events/bcache.h @@ -148,11 +148,13 @@ TRACE_EVENT(bcache_read, ); TRACE_EVENT(bcache_write, - TP_PROTO(struct bio *bio, bool writeback, bool bypass), - TP_ARGS(bio, writeback, bypass), + TP_PROTO(struct cache_set *c, u64 inode, struct bio *bio, + bool writeback, bool bypass), + TP_ARGS(c, inode, bio, writeback, bypass), TP_STRUCT__entry( - __field(dev_t, dev ) + __array(char, uuid, 16 ) + __field(u64, inode ) __field(sector_t, sector ) __field(unsigned int, nr_sector ) __array(char, rwbs, 6 ) @@ -161,7 +163,8 @@ TRACE_EVENT(bcache_write, ), TP_fast_assign( - __entry->dev = bio->bi_bdev->bd_dev; + memcpy(__entry->uuid, c->sb.set_uuid, 16); + __entry->inode = inode; __entry->sector = bio->bi_iter.bi_sector; __entry->nr_sector = bio->bi_iter.bi_size >> 9; blk_fill_rwbs(__entry->rwbs, bio->bi_rw, bio->bi_iter.bi_size); @@ -169,8 +172,8 @@ TRACE_EVENT(bcache_write, __entry->bypass = bypass; ), - TP_printk("%d,%d %s %llu + %u hit %u bypass %u", - MAJOR(__entry->dev), MINOR(__entry->dev), + TP_printk("%pU inode %llu %s %llu + %u hit %u bypass %u", + __entry->uuid, __entry->inode, __entry->rwbs, (unsigned long long)__entry->sector, __entry->nr_sector, __entry->writeback, __entry->bypass) ); -- cgit v0.10.2 From 913dc33fb2720fb5f979011664294137ddd8b13b Mon Sep 17 00:00:00 2001 From: Slava Pestov Date: Fri, 23 May 2014 11:18:35 -0700 Subject: bcache: fix crash in bcache_btree_node_alloc_fail tracepoint 'b' was NULL. Change-Id: Icac0fd04afa2d23f213d96d51afd53374e6dd0c0 diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c index 39c7f5b..f823785 100644 --- a/drivers/md/bcache/btree.c +++ b/drivers/md/bcache/btree.c @@ -1096,7 +1096,7 @@ err_free: err: mutex_unlock(&c->bucket_lock); - trace_bcache_btree_node_alloc_fail(b); + trace_bcache_btree_node_alloc_fail(c); return b; } diff --git a/include/trace/events/bcache.h b/include/trace/events/bcache.h index 6778e41..981acf7 100644 --- a/include/trace/events/bcache.h +++ b/include/trace/events/bcache.h @@ -261,9 +261,9 @@ DEFINE_EVENT(btree_node, bcache_btree_node_alloc, TP_ARGS(b) ); -DEFINE_EVENT(btree_node, bcache_btree_node_alloc_fail, - TP_PROTO(struct btree *b), - TP_ARGS(b) +DEFINE_EVENT(cache_set, bcache_btree_node_alloc_fail, + TP_PROTO(struct cache_set *c), + TP_ARGS(c) ); DEFINE_EVENT(btree_node, bcache_btree_node_free, -- cgit v0.10.2 From 6b708de64adb6dc8319e7aeac922b46904fbeeec Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Mon, 2 Jun 2014 15:39:44 -0700 Subject: bcache: Fix an infinite loop in journal replay When running with multiple cache devices, if one of the devices has a completely empty journal but we'd already found some journal entries on a previosu device we'd go into an infinite loop. Change-Id: I1dcdc0d738192746de28f40e8b08825b0dea5e2b Signed-off-by: Kent Overstreet diff --git a/drivers/md/bcache/journal.c b/drivers/md/bcache/journal.c index ead001c..fe080ad 100644 --- a/drivers/md/bcache/journal.c +++ b/drivers/md/bcache/journal.c @@ -190,9 +190,12 @@ int bch_journal_read(struct cache_set *c, struct list_head *list) if (read_bucket(l)) goto bsearch; - if (list_empty(list)) + /* no journal entries on this device? */ + if (l == ca->sb.njournal_buckets) continue; bsearch: + BUG_ON(list_empty(list)); + /* Binary search */ m = l; r = find_next_bit(bitmap, ca->sb.njournal_buckets, l + 1); -- cgit v0.10.2 From 400ffaa2acd72274e2c7293a9724382383bebf3e Mon Sep 17 00:00:00 2001 From: Slava Pestov Date: Sat, 12 Jul 2014 21:53:11 -0700 Subject: bcache: fix use-after-free in btree_gc_coalesce() If we goto out_nocoalesce after we free new_nodes[0], we end up freeing new_nodes[0] again. This was generating a lockdep warning. The fix is to set new_nodes[0] to NULL, since the out_nocoalesce path safely ignores NULL entries in the new_nodes array. This regression was introduced in 2d7f9531. Change-Id: I76564d7257800583214376b4bacf236cda90c89c diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c index f823785..776583f 100644 --- a/drivers/md/bcache/btree.c +++ b/drivers/md/bcache/btree.c @@ -1409,6 +1409,7 @@ static int btree_gc_coalesce(struct btree *b, struct btree_op *op, BUG_ON(btree_bset_first(new_nodes[0])->keys); btree_node_free(new_nodes[0]); rw_unlock(true, new_nodes[0]); + new_nodes[0] = NULL; for (i = 0; i < nodes; i++) { if (__bch_keylist_realloc(&keylist, bkey_u64s(&r[i].b->key))) -- cgit v0.10.2 From d83353b319d47ef8cce82467da6a25c2d558253f Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Wed, 11 Jun 2014 19:44:49 -0700 Subject: bcache: Fix more early shutdown bugs Signed-off-by: Kent Overstreet diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c index 00cc425..29dd1e8 100644 --- a/drivers/md/bcache/super.c +++ b/drivers/md/bcache/super.c @@ -733,8 +733,6 @@ static void bcache_device_detach(struct bcache_device *d) static void bcache_device_attach(struct bcache_device *d, struct cache_set *c, unsigned id) { - BUG_ON(test_bit(CACHE_SET_STOPPING, &c->flags)); - d->id = id; d->c = c; c->devices[id] = d; @@ -1771,6 +1769,7 @@ found: pr_debug("set version = %llu", c->sb.version); } + kobject_get(&ca->kobj); ca->set = c; ca->set->cache[ca->sb.nr_this_dev] = ca; c->cache_by_alloc[c->caches_loaded++] = ca; @@ -1888,10 +1887,12 @@ static void register_cache(struct cache_sb *sb, struct page *sb_page, goto err; pr_info("registered cache device %s", bdevname(bdev, name)); +out: + kobject_put(&ca->kobj); return; err: pr_notice("error opening %s: %s", bdevname(bdev, name), err); - kobject_put(&ca->kobj); + goto out; } /* Global interfaces/init */ -- cgit v0.10.2 From bf0c55c986540483c34ca640f2eef4c3314388b1 Mon Sep 17 00:00:00 2001 From: Slava Pestov Date: Fri, 11 Jul 2014 12:17:41 -0700 Subject: bcache: fix crash with incomplete cache set Change-Id: I6abde52afe917633480caaf4e2518f42a816d886 diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h index d2ebcf3..04f7bc2 100644 --- a/drivers/md/bcache/bcache.h +++ b/drivers/md/bcache/bcache.h @@ -477,9 +477,13 @@ struct gc_stat { * CACHE_SET_STOPPING always gets set first when we're closing down a cache set; * we'll continue to run normally for awhile with CACHE_SET_STOPPING set (i.e. * flushing dirty data). + * + * CACHE_SET_RUNNING means all cache devices have been registered and journal + * replay is complete. */ #define CACHE_SET_UNREGISTERING 0 #define CACHE_SET_STOPPING 1 +#define CACHE_SET_RUNNING 2 struct cache_set { struct closure cl; diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c index 29dd1e8..72fbaf7 100644 --- a/drivers/md/bcache/super.c +++ b/drivers/md/bcache/super.c @@ -1284,6 +1284,9 @@ int bch_flash_dev_create(struct cache_set *c, uint64_t size) if (test_bit(CACHE_SET_STOPPING, &c->flags)) return -EINTR; + if (!test_bit(CACHE_SET_RUNNING, &c->flags)) + return -EPERM; + u = uuid_find_empty(c); if (!u) { pr_err("Can't create volume, no room for UUID"); @@ -1706,6 +1709,7 @@ static void run_cache_set(struct cache_set *c) flash_devs_run(c); + set_bit(CACHE_SET_RUNNING, &c->flags); return; err: closure_sync(&cl); -- cgit v0.10.2 From c9a78332b42cbdcdd386a95192a716b67d1711a4 Mon Sep 17 00:00:00 2001 From: Slava Pestov Date: Thu, 19 Jun 2014 15:05:59 -0700 Subject: bcache: fix memory corruption in init error path If register_cache_set() failed, we would touch ca->set after it had already been freed. Also, fix an assertion to catch this. Change-Id: I748e5f5b223e2d9b2602075dec2f997cced2394d diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c index 72fbaf7..12ad381 100644 --- a/drivers/md/bcache/super.c +++ b/drivers/md/bcache/super.c @@ -1356,8 +1356,11 @@ static void cache_set_free(struct closure *cl) bch_journal_free(c); for_each_cache(ca, c, i) - if (ca) + if (ca) { + ca->set = NULL; + c->cache[ca->sb.nr_this_dev] = NULL; kobject_put(&ca->kobj); + } bch_bset_sort_state_free(&c->sort); free_pages((unsigned long) c->uuids, ilog2(bucket_pages(c))); @@ -1794,8 +1797,10 @@ void bch_cache_release(struct kobject *kobj) struct cache *ca = container_of(kobj, struct cache, kobj); unsigned i; - if (ca->set) + if (ca->set) { + BUG_ON(ca->set->cache[ca->sb.nr_this_dev] != ca); ca->set->cache[ca->sb.nr_this_dev] = NULL; + } bio_split_pool_free(&ca->bio_split_hook); @@ -1858,7 +1863,7 @@ static int cache_alloc(struct cache_sb *sb, struct cache *ca) } static void register_cache(struct cache_sb *sb, struct page *sb_page, - struct block_device *bdev, struct cache *ca) + struct block_device *bdev, struct cache *ca) { char name[BDEVNAME_SIZE]; const char *err = "cannot allocate memory"; -- cgit v0.10.2 From 2452cc89063a2a6890368f185c4b6d7d8802179e Mon Sep 17 00:00:00 2001 From: Slava Pestov Date: Sat, 12 Jul 2014 00:22:53 -0700 Subject: bcache: try to set b->parent properly bcache_flash_dev.ktest would reliably crash with 8k and 16k bucket size before; now it passes. Change-Id: Ib542232235e39298c3a7548fe52b645cabb823d1 diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c index 776583f..00cde40 100644 --- a/drivers/md/bcache/btree.c +++ b/drivers/md/bcache/btree.c @@ -117,9 +117,9 @@ ({ \ int _r, l = (b)->level - 1; \ bool _w = l <= (op)->lock; \ - struct btree *_child = bch_btree_node_get((b)->c, op, key, l, _w);\ + struct btree *_child = bch_btree_node_get((b)->c, op, key, l, \ + _w, b); \ if (!IS_ERR(_child)) { \ - _child->parent = (b); \ _r = bch_btree_ ## fn(_child, op, ##__VA_ARGS__); \ rw_unlock(_w, _child); \ } else \ @@ -142,7 +142,6 @@ rw_lock(_w, _b, _b->level); \ if (_b == (c)->root && \ _w == insert_lock(op, _b)) { \ - _b->parent = NULL; \ _r = bch_btree_ ## fn(_b, op, ##__VA_ARGS__); \ } \ rw_unlock(_w, _b); \ @@ -967,7 +966,8 @@ err: * level and op->lock. */ struct btree *bch_btree_node_get(struct cache_set *c, struct btree_op *op, - struct bkey *k, int level, bool write) + struct bkey *k, int level, bool write, + struct btree *parent) { int i = 0; struct btree *b; @@ -1002,6 +1002,7 @@ retry: BUG_ON(b->level != level); } + b->parent = parent; b->accessed = 1; for (; i <= b->keys.nsets && b->keys.set[i].size; i++) { @@ -1022,15 +1023,16 @@ retry: return b; } -static void btree_node_prefetch(struct cache_set *c, struct bkey *k, int level) +static void btree_node_prefetch(struct btree *parent, struct bkey *k) { struct btree *b; - mutex_lock(&c->bucket_lock); - b = mca_alloc(c, NULL, k, level); - mutex_unlock(&c->bucket_lock); + mutex_lock(&parent->c->bucket_lock); + b = mca_alloc(parent->c, NULL, k, parent->level - 1); + mutex_unlock(&parent->c->bucket_lock); if (!IS_ERR_OR_NULL(b)) { + b->parent = parent; bch_btree_node_read(b); rw_unlock(true, b); } @@ -1061,7 +1063,8 @@ static void btree_node_free(struct btree *b) } struct btree *__bch_btree_node_alloc(struct cache_set *c, struct btree_op *op, - int level, bool wait) + int level, bool wait, + struct btree *parent) { BKEY_PADDED(key) k; struct btree *b = ERR_PTR(-EAGAIN); @@ -1085,6 +1088,7 @@ retry: } b->accessed = 1; + b->parent = parent; bch_bset_init_next(&b->keys, b->keys.set->data, bset_magic(&b->c->sb)); mutex_unlock(&c->bucket_lock); @@ -1101,15 +1105,16 @@ err: } static struct btree *bch_btree_node_alloc(struct cache_set *c, - struct btree_op *op, int level) + struct btree_op *op, int level, + struct btree *parent) { - return __bch_btree_node_alloc(c, op, level, op != NULL); + return __bch_btree_node_alloc(c, op, level, op != NULL, parent); } static struct btree *btree_node_alloc_replacement(struct btree *b, struct btree_op *op) { - struct btree *n = bch_btree_node_alloc(b->c, op, b->level); + struct btree *n = bch_btree_node_alloc(b->c, op, b->level, b->parent); if (!IS_ERR_OR_NULL(n)) { mutex_lock(&n->write_lock); bch_btree_sort_into(&b->keys, &n->keys, &b->c->sort); @@ -1523,7 +1528,7 @@ static int btree_gc_recurse(struct btree *b, struct btree_op *op, k = bch_btree_iter_next_filter(&iter, &b->keys, bch_ptr_bad); if (k) { r->b = bch_btree_node_get(b->c, op, k, b->level - 1, - true); + true, b); if (IS_ERR(r->b)) { ret = PTR_ERR(r->b); break; @@ -1818,7 +1823,7 @@ static int bch_btree_check_recurse(struct btree *b, struct btree_op *op) k = bch_btree_iter_next_filter(&iter, &b->keys, bch_ptr_bad); if (k) - btree_node_prefetch(b->c, k, b->level - 1); + btree_node_prefetch(b, k); if (p) ret = btree(check_recurse, p, b, op); @@ -1983,12 +1988,12 @@ static int btree_split(struct btree *b, struct btree_op *op, trace_bcache_btree_node_split(b, btree_bset_first(n1)->keys); - n2 = bch_btree_node_alloc(b->c, op, b->level); + n2 = bch_btree_node_alloc(b->c, op, b->level, b->parent); if (IS_ERR(n2)) goto err_free1; if (!b->parent) { - n3 = bch_btree_node_alloc(b->c, op, b->level + 1); + n3 = bch_btree_node_alloc(b->c, op, b->level + 1, NULL); if (IS_ERR(n3)) goto err_free2; } diff --git a/drivers/md/bcache/btree.h b/drivers/md/bcache/btree.h index 0044182..5c391fa 100644 --- a/drivers/md/bcache/btree.h +++ b/drivers/md/bcache/btree.h @@ -243,9 +243,9 @@ void bch_btree_node_write(struct btree *, struct closure *); void bch_btree_set_root(struct btree *); struct btree *__bch_btree_node_alloc(struct cache_set *, struct btree_op *, - int, bool); + int, bool, struct btree *); struct btree *bch_btree_node_get(struct cache_set *, struct btree_op *, - struct bkey *, int, bool); + struct bkey *, int, bool, struct btree *); int bch_btree_insert_check_key(struct btree *, struct btree_op *, struct bkey *); diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c index 12ad381..b6114d6 100644 --- a/drivers/md/bcache/super.c +++ b/drivers/md/bcache/super.c @@ -1601,7 +1601,7 @@ static void run_cache_set(struct cache_set *c) goto err; err = "error reading btree root"; - c->root = bch_btree_node_get(c, NULL, k, j->btree_level, true); + c->root = bch_btree_node_get(c, NULL, k, j->btree_level, true, NULL); if (IS_ERR_OR_NULL(c->root)) goto err; @@ -1676,7 +1676,7 @@ static void run_cache_set(struct cache_set *c) goto err; err = "cannot allocate new btree root"; - c->root = __bch_btree_node_alloc(c, NULL, 0, true); + c->root = __bch_btree_node_alloc(c, NULL, 0, true, NULL); if (IS_ERR_OR_NULL(c->root)) goto err; -- cgit v0.10.2 From 5b25abade29616d42d60f9bd5e6a5ad07f7314e3 Mon Sep 17 00:00:00 2001 From: Surbhi Palande Date: Thu, 17 Apr 2014 12:07:04 -0700 Subject: bcache: Correct printing of btree_gc_max_duration_ms time_stats::btree_gc_max_duration_mc is not bit shifted by 8 Fixes BUG #138 Change-Id: I44fc6e1d0579674016acc533f1a546b080e5371a Signed-off-by: Surbhi Palande diff --git a/drivers/md/bcache/util.h b/drivers/md/bcache/util.h index ac7d0d1..98df757 100644 --- a/drivers/md/bcache/util.h +++ b/drivers/md/bcache/util.h @@ -416,8 +416,8 @@ do { \ average_frequency, frequency_units); \ __print_time_stat(stats, name, \ average_duration, duration_units); \ - __print_time_stat(stats, name, \ - max_duration, duration_units); \ + sysfs_print(name ## _ ##max_duration ## _ ## duration_units, \ + div_u64((stats)->max_duration, NSEC_PER_ ## duration_units));\ \ sysfs_print(name ## _last_ ## frequency_units, (stats)->last \ ? div_s64(local_clock() - (stats)->last, \ -- cgit v0.10.2 From 789d21dbd9d8889e62c79ec19585fcc97e42ef07 Mon Sep 17 00:00:00 2001 From: Jianjian Huo Date: Sun, 13 Jul 2014 09:08:59 -0700 Subject: bcache: add mutex lock for bch_is_open Since bch_is_open will iterate linked list bch_cache_sets and uncached_devices, it needs bch_register_lock. Signed-off-by: Jianjian Huo diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c index b6114d6..60e7513 100644 --- a/drivers/md/bcache/super.c +++ b/drivers/md/bcache/super.c @@ -1966,10 +1966,12 @@ static ssize_t register_bcache(struct kobject *k, struct kobj_attribute *attr, if (IS_ERR(bdev)) { if (bdev == ERR_PTR(-EBUSY)) { bdev = lookup_bdev(strim(path)); + mutex_lock(&bch_register_lock); if (!IS_ERR(bdev) && bch_is_open(bdev)) err = "device already registered"; else err = "device busy"; + mutex_unlock(&bch_register_lock); } goto err; } -- cgit v0.10.2 From 0781c8748cf1ea2b0dcd966571103909528c4efa Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Mon, 7 Jul 2014 13:03:36 -0700 Subject: bcache: Drop unneeded blk_sync_queue() calls this is needed for the queue/block device we created (it's done by blk_cleanup_queue() which we do call) - but calling it for the block devices we only opened is pointless. Change-Id: I53dfded14ed15b9581d10ca8399d5e1b3abbf9f2 diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c index 60e7513..d4713d0 100644 --- a/drivers/md/bcache/super.c +++ b/drivers/md/bcache/super.c @@ -1084,12 +1084,8 @@ static void cached_dev_free(struct closure *cl) mutex_unlock(&bch_register_lock); - if (!IS_ERR_OR_NULL(dc->bdev)) { - if (dc->bdev->bd_disk) - blk_sync_queue(bdev_get_queue(dc->bdev)); - + if (!IS_ERR_OR_NULL(dc->bdev)) blkdev_put(dc->bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL); - } wake_up(&unregister_wait); @@ -1817,10 +1813,8 @@ void bch_cache_release(struct kobject *kobj) if (ca->sb_bio.bi_inline_vecs[0].bv_page) put_page(ca->sb_bio.bi_io_vec[0].bv_page); - if (!IS_ERR_OR_NULL(ca->bdev)) { - blk_sync_queue(bdev_get_queue(ca->bdev)); + if (!IS_ERR_OR_NULL(ca->bdev)) blkdev_put(ca->bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL); - } kfree(ca); module_put(THIS_MODULE); -- cgit v0.10.2