From e49c7c374e7aacd1f04ecbc21d9dbbeeea4a77d6 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Wed, 26 Jun 2013 17:25:38 -0700 Subject: bcache: FUA fixes Journal writes need to be marked FUA, not just REQ_FLUSH. And btree node writes have... weird ordering requirements. Signed-off-by: Kent Overstreet diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c index 09fb8a2..a6ad49a 100644 --- a/drivers/md/bcache/btree.c +++ b/drivers/md/bcache/btree.c @@ -328,10 +328,25 @@ static void do_btree_node_write(struct btree *b) b->bio->bi_end_io = btree_node_write_endio; b->bio->bi_private = &b->io.cl; - b->bio->bi_rw = REQ_META|WRITE_SYNC; - b->bio->bi_size = set_blocks(i, b->c) * block_bytes(b->c); + b->bio->bi_rw = REQ_META|WRITE_SYNC|REQ_FUA; + b->bio->bi_size = set_blocks(i, b->c) * block_bytes(b->c); bch_bio_map(b->bio, i); + /* + * If we're appending to a leaf node, we don't technically need FUA - + * this write just needs to be persisted before the next journal write, + * which will be marked FLUSH|FUA. + * + * Similarly if we're writing a new btree root - the pointer is going to + * be in the next journal entry. + * + * But if we're writing a new btree node (that isn't a root) or + * appending to a non leaf btree node, we need either FUA or a flush + * when we write the parent with the new pointer. FUA is cheaper than a + * flush, and writes appending to leaf nodes aren't blocking anything so + * just make all btree node writes FUA to keep things sane. + */ + bkey_copy(&k.key, &b->key); SET_PTR_OFFSET(&k.key, 0, PTR_OFFSET(&k.key, 0) + bset_offset(b, i)); @@ -2092,6 +2107,9 @@ int bch_btree_insert(struct btree_op *op, struct cache_set *c) void bch_btree_set_root(struct btree *b) { unsigned i; + struct closure cl; + + closure_init_stack(&cl); trace_bcache_btree_set_root(b); @@ -2107,7 +2125,8 @@ void bch_btree_set_root(struct btree *b) b->c->root = b; __bkey_put(b->c, &b->key); - bch_journal_meta(b->c, NULL); + bch_journal_meta(b->c, &cl); + closure_sync(&cl); } /* Cache lookup */ diff --git a/drivers/md/bcache/journal.c b/drivers/md/bcache/journal.c index 5ca2214..4b25066 100644 --- a/drivers/md/bcache/journal.c +++ b/drivers/md/bcache/journal.c @@ -620,7 +620,7 @@ static void journal_write_unlocked(struct closure *cl) bio_reset(bio); bio->bi_sector = PTR_OFFSET(k, i); bio->bi_bdev = ca->bdev; - bio->bi_rw = REQ_WRITE|REQ_SYNC|REQ_META|REQ_FLUSH; + bio->bi_rw = REQ_WRITE|REQ_SYNC|REQ_META|REQ_FLUSH|REQ_FUA; bio->bi_size = sectors << 9; bio->bi_end_io = journal_write_endio; diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c index 17bd597..bcdf1f7 100644 --- a/drivers/md/bcache/request.c +++ b/drivers/md/bcache/request.c @@ -1035,8 +1035,19 @@ static void request_write(struct cached_dev *dc, struct search *s) closure_bio_submit(bio, cl, s->d); } else { - s->op.cache_bio = bio; bch_writeback_add(dc); + + if (s->op.flush_journal) { + /* Also need to send a flush to the backing device */ + s->op.cache_bio = bio_clone_bioset(bio, GFP_NOIO, + dc->disk.bio_split); + + bio->bi_size = 0; + bio->bi_vcnt = 0; + closure_bio_submit(bio, cl, s->d); + } else { + s->op.cache_bio = bio; + } } out: closure_call(&s->op.cl, bch_insert_data, NULL, cl); -- cgit v0.10.2