diff options
Diffstat (limited to 'drivers/md')
-rw-r--r-- | drivers/md/bcache/bcache.h | 1 | ||||
-rw-r--r-- | drivers/md/bcache/request.c | 6 | ||||
-rw-r--r-- | drivers/md/bcache/super.c | 7 | ||||
-rw-r--r-- | drivers/md/bcache/sysfs.c | 4 | ||||
-rw-r--r-- | drivers/md/bcache/util.c | 50 | ||||
-rw-r--r-- | drivers/md/bcache/writeback.c | 20 | ||||
-rw-r--r-- | drivers/md/bcache/writeback.h | 21 | ||||
-rw-r--r-- | drivers/md/bitmap.c | 5 | ||||
-rw-r--r-- | drivers/md/dm-mpath.c | 4 | ||||
-rw-r--r-- | drivers/md/dm-thin.c | 26 | ||||
-rw-r--r-- | drivers/md/linear.c | 3 | ||||
-rw-r--r-- | drivers/md/md.c | 4 | ||||
-rw-r--r-- | drivers/md/raid1.c | 5 | ||||
-rw-r--r-- | drivers/md/raid10.c | 19 | ||||
-rw-r--r-- | drivers/md/raid5.c | 26 |
15 files changed, 143 insertions, 58 deletions
diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h index c3ea03c..02619ca 100644 --- a/drivers/md/bcache/bcache.h +++ b/drivers/md/bcache/bcache.h @@ -333,6 +333,7 @@ struct cached_dev { /* Limit number of writeback bios in flight */ struct semaphore in_flight; struct task_struct *writeback_thread; + struct workqueue_struct *writeback_write_wq; struct keybuf writeback_keys; diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c index a37c177..e0f1c6d 100644 --- a/drivers/md/bcache/request.c +++ b/drivers/md/bcache/request.c @@ -196,12 +196,12 @@ static void bch_data_insert_start(struct closure *cl) struct data_insert_op *op = container_of(cl, struct data_insert_op, cl); struct bio *bio = op->bio, *n; - if (atomic_sub_return(bio_sectors(bio), &op->c->sectors_to_gc) < 0) - wake_up_gc(op->c); - if (op->bypass) return bch_data_invalidate(cl); + if (atomic_sub_return(bio_sectors(bio), &op->c->sectors_to_gc) < 0) + wake_up_gc(op->c); + /* * Journal writes are marked REQ_PREFLUSH; if the original write was a * flush, it'll wait on the journal write. diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c index 66669c8..f4557f5 100644 --- a/drivers/md/bcache/super.c +++ b/drivers/md/bcache/super.c @@ -1025,7 +1025,7 @@ int bch_cached_dev_attach(struct cached_dev *dc, struct cache_set *c) } if (BDEV_STATE(&dc->sb) == BDEV_STATE_DIRTY) { - bch_sectors_dirty_init(dc); + bch_sectors_dirty_init(&dc->disk); atomic_set(&dc->has_dirty, 1); atomic_inc(&dc->count); bch_writeback_queue(dc); @@ -1058,6 +1058,8 @@ static void cached_dev_free(struct closure *cl) cancel_delayed_work_sync(&dc->writeback_rate_update); if (!IS_ERR_OR_NULL(dc->writeback_thread)) kthread_stop(dc->writeback_thread); + if (dc->writeback_write_wq) + destroy_workqueue(dc->writeback_write_wq); mutex_lock(&bch_register_lock); @@ -1229,6 +1231,7 @@ static int flash_dev_run(struct cache_set *c, struct uuid_entry *u) goto err; bcache_device_attach(d, c, u - c->uuids); + bch_sectors_dirty_init(d); bch_flash_dev_request_init(d); add_disk(d->disk); @@ -1967,6 +1970,8 @@ static ssize_t register_bcache(struct kobject *k, struct kobj_attribute *attr, else err = "device busy"; mutex_unlock(&bch_register_lock); + if (!IS_ERR(bdev)) + bdput(bdev); if (attr == &ksysfs_register_quiet) goto out; } diff --git a/drivers/md/bcache/sysfs.c b/drivers/md/bcache/sysfs.c index b3ff57d..4fbb553 100644 --- a/drivers/md/bcache/sysfs.c +++ b/drivers/md/bcache/sysfs.c @@ -191,7 +191,7 @@ STORE(__cached_dev) { struct cached_dev *dc = container_of(kobj, struct cached_dev, disk.kobj); - unsigned v = size; + ssize_t v = size; struct cache_set *c; struct kobj_uevent_env *env; @@ -226,7 +226,7 @@ STORE(__cached_dev) bch_cached_dev_run(dc); if (attr == &sysfs_cache_mode) { - ssize_t v = bch_read_string_list(buf, bch_cache_modes + 1); + v = bch_read_string_list(buf, bch_cache_modes + 1); if (v < 0) return v; diff --git a/drivers/md/bcache/util.c b/drivers/md/bcache/util.c index dde6172..eb70f68 100644 --- a/drivers/md/bcache/util.c +++ b/drivers/md/bcache/util.c @@ -73,24 +73,44 @@ STRTO_H(strtouint, unsigned int) STRTO_H(strtoll, long long) STRTO_H(strtoull, unsigned long long) +/** + * bch_hprint() - formats @v to human readable string for sysfs. + * + * @v - signed 64 bit integer + * @buf - the (at least 8 byte) buffer to format the result into. + * + * Returns the number of bytes used by format. + */ ssize_t bch_hprint(char *buf, int64_t v) { static const char units[] = "?kMGTPEZY"; - char dec[4] = ""; - int u, t = 0; - - for (u = 0; v >= 1024 || v <= -1024; u++) { - t = v & ~(~0 << 10); - v >>= 10; - } - - if (!u) - return sprintf(buf, "%llu", v); - - if (v < 100 && v > -100) - snprintf(dec, sizeof(dec), ".%i", t / 100); - - return sprintf(buf, "%lli%s%c", v, dec, units[u]); + int u = 0, t; + + uint64_t q; + + if (v < 0) + q = -v; + else + q = v; + + /* For as long as the number is more than 3 digits, but at least + * once, shift right / divide by 1024. Keep the remainder for + * a digit after the decimal point. + */ + do { + u++; + + t = q & ~(~0 << 10); + q >>= 10; + } while (q >= 1000); + + if (v < 0) + /* '-', up to 3 digits, '.', 1 digit, 1 character, null; + * yields 8 bytes. + */ + return sprintf(buf, "-%llu.%i%c", q, t * 10 / 1024, units[u]); + else + return sprintf(buf, "%llu.%i%c", q, t * 10 / 1024, units[u]); } ssize_t bch_snprint_string_list(char *buf, size_t size, const char * const list[], diff --git a/drivers/md/bcache/writeback.c b/drivers/md/bcache/writeback.c index e51644e..4ce2b19 100644 --- a/drivers/md/bcache/writeback.c +++ b/drivers/md/bcache/writeback.c @@ -20,7 +20,8 @@ static void __update_writeback_rate(struct cached_dev *dc) { struct cache_set *c = dc->disk.c; - uint64_t cache_sectors = c->nbuckets * c->sb.bucket_size; + uint64_t cache_sectors = c->nbuckets * c->sb.bucket_size - + bcache_flash_devs_sectors_dirty(c); uint64_t cache_dirty_target = div_u64(cache_sectors * dc->writeback_percent, 100); @@ -186,7 +187,7 @@ static void write_dirty(struct closure *cl) closure_bio_submit(&io->bio, cl); - continue_at(cl, write_dirty_finish, system_wq); + continue_at(cl, write_dirty_finish, io->dc->writeback_write_wq); } static void read_dirty_endio(struct bio *bio) @@ -206,7 +207,7 @@ static void read_dirty_submit(struct closure *cl) closure_bio_submit(&io->bio, cl); - continue_at(cl, write_dirty, system_wq); + continue_at(cl, write_dirty, io->dc->writeback_write_wq); } static void read_dirty(struct cached_dev *dc) @@ -482,17 +483,17 @@ static int sectors_dirty_init_fn(struct btree_op *_op, struct btree *b, return MAP_CONTINUE; } -void bch_sectors_dirty_init(struct cached_dev *dc) +void bch_sectors_dirty_init(struct bcache_device *d) { struct sectors_dirty_init op; bch_btree_op_init(&op.op, -1); - op.inode = dc->disk.id; + op.inode = d->id; - bch_btree_map_keys(&op.op, dc->disk.c, &KEY(op.inode, 0, 0), + bch_btree_map_keys(&op.op, d->c, &KEY(op.inode, 0, 0), sectors_dirty_init_fn, 0); - dc->disk.sectors_dirty_last = bcache_dev_sectors_dirty(&dc->disk); + d->sectors_dirty_last = bcache_dev_sectors_dirty(d); } void bch_cached_dev_writeback_init(struct cached_dev *dc) @@ -516,6 +517,11 @@ void bch_cached_dev_writeback_init(struct cached_dev *dc) int bch_cached_dev_writeback_start(struct cached_dev *dc) { + dc->writeback_write_wq = alloc_workqueue("bcache_writeback_wq", + WQ_MEM_RECLAIM, 0); + if (!dc->writeback_write_wq) + return -ENOMEM; + dc->writeback_thread = kthread_create(bch_writeback_thread, dc, "bcache_writeback"); if (IS_ERR(dc->writeback_thread)) diff --git a/drivers/md/bcache/writeback.h b/drivers/md/bcache/writeback.h index 301eaf5..cdf8d25 100644 --- a/drivers/md/bcache/writeback.h +++ b/drivers/md/bcache/writeback.h @@ -14,6 +14,25 @@ static inline uint64_t bcache_dev_sectors_dirty(struct bcache_device *d) return ret; } +static inline uint64_t bcache_flash_devs_sectors_dirty(struct cache_set *c) +{ + uint64_t i, ret = 0; + + mutex_lock(&bch_register_lock); + + for (i = 0; i < c->nr_uuids; i++) { + struct bcache_device *d = c->devices[i]; + + if (!d || !UUID_FLASH_ONLY(&c->uuids[i])) + continue; + ret += bcache_dev_sectors_dirty(d); + } + + mutex_unlock(&bch_register_lock); + + return ret; +} + static inline unsigned offset_to_stripe(struct bcache_device *d, uint64_t offset) { @@ -85,7 +104,7 @@ static inline void bch_writeback_add(struct cached_dev *dc) void bcache_dev_sectors_dirty_add(struct cache_set *, unsigned, uint64_t, int); -void bch_sectors_dirty_init(struct cached_dev *dc); +void bch_sectors_dirty_init(struct bcache_device *); void bch_cached_dev_writeback_init(struct cached_dev *); int bch_cached_dev_writeback_start(struct cached_dev *); diff --git a/drivers/md/bitmap.c b/drivers/md/bitmap.c index 2d82692..fb02c39 100644 --- a/drivers/md/bitmap.c +++ b/drivers/md/bitmap.c @@ -1992,6 +1992,11 @@ int bitmap_resize(struct bitmap *bitmap, sector_t blocks, long pages; struct bitmap_page *new_bp; + if (bitmap->storage.file && !init) { + pr_info("md: cannot resize file-based bitmap\n"); + return -EINVAL; + } + if (chunksize == 0) { /* If there is enough space, leave the chunk size unchanged, * else increase by factor of two until there is enough space. diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c index ac8235b..0d437c9 100644 --- a/drivers/md/dm-mpath.c +++ b/drivers/md/dm-mpath.c @@ -431,7 +431,7 @@ static struct pgpath *choose_pgpath(struct multipath *m, size_t nr_bytes) unsigned long flags; struct priority_group *pg; struct pgpath *pgpath; - bool bypassed = true; + unsigned bypassed = 1; if (!atomic_read(&m->nr_valid_paths)) { clear_bit(MPATHF_QUEUE_IO, &m->flags); @@ -470,7 +470,7 @@ check_current_pg: */ do { list_for_each_entry(pg, &m->priority_groups, list) { - if (pg->bypassed == bypassed) + if (pg->bypassed == !!bypassed) continue; pgpath = choose_path_in_pg(m, pg, nr_bytes); if (!IS_ERR_OR_NULL(pgpath)) { diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c index be869a9..0b678b5 100644 --- a/drivers/md/dm-thin.c +++ b/drivers/md/dm-thin.c @@ -1095,6 +1095,19 @@ static void process_prepared_discard_passdown_pt1(struct dm_thin_new_mapping *m) return; } + /* + * Increment the unmapped blocks. This prevents a race between the + * passdown io and reallocation of freed blocks. + */ + r = dm_pool_inc_data_range(pool->pmd, m->data_block, data_end); + if (r) { + metadata_operation_failed(pool, "dm_pool_inc_data_range", r); + bio_io_error(m->bio); + cell_defer_no_holder(tc, m->cell); + mempool_free(m, pool->mapping_pool); + return; + } + discard_parent = bio_alloc(GFP_NOIO, 1); if (!discard_parent) { DMWARN("%s: unable to allocate top level discard bio for passdown. Skipping passdown.", @@ -1115,19 +1128,6 @@ static void process_prepared_discard_passdown_pt1(struct dm_thin_new_mapping *m) end_discard(&op, r); } } - - /* - * Increment the unmapped blocks. This prevents a race between the - * passdown io and reallocation of freed blocks. - */ - r = dm_pool_inc_data_range(pool->pmd, m->data_block, data_end); - if (r) { - metadata_operation_failed(pool, "dm_pool_inc_data_range", r); - bio_io_error(m->bio); - cell_defer_no_holder(tc, m->cell); - mempool_free(m, pool->mapping_pool); - return; - } } static void process_prepared_discard_passdown_pt2(struct dm_thin_new_mapping *m) diff --git a/drivers/md/linear.c b/drivers/md/linear.c index b0c0aef..12abf69 100644 --- a/drivers/md/linear.c +++ b/drivers/md/linear.c @@ -223,7 +223,8 @@ static int linear_add(struct mddev *mddev, struct md_rdev *rdev) * oldconf until no one uses it anymore. */ mddev_suspend(mddev); - oldconf = rcu_dereference(mddev->private); + oldconf = rcu_dereference_protected(mddev->private, + lockdep_is_held(&mddev->reconfig_mutex)); mddev->raid_disks++; WARN_ONCE(mddev->raid_disks != newconf->raid_disks, "copied raid_disks doesn't match mddev->raid_disks"); diff --git a/drivers/md/md.c b/drivers/md/md.c index eddd360..8ebf1b9 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -1861,7 +1861,7 @@ super_1_rdev_size_change(struct md_rdev *rdev, sector_t num_sectors) } sb = page_address(rdev->sb_page); sb->data_size = cpu_to_le64(num_sectors); - sb->super_offset = rdev->sb_start; + sb->super_offset = cpu_to_le64(rdev->sb_start); sb->sb_csum = calc_sb_1_csum(sb); md_super_write(rdev->mddev, rdev, rdev->sb_start, rdev->sb_size, rdev->sb_page); @@ -2270,7 +2270,7 @@ static bool does_sb_need_changing(struct mddev *mddev) /* Check if any mddev parameters have changed */ if ((mddev->dev_sectors != le64_to_cpu(sb->size)) || (mddev->reshape_position != le64_to_cpu(sb->reshape_position)) || - (mddev->layout != le64_to_cpu(sb->layout)) || + (mddev->layout != le32_to_cpu(sb->layout)) || (mddev->raid_disks != le32_to_cpu(sb->raid_disks)) || (mddev->chunk_sectors != le32_to_cpu(sb->chunksize))) return true; diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index 29e2df5..81a7875 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c @@ -1073,7 +1073,7 @@ static void raid1_make_request(struct mddev *mddev, struct bio * bio) */ DEFINE_WAIT(w); for (;;) { - flush_signals(current); + sigset_t full, old; prepare_to_wait(&conf->wait_barrier, &w, TASK_INTERRUPTIBLE); if (bio_end_sector(bio) <= mddev->suspend_lo || @@ -1082,7 +1082,10 @@ static void raid1_make_request(struct mddev *mddev, struct bio * bio) !md_cluster_ops->area_resyncing(mddev, WRITE, bio->bi_iter.bi_sector, bio_end_sector(bio)))) break; + sigfillset(&full); + sigprocmask(SIG_BLOCK, &full, &old); schedule(); + sigprocmask(SIG_SETMASK, &old, NULL); } finish_wait(&conf->wait_barrier, &w); } diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index 4c4aab0..b19b551 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -1407,11 +1407,24 @@ retry_write: mbio->bi_private = r10_bio; atomic_inc(&r10_bio->remaining); + + cb = blk_check_plugged(raid10_unplug, mddev, + sizeof(*plug)); + if (cb) + plug = container_of(cb, struct raid10_plug_cb, + cb); + else + plug = NULL; spin_lock_irqsave(&conf->device_lock, flags); - bio_list_add(&conf->pending_bio_list, mbio); - conf->pending_count++; + if (plug) { + bio_list_add(&plug->pending, mbio); + plug->pending_cnt++; + } else { + bio_list_add(&conf->pending_bio_list, mbio); + conf->pending_count++; + } spin_unlock_irqrestore(&conf->device_lock, flags); - if (!mddev_check_plugged(mddev)) + if (!plug) md_wakeup_thread(mddev->thread); } } diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index f34ad2b..7aea022 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -829,6 +829,14 @@ static void stripe_add_to_batch_list(struct r5conf *conf, struct stripe_head *sh spin_unlock(&head->batch_head->batch_lock); goto unlock_out; } + /* + * We must assign batch_head of this stripe within the + * batch_lock, otherwise clear_batch_ready of batch head + * stripe could clear BATCH_READY bit of this stripe and + * this stripe->batch_head doesn't get assigned, which + * could confuse clear_batch_ready for this stripe + */ + sh->batch_head = head->batch_head; /* * at this point, head's BATCH_READY could be cleared, but we @@ -836,8 +844,6 @@ static void stripe_add_to_batch_list(struct r5conf *conf, struct stripe_head *sh */ list_add(&sh->batch_list, &head->batch_list); spin_unlock(&head->batch_head->batch_lock); - - sh->batch_head = head->batch_head; } else { head->batch_head = head; sh->batch_head = head->batch_head; @@ -4277,7 +4283,8 @@ static void break_stripe_batch_list(struct stripe_head *head_sh, set_mask_bits(&sh->state, ~(STRIPE_EXPAND_SYNC_FLAGS | (1 << STRIPE_PREREAD_ACTIVE) | - (1 << STRIPE_DEGRADED)), + (1 << STRIPE_DEGRADED) | + (1 << STRIPE_ON_UNPLUG_LIST)), head_sh->state & (1 << STRIPE_INSYNC)); sh->check_state = head_sh->check_state; @@ -5300,12 +5307,15 @@ static void raid5_make_request(struct mddev *mddev, struct bio * bi) * userspace, we want an interruptible * wait. */ - flush_signals(current); prepare_to_wait(&conf->wait_for_overlap, &w, TASK_INTERRUPTIBLE); if (logical_sector >= mddev->suspend_lo && logical_sector < mddev->suspend_hi) { + sigset_t full, old; + sigfillset(&full); + sigprocmask(SIG_BLOCK, &full, &old); schedule(); + sigprocmask(SIG_SETMASK, &old, NULL); do_prepare = true; } goto retry; @@ -5840,6 +5850,10 @@ static void raid5_do_work(struct work_struct *work) pr_debug("%d stripes handled\n", handled); spin_unlock_irq(&conf->device_lock); + + r5l_flush_stripe_to_raid(conf->log); + + async_tx_issue_pending_all(); blk_finish_plug(&plug); pr_debug("--- raid5worker inactive\n"); @@ -7557,12 +7571,10 @@ static void end_reshape(struct r5conf *conf) { if (!test_bit(MD_RECOVERY_INTR, &conf->mddev->recovery)) { - struct md_rdev *rdev; spin_lock_irq(&conf->device_lock); conf->previous_raid_disks = conf->raid_disks; - rdev_for_each(rdev, conf->mddev) - rdev->data_offset = rdev->new_data_offset; + md_finish_reshape(conf->mddev); smp_wmb(); conf->reshape_progress = MaxSector; conf->mddev->reshape_position = MaxSector; |