From 8182924bc5850281985d73c312876746acd390b5 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Wed, 30 Mar 2011 09:51:33 +0200 Subject: block: dump request state on seeing a corrupted request completion Currently we just dump a non-informative 'request botched' message. Lets actually try and print something sane to help debug issues around this. Signed-off-by: Jens Axboe diff --git a/block/blk-core.c b/block/blk-core.c index e0a0623..4fdf895 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -2163,7 +2163,7 @@ bool blk_update_request(struct request *req, int error, unsigned int nr_bytes) * size, something has gone terribly wrong. */ if (blk_rq_bytes(req) < blk_rq_cur_bytes(req)) { - printk(KERN_ERR "blk: request botched\n"); + blk_dump_rq_flags(req, "request botched"); req->__data_len = blk_rq_cur_bytes(req); } -- cgit v0.10.2 From b710a480554f2be682bac3cb59b0e085ba3d644b Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Wed, 30 Mar 2011 09:52:30 +0200 Subject: block: get rid of elv_insert() interface Merge it with __elv_add_request(), it's pretty pointless to have a function with only two callers. The main interface is elv_add_request()/__elv_add_request(). Signed-off-by: Jens Axboe diff --git a/block/blk-flush.c b/block/blk-flush.c index 93d5fd8..d6275cb 100644 --- a/block/blk-flush.c +++ b/block/blk-flush.c @@ -261,7 +261,7 @@ static bool blk_kick_flush(struct request_queue *q) q->flush_rq.end_io = flush_end_io; q->flush_pending_idx ^= 1; - elv_insert(q, &q->flush_rq, ELEVATOR_INSERT_REQUEUE); + __elv_add_request(q, &q->flush_rq, ELEVATOR_INSERT_REQUEUE); return true; } @@ -281,7 +281,7 @@ static void flush_data_end_io(struct request *rq, int error) * blk_insert_flush - insert a new FLUSH/FUA request * @rq: request to insert * - * To be called from elv_insert() for %ELEVATOR_INSERT_FLUSH insertions. + * To be called from __elv_add_request() for %ELEVATOR_INSERT_FLUSH insertions. * @rq is being submitted. Analyze what needs to be done and put it on the * right queue. * diff --git a/block/elevator.c b/block/elevator.c index c387d31..0cdb4e7 100644 --- a/block/elevator.c +++ b/block/elevator.c @@ -610,7 +610,7 @@ void elv_requeue_request(struct request_queue *q, struct request *rq) rq->cmd_flags &= ~REQ_STARTED; - elv_insert(q, rq, ELEVATOR_INSERT_REQUEUE); + __elv_add_request(q, rq, ELEVATOR_INSERT_REQUEUE); } void elv_drain_elevator(struct request_queue *q) @@ -655,12 +655,25 @@ void elv_quiesce_end(struct request_queue *q) queue_flag_clear(QUEUE_FLAG_ELVSWITCH, q); } -void elv_insert(struct request_queue *q, struct request *rq, int where) +void __elv_add_request(struct request_queue *q, struct request *rq, int where) { trace_block_rq_insert(q, rq); rq->q = q; + BUG_ON(rq->cmd_flags & REQ_ON_PLUG); + + if (rq->cmd_flags & REQ_SOFTBARRIER) { + /* barriers are scheduling boundary, update end_sector */ + if (rq->cmd_type == REQ_TYPE_FS || + (rq->cmd_flags & REQ_DISCARD)) { + q->end_sector = rq_end_sector(rq); + q->boundary_rq = rq; + } + } else if (!(rq->cmd_flags & REQ_ELVPRIV) && + where == ELEVATOR_INSERT_SORT) + where = ELEVATOR_INSERT_BACK; + switch (where) { case ELEVATOR_INSERT_REQUEUE: case ELEVATOR_INSERT_FRONT: @@ -722,24 +735,6 @@ void elv_insert(struct request_queue *q, struct request *rq, int where) BUG(); } } - -void __elv_add_request(struct request_queue *q, struct request *rq, int where) -{ - BUG_ON(rq->cmd_flags & REQ_ON_PLUG); - - if (rq->cmd_flags & REQ_SOFTBARRIER) { - /* barriers are scheduling boundary, update end_sector */ - if (rq->cmd_type == REQ_TYPE_FS || - (rq->cmd_flags & REQ_DISCARD)) { - q->end_sector = rq_end_sector(rq); - q->boundary_rq = rq; - } - } else if (!(rq->cmd_flags & REQ_ELVPRIV) && - where == ELEVATOR_INSERT_SORT) - where = ELEVATOR_INSERT_BACK; - - elv_insert(q, rq, where); -} EXPORT_SYMBOL(__elv_add_request); void elv_add_request(struct request_queue *q, struct request *rq, int where) diff --git a/include/linux/elevator.h b/include/linux/elevator.h index d93efcc445..21a8ebf 100644 --- a/include/linux/elevator.h +++ b/include/linux/elevator.h @@ -101,7 +101,6 @@ extern void elv_dispatch_sort(struct request_queue *, struct request *); extern void elv_dispatch_add_tail(struct request_queue *, struct request *); extern void elv_add_request(struct request_queue *, struct request *, int); extern void __elv_add_request(struct request_queue *, struct request *, int); -extern void elv_insert(struct request_queue *, struct request *, int); extern int elv_merge(struct request_queue *, struct request **, struct bio *); extern int elv_try_merge(struct request *, struct bio *); extern void elv_merge_requests(struct request_queue *, struct request *, -- cgit v0.10.2 From 53d63e6b0dfb95882ec0219ba6bbd50cde423794 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Wed, 30 Mar 2011 13:27:09 +0200 Subject: block: make the flush insertion use the tail of the dispatch list It's not a preempt type request, in fact we have to insert it behind requests that do specify INSERT_FRONT. Signed-off-by: Jens Axboe diff --git a/block/blk-flush.c b/block/blk-flush.c index d6275cb..eba4a27 100644 --- a/block/blk-flush.c +++ b/block/blk-flush.c @@ -261,7 +261,7 @@ static bool blk_kick_flush(struct request_queue *q) q->flush_rq.end_io = flush_end_io; q->flush_pending_idx ^= 1; - __elv_add_request(q, &q->flush_rq, ELEVATOR_INSERT_REQUEUE); + list_add_tail(&q->flush_rq.queuelist, &q->queue_head); return true; } @@ -312,7 +312,7 @@ void blk_insert_flush(struct request *rq) */ if ((policy & REQ_FSEQ_DATA) && !(policy & (REQ_FSEQ_PREFLUSH | REQ_FSEQ_POSTFLUSH))) { - list_add(&rq->queuelist, &q->queue_head); + list_add_tail(&rq->queuelist, &q->queue_head); return; } -- cgit v0.10.2 From ee3dea3549444e6e76d27af48b4929983e6f023c Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 30 Mar 2011 12:17:43 +0200 Subject: ufs: remove unessecary blk_flush_plug We already flush the per-process plugging list when context switching, so a blk_flush_plug call just before a yield() is not needed. Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe diff --git a/fs/ufs/truncate.c b/fs/ufs/truncate.c index 1101430..5f821db 100644 --- a/fs/ufs/truncate.c +++ b/fs/ufs/truncate.c @@ -479,7 +479,6 @@ int ufs_truncate(struct inode *inode, loff_t old_i_size) break; if (IS_SYNC(inode) && (inode->i_state & I_DIRTY)) ufs_sync_inode (inode); - blk_flush_plug(current); yield(); } -- cgit v0.10.2 From 6f0379377047b18103b88ce33c03e5b19747ae57 Mon Sep 17 00:00:00 2001 From: Andreas Schwab Date: Wed, 30 Mar 2011 12:21:56 +0200 Subject: blk-throttle: don't call xchg on bool xchg does not work portably with smaller than 32bit types. Signed-off-by: Andreas Schwab Signed-off-by: Jens Axboe diff --git a/block/blk-throttle.c b/block/blk-throttle.c index 5352bda..6c98cfe 100644 --- a/block/blk-throttle.c +++ b/block/blk-throttle.c @@ -77,7 +77,7 @@ struct throtl_grp { unsigned long slice_end[2]; /* Some throttle limits got updated for the group */ - bool limits_changed; + int limits_changed; }; struct throtl_data @@ -102,7 +102,7 @@ struct throtl_data /* Work for dispatching throttled bios */ struct delayed_work throtl_work; - bool limits_changed; + int limits_changed; }; enum tg_state_flags { -- cgit v0.10.2 From 929e27252e8ca69363f81f26af5eaba62cb4c572 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Wed, 30 Mar 2011 12:22:23 +0200 Subject: ide: ide_requeue_and_plug() reinstate "always plug" behaviour We see stalls if we don't always ensure that the queue gets run again. Even if rq == NULL, we could have other pending requests in the queue. Signed-off-by: Jens Axboe diff --git a/drivers/ide/ide-io.c b/drivers/ide/ide-io.c index 0e406d73..ca27d30 100644 --- a/drivers/ide/ide-io.c +++ b/drivers/ide/ide-io.c @@ -570,8 +570,7 @@ void ide_requeue_and_plug(ide_drive_t *drive, struct request *rq) spin_unlock_irqrestore(q->queue_lock, flags); /* Use 3ms as that was the old plug delay */ - if (rq) - blk_delay_queue(q, 3); + blk_delay_queue(q, 3); } static int drive_is_ready(ide_drive_t *drive) -- cgit v0.10.2 From 7dcda1c96d7c643101d4a05579ef4512a4baa7ef Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Tue, 5 Apr 2011 23:51:48 +0200 Subject: fs: export empty_aops With the ->sync_page() hook gone, we have a few users that add their own static address_space_operations without any functions defined. fs/inode.c already has an empty_aops that it uses for init purposes. Lets export that and use it in the places where an otherwise empty aops was defined. Signed-off-by: Jens Axboe diff --git a/fs/inode.c b/fs/inode.c index 5f4e11a..33c963d 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -125,6 +125,14 @@ __cacheline_aligned_in_smp DEFINE_SPINLOCK(inode_wb_list_lock); static DECLARE_RWSEM(iprune_sem); /* + * Empty aops. Can be used for the cases where the user does not + * define any of the address_space operations. + */ +const struct address_space_operations empty_aops = { +}; +EXPORT_SYMBOL(empty_aops); + +/* * Statistics gathering.. */ struct inodes_stat_t inodes_stat; @@ -176,7 +184,6 @@ int proc_nr_inodes(ctl_table *table, int write, */ int inode_init_always(struct super_block *sb, struct inode *inode) { - static const struct address_space_operations empty_aops; static const struct inode_operations empty_iops; static const struct file_operations empty_fops; struct address_space *const mapping = &inode->i_data; diff --git a/fs/nilfs2/page.c b/fs/nilfs2/page.c index 9d2dc6b..1168059 100644 --- a/fs/nilfs2/page.c +++ b/fs/nilfs2/page.c @@ -495,8 +495,6 @@ unsigned nilfs_page_count_clean_buffers(struct page *page, void nilfs_mapping_init(struct address_space *mapping, struct backing_dev_info *bdi) { - static const struct address_space_operations empty_aops; - mapping->host = NULL; mapping->flags = 0; mapping_set_gfp_mask(mapping, GFP_NOFS); diff --git a/fs/ubifs/xattr.c b/fs/ubifs/xattr.c index c74400f..3299f46 100644 --- a/fs/ubifs/xattr.c +++ b/fs/ubifs/xattr.c @@ -56,6 +56,7 @@ */ #include "ubifs.h" +#include #include #include #include @@ -80,7 +81,6 @@ enum { }; static const struct inode_operations none_inode_operations; -static const struct address_space_operations none_address_operations; static const struct file_operations none_file_operations; /** @@ -130,7 +130,7 @@ static int create_xattr(struct ubifs_info *c, struct inode *host, } /* Re-define all operations to be "nothing" */ - inode->i_mapping->a_ops = &none_address_operations; + inode->i_mapping->a_ops = &empty_aops; inode->i_op = &none_inode_operations; inode->i_fop = &none_file_operations; diff --git a/include/linux/fs.h b/include/linux/fs.h index 52f283c..1b95af3 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -613,6 +613,8 @@ struct address_space_operations { int (*error_remove_page)(struct address_space *, struct page *); }; +extern const struct address_space_operations empty_aops; + /* * pagecache_write_begin/pagecache_write_end must be used by general code * to write into the pagecache. -- cgit v0.10.2 From a63a5cf84dac7a23a57c800eea5734701e7d3c04 Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Fri, 1 Apr 2011 21:02:31 +0200 Subject: dm: improve block integrity support The current block integrity (DIF/DIX) support in DM is verifying that all devices' integrity profiles match during DM device resume (which is past the point of no return). To some degree that is unavoidable (stacked DM devices force this late checking). But for most DM devices (which aren't stacking on other DM devices) the ideal time to verify all integrity profiles match is during table load. Introduce the notion of an "initialized" integrity profile: a profile that was blk_integrity_register()'d with a non-NULL 'blk_integrity' template. Add blk_integrity_is_initialized() to allow checking if a profile was initialized. Update DM integrity support to: - check all devices with _initialized_ integrity profiles match during table load; uninitialized profiles (e.g. for underlying DM device(s) of a stacked DM device) are ignored. - disallow a table load that would result in an integrity profile that conflicts with a DM device's existing (in-use) integrity profile - avoid clearing an existing integrity profile - validate all integrity profiles match during resume; but if they don't all we can do is report the mismatch (during resume we're past the point of no return) Signed-off-by: Mike Snitzer Cc: Martin K. Petersen Signed-off-by: Jens Axboe diff --git a/block/blk-integrity.c b/block/blk-integrity.c index 54bcba6..129b9e2 100644 --- a/block/blk-integrity.c +++ b/block/blk-integrity.c @@ -30,6 +30,8 @@ static struct kmem_cache *integrity_cachep; +static const char *bi_unsupported_name = "unsupported"; + /** * blk_rq_count_integrity_sg - Count number of integrity scatterlist elements * @q: request queue @@ -358,6 +360,14 @@ static struct kobj_type integrity_ktype = { .release = blk_integrity_release, }; +bool blk_integrity_is_initialized(struct gendisk *disk) +{ + struct blk_integrity *bi = blk_get_integrity(disk); + + return (bi && bi->name && strcmp(bi->name, bi_unsupported_name) != 0); +} +EXPORT_SYMBOL(blk_integrity_is_initialized); + /** * blk_integrity_register - Register a gendisk as being integrity-capable * @disk: struct gendisk pointer to make integrity-aware @@ -407,7 +417,7 @@ int blk_integrity_register(struct gendisk *disk, struct blk_integrity *template) bi->get_tag_fn = template->get_tag_fn; bi->tag_size = template->tag_size; } else - bi->name = "unsupported"; + bi->name = bi_unsupported_name; return 0; } diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c index 416d4e2..cb8380c 100644 --- a/drivers/md/dm-table.c +++ b/drivers/md/dm-table.c @@ -927,20 +927,80 @@ static int dm_table_build_index(struct dm_table *t) } /* + * Get a disk whose integrity profile reflects the table's profile. + * If %match_all is true, all devices' profiles must match. + * If %match_all is false, all devices must at least have an + * allocated integrity profile; but uninitialized is ok. + * Returns NULL if integrity support was inconsistent or unavailable. + */ +static struct gendisk * dm_table_get_integrity_disk(struct dm_table *t, + bool match_all) +{ + struct list_head *devices = dm_table_get_devices(t); + struct dm_dev_internal *dd = NULL; + struct gendisk *prev_disk = NULL, *template_disk = NULL; + + list_for_each_entry(dd, devices, list) { + template_disk = dd->dm_dev.bdev->bd_disk; + if (!blk_get_integrity(template_disk)) + goto no_integrity; + if (!match_all && !blk_integrity_is_initialized(template_disk)) + continue; /* skip uninitialized profiles */ + else if (prev_disk && + blk_integrity_compare(prev_disk, template_disk) < 0) + goto no_integrity; + prev_disk = template_disk; + } + + return template_disk; + +no_integrity: + if (prev_disk) + DMWARN("%s: integrity not set: %s and %s profile mismatch", + dm_device_name(t->md), + prev_disk->disk_name, + template_disk->disk_name); + return NULL; +} + +/* * Register the mapped device for blk_integrity support if - * the underlying devices support it. + * the underlying devices have an integrity profile. But all devices + * may not have matching profiles (checking all devices isn't reliable + * during table load because this table may use other DM device(s) which + * must be resumed before they will have an initialized integity profile). + * Stacked DM devices force a 2 stage integrity profile validation: + * 1 - during load, validate all initialized integrity profiles match + * 2 - during resume, validate all integrity profiles match */ static int dm_table_prealloc_integrity(struct dm_table *t, struct mapped_device *md) { - struct list_head *devices = dm_table_get_devices(t); - struct dm_dev_internal *dd; + struct gendisk *template_disk = NULL; - list_for_each_entry(dd, devices, list) - if (bdev_get_integrity(dd->dm_dev.bdev)) { - t->integrity_supported = 1; - return blk_integrity_register(dm_disk(md), NULL); - } + template_disk = dm_table_get_integrity_disk(t, false); + if (!template_disk) + return 0; + if (!blk_integrity_is_initialized(dm_disk(md))) { + t->integrity_supported = 1; + return blk_integrity_register(dm_disk(md), NULL); + } + + /* + * If DM device already has an initalized integrity + * profile the new profile should not conflict. + */ + if (blk_integrity_is_initialized(template_disk) && + blk_integrity_compare(dm_disk(md), template_disk) < 0) { + DMWARN("%s: conflict with existing integrity profile: " + "%s profile mismatch", + dm_device_name(t->md), + template_disk->disk_name); + return 1; + } + + /* Preserve existing initialized integrity profile */ + t->integrity_supported = 1; return 0; } @@ -1094,41 +1154,27 @@ combine_limits: /* * Set the integrity profile for this device if all devices used have - * matching profiles. + * matching profiles. We're quite deep in the resume path but still + * don't know if all devices (particularly DM devices this device + * may be stacked on) have matching profiles. Even if the profiles + * don't match we have no way to fail (to resume) at this point. */ static void dm_table_set_integrity(struct dm_table *t) { - struct list_head *devices = dm_table_get_devices(t); - struct dm_dev_internal *prev = NULL, *dd = NULL; + struct gendisk *template_disk = NULL; if (!blk_get_integrity(dm_disk(t->md))) return; - list_for_each_entry(dd, devices, list) { - if (prev && - blk_integrity_compare(prev->dm_dev.bdev->bd_disk, - dd->dm_dev.bdev->bd_disk) < 0) { - DMWARN("%s: integrity not set: %s and %s mismatch", - dm_device_name(t->md), - prev->dm_dev.bdev->bd_disk->disk_name, - dd->dm_dev.bdev->bd_disk->disk_name); - goto no_integrity; - } - prev = dd; + template_disk = dm_table_get_integrity_disk(t, true); + if (!template_disk && + blk_integrity_is_initialized(dm_disk(t->md))) { + DMWARN("%s: device no longer has a valid integrity profile", + dm_device_name(t->md)); + return; } - - if (!prev || !bdev_get_integrity(prev->dm_dev.bdev)) - goto no_integrity; - blk_integrity_register(dm_disk(t->md), - bdev_get_integrity(prev->dm_dev.bdev)); - - return; - -no_integrity: - blk_integrity_register(dm_disk(t->md), NULL); - - return; + blk_get_integrity(template_disk)); } void dm_table_set_restrictions(struct dm_table *t, struct request_queue *q, diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 16a902f..32176cc 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1206,6 +1206,7 @@ struct blk_integrity { struct kobject kobj; }; +extern bool blk_integrity_is_initialized(struct gendisk *); extern int blk_integrity_register(struct gendisk *, struct blk_integrity *); extern void blk_integrity_unregister(struct gendisk *); extern int blk_integrity_compare(struct gendisk *, struct gendisk *); @@ -1262,6 +1263,7 @@ queue_max_integrity_segments(struct request_queue *q) #define queue_max_integrity_segments(a) (0) #define blk_integrity_merge_rq(a, b, c) (0) #define blk_integrity_merge_bio(a, b, c) (0) +#define blk_integrity_is_initialized(a) (0) #endif /* CONFIG_BLK_DEV_INTEGRITY */ -- cgit v0.10.2 From f83e826181f7f8fb152e4190d03854fc3a5dd040 Mon Sep 17 00:00:00 2001 From: Konstantin Khlebnikov Date: Mon, 4 Apr 2011 00:15:02 +0200 Subject: block: fix request sorting at unplug Comparison function for list_sort() must be anticommutative, otherwise it is not sorting in ordinary meaning. But fortunately list_sort() always check ((*cmp)(priv, a, b) <= 0) it not distinguish negative and zero, so comparison function can implement only less-or-equal instead of full three-way comparison. Signed-off-by: Konstantin Khlebnikov Signed-off-by: Jens Axboe diff --git a/block/blk-core.c b/block/blk-core.c index 4fdf895..725091d 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -2665,7 +2665,7 @@ static int plug_rq_cmp(void *priv, struct list_head *a, struct list_head *b) struct request *rqa = container_of(a, struct request, queuelist); struct request *rqb = container_of(b, struct request, queuelist); - return !(rqa->q == rqb->q); + return !(rqa->q <= rqb->q); } static void flush_plug_list(struct blk_plug *plug) -- cgit v0.10.2 From 782b86e2656762382ae1c2686d8d5c91f7d5eacf Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Tue, 5 Apr 2011 03:29:57 +0200 Subject: ide: always ensure that blk_delay_queue() is called if we have pending IO Just because we are not requeuing a request does not mean that some aren't pending. So always issue a blk_delay_queue() if either we are requeueing OR there's pending IO. This fixes a boot problem for some IDE boxes. Signed-off-by: Jens Axboe diff --git a/drivers/ide/ide-io.c b/drivers/ide/ide-io.c index ca27d30..177db6d 100644 --- a/drivers/ide/ide-io.c +++ b/drivers/ide/ide-io.c @@ -430,6 +430,26 @@ static inline void ide_unlock_host(struct ide_host *host) } } +static void __ide_requeue_and_plug(struct request_queue *q, struct request *rq) +{ + if (rq) + blk_requeue_request(q, rq); + if (rq || blk_peek_request(q)) { + /* Use 3ms as that was the old plug delay */ + blk_delay_queue(q, 3); + } +} + +void ide_requeue_and_plug(ide_drive_t *drive, struct request *rq) +{ + struct request_queue *q = drive->queue; + unsigned long flags; + + spin_lock_irqsave(q->queue_lock, flags); + __ide_requeue_and_plug(q, rq); + spin_unlock_irqrestore(q->queue_lock, flags); +} + /* * Issue a new request to a device. */ @@ -550,27 +570,7 @@ plug_device: ide_unlock_host(host); plug_device_2: spin_lock_irq(q->queue_lock); - - if (rq) { - blk_requeue_request(q, rq); - blk_delay_queue(q, queue_run_ms); - } -} - -void ide_requeue_and_plug(ide_drive_t *drive, struct request *rq) -{ - struct request_queue *q = drive->queue; - unsigned long flags; - - spin_lock_irqsave(q->queue_lock, flags); - - if (rq) - blk_requeue_request(q, rq); - - spin_unlock_irqrestore(q->queue_lock, flags); - - /* Use 3ms as that was the old plug delay */ - blk_delay_queue(q, 3); + __ide_requeue_and_plug(q, rq); } static int drive_is_ready(ide_drive_t *drive) -- cgit v0.10.2