From 6da127ad0918f93ea93678dad62ce15ffed18797 Mon Sep 17 00:00:00 2001 From: Christof Schmitt Date: Fri, 11 Jan 2008 10:09:43 +0100 Subject: blktrace: Add blktrace ioctls to SCSI generic devices Since the SCSI layer uses the request queues from the block layer, blktrace can also be used to trace the requests to all SCSI devices (like SCSI tape drives), not only disks. The only missing part is the ioctl interface to start and stop tracing. This patch adds the SETUP, START, STOP and TEARDOWN ioctls from blktrace to the sg device files. With this change, blktrace can be used for SCSI devices like for disks, e.g.: blktrace -d /dev/sg1 -o - | blkparse -i - Signed-off-by: Christof Schmitt Signed-off-by: Jens Axboe diff --git a/block/blktrace.c b/block/blktrace.c index 9b4da4a..568588c 100644 --- a/block/blktrace.c +++ b/block/blktrace.c @@ -235,7 +235,7 @@ static void blk_trace_cleanup(struct blk_trace *bt) kfree(bt); } -static int blk_trace_remove(struct request_queue *q) +int blk_trace_remove(struct request_queue *q) { struct blk_trace *bt; @@ -249,6 +249,7 @@ static int blk_trace_remove(struct request_queue *q) return 0; } +EXPORT_SYMBOL_GPL(blk_trace_remove); static int blk_dropped_open(struct inode *inode, struct file *filp) { @@ -316,18 +317,17 @@ static struct rchan_callbacks blk_relay_callbacks = { /* * Setup everything required to start tracing */ -int do_blk_trace_setup(struct request_queue *q, struct block_device *bdev, +int do_blk_trace_setup(struct request_queue *q, char *name, dev_t dev, struct blk_user_trace_setup *buts) { struct blk_trace *old_bt, *bt = NULL; struct dentry *dir = NULL; - char b[BDEVNAME_SIZE]; int ret, i; if (!buts->buf_size || !buts->buf_nr) return -EINVAL; - strcpy(buts->name, bdevname(bdev, b)); + strcpy(buts->name, name); /* * some device names have larger paths - convert the slashes @@ -352,7 +352,7 @@ int do_blk_trace_setup(struct request_queue *q, struct block_device *bdev, goto err; bt->dir = dir; - bt->dev = bdev->bd_dev; + bt->dev = dev; atomic_set(&bt->dropped, 0); ret = -EIO; @@ -399,8 +399,8 @@ err: return ret; } -static int blk_trace_setup(struct request_queue *q, struct block_device *bdev, - char __user *arg) +int blk_trace_setup(struct request_queue *q, char *name, dev_t dev, + char __user *arg) { struct blk_user_trace_setup buts; int ret; @@ -409,7 +409,7 @@ static int blk_trace_setup(struct request_queue *q, struct block_device *bdev, if (ret) return -EFAULT; - ret = do_blk_trace_setup(q, bdev, &buts); + ret = do_blk_trace_setup(q, name, dev, &buts); if (ret) return ret; @@ -418,8 +418,9 @@ static int blk_trace_setup(struct request_queue *q, struct block_device *bdev, return 0; } +EXPORT_SYMBOL_GPL(blk_trace_setup); -static int blk_trace_startstop(struct request_queue *q, int start) +int blk_trace_startstop(struct request_queue *q, int start) { struct blk_trace *bt; int ret; @@ -452,6 +453,7 @@ static int blk_trace_startstop(struct request_queue *q, int start) return ret; } +EXPORT_SYMBOL_GPL(blk_trace_startstop); /** * blk_trace_ioctl: - handle the ioctls associated with tracing @@ -464,6 +466,7 @@ int blk_trace_ioctl(struct block_device *bdev, unsigned cmd, char __user *arg) { struct request_queue *q; int ret, start = 0; + char b[BDEVNAME_SIZE]; q = bdev_get_queue(bdev); if (!q) @@ -473,7 +476,8 @@ int blk_trace_ioctl(struct block_device *bdev, unsigned cmd, char __user *arg) switch (cmd) { case BLKTRACESETUP: - ret = blk_trace_setup(q, bdev, arg); + strcpy(b, bdevname(bdev, b)); + ret = blk_trace_setup(q, b, bdev->bd_dev, arg); break; case BLKTRACESTART: start = 1; diff --git a/block/compat_ioctl.c b/block/compat_ioctl.c index cae0a85..b733732 100644 --- a/block/compat_ioctl.c +++ b/block/compat_ioctl.c @@ -545,6 +545,7 @@ static int compat_blk_trace_setup(struct block_device *bdev, char __user *arg) struct blk_user_trace_setup buts; struct compat_blk_user_trace_setup cbuts; struct request_queue *q; + char b[BDEVNAME_SIZE]; int ret; q = bdev_get_queue(bdev); @@ -554,6 +555,8 @@ static int compat_blk_trace_setup(struct block_device *bdev, char __user *arg) if (copy_from_user(&cbuts, arg, sizeof(cbuts))) return -EFAULT; + strcpy(b, bdevname(bdev, b)); + buts = (struct blk_user_trace_setup) { .act_mask = cbuts.act_mask, .buf_size = cbuts.buf_size, @@ -565,7 +568,7 @@ static int compat_blk_trace_setup(struct block_device *bdev, char __user *arg) memcpy(&buts.name, &cbuts.name, 32); mutex_lock(&bdev->bd_mutex); - ret = do_blk_trace_setup(q, bdev, &buts); + ret = do_blk_trace_setup(q, b, bdev->bd_dev, &buts); mutex_unlock(&bdev->bd_mutex); if (ret) return ret; diff --git a/drivers/scsi/sg.c b/drivers/scsi/sg.c index 17216b7..aba28f3 100644 --- a/drivers/scsi/sg.c +++ b/drivers/scsi/sg.c @@ -48,6 +48,7 @@ static int sg_version_num = 30534; /* 2 digits for each component */ #include #include #include +#include #include "scsi.h" #include @@ -1067,6 +1068,17 @@ sg_ioctl(struct inode *inode, struct file *filp, case BLKSECTGET: return put_user(sdp->device->request_queue->max_sectors * 512, ip); + case BLKTRACESETUP: + return blk_trace_setup(sdp->device->request_queue, + sdp->disk->disk_name, + sdp->device->sdev_gendev.devt, + (char *)arg); + case BLKTRACESTART: + return blk_trace_startstop(sdp->device->request_queue, 1); + case BLKTRACESTOP: + return blk_trace_startstop(sdp->device->request_queue, 0); + case BLKTRACETEARDOWN: + return blk_trace_remove(sdp->device->request_queue); default: if (read_only) return -EPERM; /* don't know so take safe approach */ diff --git a/include/linux/blktrace_api.h b/include/linux/blktrace_api.h index 7e11d23..06dadba 100644 --- a/include/linux/blktrace_api.h +++ b/include/linux/blktrace_api.h @@ -148,7 +148,7 @@ extern int blk_trace_ioctl(struct block_device *, unsigned, char __user *); extern void blk_trace_shutdown(struct request_queue *); extern void __blk_add_trace(struct blk_trace *, sector_t, int, int, u32, int, int, void *); extern int do_blk_trace_setup(struct request_queue *q, - struct block_device *bdev, struct blk_user_trace_setup *buts); + char *name, dev_t dev, struct blk_user_trace_setup *buts); /** @@ -282,6 +282,11 @@ static inline void blk_add_trace_remap(struct request_queue *q, struct bio *bio, __blk_add_trace(bt, from, bio->bi_size, bio->bi_rw, BLK_TA_REMAP, !bio_flagged(bio, BIO_UPTODATE), sizeof(r), &r); } +extern int blk_trace_setup(request_queue_t *q, char *name, dev_t dev, + char __user *arg); +extern int blk_trace_startstop(request_queue_t *q, int start); +extern int blk_trace_remove(request_queue_t *q); + #else /* !CONFIG_BLK_DEV_IO_TRACE */ #define blk_trace_ioctl(bdev, cmd, arg) (-ENOTTY) #define blk_trace_shutdown(q) do { } while (0) @@ -290,7 +295,10 @@ static inline void blk_add_trace_remap(struct request_queue *q, struct bio *bio, #define blk_add_trace_generic(q, rq, rw, what) do { } while (0) #define blk_add_trace_pdu_int(q, what, bio, pdu) do { } while (0) #define blk_add_trace_remap(q, bio, dev, f, t) do {} while (0) -#define do_blk_trace_setup(q, bdev, buts) (-ENOTTY) +#define do_blk_trace_setup(q, name, dev, buts) (-ENOTTY) +#define blk_trace_setup(q, name, dev, arg) (-ENOTTY) +#define blk_trace_startstop(q, start) (-ENOTTY) +#define blk_trace_remove(q) (-ENOTTY) #endif /* CONFIG_BLK_DEV_IO_TRACE */ #endif /* __KERNEL__ */ #endif -- cgit v0.10.2 From 482eb689169948e9f4966fbae6be4d6bc0bfa818 Mon Sep 17 00:00:00 2001 From: Pete Wyckoff Date: Tue, 1 Jan 2008 10:23:02 -0500 Subject: block: allow queue dma_alignment of zero Let queue_dma_alignment return 0 if it was specifically set to 0. This permits devices with no particular alignment restrictions to use arbitrary user space buffers without copying. Signed-off-by: Pete Wyckoff Signed-off-by: Jens Axboe diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 49b7a4c..c7a3ab5 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -836,12 +836,7 @@ static inline int bdev_hardsect_size(struct block_device *bdev) static inline int queue_dma_alignment(struct request_queue *q) { - int retval = 511; - - if (q && q->dma_alignment) - retval = q->dma_alignment; - - return retval; + return q ? q->dma_alignment : 511; } /* assumes size > 256 */ -- cgit v0.10.2 From 5d84070ee0a433620c57e85dac7f82faaec5fbb3 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Fri, 25 Jan 2008 12:44:44 +0100 Subject: __bio_clone: don't calculate hw/phys segment counts If the users sets a new ->bi_bdev on the bio after __bio_clone() has returned it, the "segment counts valid" flag still remains even though it may be different with the new target. So don't calculate segment counts in __bio_clone(). Signed-off-by: Jens Axboe diff --git a/fs/bio.c b/fs/bio.c index d59ddbf..242e409 100644 --- a/fs/bio.c +++ b/fs/bio.c @@ -248,11 +248,13 @@ inline int bio_hw_segments(struct request_queue *q, struct bio *bio) */ void __bio_clone(struct bio *bio, struct bio *bio_src) { - struct request_queue *q = bdev_get_queue(bio_src->bi_bdev); - memcpy(bio->bi_io_vec, bio_src->bi_io_vec, bio_src->bi_max_vecs * sizeof(struct bio_vec)); + /* + * most users will be overriding ->bi_bdev with a new target, + * so we don't set nor calculate new physical/hw segment counts here + */ bio->bi_sector = bio_src->bi_sector; bio->bi_bdev = bio_src->bi_bdev; bio->bi_flags |= 1 << BIO_CLONED; @@ -260,8 +262,6 @@ void __bio_clone(struct bio *bio, struct bio *bio_src) bio->bi_vcnt = bio_src->bi_vcnt; bio->bi_size = bio_src->bi_size; bio->bi_idx = bio_src->bi_idx; - bio_phys_segments(q, bio); - bio_hw_segments(q, bio); } /** -- cgit v0.10.2 From fa0ccd837e3dddb44c7db2f128a8bb7e4eabc21a Mon Sep 17 00:00:00 2001 From: James Bottomley Date: Thu, 10 Jan 2008 11:30:36 -0600 Subject: block: implement drain buffers These DMA drain buffer implementations in drivers are pretty horrible to do in terms of manipulating the scatterlist. Plus they're being done at least in drivers/ide and drivers/ata, so we now have code duplication. The one use case for this, as I understand it is AHCI controllers doing PIO mode to mmc devices but translating this to DMA at the controller level. So, what about adding a callback to the block layer that permits the adding of the drain buffer for the problem devices. The idea is that you'd do this in slave_configure after you find one of these devices. The beauty of doing it in the block layer is that it quietly adds the drain buffer to the end of the sg list, so it automatically gets mapped (and unmapped) without anything unusual having to be done to the scatterlist in driver/scsi or drivers/ata and without any alteration to the transfer length. Signed-off-by: James Bottomley Signed-off-by: Jens Axboe diff --git a/block/elevator.c b/block/elevator.c index f9736fb..8cd5775 100644 --- a/block/elevator.c +++ b/block/elevator.c @@ -741,7 +741,21 @@ struct request *elv_next_request(struct request_queue *q) q->boundary_rq = NULL; } - if ((rq->cmd_flags & REQ_DONTPREP) || !q->prep_rq_fn) + if (rq->cmd_flags & REQ_DONTPREP) + break; + + if (q->dma_drain_size && rq->data_len) { + /* + * make sure space for the drain appears we + * know we can do this because max_hw_segments + * has been adjusted to be one fewer than the + * device can handle + */ + rq->nr_phys_segments++; + rq->nr_hw_segments++; + } + + if (!q->prep_rq_fn) break; ret = q->prep_rq_fn(q, rq); @@ -754,6 +768,16 @@ struct request *elv_next_request(struct request_queue *q) * avoid resource deadlock. REQ_STARTED will * prevent other fs requests from passing this one. */ + if (q->dma_drain_size && rq->data_len && + !(rq->cmd_flags & REQ_DONTPREP)) { + /* + * remove the space for the drain we added + * so that we don't add it again + */ + --rq->nr_phys_segments; + --rq->nr_hw_segments; + } + rq = NULL; break; } else if (ret == BLKPREP_KILL) { diff --git a/block/ll_rw_blk.c b/block/ll_rw_blk.c index 3d0422f..768987d 100644 --- a/block/ll_rw_blk.c +++ b/block/ll_rw_blk.c @@ -726,6 +726,45 @@ void blk_queue_stack_limits(struct request_queue *t, struct request_queue *b) EXPORT_SYMBOL(blk_queue_stack_limits); /** + * blk_queue_dma_drain - Set up a drain buffer for excess dma. + * + * @q: the request queue for the device + * @buf: physically contiguous buffer + * @size: size of the buffer in bytes + * + * Some devices have excess DMA problems and can't simply discard (or + * zero fill) the unwanted piece of the transfer. They have to have a + * real area of memory to transfer it into. The use case for this is + * ATAPI devices in DMA mode. If the packet command causes a transfer + * bigger than the transfer size some HBAs will lock up if there + * aren't DMA elements to contain the excess transfer. What this API + * does is adjust the queue so that the buf is always appended + * silently to the scatterlist. + * + * Note: This routine adjusts max_hw_segments to make room for + * appending the drain buffer. If you call + * blk_queue_max_hw_segments() or blk_queue_max_phys_segments() after + * calling this routine, you must set the limit to one fewer than your + * device can support otherwise there won't be room for the drain + * buffer. + */ +int blk_queue_dma_drain(struct request_queue *q, void *buf, + unsigned int size) +{ + if (q->max_hw_segments < 2 || q->max_phys_segments < 2) + return -EINVAL; + /* make room for appending the drain */ + --q->max_hw_segments; + --q->max_phys_segments; + q->dma_drain_buffer = buf; + q->dma_drain_size = size; + + return 0; +} + +EXPORT_SYMBOL_GPL(blk_queue_dma_drain); + +/** * blk_queue_segment_boundary - set boundary rules for segment merging * @q: the request queue for the device * @mask: the memory boundary mask @@ -1379,6 +1418,16 @@ new_segment: bvprv = bvec; } /* segments in rq */ + if (q->dma_drain_size) { + sg->page_link &= ~0x02; + sg = sg_next(sg); + sg_set_page(sg, virt_to_page(q->dma_drain_buffer), + q->dma_drain_size, + ((unsigned long)q->dma_drain_buffer) & + (PAGE_SIZE - 1)); + nsegs++; + } + if (sg) sg_mark_end(sg); diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index c7a3ab5..e542c8f 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -429,6 +429,8 @@ struct request_queue unsigned int max_segment_size; unsigned long seg_boundary_mask; + void *dma_drain_buffer; + unsigned int dma_drain_size; unsigned int dma_alignment; struct blk_queue_tag *queue_tags; @@ -760,6 +762,8 @@ extern void blk_queue_max_hw_segments(struct request_queue *, unsigned short); extern void blk_queue_max_segment_size(struct request_queue *, unsigned int); extern void blk_queue_hardsect_size(struct request_queue *, unsigned short); extern void blk_queue_stack_limits(struct request_queue *t, struct request_queue *b); +extern int blk_queue_dma_drain(struct request_queue *q, void *buf, + unsigned int size); extern void blk_queue_segment_boundary(struct request_queue *, unsigned long); extern void blk_queue_prep_rq(struct request_queue *, prep_rq_fn *pfn); extern void blk_queue_merge_bvec(struct request_queue *, merge_bvec_fn *); -- cgit v0.10.2