From f4aa4c7bbac6c4afdd4adccf90898c1a3685396d Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Tue, 5 May 2015 19:49:54 +0800 Subject: block: loop: convert to per-device workqueue Documentation/workqueue.txt: If there is dependency among multiple work items used during memory reclaim, they should be queued to separate wq each with WQ_MEM_RECLAIM. Loop devices can be stacked, so we have to convert to per-device workqueue. One example is Fedora live CD. Fixes: b5dd2f6047ca108001328aac0e8588edd15f1778 Cc: stable@vger.kernel.org (v4.0) Cc: Justin M. Forbes Signed-off-by: Ming Lei Acked-by: Tejun Heo Signed-off-by: Jens Axboe diff --git a/drivers/block/loop.c b/drivers/block/loop.c index ae3fcb4..3dc1598 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -86,8 +86,6 @@ static DEFINE_MUTEX(loop_index_mutex); static int max_part; static int part_shift; -static struct workqueue_struct *loop_wq; - static int transfer_xor(struct loop_device *lo, int cmd, struct page *raw_page, unsigned raw_off, struct page *loop_page, unsigned loop_off, @@ -725,6 +723,12 @@ static int loop_set_fd(struct loop_device *lo, fmode_t mode, size = get_loop_size(lo, file); if ((loff_t)(sector_t)size != size) goto out_putf; + error = -ENOMEM; + lo->wq = alloc_workqueue("kloopd%d", + WQ_MEM_RECLAIM | WQ_HIGHPRI | WQ_UNBOUND, 0, + lo->lo_number); + if (!lo->wq) + goto out_putf; error = 0; @@ -872,6 +876,8 @@ static int loop_clr_fd(struct loop_device *lo) lo->lo_flags = 0; if (!part_shift) lo->lo_disk->flags |= GENHD_FL_NO_PART_SCAN; + destroy_workqueue(lo->wq); + lo->wq = NULL; mutex_unlock(&lo->lo_ctl_mutex); /* * Need not hold lo_ctl_mutex to fput backing file. @@ -1425,9 +1431,13 @@ static int loop_queue_rq(struct blk_mq_hw_ctx *hctx, const struct blk_mq_queue_data *bd) { struct loop_cmd *cmd = blk_mq_rq_to_pdu(bd->rq); + struct loop_device *lo = cmd->rq->q->queuedata; blk_mq_start_request(bd->rq); + if (lo->lo_state != Lo_bound) + return -EIO; + if (cmd->rq->cmd_flags & REQ_WRITE) { struct loop_device *lo = cmd->rq->q->queuedata; bool need_sched = true; @@ -1441,9 +1451,9 @@ static int loop_queue_rq(struct blk_mq_hw_ctx *hctx, spin_unlock_irq(&lo->lo_lock); if (need_sched) - queue_work(loop_wq, &lo->write_work); + queue_work(lo->wq, &lo->write_work); } else { - queue_work(loop_wq, &cmd->read_work); + queue_work(lo->wq, &cmd->read_work); } return BLK_MQ_RQ_QUEUE_OK; @@ -1455,9 +1465,6 @@ static void loop_handle_cmd(struct loop_cmd *cmd) struct loop_device *lo = cmd->rq->q->queuedata; int ret = -EIO; - if (lo->lo_state != Lo_bound) - goto failed; - if (write && (lo->lo_flags & LO_FLAGS_READ_ONLY)) goto failed; @@ -1806,13 +1813,6 @@ static int __init loop_init(void) goto misc_out; } - loop_wq = alloc_workqueue("kloopd", - WQ_MEM_RECLAIM | WQ_HIGHPRI | WQ_UNBOUND, 0); - if (!loop_wq) { - err = -ENOMEM; - goto misc_out; - } - blk_register_region(MKDEV(LOOP_MAJOR, 0), range, THIS_MODULE, loop_probe, NULL, NULL); @@ -1850,8 +1850,6 @@ static void __exit loop_exit(void) blk_unregister_region(MKDEV(LOOP_MAJOR, 0), range); unregister_blkdev(LOOP_MAJOR, "loop"); - destroy_workqueue(loop_wq); - misc_deregister(&loop_misc); } diff --git a/drivers/block/loop.h b/drivers/block/loop.h index 301c27f..49564ed 100644 --- a/drivers/block/loop.h +++ b/drivers/block/loop.h @@ -54,6 +54,7 @@ struct loop_device { gfp_t old_gfp_mask; spinlock_t lo_lock; + struct workqueue_struct *wq; struct list_head write_cmd_head; struct work_struct write_work; bool write_started; -- cgit v0.10.2 From 4d4e41aef9429872ea3b105e83426941f7185ab6 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Tue, 5 May 2015 19:49:55 +0800 Subject: block: loop: avoiding too many pending per work I/O If there are too many pending per work I/O, too many high priority work thread can be generated so that system performance can be effected. This patch limits the max_active parameter of workqueue as 16. This patch fixes Fedora 22 live booting performance regression when it is booted from squashfs over dm based on loop, and looks the following reasons are related with the problem: - not like other filesyststems(such as ext4), squashfs is a bit special, and I observed that increasing I/O jobs to access file in squashfs only improve I/O performance a little, but it can make big difference for ext4 - nested loop: both squashfs.img and ext3fs.img are mounted as loop block, and ext3fs.img is inside the squashfs - during booting, lots of tasks may run concurrently Fixes: b5dd2f6047ca108001328aac0e8588edd15f1778 Cc: stable@vger.kernel.org (v4.0) Cc: Justin M. Forbes Signed-off-by: Ming Lei Acked-by: Tejun Heo Signed-off-by: Jens Axboe diff --git a/drivers/block/loop.c b/drivers/block/loop.c index 3dc1598..1bee523 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -725,7 +725,7 @@ static int loop_set_fd(struct loop_device *lo, fmode_t mode, goto out_putf; error = -ENOMEM; lo->wq = alloc_workqueue("kloopd%d", - WQ_MEM_RECLAIM | WQ_HIGHPRI | WQ_UNBOUND, 0, + WQ_MEM_RECLAIM | WQ_HIGHPRI | WQ_UNBOUND, 16, lo->lo_number); if (!lo->wq) goto out_putf; -- cgit v0.10.2 From 5aea3288d3706e812a8d6c4078669f38b7b72bda Mon Sep 17 00:00:00 2001 From: Tomas Henzl Date: Tue, 17 Feb 2015 17:40:21 +0100 Subject: cciss: remove duplicate entries from board_type struct and devices not supported by this driver from unresettable list Signed-off-by: Tomas Henzl Signed-off-by: Jens Axboe diff --git a/drivers/block/cciss.c b/drivers/block/cciss.c index ff20f19..4849822 100644 --- a/drivers/block/cciss.c +++ b/drivers/block/cciss.c @@ -139,8 +139,6 @@ static struct board_type products[] = { {0x3214103C, "Smart Array E200i", &SA5_access}, {0x3215103C, "Smart Array E200i", &SA5_access}, {0x3237103C, "Smart Array E500", &SA5_access}, - {0x3223103C, "Smart Array P800", &SA5_access}, - {0x3234103C, "Smart Array P400", &SA5_access}, {0x323D103C, "Smart Array P700m", &SA5_access}, }; @@ -574,8 +572,6 @@ static void cciss_procinit(ctlr_info_t *h) /* List of controllers which cannot be hard reset on kexec with reset_devices */ static u32 unresettable_controller[] = { - 0x324a103C, /* Smart Array P712m */ - 0x324b103C, /* SmartArray P711m */ 0x3223103C, /* Smart Array P800 */ 0x3234103C, /* Smart Array P400 */ 0x3235103C, /* Smart Array P400i */ -- cgit v0.10.2 From 8a0ee3b52df73c7b89376b03c789232b78dd2aff Mon Sep 17 00:00:00 2001 From: Tomas Henzl Date: Tue, 17 Feb 2015 17:40:22 +0100 Subject: cciss: correct the non-resettable board list The hpsa driver carries a more recent version, copy the table from there. Signed-off-by: Tomas Henzl Signed-off-by: Jens Axboe diff --git a/drivers/block/cciss.c b/drivers/block/cciss.c index 4849822..0422c47 100644 --- a/drivers/block/cciss.c +++ b/drivers/block/cciss.c @@ -582,12 +582,32 @@ static u32 unresettable_controller[] = { 0x3215103C, /* Smart Array E200i */ 0x3237103C, /* Smart Array E500 */ 0x323D103C, /* Smart Array P700m */ + 0x40800E11, /* Smart Array 5i */ 0x409C0E11, /* Smart Array 6400 */ 0x409D0E11, /* Smart Array 6400 EM */ + 0x40700E11, /* Smart Array 5300 */ + 0x40820E11, /* Smart Array 532 */ + 0x40830E11, /* Smart Array 5312 */ + 0x409A0E11, /* Smart Array 641 */ + 0x409B0E11, /* Smart Array 642 */ + 0x40910E11, /* Smart Array 6i */ }; /* List of controllers which cannot even be soft reset */ static u32 soft_unresettable_controller[] = { + 0x40800E11, /* Smart Array 5i */ + 0x40700E11, /* Smart Array 5300 */ + 0x40820E11, /* Smart Array 532 */ + 0x40830E11, /* Smart Array 5312 */ + 0x409A0E11, /* Smart Array 641 */ + 0x409B0E11, /* Smart Array 642 */ + 0x40910E11, /* Smart Array 6i */ + /* Exclude 640x boards. These are two pci devices in one slot + * which share a battery backed cache module. One controls the + * cache, the other accesses the cache through the one that controls + * it. If we reset the one controlling the cache, the other will + * likely not be happy. Just forbid resetting this conjoined mess. + */ 0x409C0E11, /* Smart Array 6400 */ 0x409D0E11, /* Smart Array 6400 EM */ }; @@ -4663,8 +4683,7 @@ static int cciss_kdump_hard_reset_controller(struct pci_dev *pdev) */ cciss_lookup_board_id(pdev, &board_id); if (!ctlr_is_resettable(board_id)) { - dev_warn(&pdev->dev, "Cannot reset Smart Array 640x " - "due to shared cache module."); + dev_warn(&pdev->dev, "Controller not resettable\n"); return -ENODEV; } -- cgit v0.10.2 From cddcd72bcec3b1dc9cef7f17d724a7fe42d64cc1 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 7 May 2015 09:38:14 +0200 Subject: nvme: disable irqs in nvme_freeze_queues The queue_lock needs to be taken with irqs disabled. This is mostly due to the old pre blk-mq usage pattern, but we've also picked it up in most of the few places where we use the queue_lock with blk-mq. Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe diff --git a/drivers/block/nvme-core.c b/drivers/block/nvme-core.c index 85b8036..00e6419 100644 --- a/drivers/block/nvme-core.c +++ b/drivers/block/nvme-core.c @@ -2585,9 +2585,9 @@ static void nvme_freeze_queues(struct nvme_dev *dev) list_for_each_entry(ns, &dev->namespaces, list) { blk_mq_freeze_queue_start(ns->queue); - spin_lock(ns->queue->queue_lock); + spin_lock_irq(ns->queue->queue_lock); queue_flag_set(QUEUE_FLAG_STOPPED, ns->queue); - spin_unlock(ns->queue->queue_lock); + spin_unlock_irq(ns->queue->queue_lock); blk_mq_cancel_requeue_work(ns->queue); blk_mq_stop_hw_queues(ns->queue); -- cgit v0.10.2 From f8933667953e8e61bb6104f5ca88e32e85656a93 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Wed, 6 May 2015 12:26:23 +0800 Subject: block: loop: don't hold lo_ctl_mutex in lo_open The lo_ctl_mutex is held for running all ioctl handlers, and in some ioctl handlers, ioctl_by_bdev(BLKRRPART) is called for rereading partitions, which requires bd_mutex. So it is easy to cause failure because trylock(bd_mutex) may fail inside blkdev_reread_part(), and follows the lock context: blkid or other application: ->open() ->mutex_lock(bd_mutex) ->lo_open() ->mutex_lock(lo_ctl_mutex) losetup(set fd ioctl): ->mutex_lock(lo_ctl_mutex) ->ioctl_by_bdev(BLKRRPART) ->trylock(bd_mutex) This patch trys to eliminate the ABBA lock dependency by removing lo_ctl_mutext in lo_open() with the following approach: 1) make lo_refcnt as atomic_t and avoid acquiring lo_ctl_mutex in lo_open(): - for open vs. add/del loop, no any problem because of loop_index_mutex - freeze request queue during clr_fd, so I/O can't come until clearing fd is completed, like the effect of holding lo_ctl_mutex in lo_open - both open() and release() have been serialized by bd_mutex already 2) don't hold lo_ctl_mutex for decreasing/checking lo_refcnt in lo_release(), then lo_ctl_mutex is only required for the last release. Reviewed-by: Christoph Hellwig Tested-by: Jarod Wilson Acked-by: Jarod Wilson Signed-off-by: Ming Lei Signed-off-by: Jens Axboe diff --git a/drivers/block/loop.c b/drivers/block/loop.c index 1bee523..b3e294e 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -831,7 +831,7 @@ static int loop_clr_fd(struct loop_device *lo) * /do something like mkfs/losetup -d causing the losetup -d * command to fail with EBUSY. */ - if (lo->lo_refcnt > 1) { + if (atomic_read(&lo->lo_refcnt) > 1) { lo->lo_flags |= LO_FLAGS_AUTOCLEAR; mutex_unlock(&lo->lo_ctl_mutex); return 0; @@ -840,6 +840,9 @@ static int loop_clr_fd(struct loop_device *lo) if (filp == NULL) return -EINVAL; + /* freeze request queue during the transition */ + blk_mq_freeze_queue(lo->lo_queue); + spin_lock_irq(&lo->lo_lock); lo->lo_state = Lo_rundown; lo->lo_backing_file = NULL; @@ -871,6 +874,8 @@ static int loop_clr_fd(struct loop_device *lo) lo->lo_state = Lo_unbound; /* This is safe: open() is still holding a reference. */ module_put(THIS_MODULE); + blk_mq_unfreeze_queue(lo->lo_queue); + if (lo->lo_flags & LO_FLAGS_PARTSCAN && bdev) ioctl_by_bdev(bdev, BLKRRPART, 0); lo->lo_flags = 0; @@ -1330,9 +1335,7 @@ static int lo_open(struct block_device *bdev, fmode_t mode) goto out; } - mutex_lock(&lo->lo_ctl_mutex); - lo->lo_refcnt++; - mutex_unlock(&lo->lo_ctl_mutex); + atomic_inc(&lo->lo_refcnt); out: mutex_unlock(&loop_index_mutex); return err; @@ -1343,11 +1346,10 @@ static void lo_release(struct gendisk *disk, fmode_t mode) struct loop_device *lo = disk->private_data; int err; - mutex_lock(&lo->lo_ctl_mutex); - - if (--lo->lo_refcnt) - goto out; + if (atomic_dec_return(&lo->lo_refcnt)) + return; + mutex_lock(&lo->lo_ctl_mutex); if (lo->lo_flags & LO_FLAGS_AUTOCLEAR) { /* * In autoclear mode, stop the loop thread @@ -1601,6 +1603,7 @@ static int loop_add(struct loop_device **l, int i) disk->flags |= GENHD_FL_NO_PART_SCAN; disk->flags |= GENHD_FL_EXT_DEVT; mutex_init(&lo->lo_ctl_mutex); + atomic_set(&lo->lo_refcnt, 0); lo->lo_number = i; spin_lock_init(&lo->lo_lock); disk->major = LOOP_MAJOR; @@ -1718,7 +1721,7 @@ static long loop_control_ioctl(struct file *file, unsigned int cmd, mutex_unlock(&lo->lo_ctl_mutex); break; } - if (lo->lo_refcnt > 0) { + if (atomic_read(&lo->lo_refcnt) > 0) { ret = -EBUSY; mutex_unlock(&lo->lo_ctl_mutex); break; diff --git a/drivers/block/loop.h b/drivers/block/loop.h index 49564ed..25e8997 100644 --- a/drivers/block/loop.h +++ b/drivers/block/loop.h @@ -28,7 +28,7 @@ struct loop_func_table; struct loop_device { int lo_number; - int lo_refcnt; + atomic_t lo_refcnt; loff_t lo_offset; loff_t lo_sizelimit; int lo_flags; -- cgit v0.10.2 From 06f0e9e68c0d81c7d822a405f6e35686a711c1fe Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Wed, 6 May 2015 12:26:24 +0800 Subject: block: loop: fix another reread part failure loop_clr_fd() can be run piggyback with lo_release(), and under this situation, reread partition may always fail because bd_mutex has been held already. This patch detects the situation by the reference count, and call __blkdev_reread_part() to avoid acquiring the lock again. In the meantime, this patch switches to new kernel APIs of blkdev_reread_part() and __blkdev_reread_part(). Reviewed-by: Christoph Hellwig Tested-by: Jarod Wilson Acked-by: Jarod Wilson Signed-off-by: Jarod Wilson Signed-off-by: Ming Lei Signed-off-by: Jens Axboe diff --git a/drivers/block/loop.c b/drivers/block/loop.c index b3e294e..2b99e34 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -474,6 +474,28 @@ static int loop_flush(struct loop_device *lo) return loop_switch(lo, NULL); } +static void loop_reread_partitions(struct loop_device *lo, + struct block_device *bdev) +{ + int rc; + + /* + * bd_mutex has been held already in release path, so don't + * acquire it if this function is called in such case. + * + * If the reread partition isn't from release path, lo_refcnt + * must be at least one and it can only become zero when the + * current holder is released. + */ + if (!atomic_read(&lo->lo_refcnt)) + rc = __blkdev_reread_part(bdev); + else + rc = blkdev_reread_part(bdev); + if (rc) + pr_warn("%s: partition scan of loop%d (%s) failed (rc=%d)\n", + __func__, lo->lo_number, lo->lo_file_name, rc); +} + /* * loop_change_fd switched the backing store of a loopback device to * a new file. This is useful for operating system installers to free up @@ -522,7 +544,7 @@ static int loop_change_fd(struct loop_device *lo, struct block_device *bdev, fput(old_file); if (lo->lo_flags & LO_FLAGS_PARTSCAN) - ioctl_by_bdev(bdev, BLKRRPART, 0); + loop_reread_partitions(lo, bdev); return 0; out_putf: @@ -759,7 +781,7 @@ static int loop_set_fd(struct loop_device *lo, fmode_t mode, if (part_shift) lo->lo_flags |= LO_FLAGS_PARTSCAN; if (lo->lo_flags & LO_FLAGS_PARTSCAN) - ioctl_by_bdev(bdev, BLKRRPART, 0); + loop_reread_partitions(lo, bdev); /* Grab the block_device to prevent its destruction after we * put /dev/loopXX inode. Later in loop_clr_fd() we bdput(bdev). @@ -877,7 +899,7 @@ static int loop_clr_fd(struct loop_device *lo) blk_mq_unfreeze_queue(lo->lo_queue); if (lo->lo_flags & LO_FLAGS_PARTSCAN && bdev) - ioctl_by_bdev(bdev, BLKRRPART, 0); + loop_reread_partitions(lo, bdev); lo->lo_flags = 0; if (!part_shift) lo->lo_disk->flags |= GENHD_FL_NO_PART_SCAN; @@ -954,7 +976,7 @@ loop_set_status(struct loop_device *lo, const struct loop_info64 *info) !(lo->lo_flags & LO_FLAGS_PARTSCAN)) { lo->lo_flags |= LO_FLAGS_PARTSCAN; lo->lo_disk->flags &= ~GENHD_FL_NO_PART_SCAN; - ioctl_by_bdev(lo->lo_device, BLKRRPART, 0); + loop_reread_partitions(lo, lo->lo_device); } lo->lo_encrypt_key_size = info->lo_encrypt_key_size; -- cgit v0.10.2 From 9dcd13795342f51994fc23a4949d31c77919271c Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Wed, 6 May 2015 12:26:25 +0800 Subject: block: nbd: convert to blkdev_reread_part() Reviewed-by: Christoph Hellwig Tested-by: Jarod Wilson Acked-by: Jarod Wilson Signed-off-by: Ming Lei Signed-off-by: Jens Axboe diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c index 83a7ba4..0e385d8 100644 --- a/drivers/block/nbd.c +++ b/drivers/block/nbd.c @@ -711,7 +711,7 @@ static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *nbd, bdev->bd_inode->i_size = 0; set_capacity(nbd->disk, 0); if (max_part > 0) - ioctl_by_bdev(bdev, BLKRRPART, 0); + blkdev_reread_part(bdev); if (nbd->disconnect) /* user requested, ignore socket errors */ return 0; return nbd->harderror; -- cgit v0.10.2 From 6029a06c88b925467cb43e4b57dcede88f0457eb Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Wed, 6 May 2015 12:26:26 +0800 Subject: block: dasd_genhd: convert to blkdev_reread_part Also remove the obsolete comment. Reviewed-by: Christoph Hellwig Tested-by: Jarod Wilson Acked-by: Jarod Wilson Acked-by: Sebastian Ott Signed-off-by: Ming Lei Signed-off-by: Jens Axboe diff --git a/drivers/s390/block/dasd_genhd.c b/drivers/s390/block/dasd_genhd.c index 90f39f7..2af4619 100644 --- a/drivers/s390/block/dasd_genhd.c +++ b/drivers/s390/block/dasd_genhd.c @@ -116,14 +116,11 @@ int dasd_scan_partitions(struct dasd_block *block) rc); return -ENODEV; } - /* - * See fs/partition/check.c:register_disk,rescan_partitions - * Can't call rescan_partitions directly. Use ioctl. - */ - rc = ioctl_by_bdev(bdev, BLKRRPART, 0); + + rc = blkdev_reread_part(bdev); while (rc == -EBUSY && retry > 0) { schedule(); - rc = ioctl_by_bdev(bdev, BLKRRPART, 0); + rc = blkdev_reread_part(bdev); retry--; DBF_DEV_EVENT(DBF_ERR, block->base, "scan partitions error, retry %d rc %d", -- cgit v0.10.2 From a05e578055d7f9ec2d5c4465933eb424c4e8e25b Mon Sep 17 00:00:00 2001 From: Jarod Wilson Date: Wed, 6 May 2015 12:26:28 +0800 Subject: s390/block/dasd: remove obsolete while -EBUSY loop With the mutex_trylock bit gone from blkdev_reread_part(), the retry logic in dasd_scan_partitions() shouldn't be necessary. CC: Christoph Hellwig CC: Jens Axboe CC: Tejun Heo CC: Alexander Viro CC: Markus Pargmann CC: Stefan Weinhuber CC: Stefan Haberland CC: Sebastian Ott CC: Fabian Frederick CC: Ming Lei CC: David Herrmann CC: Andrew Morton CC: Peter Zijlstra CC: nbd-general@lists.sourceforge.net CC: linux-s390@vger.kernel.org Reviewed-by: Christoph Hellwig Acked-by: Sebastian Ott Signed-off-by: Ming Lei Signed-off-by: Jarod Wilson Signed-off-by: Jens Axboe diff --git a/drivers/s390/block/dasd_genhd.c b/drivers/s390/block/dasd_genhd.c index 2af4619..ef1d9fb 100644 --- a/drivers/s390/block/dasd_genhd.c +++ b/drivers/s390/block/dasd_genhd.c @@ -99,9 +99,8 @@ void dasd_gendisk_free(struct dasd_block *block) int dasd_scan_partitions(struct dasd_block *block) { struct block_device *bdev; - int retry, rc; + int rc; - retry = 5; bdev = bdget_disk(block->gdp, 0); if (!bdev) { DBF_DEV_EVENT(DBF_ERR, block->base, "%s", @@ -118,14 +117,9 @@ int dasd_scan_partitions(struct dasd_block *block) } rc = blkdev_reread_part(bdev); - while (rc == -EBUSY && retry > 0) { - schedule(); - rc = blkdev_reread_part(bdev); - retry--; + if (rc) DBF_DEV_EVENT(DBF_ERR, block->base, - "scan partitions error, retry %d rc %d", - retry, rc); - } + "scan partitions error, rc %d", rc); /* * Since the matching blkdev_put call to the blkdev_get in -- cgit v0.10.2 From 6a9270075858a0586bc1a8415263e8d1134550f6 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Wed, 20 May 2015 09:54:35 -0600 Subject: loop: remove (now) unused 'out' label gcc, righfully, complains: drivers/block/loop.c:1369:1: warning: label 'out' defined but not used [-Wunused-label] Kill it. Signed-off-by: Jens Axboe diff --git a/drivers/block/loop.c b/drivers/block/loop.c index 2b99e34..1797185 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -1388,7 +1388,6 @@ static void lo_release(struct gendisk *disk, fmode_t mode) loop_flush(lo); } -out: mutex_unlock(&lo->lo_ctl_mutex); } -- cgit v0.10.2 From f705f837c58ebe1ea69dfffff4dcc234e2fbc8dd Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 22 May 2015 11:12:38 +0200 Subject: nvme: consolidate synchronous command submission helpers Note that we keep the unused timeout argument, but allow callers to pass 0 instead of a timeout if they want the default. This will allow adding a timeout to the pass through path later on. Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe diff --git a/drivers/block/nvme-core.c b/drivers/block/nvme-core.c index 00e6419..e81b205 100644 --- a/drivers/block/nvme-core.c +++ b/drivers/block/nvme-core.c @@ -991,27 +991,40 @@ static void sync_completion(struct nvme_queue *nvmeq, void *ctx, * Returns 0 on success. If the result is negative, it's a Linux error code; * if the result is positive, it's an NVM Express status code */ -static int nvme_submit_sync_cmd(struct request *req, struct nvme_command *cmd, - u32 *result, unsigned timeout) +static int __nvme_submit_sync_cmd(struct request_queue *q, + struct nvme_command *cmd, u32 *result, unsigned timeout) { struct sync_cmd_info cmdinfo; - struct nvme_cmd_info *cmd_rq = blk_mq_rq_to_pdu(req); - struct nvme_queue *nvmeq = cmd_rq->nvmeq; + struct nvme_cmd_info *cmd_rq; + struct request *req; + int res; + + req = blk_mq_alloc_request(q, WRITE, GFP_KERNEL, false); + if (IS_ERR(req)) + return PTR_ERR(req); cmdinfo.task = current; cmdinfo.status = -EINTR; cmd->common.command_id = req->tag; + cmd_rq = blk_mq_rq_to_pdu(req); nvme_set_info(cmd_rq, &cmdinfo, sync_completion); set_current_state(TASK_UNINTERRUPTIBLE); - nvme_submit_cmd(nvmeq, cmd); + nvme_submit_cmd(cmd_rq->nvmeq, cmd); schedule(); if (result) *result = cmdinfo.result; - return cmdinfo.status; + res = cmdinfo.status; + blk_mq_free_request(req); + return res; +} + +int nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd) +{ + return __nvme_submit_sync_cmd(q, cmd, NULL, 0); } static int nvme_submit_async_admin_req(struct nvme_dev *dev) @@ -1060,41 +1073,6 @@ static int nvme_submit_admin_async_cmd(struct nvme_dev *dev, return nvme_submit_cmd(nvmeq, cmd); } -static int __nvme_submit_admin_cmd(struct nvme_dev *dev, struct nvme_command *cmd, - u32 *result, unsigned timeout) -{ - int res; - struct request *req; - - req = blk_mq_alloc_request(dev->admin_q, WRITE, GFP_KERNEL, false); - if (IS_ERR(req)) - return PTR_ERR(req); - res = nvme_submit_sync_cmd(req, cmd, result, timeout); - blk_mq_free_request(req); - return res; -} - -int nvme_submit_admin_cmd(struct nvme_dev *dev, struct nvme_command *cmd, - u32 *result) -{ - return __nvme_submit_admin_cmd(dev, cmd, result, ADMIN_TIMEOUT); -} - -int nvme_submit_io_cmd(struct nvme_dev *dev, struct nvme_ns *ns, - struct nvme_command *cmd, u32 *result) -{ - int res; - struct request *req; - - req = blk_mq_alloc_request(ns->queue, WRITE, (GFP_KERNEL|__GFP_WAIT), - false); - if (IS_ERR(req)) - return PTR_ERR(req); - res = nvme_submit_sync_cmd(req, cmd, result, NVME_IO_TIMEOUT); - blk_mq_free_request(req); - return res; -} - static int adapter_delete_queue(struct nvme_dev *dev, u8 opcode, u16 id) { struct nvme_command c; @@ -1103,7 +1081,7 @@ static int adapter_delete_queue(struct nvme_dev *dev, u8 opcode, u16 id) c.delete_queue.opcode = opcode; c.delete_queue.qid = cpu_to_le16(id); - return nvme_submit_admin_cmd(dev, &c, NULL); + return nvme_submit_sync_cmd(dev->admin_q, &c); } static int adapter_alloc_cq(struct nvme_dev *dev, u16 qid, @@ -1120,7 +1098,7 @@ static int adapter_alloc_cq(struct nvme_dev *dev, u16 qid, c.create_cq.cq_flags = cpu_to_le16(flags); c.create_cq.irq_vector = cpu_to_le16(nvmeq->cq_vector); - return nvme_submit_admin_cmd(dev, &c, NULL); + return nvme_submit_sync_cmd(dev->admin_q, &c); } static int adapter_alloc_sq(struct nvme_dev *dev, u16 qid, @@ -1137,7 +1115,7 @@ static int adapter_alloc_sq(struct nvme_dev *dev, u16 qid, c.create_sq.sq_flags = cpu_to_le16(flags); c.create_sq.cqid = cpu_to_le16(qid); - return nvme_submit_admin_cmd(dev, &c, NULL); + return nvme_submit_sync_cmd(dev->admin_q, &c); } static int adapter_delete_cq(struct nvme_dev *dev, u16 cqid) @@ -1161,7 +1139,7 @@ int nvme_identify(struct nvme_dev *dev, unsigned nsid, unsigned cns, c.identify.prp1 = cpu_to_le64(dma_addr); c.identify.cns = cpu_to_le32(cns); - return nvme_submit_admin_cmd(dev, &c, NULL); + return nvme_submit_sync_cmd(dev->admin_q, &c); } int nvme_get_features(struct nvme_dev *dev, unsigned fid, unsigned nsid, @@ -1175,7 +1153,7 @@ int nvme_get_features(struct nvme_dev *dev, unsigned fid, unsigned nsid, c.features.prp1 = cpu_to_le64(dma_addr); c.features.fid = cpu_to_le32(fid); - return nvme_submit_admin_cmd(dev, &c, result); + return __nvme_submit_sync_cmd(dev->admin_q, &c, result, 0); } int nvme_set_features(struct nvme_dev *dev, unsigned fid, unsigned dword11, @@ -1189,7 +1167,7 @@ int nvme_set_features(struct nvme_dev *dev, unsigned fid, unsigned dword11, c.features.fid = cpu_to_le32(fid); c.features.dword11 = cpu_to_le32(dword11); - return nvme_submit_admin_cmd(dev, &c, result); + return __nvme_submit_sync_cmd(dev->admin_q, &c, result, 0); } /** @@ -1813,7 +1791,7 @@ static int nvme_submit_io(struct nvme_ns *ns, struct nvme_user_io __user *uio) c.rw.prp1 = cpu_to_le64(sg_dma_address(iod->sg)); c.rw.prp2 = cpu_to_le64(iod->first_dma); c.rw.metadata = cpu_to_le64(meta_dma); - status = nvme_submit_io_cmd(dev, ns, &c, NULL); + status = nvme_submit_sync_cmd(ns->queue, &c); unmap: nvme_unmap_user_pages(dev, write, iod); nvme_free_iod(dev, iod); @@ -1869,23 +1847,15 @@ static int nvme_user_cmd(struct nvme_dev *dev, struct nvme_ns *ns, timeout = cmd.timeout_ms ? msecs_to_jiffies(cmd.timeout_ms) : ADMIN_TIMEOUT; - if (length != cmd.data_len) + if (length != cmd.data_len) { status = -ENOMEM; - else if (ns) { - struct request *req; - - req = blk_mq_alloc_request(ns->queue, WRITE, - (GFP_KERNEL|__GFP_WAIT), false); - if (IS_ERR(req)) - status = PTR_ERR(req); - else { - status = nvme_submit_sync_cmd(req, &c, &cmd.result, - timeout); - blk_mq_free_request(req); - } - } else - status = __nvme_submit_admin_cmd(dev, &c, &cmd.result, timeout); + goto out; + } + + status = __nvme_submit_sync_cmd(ns ? ns->queue : dev->admin_q, &c, + &cmd.result, timeout); +out: if (cmd.data_len) { nvme_unmap_user_pages(dev, cmd.opcode & 1, iod); nvme_free_iod(dev, iod); diff --git a/drivers/block/nvme-scsi.c b/drivers/block/nvme-scsi.c index 6b736b0..ba1809f 100644 --- a/drivers/block/nvme-scsi.c +++ b/drivers/block/nvme-scsi.c @@ -1053,7 +1053,7 @@ static int nvme_trans_log_info_exceptions(struct nvme_ns *ns, c.common.prp1 = cpu_to_le64(dma_addr); c.common.cdw10[0] = cpu_to_le32((((sizeof(struct nvme_smart_log) / BYTES_TO_DWORDS) - 1) << 16) | NVME_LOG_SMART); - res = nvme_submit_admin_cmd(dev, &c, NULL); + res = nvme_submit_sync_cmd(dev->admin_q, &c); if (res != NVME_SC_SUCCESS) { temp_c = LOG_TEMP_UNKNOWN; } else { @@ -1121,7 +1121,7 @@ static int nvme_trans_log_temperature(struct nvme_ns *ns, struct sg_io_hdr *hdr, c.common.prp1 = cpu_to_le64(dma_addr); c.common.cdw10[0] = cpu_to_le32((((sizeof(struct nvme_smart_log) / BYTES_TO_DWORDS) - 1) << 16) | NVME_LOG_SMART); - res = nvme_submit_admin_cmd(dev, &c, NULL); + res = nvme_submit_sync_cmd(dev->admin_q, &c); if (res != NVME_SC_SUCCESS) { temp_c_cur = LOG_TEMP_UNKNOWN; } else { @@ -1609,7 +1609,7 @@ static int nvme_trans_send_fw_cmd(struct nvme_ns *ns, struct sg_io_hdr *hdr, c.common.cdw10[0] = cpu_to_le32(cdw10); } - nvme_sc = nvme_submit_admin_cmd(dev, &c, NULL); + nvme_sc = nvme_submit_sync_cmd(dev->admin_q, &c); res = nvme_trans_status_code(hdr, nvme_sc); if (res) goto out_unmap; @@ -1971,7 +1971,7 @@ static int nvme_trans_fmt_send_cmd(struct nvme_ns *ns, struct sg_io_hdr *hdr, c.format.nsid = cpu_to_le32(ns->ns_id); c.format.cdw10 = cpu_to_le32(cdw10); - nvme_sc = nvme_submit_admin_cmd(dev, &c, NULL); + nvme_sc = nvme_submit_sync_cmd(dev->admin_q, &c); res = nvme_trans_status_code(hdr, nvme_sc); if (res) goto out_dma; @@ -2139,7 +2139,7 @@ static int nvme_trans_do_nvme_io(struct nvme_ns *ns, struct sg_io_hdr *hdr, nvme_offset += unit_num_blocks; - nvme_sc = nvme_submit_io_cmd(dev, ns, &c, NULL); + nvme_sc = nvme_submit_sync_cmd(ns->queue, &c); if (nvme_sc != NVME_SC_SUCCESS) { nvme_unmap_user_pages(dev, (is_write) ? DMA_TO_DEVICE : DMA_FROM_DEVICE, @@ -2696,7 +2696,7 @@ static int nvme_trans_start_stop(struct nvme_ns *ns, struct sg_io_hdr *hdr, c.common.opcode = nvme_cmd_flush; c.common.nsid = cpu_to_le32(ns->ns_id); - nvme_sc = nvme_submit_io_cmd(ns->dev, ns, &c, NULL); + nvme_sc = nvme_submit_sync_cmd(ns->queue, &c); res = nvme_trans_status_code(hdr, nvme_sc); if (res) goto out; @@ -2724,8 +2724,7 @@ static int nvme_trans_synchronize_cache(struct nvme_ns *ns, c.common.opcode = nvme_cmd_flush; c.common.nsid = cpu_to_le32(ns->ns_id); - nvme_sc = nvme_submit_io_cmd(ns->dev, ns, &c, NULL); - + nvme_sc = nvme_submit_sync_cmd(ns->queue, &c); res = nvme_trans_status_code(hdr, nvme_sc); if (res) goto out; @@ -2932,7 +2931,7 @@ static int nvme_trans_unmap(struct nvme_ns *ns, struct sg_io_hdr *hdr, c.dsm.nr = cpu_to_le32(ndesc - 1); c.dsm.attributes = cpu_to_le32(NVME_DSMGMT_AD); - nvme_sc = nvme_submit_io_cmd(dev, ns, &c, NULL); + nvme_sc = nvme_submit_sync_cmd(ns->queue, &c); res = nvme_trans_status_code(hdr, nvme_sc); dma_free_coherent(&dev->pci_dev->dev, ndesc * sizeof(*range), diff --git a/include/linux/nvme.h b/include/linux/nvme.h index 8dbd05e..61488b2 100644 --- a/include/linux/nvme.h +++ b/include/linux/nvme.h @@ -158,11 +158,7 @@ struct nvme_iod *nvme_map_user_pages(struct nvme_dev *dev, int write, unsigned long addr, unsigned length); void nvme_unmap_user_pages(struct nvme_dev *dev, int write, struct nvme_iod *iod); -int nvme_submit_io_cmd(struct nvme_dev *, struct nvme_ns *, - struct nvme_command *, u32 *); -int nvme_submit_flush_data(struct nvme_queue *nvmeq, struct nvme_ns *ns); -int nvme_submit_admin_cmd(struct nvme_dev *, struct nvme_command *, - u32 *result); +int nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd); int nvme_identify(struct nvme_dev *, unsigned nsid, unsigned cns, dma_addr_t dma_addr); int nvme_get_features(struct nvme_dev *dev, unsigned fid, unsigned nsid, -- cgit v0.10.2 From e75ec752d725b7b612c0b2db1bca50a9e53c0879 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 22 May 2015 11:12:39 +0200 Subject: nvme: store a struct device pointer in struct nvme_dev Most users want the generic device, so store that in struct nvme_dev instead of the pci_dev. This also happens to be a nice step towards making some code reusable for non-PCI transports. Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe diff --git a/drivers/block/nvme-core.c b/drivers/block/nvme-core.c index e81b205..870a926 100644 --- a/drivers/block/nvme-core.c +++ b/drivers/block/nvme-core.c @@ -610,17 +610,17 @@ static void req_completion(struct nvme_queue *nvmeq, void *ctx, req->errors = 0; if (cmd_rq->aborted) - dev_warn(&nvmeq->dev->pci_dev->dev, + dev_warn(nvmeq->dev->dev, "completing aborted command with status:%04x\n", status); if (iod->nents) { - dma_unmap_sg(&nvmeq->dev->pci_dev->dev, iod->sg, iod->nents, + dma_unmap_sg(nvmeq->dev->dev, iod->sg, iod->nents, rq_data_dir(req) ? DMA_TO_DEVICE : DMA_FROM_DEVICE); if (blk_integrity_rq(req)) { if (!rq_data_dir(req)) nvme_dif_remap(req, nvme_dif_complete); - dma_unmap_sg(&nvmeq->dev->pci_dev->dev, iod->meta_sg, 1, + dma_unmap_sg(nvmeq->dev->dev, iod->meta_sg, 1, rq_data_dir(req) ? DMA_TO_DEVICE : DMA_FROM_DEVICE); } } @@ -861,7 +861,7 @@ static int nvme_queue_rq(struct blk_mq_hw_ctx *hctx, if (blk_rq_bytes(req) != nvme_setup_prps(nvmeq->dev, iod, blk_rq_bytes(req), GFP_ATOMIC)) { - dma_unmap_sg(&nvmeq->dev->pci_dev->dev, iod->sg, + dma_unmap_sg(nvmeq->dev->dev, iod->sg, iod->nents, dma_dir); goto retry_cmd; } @@ -1192,8 +1192,7 @@ static void nvme_abort_req(struct request *req) if (work_busy(&dev->reset_work)) goto out; list_del_init(&dev->node); - dev_warn(&dev->pci_dev->dev, - "I/O %d QID %d timeout, reset controller\n", + dev_warn(dev->dev, "I/O %d QID %d timeout, reset controller\n", req->tag, nvmeq->qid); dev->reset_workfn = nvme_reset_failed_dev; queue_work(nvme_workq, &dev->reset_work); @@ -1362,22 +1361,21 @@ static void nvme_disable_queue(struct nvme_dev *dev, int qid) static struct nvme_queue *nvme_alloc_queue(struct nvme_dev *dev, int qid, int depth) { - struct device *dmadev = &dev->pci_dev->dev; struct nvme_queue *nvmeq = kzalloc(sizeof(*nvmeq), GFP_KERNEL); if (!nvmeq) return NULL; - nvmeq->cqes = dma_zalloc_coherent(dmadev, CQ_SIZE(depth), + nvmeq->cqes = dma_zalloc_coherent(dev->dev, CQ_SIZE(depth), &nvmeq->cq_dma_addr, GFP_KERNEL); if (!nvmeq->cqes) goto free_nvmeq; - nvmeq->sq_cmds = dma_alloc_coherent(dmadev, SQ_SIZE(depth), + nvmeq->sq_cmds = dma_alloc_coherent(dev->dev, SQ_SIZE(depth), &nvmeq->sq_dma_addr, GFP_KERNEL); if (!nvmeq->sq_cmds) goto free_cqdma; - nvmeq->q_dmadev = dmadev; + nvmeq->q_dmadev = dev->dev; nvmeq->dev = dev; snprintf(nvmeq->irqname, sizeof(nvmeq->irqname), "nvme%dq%d", dev->instance, qid); @@ -1393,7 +1391,7 @@ static struct nvme_queue *nvme_alloc_queue(struct nvme_dev *dev, int qid, return nvmeq; free_cqdma: - dma_free_coherent(dmadev, CQ_SIZE(depth), (void *)nvmeq->cqes, + dma_free_coherent(dev->dev, CQ_SIZE(depth), (void *)nvmeq->cqes, nvmeq->cq_dma_addr); free_nvmeq: kfree(nvmeq); @@ -1465,7 +1463,7 @@ static int nvme_wait_ready(struct nvme_dev *dev, u64 cap, bool enabled) if (fatal_signal_pending(current)) return -EINTR; if (time_after(jiffies, timeout)) { - dev_err(&dev->pci_dev->dev, + dev_err(dev->dev, "Device not ready; aborting %s\n", enabled ? "initialisation" : "reset"); return -ENODEV; @@ -1515,7 +1513,7 @@ static int nvme_shutdown_ctrl(struct nvme_dev *dev) if (fatal_signal_pending(current)) return -EINTR; if (time_after(jiffies, timeout)) { - dev_err(&dev->pci_dev->dev, + dev_err(dev->dev, "Device shutdown incomplete; abort shutdown\n"); return -ENODEV; } @@ -1558,7 +1556,7 @@ static int nvme_alloc_admin_tags(struct nvme_dev *dev) dev->admin_tagset.queue_depth = NVME_AQ_DEPTH - 1; dev->admin_tagset.reserved_tags = 1; dev->admin_tagset.timeout = ADMIN_TIMEOUT; - dev->admin_tagset.numa_node = dev_to_node(&dev->pci_dev->dev); + dev->admin_tagset.numa_node = dev_to_node(dev->dev); dev->admin_tagset.cmd_size = nvme_cmd_size(dev); dev->admin_tagset.driver_data = dev; @@ -1591,14 +1589,14 @@ static int nvme_configure_admin_queue(struct nvme_dev *dev) unsigned dev_page_max = NVME_CAP_MPSMAX(cap) + 12; if (page_shift < dev_page_min) { - dev_err(&dev->pci_dev->dev, + dev_err(dev->dev, "Minimum device page size (%u) too large for " "host (%u)\n", 1 << dev_page_min, 1 << page_shift); return -ENODEV; } if (page_shift > dev_page_max) { - dev_info(&dev->pci_dev->dev, + dev_info(dev->dev, "Device maximum page size (%u) smaller than " "host (%u); enabling work-around\n", 1 << dev_page_max, 1 << page_shift); @@ -1689,7 +1687,7 @@ struct nvme_iod *nvme_map_user_pages(struct nvme_dev *dev, int write, sg_mark_end(&sg[i - 1]); iod->nents = count; - nents = dma_map_sg(&dev->pci_dev->dev, sg, count, + nents = dma_map_sg(dev->dev, sg, count, write ? DMA_TO_DEVICE : DMA_FROM_DEVICE); if (!nents) goto free_iod; @@ -1711,7 +1709,7 @@ void nvme_unmap_user_pages(struct nvme_dev *dev, int write, { int i; - dma_unmap_sg(&dev->pci_dev->dev, iod->sg, iod->nents, + dma_unmap_sg(dev->dev, iod->sg, iod->nents, write ? DMA_TO_DEVICE : DMA_FROM_DEVICE); for (i = 0; i < iod->nents; i++) @@ -1762,7 +1760,7 @@ static int nvme_submit_io(struct nvme_ns *ns, struct nvme_user_io __user *uio) goto unmap; } if (meta_len) { - meta = dma_alloc_coherent(&dev->pci_dev->dev, meta_len, + meta = dma_alloc_coherent(dev->dev, meta_len, &meta_dma, GFP_KERNEL); if (!meta) { status = -ENOMEM; @@ -1801,7 +1799,7 @@ static int nvme_submit_io(struct nvme_ns *ns, struct nvme_user_io __user *uio) meta_len)) status = -EFAULT; } - dma_free_coherent(&dev->pci_dev->dev, meta_len, meta, meta_dma); + dma_free_coherent(dev->dev, meta_len, meta, meta_dma); } return status; } @@ -1961,15 +1959,13 @@ static int nvme_revalidate_disk(struct gendisk *disk) u16 old_ms; unsigned short bs; - id = dma_alloc_coherent(&dev->pci_dev->dev, 4096, &dma_addr, - GFP_KERNEL); + id = dma_alloc_coherent(dev->dev, 4096, &dma_addr, GFP_KERNEL); if (!id) { - dev_warn(&dev->pci_dev->dev, "%s: Memory alocation failure\n", - __func__); + dev_warn(dev->dev, "%s: Memory alocation failure\n", __func__); return 0; } if (nvme_identify(dev, ns->ns_id, 0, dma_addr)) { - dev_warn(&dev->pci_dev->dev, + dev_warn(dev->dev, "identify failed ns:%d, setting capacity to 0\n", ns->ns_id); memset(id, 0, sizeof(*id)); @@ -2014,7 +2010,7 @@ static int nvme_revalidate_disk(struct gendisk *disk) if (dev->oncs & NVME_CTRL_ONCS_DSM) nvme_config_discard(ns); - dma_free_coherent(&dev->pci_dev->dev, 4096, id, dma_addr); + dma_free_coherent(dev->dev, 4096, id, dma_addr); return 0; } @@ -2041,7 +2037,7 @@ static int nvme_kthread(void *data) if (work_busy(&dev->reset_work)) continue; list_del_init(&dev->node); - dev_warn(&dev->pci_dev->dev, + dev_warn(dev->dev, "Failed status: %x, reset controller\n", readl(&dev->bar->csts)); dev->reset_workfn = nvme_reset_failed_dev; @@ -2073,7 +2069,7 @@ static void nvme_alloc_ns(struct nvme_dev *dev, unsigned nsid) { struct nvme_ns *ns; struct gendisk *disk; - int node = dev_to_node(&dev->pci_dev->dev); + int node = dev_to_node(dev->dev); ns = kzalloc_node(sizeof(*ns), GFP_KERNEL, node); if (!ns) @@ -2156,8 +2152,7 @@ static int set_queue_count(struct nvme_dev *dev, int count) if (status < 0) return status; if (status > 0) { - dev_err(&dev->pci_dev->dev, "Could not set queue count (%d)\n", - status); + dev_err(dev->dev, "Could not set queue count (%d)\n", status); return 0; } return min(result & 0xffff, result >> 16) + 1; @@ -2171,7 +2166,7 @@ static size_t db_bar_size(struct nvme_dev *dev, unsigned nr_io_queues) static int nvme_setup_io_queues(struct nvme_dev *dev) { struct nvme_queue *adminq = dev->queues[0]; - struct pci_dev *pdev = dev->pci_dev; + struct pci_dev *pdev = to_pci_dev(dev->dev); int result, i, vecs, nr_io_queues, size; nr_io_queues = num_possible_cpus(); @@ -2251,7 +2246,7 @@ static int nvme_setup_io_queues(struct nvme_dev *dev) */ static int nvme_dev_add(struct nvme_dev *dev) { - struct pci_dev *pdev = dev->pci_dev; + struct pci_dev *pdev = to_pci_dev(dev->dev); int res; unsigned nn, i; struct nvme_id_ctrl *ctrl; @@ -2259,14 +2254,14 @@ static int nvme_dev_add(struct nvme_dev *dev) dma_addr_t dma_addr; int shift = NVME_CAP_MPSMIN(readq(&dev->bar->cap)) + 12; - mem = dma_alloc_coherent(&pdev->dev, 4096, &dma_addr, GFP_KERNEL); + mem = dma_alloc_coherent(dev->dev, 4096, &dma_addr, GFP_KERNEL); if (!mem) return -ENOMEM; res = nvme_identify(dev, 0, 1, dma_addr); if (res) { - dev_err(&pdev->dev, "Identify Controller failed (%d)\n", res); - dma_free_coherent(&dev->pci_dev->dev, 4096, mem, dma_addr); + dev_err(dev->dev, "Identify Controller failed (%d)\n", res); + dma_free_coherent(dev->dev, 4096, mem, dma_addr); return -EIO; } @@ -2292,12 +2287,12 @@ static int nvme_dev_add(struct nvme_dev *dev) } else dev->max_hw_sectors = max_hw_sectors; } - dma_free_coherent(&dev->pci_dev->dev, 4096, mem, dma_addr); + dma_free_coherent(dev->dev, 4096, mem, dma_addr); dev->tagset.ops = &nvme_mq_ops; dev->tagset.nr_hw_queues = dev->online_queues - 1; dev->tagset.timeout = NVME_IO_TIMEOUT; - dev->tagset.numa_node = dev_to_node(&dev->pci_dev->dev); + dev->tagset.numa_node = dev_to_node(dev->dev); dev->tagset.queue_depth = min_t(int, dev->q_depth, BLK_MQ_MAX_DEPTH) - 1; dev->tagset.cmd_size = nvme_cmd_size(dev); @@ -2317,7 +2312,7 @@ static int nvme_dev_map(struct nvme_dev *dev) { u64 cap; int bars, result = -ENOMEM; - struct pci_dev *pdev = dev->pci_dev; + struct pci_dev *pdev = to_pci_dev(dev->dev); if (pci_enable_device_mem(pdev)) return result; @@ -2331,8 +2326,8 @@ static int nvme_dev_map(struct nvme_dev *dev) if (pci_request_selected_regions(pdev, bars, "nvme")) goto disable_pci; - if (dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64)) && - dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32))) + if (dma_set_mask_and_coherent(dev->dev, DMA_BIT_MASK(64)) && + dma_set_mask_and_coherent(dev->dev, DMA_BIT_MASK(32))) goto disable; dev->bar = ioremap(pci_resource_start(pdev, 0), 8192); @@ -2373,19 +2368,21 @@ static int nvme_dev_map(struct nvme_dev *dev) static void nvme_dev_unmap(struct nvme_dev *dev) { - if (dev->pci_dev->msi_enabled) - pci_disable_msi(dev->pci_dev); - else if (dev->pci_dev->msix_enabled) - pci_disable_msix(dev->pci_dev); + struct pci_dev *pdev = to_pci_dev(dev->dev); + + if (pdev->msi_enabled) + pci_disable_msi(pdev); + else if (pdev->msix_enabled) + pci_disable_msix(pdev); if (dev->bar) { iounmap(dev->bar); dev->bar = NULL; - pci_release_regions(dev->pci_dev); + pci_release_regions(pdev); } - if (pci_is_enabled(dev->pci_dev)) - pci_disable_device(dev->pci_dev); + if (pci_is_enabled(pdev)) + pci_disable_device(pdev); } struct nvme_delq_ctx { @@ -2504,7 +2501,7 @@ static void nvme_disable_io_queues(struct nvme_dev *dev) &worker, "nvme%d", dev->instance); if (IS_ERR(kworker_task)) { - dev_err(&dev->pci_dev->dev, + dev_err(dev->dev, "Failed to create queue del task\n"); for (i = dev->queue_count - 1; i > 0; i--) nvme_disable_queue(dev, i); @@ -2622,14 +2619,13 @@ static void nvme_dev_remove(struct nvme_dev *dev) static int nvme_setup_prp_pools(struct nvme_dev *dev) { - struct device *dmadev = &dev->pci_dev->dev; - dev->prp_page_pool = dma_pool_create("prp list page", dmadev, + dev->prp_page_pool = dma_pool_create("prp list page", dev->dev, PAGE_SIZE, PAGE_SIZE, 0); if (!dev->prp_page_pool) return -ENOMEM; /* Optimisation for I/Os between 4k and 128k */ - dev->prp_small_pool = dma_pool_create("prp list 256", dmadev, + dev->prp_small_pool = dma_pool_create("prp list 256", dev->dev, 256, 256, 0); if (!dev->prp_small_pool) { dma_pool_destroy(dev->prp_page_pool); @@ -2693,7 +2689,7 @@ static void nvme_free_dev(struct kref *kref) { struct nvme_dev *dev = container_of(kref, struct nvme_dev, kref); - pci_dev_put(dev->pci_dev); + put_device(dev->dev); put_device(dev->device); nvme_free_namespaces(dev); nvme_release_instance(dev); @@ -2837,7 +2833,7 @@ static int nvme_dev_start(struct nvme_dev *dev) static int nvme_remove_dead_ctrl(void *arg) { struct nvme_dev *dev = (struct nvme_dev *)arg; - struct pci_dev *pdev = dev->pci_dev; + struct pci_dev *pdev = to_pci_dev(dev->dev); if (pci_get_drvdata(pdev)) pci_stop_and_remove_bus_device_locked(pdev); @@ -2876,11 +2872,11 @@ static void nvme_dev_reset(struct nvme_dev *dev) { nvme_dev_shutdown(dev); if (nvme_dev_resume(dev)) { - dev_warn(&dev->pci_dev->dev, "Device failed to resume\n"); + dev_warn(dev->dev, "Device failed to resume\n"); kref_get(&dev->kref); if (IS_ERR(kthread_run(nvme_remove_dead_ctrl, dev, "nvme%d", dev->instance))) { - dev_err(&dev->pci_dev->dev, + dev_err(dev->dev, "Failed to start controller remove task\n"); kref_put(&dev->kref, nvme_free_dev); } @@ -2924,7 +2920,7 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id) INIT_LIST_HEAD(&dev->namespaces); dev->reset_workfn = nvme_reset_failed_dev; INIT_WORK(&dev->reset_work, nvme_reset_workfn); - dev->pci_dev = pci_dev_get(pdev); + dev->dev = get_device(&pdev->dev); pci_set_drvdata(pdev, dev); result = nvme_set_instance(dev); if (result) @@ -2954,7 +2950,7 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id) release: nvme_release_instance(dev); put_pci: - pci_dev_put(dev->pci_dev); + put_device(dev->dev); free: kfree(dev->queues); kfree(dev->entry); diff --git a/drivers/block/nvme-scsi.c b/drivers/block/nvme-scsi.c index ba1809f..f1c90f2 100644 --- a/drivers/block/nvme-scsi.c +++ b/drivers/block/nvme-scsi.c @@ -684,7 +684,7 @@ static int nvme_trans_standard_inquiry_page(struct nvme_ns *ns, u8 cmdque = 0x01 << 1; u8 fw_offset = sizeof(dev->firmware_rev); - mem = dma_alloc_coherent(&dev->pci_dev->dev, sizeof(struct nvme_id_ns), + mem = dma_alloc_coherent(dev->dev, sizeof(struct nvme_id_ns), &dma_addr, GFP_KERNEL); if (mem == NULL) { res = -ENOMEM; @@ -728,8 +728,7 @@ static int nvme_trans_standard_inquiry_page(struct nvme_ns *ns, res = nvme_trans_copy_to_user(hdr, inq_response, xfer_len); out_free: - dma_free_coherent(&dev->pci_dev->dev, sizeof(struct nvme_id_ns), mem, - dma_addr); + dma_free_coherent(dev->dev, sizeof(struct nvme_id_ns), mem, dma_addr); out_dma: return res; } @@ -787,7 +786,7 @@ static int nvme_trans_device_id_page(struct nvme_ns *ns, struct sg_io_hdr *hdr, int xfer_len; __be32 tmp_id = cpu_to_be32(ns->ns_id); - mem = dma_alloc_coherent(&dev->pci_dev->dev, sizeof(struct nvme_id_ns), + mem = dma_alloc_coherent(dev->dev, sizeof(struct nvme_id_ns), &dma_addr, GFP_KERNEL); if (mem == NULL) { res = -ENOMEM; @@ -842,7 +841,7 @@ static int nvme_trans_device_id_page(struct nvme_ns *ns, struct sg_io_hdr *hdr, inq_response[6] = 0x00; /* Rsvd */ inq_response[7] = 0x44; /* Designator Length */ - sprintf(&inq_response[8], "%04x", dev->pci_dev->vendor); + sprintf(&inq_response[8], "%04x", to_pci_dev(dev->dev)->vendor); memcpy(&inq_response[12], dev->model, sizeof(dev->model)); sprintf(&inq_response[52], "%04x", tmp_id); memcpy(&inq_response[56], dev->serial, sizeof(dev->serial)); @@ -851,8 +850,7 @@ static int nvme_trans_device_id_page(struct nvme_ns *ns, struct sg_io_hdr *hdr, res = nvme_trans_copy_to_user(hdr, inq_response, xfer_len); out_free: - dma_free_coherent(&dev->pci_dev->dev, sizeof(struct nvme_id_ns), mem, - dma_addr); + dma_free_coherent(dev->dev, sizeof(struct nvme_id_ns), mem, dma_addr); out_dma: return res; } @@ -883,7 +881,7 @@ static int nvme_trans_ext_inq_page(struct nvme_ns *ns, struct sg_io_hdr *hdr, goto out_mem; } - mem = dma_alloc_coherent(&dev->pci_dev->dev, sizeof(struct nvme_id_ns), + mem = dma_alloc_coherent(dev->dev, sizeof(struct nvme_id_ns), &dma_addr, GFP_KERNEL); if (mem == NULL) { res = -ENOMEM; @@ -933,8 +931,7 @@ static int nvme_trans_ext_inq_page(struct nvme_ns *ns, struct sg_io_hdr *hdr, res = nvme_trans_copy_to_user(hdr, inq_response, xfer_len); out_free: - dma_free_coherent(&dev->pci_dev->dev, sizeof(struct nvme_id_ns), mem, - dma_addr); + dma_free_coherent(dev->dev, sizeof(struct nvme_id_ns), mem, dma_addr); out_dma: kfree(inq_response); out_mem: @@ -1038,8 +1035,7 @@ static int nvme_trans_log_info_exceptions(struct nvme_ns *ns, goto out_mem; } - mem = dma_alloc_coherent(&dev->pci_dev->dev, - sizeof(struct nvme_smart_log), + mem = dma_alloc_coherent(dev->dev, sizeof(struct nvme_smart_log), &dma_addr, GFP_KERNEL); if (mem == NULL) { res = -ENOMEM; @@ -1077,7 +1073,7 @@ static int nvme_trans_log_info_exceptions(struct nvme_ns *ns, xfer_len = min(alloc_len, LOG_INFO_EXCP_PAGE_LENGTH); res = nvme_trans_copy_to_user(hdr, log_response, xfer_len); - dma_free_coherent(&dev->pci_dev->dev, sizeof(struct nvme_smart_log), + dma_free_coherent(dev->dev, sizeof(struct nvme_smart_log), mem, dma_addr); out_dma: kfree(log_response); @@ -1106,8 +1102,7 @@ static int nvme_trans_log_temperature(struct nvme_ns *ns, struct sg_io_hdr *hdr, goto out_mem; } - mem = dma_alloc_coherent(&dev->pci_dev->dev, - sizeof(struct nvme_smart_log), + mem = dma_alloc_coherent(dev->dev, sizeof(struct nvme_smart_log), &dma_addr, GFP_KERNEL); if (mem == NULL) { res = -ENOMEM; @@ -1158,7 +1153,7 @@ static int nvme_trans_log_temperature(struct nvme_ns *ns, struct sg_io_hdr *hdr, xfer_len = min(alloc_len, LOG_TEMP_PAGE_LENGTH); res = nvme_trans_copy_to_user(hdr, log_response, xfer_len); - dma_free_coherent(&dev->pci_dev->dev, sizeof(struct nvme_smart_log), + dma_free_coherent(dev->dev, sizeof(struct nvme_smart_log), mem, dma_addr); out_dma: kfree(log_response); @@ -1209,7 +1204,7 @@ static int nvme_trans_fill_blk_desc(struct nvme_ns *ns, struct sg_io_hdr *hdr, else if (llbaa > 0 && len < MODE_PAGE_LLBAA_BLK_DES_LEN) return SNTI_INTERNAL_ERROR; - mem = dma_alloc_coherent(&dev->pci_dev->dev, sizeof(struct nvme_id_ns), + mem = dma_alloc_coherent(dev->dev, sizeof(struct nvme_id_ns), &dma_addr, GFP_KERNEL); if (mem == NULL) { res = -ENOMEM; @@ -1246,8 +1241,7 @@ static int nvme_trans_fill_blk_desc(struct nvme_ns *ns, struct sg_io_hdr *hdr, } out_dma: - dma_free_coherent(&dev->pci_dev->dev, sizeof(struct nvme_id_ns), mem, - dma_addr); + dma_free_coherent(dev->dev, sizeof(struct nvme_id_ns), mem, dma_addr); out: return res; } @@ -1494,8 +1488,7 @@ static int nvme_trans_power_state(struct nvme_ns *ns, struct sg_io_hdr *hdr, unsigned ps_desired = 0; /* NVMe Controller Identify */ - mem = dma_alloc_coherent(&dev->pci_dev->dev, - sizeof(struct nvme_id_ctrl), + mem = dma_alloc_coherent(dev->dev, sizeof(struct nvme_id_ctrl), &dma_addr, GFP_KERNEL); if (mem == NULL) { res = -ENOMEM; @@ -1556,8 +1549,7 @@ static int nvme_trans_power_state(struct nvme_ns *ns, struct sg_io_hdr *hdr, if (nvme_sc) res = nvme_sc; out_dma: - dma_free_coherent(&dev->pci_dev->dev, sizeof(struct nvme_id_ctrl), mem, - dma_addr); + dma_free_coherent(dev->dev, sizeof(struct nvme_id_ctrl), mem, dma_addr); out: return res; } @@ -1820,7 +1812,7 @@ static int nvme_trans_fmt_set_blk_size_count(struct nvme_ns *ns, */ if (ns->mode_select_num_blocks == 0 || ns->mode_select_block_len == 0) { - mem = dma_alloc_coherent(&dev->pci_dev->dev, + mem = dma_alloc_coherent(dev->dev, sizeof(struct nvme_id_ns), &dma_addr, GFP_KERNEL); if (mem == NULL) { res = -ENOMEM; @@ -1845,7 +1837,7 @@ static int nvme_trans_fmt_set_blk_size_count(struct nvme_ns *ns, (1 << (id_ns->lbaf[flbas].ds)); } out_dma: - dma_free_coherent(&dev->pci_dev->dev, sizeof(struct nvme_id_ns), + dma_free_coherent(dev->dev, sizeof(struct nvme_id_ns), mem, dma_addr); } out: @@ -1928,7 +1920,7 @@ static int nvme_trans_fmt_send_cmd(struct nvme_ns *ns, struct sg_io_hdr *hdr, struct nvme_command c; /* Loop thru LBAF's in id_ns to match reqd lbaf, put in cdw10 */ - mem = dma_alloc_coherent(&dev->pci_dev->dev, sizeof(struct nvme_id_ns), + mem = dma_alloc_coherent(dev->dev, sizeof(struct nvme_id_ns), &dma_addr, GFP_KERNEL); if (mem == NULL) { res = -ENOMEM; @@ -1979,8 +1971,7 @@ static int nvme_trans_fmt_send_cmd(struct nvme_ns *ns, struct sg_io_hdr *hdr, res = nvme_sc; out_dma: - dma_free_coherent(&dev->pci_dev->dev, sizeof(struct nvme_id_ns), mem, - dma_addr); + dma_free_coherent(dev->dev, sizeof(struct nvme_id_ns), mem, dma_addr); out: return res; } @@ -2485,7 +2476,7 @@ static int nvme_trans_read_capacity(struct nvme_ns *ns, struct sg_io_hdr *hdr, resp_size = READ_CAP_16_RESP_SIZE; } - mem = dma_alloc_coherent(&dev->pci_dev->dev, sizeof(struct nvme_id_ns), + mem = dma_alloc_coherent(dev->dev, sizeof(struct nvme_id_ns), &dma_addr, GFP_KERNEL); if (mem == NULL) { res = -ENOMEM; @@ -2514,8 +2505,7 @@ static int nvme_trans_read_capacity(struct nvme_ns *ns, struct sg_io_hdr *hdr, kfree(response); out_dma: - dma_free_coherent(&dev->pci_dev->dev, sizeof(struct nvme_id_ns), mem, - dma_addr); + dma_free_coherent(dev->dev, sizeof(struct nvme_id_ns), mem, dma_addr); out: return res; } @@ -2548,8 +2538,7 @@ static int nvme_trans_report_luns(struct nvme_ns *ns, struct sg_io_hdr *hdr, goto out; } else { /* NVMe Controller Identify */ - mem = dma_alloc_coherent(&dev->pci_dev->dev, - sizeof(struct nvme_id_ctrl), + mem = dma_alloc_coherent(dev->dev, sizeof(struct nvme_id_ctrl), &dma_addr, GFP_KERNEL); if (mem == NULL) { res = -ENOMEM; @@ -2600,8 +2589,7 @@ static int nvme_trans_report_luns(struct nvme_ns *ns, struct sg_io_hdr *hdr, kfree(response); out_dma: - dma_free_coherent(&dev->pci_dev->dev, sizeof(struct nvme_id_ctrl), mem, - dma_addr); + dma_free_coherent(dev->dev, sizeof(struct nvme_id_ctrl), mem, dma_addr); out: return res; } @@ -2913,7 +2901,7 @@ static int nvme_trans_unmap(struct nvme_ns *ns, struct sg_io_hdr *hdr, goto out; } - range = dma_alloc_coherent(&dev->pci_dev->dev, ndesc * sizeof(*range), + range = dma_alloc_coherent(dev->dev, ndesc * sizeof(*range), &dma_addr, GFP_KERNEL); if (!range) goto out; @@ -2934,8 +2922,7 @@ static int nvme_trans_unmap(struct nvme_ns *ns, struct sg_io_hdr *hdr, nvme_sc = nvme_submit_sync_cmd(ns->queue, &c); res = nvme_trans_status_code(hdr, nvme_sc); - dma_free_coherent(&dev->pci_dev->dev, ndesc * sizeof(*range), - range, dma_addr); + dma_free_coherent(dev->dev, ndesc * sizeof(*range), range, dma_addr); out: kfree(plist); return res; diff --git a/include/linux/nvme.h b/include/linux/nvme.h index 61488b2..de0e49a 100644 --- a/include/linux/nvme.h +++ b/include/linux/nvme.h @@ -74,7 +74,7 @@ struct nvme_dev { struct blk_mq_tag_set tagset; struct blk_mq_tag_set admin_tagset; u32 __iomem *dbs; - struct pci_dev *pci_dev; + struct device *dev; struct dma_pool *prp_page_pool; struct dma_pool *prp_small_pool; int instance; -- cgit v0.10.2 From b90c48d0c11efe373a42a60e66e2ac2a503c287b Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 22 May 2015 11:12:40 +0200 Subject: nvme: split nvme_trans_send_fw_cmd This function handles two totally different opcodes, so split it. Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe diff --git a/drivers/block/nvme-scsi.c b/drivers/block/nvme-scsi.c index f1c90f2..60415b5 100644 --- a/drivers/block/nvme-scsi.c +++ b/drivers/block/nvme-scsi.c @@ -1554,10 +1554,25 @@ static int nvme_trans_power_state(struct nvme_ns *ns, struct sg_io_hdr *hdr, return res; } -/* Write Buffer Helper Functions */ -/* Also using this for Format Unit with hdr passed as NULL, and buffer_id, 0 */ +static int nvme_trans_send_activate_fw_cmd(struct nvme_ns *ns, struct sg_io_hdr *hdr, + u8 buffer_id) +{ + struct nvme_command c; + int nvme_sc; + int res; + + memset(&c, 0, sizeof(c)); + c.common.opcode = nvme_admin_activate_fw; + c.common.cdw10[0] = cpu_to_le32(buffer_id | NVME_FWACT_REPL_ACTV); + + nvme_sc = nvme_submit_sync_cmd(ns->queue, &c); + res = nvme_trans_status_code(hdr, nvme_sc); + if (res) + return res; + return nvme_sc; +} -static int nvme_trans_send_fw_cmd(struct nvme_ns *ns, struct sg_io_hdr *hdr, +static int nvme_trans_send_download_fw_cmd(struct nvme_ns *ns, struct sg_io_hdr *hdr, u8 opcode, u32 tot_len, u32 offset, u8 buffer_id) { @@ -1569,38 +1584,31 @@ static int nvme_trans_send_fw_cmd(struct nvme_ns *ns, struct sg_io_hdr *hdr, unsigned length; memset(&c, 0, sizeof(c)); - c.common.opcode = opcode; - if (opcode == nvme_admin_download_fw) { - if (hdr->iovec_count > 0) { - /* Assuming SGL is not allowed for this command */ - res = nvme_trans_completion(hdr, - SAM_STAT_CHECK_CONDITION, - ILLEGAL_REQUEST, - SCSI_ASC_INVALID_CDB, - SCSI_ASCQ_CAUSE_NOT_REPORTABLE); - goto out; - } - iod = nvme_map_user_pages(dev, DMA_TO_DEVICE, - (unsigned long)hdr->dxferp, tot_len); - if (IS_ERR(iod)) { - res = PTR_ERR(iod); - goto out; - } - length = nvme_setup_prps(dev, iod, tot_len, GFP_KERNEL); - if (length != tot_len) { - res = -ENOMEM; - goto out_unmap; - } + c.common.opcode = nvme_admin_download_fw; - c.dlfw.prp1 = cpu_to_le64(sg_dma_address(iod->sg)); - c.dlfw.prp2 = cpu_to_le64(iod->first_dma); - c.dlfw.numd = cpu_to_le32((tot_len/BYTES_TO_DWORDS) - 1); - c.dlfw.offset = cpu_to_le32(offset/BYTES_TO_DWORDS); - } else if (opcode == nvme_admin_activate_fw) { - u32 cdw10 = buffer_id | NVME_FWACT_REPL_ACTV; - c.common.cdw10[0] = cpu_to_le32(cdw10); + if (hdr->iovec_count > 0) { + /* Assuming SGL is not allowed for this command */ + return nvme_trans_completion(hdr, + SAM_STAT_CHECK_CONDITION, + ILLEGAL_REQUEST, + SCSI_ASC_INVALID_CDB, + SCSI_ASCQ_CAUSE_NOT_REPORTABLE); + } + iod = nvme_map_user_pages(dev, DMA_TO_DEVICE, + (unsigned long)hdr->dxferp, tot_len); + if (IS_ERR(iod)) + return PTR_ERR(iod); + length = nvme_setup_prps(dev, iod, tot_len, GFP_KERNEL); + if (length != tot_len) { + res = -ENOMEM; + goto out_unmap; } + c.dlfw.prp1 = cpu_to_le64(sg_dma_address(iod->sg)); + c.dlfw.prp2 = cpu_to_le64(iod->first_dma); + c.dlfw.numd = cpu_to_le32((tot_len/BYTES_TO_DWORDS) - 1); + c.dlfw.offset = cpu_to_le32(offset/BYTES_TO_DWORDS); + nvme_sc = nvme_submit_sync_cmd(dev->admin_q, &c); res = nvme_trans_status_code(hdr, nvme_sc); if (res) @@ -1609,11 +1617,8 @@ static int nvme_trans_send_fw_cmd(struct nvme_ns *ns, struct sg_io_hdr *hdr, res = nvme_sc; out_unmap: - if (opcode == nvme_admin_download_fw) { - nvme_unmap_user_pages(dev, DMA_TO_DEVICE, iod); - nvme_free_iod(dev, iod); - } - out: + nvme_unmap_user_pages(dev, DMA_TO_DEVICE, iod); + nvme_free_iod(dev, iod); return res; } @@ -2769,7 +2774,7 @@ static int nvme_trans_format_unit(struct nvme_ns *ns, struct sg_io_hdr *hdr, } /* Attempt to activate any previously downloaded firmware image */ - res = nvme_trans_send_fw_cmd(ns, hdr, nvme_admin_activate_fw, 0, 0, 0); + res = nvme_trans_send_activate_fw_cmd(ns, hdr, 0); /* Determine Block size and count and send format command */ res = nvme_trans_fmt_set_blk_size_count(ns, hdr); @@ -2829,24 +2834,20 @@ static int nvme_trans_write_buffer(struct nvme_ns *ns, struct sg_io_hdr *hdr, switch (mode) { case DOWNLOAD_SAVE_ACTIVATE: - res = nvme_trans_send_fw_cmd(ns, hdr, nvme_admin_download_fw, + res = nvme_trans_send_download_fw_cmd(ns, hdr, nvme_admin_download_fw, parm_list_length, buffer_offset, buffer_id); if (res != SNTI_TRANSLATION_SUCCESS) goto out; - res = nvme_trans_send_fw_cmd(ns, hdr, nvme_admin_activate_fw, - parm_list_length, buffer_offset, - buffer_id); + res = nvme_trans_send_activate_fw_cmd(ns, hdr, buffer_id); break; case DOWNLOAD_SAVE_DEFER_ACTIVATE: - res = nvme_trans_send_fw_cmd(ns, hdr, nvme_admin_download_fw, + res = nvme_trans_send_download_fw_cmd(ns, hdr, nvme_admin_download_fw, parm_list_length, buffer_offset, buffer_id); break; case ACTIVATE_DEFERRED_MICROCODE: - res = nvme_trans_send_fw_cmd(ns, hdr, nvme_admin_activate_fw, - parm_list_length, buffer_offset, - buffer_id); + res = nvme_trans_send_activate_fw_cmd(ns, hdr, buffer_id); break; default: res = nvme_trans_completion(hdr, SAM_STAT_CHECK_CONDITION, -- cgit v0.10.2 From e61b0a86cac83c3bf501705c8d52a0a29cecf091 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 22 May 2015 11:12:41 +0200 Subject: nvme: fix scsi translation error handling Erorr handling for the scsi translation was completely broken, as there were two different positive error number spaces overlapping. Fix this up by removing one of them, and centralizing the generation of the other positive values in a single place. Also fix up a few places that didn't handle the NVMe error codes properly. Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe diff --git a/drivers/block/nvme-scsi.c b/drivers/block/nvme-scsi.c index 60415b5..390c46d 100644 --- a/drivers/block/nvme-scsi.c +++ b/drivers/block/nvme-scsi.c @@ -47,9 +47,6 @@ static int sg_version_num = 30534; /* 2 digits for each component */ -#define SNTI_TRANSLATION_SUCCESS 0 -#define SNTI_INTERNAL_ERROR 1 - /* VPD Page Codes */ #define VPD_SUPPORTED_PAGES 0x00 #define VPD_SERIAL_NUMBER 0x80 @@ -369,8 +366,6 @@ struct nvme_trans_io_cdb { static int nvme_trans_copy_to_user(struct sg_io_hdr *hdr, void *from, unsigned long n) { - int res = SNTI_TRANSLATION_SUCCESS; - unsigned long not_copied; int i; void *index = from; size_t remaining = n; @@ -380,29 +375,25 @@ static int nvme_trans_copy_to_user(struct sg_io_hdr *hdr, void *from, struct sg_iovec sgl; for (i = 0; i < hdr->iovec_count; i++) { - not_copied = copy_from_user(&sgl, hdr->dxferp + + if (copy_from_user(&sgl, hdr->dxferp + i * sizeof(struct sg_iovec), - sizeof(struct sg_iovec)); - if (not_copied) + sizeof(struct sg_iovec))) return -EFAULT; xfer_len = min(remaining, sgl.iov_len); - not_copied = copy_to_user(sgl.iov_base, index, - xfer_len); - if (not_copied) { - res = -EFAULT; - break; - } + if (copy_to_user(sgl.iov_base, index, xfer_len)) + return -EFAULT; + index += xfer_len; remaining -= xfer_len; if (remaining == 0) break; } - return res; + return 0; } - not_copied = copy_to_user(hdr->dxferp, from, n); - if (not_copied) - res = -EFAULT; - return res; + + if (copy_to_user(hdr->dxferp, from, n)) + return -EFAULT; + return 0; } /* Copy data from userspace memory */ @@ -410,8 +401,6 @@ static int nvme_trans_copy_to_user(struct sg_io_hdr *hdr, void *from, static int nvme_trans_copy_from_user(struct sg_io_hdr *hdr, void *to, unsigned long n) { - int res = SNTI_TRANSLATION_SUCCESS; - unsigned long not_copied; int i; void *index = to; size_t remaining = n; @@ -421,30 +410,24 @@ static int nvme_trans_copy_from_user(struct sg_io_hdr *hdr, void *to, struct sg_iovec sgl; for (i = 0; i < hdr->iovec_count; i++) { - not_copied = copy_from_user(&sgl, hdr->dxferp + + if (copy_from_user(&sgl, hdr->dxferp + i * sizeof(struct sg_iovec), - sizeof(struct sg_iovec)); - if (not_copied) + sizeof(struct sg_iovec))) return -EFAULT; xfer_len = min(remaining, sgl.iov_len); - not_copied = copy_from_user(index, sgl.iov_base, - xfer_len); - if (not_copied) { - res = -EFAULT; - break; - } + if (copy_from_user(index, sgl.iov_base, xfer_len)) + return -EFAULT; index += xfer_len; remaining -= xfer_len; if (remaining == 0) break; } - return res; + return 0; } - not_copied = copy_from_user(to, hdr->dxferp, n); - if (not_copied) - res = -EFAULT; - return res; + if (copy_from_user(to, hdr->dxferp, n)) + return -EFAULT; + return 0; } /* Status/Sense Buffer Writeback */ @@ -452,7 +435,6 @@ static int nvme_trans_copy_from_user(struct sg_io_hdr *hdr, void *to, static int nvme_trans_completion(struct sg_io_hdr *hdr, u8 status, u8 sense_key, u8 asc, u8 ascq) { - int res = SNTI_TRANSLATION_SUCCESS; u8 xfer_len; u8 resp[DESC_FMT_SENSE_DATA_SIZE]; @@ -477,25 +459,29 @@ static int nvme_trans_completion(struct sg_io_hdr *hdr, u8 status, u8 sense_key, xfer_len = min_t(u8, hdr->mx_sb_len, DESC_FMT_SENSE_DATA_SIZE); hdr->sb_len_wr = xfer_len; if (copy_to_user(hdr->sbp, resp, xfer_len) > 0) - res = -EFAULT; + return -EFAULT; } - return res; + return 0; } +/* + * Take a status code from a lowlevel routine, and if it was a positive NVMe + * error code update the sense data based on it. In either case the passed + * in value is returned again, unless an -EFAULT from copy_to_user overrides + * it. + */ static int nvme_trans_status_code(struct sg_io_hdr *hdr, int nvme_sc) { u8 status, sense_key, asc, ascq; - int res = SNTI_TRANSLATION_SUCCESS; + int res; /* For non-nvme (Linux) errors, simply return the error code */ if (nvme_sc < 0) return nvme_sc; /* Mask DNR, More, and reserved fields */ - nvme_sc &= 0x7FF; - - switch (nvme_sc) { + switch (nvme_sc & 0x7FF) { /* Generic Command Status */ case NVME_SC_SUCCESS: status = SAM_STAT_GOOD; @@ -662,8 +648,7 @@ static int nvme_trans_status_code(struct sg_io_hdr *hdr, int nvme_sc) } res = nvme_trans_completion(hdr, status, sense_key, asc, ascq); - - return res; + return res ? res : nvme_sc; } /* INQUIRY Helper Functions */ @@ -676,7 +661,7 @@ static int nvme_trans_standard_inquiry_page(struct nvme_ns *ns, dma_addr_t dma_addr; void *mem; struct nvme_id_ns *id_ns; - int res = SNTI_TRANSLATION_SUCCESS; + int res; int nvme_sc; int xfer_len; u8 resp_data_format = 0x02; @@ -694,19 +679,9 @@ static int nvme_trans_standard_inquiry_page(struct nvme_ns *ns, /* nvme ns identify - use DPS value for PROTECT field */ nvme_sc = nvme_identify(dev, ns->ns_id, 0, dma_addr); res = nvme_trans_status_code(hdr, nvme_sc); - /* - * If nvme_sc was -ve, res will be -ve here. - * If nvme_sc was +ve, the status would bace been translated, and res - * can only be 0 or -ve. - * - If 0 && nvme_sc > 0, then go into next if where res gets nvme_sc - * - If -ve, return because its a Linux error. - */ if (res) goto out_free; - if (nvme_sc) { - res = nvme_sc; - goto out_free; - } + id_ns = mem; (id_ns->dps) ? (protect = 0x01) : (protect = 0); @@ -737,7 +712,6 @@ static int nvme_trans_supported_vpd_pages(struct nvme_ns *ns, struct sg_io_hdr *hdr, u8 *inq_response, int alloc_len) { - int res = SNTI_TRANSLATION_SUCCESS; int xfer_len; memset(inq_response, 0, STANDARD_INQUIRY_LENGTH); @@ -751,9 +725,7 @@ static int nvme_trans_supported_vpd_pages(struct nvme_ns *ns, inq_response[9] = INQ_BDEV_LIMITS_PAGE; xfer_len = min(alloc_len, STANDARD_INQUIRY_LENGTH); - res = nvme_trans_copy_to_user(hdr, inq_response, xfer_len); - - return res; + return nvme_trans_copy_to_user(hdr, inq_response, xfer_len); } static int nvme_trans_unit_serial_page(struct nvme_ns *ns, @@ -761,7 +733,6 @@ static int nvme_trans_unit_serial_page(struct nvme_ns *ns, int alloc_len) { struct nvme_dev *dev = ns->dev; - int res = SNTI_TRANSLATION_SUCCESS; int xfer_len; memset(inq_response, 0, STANDARD_INQUIRY_LENGTH); @@ -770,9 +741,7 @@ static int nvme_trans_unit_serial_page(struct nvme_ns *ns, strncpy(&inq_response[4], dev->serial, INQ_SERIAL_NUMBER_LENGTH); xfer_len = min(alloc_len, STANDARD_INQUIRY_LENGTH); - res = nvme_trans_copy_to_user(hdr, inq_response, xfer_len); - - return res; + return nvme_trans_copy_to_user(hdr, inq_response, xfer_len); } static int nvme_trans_device_id_page(struct nvme_ns *ns, struct sg_io_hdr *hdr, @@ -781,7 +750,7 @@ static int nvme_trans_device_id_page(struct nvme_ns *ns, struct sg_io_hdr *hdr, struct nvme_dev *dev = ns->dev; dma_addr_t dma_addr; void *mem; - int res = SNTI_TRANSLATION_SUCCESS; + int res; int nvme_sc; int xfer_len; __be32 tmp_id = cpu_to_be32(ns->ns_id); @@ -804,10 +773,6 @@ static int nvme_trans_device_id_page(struct nvme_ns *ns, struct sg_io_hdr *hdr, res = nvme_trans_status_code(hdr, nvme_sc); if (res) goto out_free; - if (nvme_sc) { - res = nvme_sc; - goto out_free; - } if (readl(&dev->bar->vs) >= NVME_VS(1, 2)) { if (bitmap_empty(eui, len * 8)) { @@ -859,7 +824,7 @@ static int nvme_trans_ext_inq_page(struct nvme_ns *ns, struct sg_io_hdr *hdr, int alloc_len) { u8 *inq_response; - int res = SNTI_TRANSLATION_SUCCESS; + int res; int nvme_sc; struct nvme_dev *dev = ns->dev; dma_addr_t dma_addr; @@ -893,10 +858,7 @@ static int nvme_trans_ext_inq_page(struct nvme_ns *ns, struct sg_io_hdr *hdr, res = nvme_trans_status_code(hdr, nvme_sc); if (res) goto out_free; - if (nvme_sc) { - res = nvme_sc; - goto out_free; - } + id_ns = mem; spt = spt_lut[(id_ns->dpc) & 0x07] << 3; (id_ns->dps) ? (protect = 0x01) : (protect = 0); @@ -909,10 +871,7 @@ static int nvme_trans_ext_inq_page(struct nvme_ns *ns, struct sg_io_hdr *hdr, res = nvme_trans_status_code(hdr, nvme_sc); if (res) goto out_free; - if (nvme_sc) { - res = nvme_sc; - goto out_free; - } + id_ctrl = mem; v_sup = id_ctrl->vwc; @@ -961,7 +920,7 @@ static int nvme_trans_bdev_char_page(struct nvme_ns *ns, struct sg_io_hdr *hdr, int alloc_len) { u8 *inq_response; - int res = SNTI_TRANSLATION_SUCCESS; + int res; int xfer_len; inq_response = kzalloc(EXTENDED_INQUIRY_DATA_PAGE_LENGTH, GFP_KERNEL); @@ -990,7 +949,7 @@ static int nvme_trans_bdev_char_page(struct nvme_ns *ns, struct sg_io_hdr *hdr, static int nvme_trans_log_supp_pages(struct nvme_ns *ns, struct sg_io_hdr *hdr, int alloc_len) { - int res = SNTI_TRANSLATION_SUCCESS; + int res; int xfer_len; u8 *log_response; @@ -1018,7 +977,7 @@ static int nvme_trans_log_supp_pages(struct nvme_ns *ns, struct sg_io_hdr *hdr, static int nvme_trans_log_info_exceptions(struct nvme_ns *ns, struct sg_io_hdr *hdr, int alloc_len) { - int res = SNTI_TRANSLATION_SUCCESS; + int res; int xfer_len; u8 *log_response; struct nvme_command c; @@ -1084,7 +1043,7 @@ static int nvme_trans_log_info_exceptions(struct nvme_ns *ns, static int nvme_trans_log_temperature(struct nvme_ns *ns, struct sg_io_hdr *hdr, int alloc_len) { - int res = SNTI_TRANSLATION_SUCCESS; + int res; int xfer_len; u8 *log_response; struct nvme_command c; @@ -1168,7 +1127,7 @@ static int nvme_trans_fill_mode_parm_hdr(u8 *resp, int len, u8 cdb10, u8 llbaa, { /* Quick check to make sure I don't stomp on my own memory... */ if ((cdb10 && len < 8) || (!cdb10 && len < 4)) - return SNTI_INTERNAL_ERROR; + return -EINVAL; if (cdb10) { resp[0] = (mode_data_length & 0xFF00) >> 8; @@ -1184,13 +1143,13 @@ static int nvme_trans_fill_mode_parm_hdr(u8 *resp, int len, u8 cdb10, u8 llbaa, resp[3] = (blk_desc_len & 0x00FF); } - return SNTI_TRANSLATION_SUCCESS; + return 0; } static int nvme_trans_fill_blk_desc(struct nvme_ns *ns, struct sg_io_hdr *hdr, u8 *resp, int len, u8 llbaa) { - int res = SNTI_TRANSLATION_SUCCESS; + int res; int nvme_sc; struct nvme_dev *dev = ns->dev; dma_addr_t dma_addr; @@ -1200,9 +1159,9 @@ static int nvme_trans_fill_blk_desc(struct nvme_ns *ns, struct sg_io_hdr *hdr, u32 lba_length; if (llbaa == 0 && len < MODE_PAGE_BLK_DES_LEN) - return SNTI_INTERNAL_ERROR; + return -EINVAL; else if (llbaa > 0 && len < MODE_PAGE_LLBAA_BLK_DES_LEN) - return SNTI_INTERNAL_ERROR; + return -EINVAL; mem = dma_alloc_coherent(dev->dev, sizeof(struct nvme_id_ns), &dma_addr, GFP_KERNEL); @@ -1216,10 +1175,7 @@ static int nvme_trans_fill_blk_desc(struct nvme_ns *ns, struct sg_io_hdr *hdr, res = nvme_trans_status_code(hdr, nvme_sc); if (res) goto out_dma; - if (nvme_sc) { - res = nvme_sc; - goto out_dma; - } + id_ns = mem; flbas = (id_ns->flbas) & 0x0F; lba_length = (1 << (id_ns->lbaf[flbas].ds)); @@ -1251,7 +1207,7 @@ static int nvme_trans_fill_control_page(struct nvme_ns *ns, int len) { if (len < MODE_PAGE_CONTROL_LEN) - return SNTI_INTERNAL_ERROR; + return -EINVAL; resp[0] = MODE_PAGE_CONTROL; resp[1] = MODE_PAGE_CONTROL_LEN_FIELD; @@ -1265,78 +1221,69 @@ static int nvme_trans_fill_control_page(struct nvme_ns *ns, resp[9] = 0xFF; /* Bytes 10,11: Extended selftest completion time = 0x0000 */ - return SNTI_TRANSLATION_SUCCESS; + return 0; } static int nvme_trans_fill_caching_page(struct nvme_ns *ns, struct sg_io_hdr *hdr, u8 *resp, int len) { - int res = SNTI_TRANSLATION_SUCCESS; + int res = 0; int nvme_sc; struct nvme_dev *dev = ns->dev; u32 feature_resp; u8 vwc; if (len < MODE_PAGE_CACHING_LEN) - return SNTI_INTERNAL_ERROR; + return -EINVAL; nvme_sc = nvme_get_features(dev, NVME_FEAT_VOLATILE_WC, 0, 0, &feature_resp); res = nvme_trans_status_code(hdr, nvme_sc); if (res) - goto out; - if (nvme_sc) { - res = nvme_sc; - goto out; - } + return res; + vwc = feature_resp & 0x00000001; resp[0] = MODE_PAGE_CACHING; resp[1] = MODE_PAGE_CACHING_LEN_FIELD; resp[2] = vwc << 2; - - out: - return res; + return 0; } static int nvme_trans_fill_pow_cnd_page(struct nvme_ns *ns, struct sg_io_hdr *hdr, u8 *resp, int len) { - int res = SNTI_TRANSLATION_SUCCESS; - if (len < MODE_PAGE_POW_CND_LEN) - return SNTI_INTERNAL_ERROR; + return -EINVAL; resp[0] = MODE_PAGE_POWER_CONDITION; resp[1] = MODE_PAGE_POW_CND_LEN_FIELD; /* All other bytes are zero */ - return res; + return 0; } static int nvme_trans_fill_inf_exc_page(struct nvme_ns *ns, struct sg_io_hdr *hdr, u8 *resp, int len) { - int res = SNTI_TRANSLATION_SUCCESS; - if (len < MODE_PAGE_INF_EXC_LEN) - return SNTI_INTERNAL_ERROR; + return -EINVAL; resp[0] = MODE_PAGE_INFO_EXCEP; resp[1] = MODE_PAGE_INF_EXC_LEN_FIELD; resp[2] = 0x88; /* All other bytes are zero */ - return res; + return 0; } static int nvme_trans_fill_all_pages(struct nvme_ns *ns, struct sg_io_hdr *hdr, u8 *resp, int len) { - int res = SNTI_TRANSLATION_SUCCESS; + int res; u16 mode_pages_offset_1 = 0; u16 mode_pages_offset_2, mode_pages_offset_3, mode_pages_offset_4; @@ -1346,23 +1293,18 @@ static int nvme_trans_fill_all_pages(struct nvme_ns *ns, struct sg_io_hdr *hdr, res = nvme_trans_fill_caching_page(ns, hdr, &resp[mode_pages_offset_1], MODE_PAGE_CACHING_LEN); - if (res != SNTI_TRANSLATION_SUCCESS) - goto out; + if (res) + return res; res = nvme_trans_fill_control_page(ns, hdr, &resp[mode_pages_offset_2], MODE_PAGE_CONTROL_LEN); - if (res != SNTI_TRANSLATION_SUCCESS) - goto out; + if (res) + return res; res = nvme_trans_fill_pow_cnd_page(ns, hdr, &resp[mode_pages_offset_3], MODE_PAGE_POW_CND_LEN); - if (res != SNTI_TRANSLATION_SUCCESS) - goto out; - res = nvme_trans_fill_inf_exc_page(ns, hdr, &resp[mode_pages_offset_4], + if (res) + return res; + return nvme_trans_fill_inf_exc_page(ns, hdr, &resp[mode_pages_offset_4], MODE_PAGE_INF_EXC_LEN); - if (res != SNTI_TRANSLATION_SUCCESS) - goto out; - - out: - return res; } static inline int nvme_trans_get_blk_desc_len(u8 dbd, u8 llbaa) @@ -1383,7 +1325,7 @@ static int nvme_trans_mode_page_create(struct nvme_ns *ns, struct sg_io_hdr *hdr, u8 *, int), u16 mode_pages_tot_len) { - int res = SNTI_TRANSLATION_SUCCESS; + int res; int xfer_len; u8 *response; u8 dbd, llbaa; @@ -1412,18 +1354,18 @@ static int nvme_trans_mode_page_create(struct nvme_ns *ns, res = nvme_trans_fill_mode_parm_hdr(&response[0], mph_size, cdb10, llbaa, mode_data_length, blk_desc_len); - if (res != SNTI_TRANSLATION_SUCCESS) + if (res) goto out_free; if (blk_desc_len > 0) { res = nvme_trans_fill_blk_desc(ns, hdr, &response[blk_desc_offset], blk_desc_len, llbaa); - if (res != SNTI_TRANSLATION_SUCCESS) + if (res) goto out_free; } res = mode_page_fill_func(ns, hdr, &response[mode_pages_offset_1], mode_pages_tot_len); - if (res != SNTI_TRANSLATION_SUCCESS) + if (res) goto out_free; xfer_len = min(alloc_len, resp_size); @@ -1478,7 +1420,7 @@ static void nvme_trans_fill_read_cap(u8 *response, struct nvme_id_ns *id_ns, static int nvme_trans_power_state(struct nvme_ns *ns, struct sg_io_hdr *hdr, u8 pc, u8 pcmod, u8 start) { - int res = SNTI_TRANSLATION_SUCCESS; + int res; int nvme_sc; struct nvme_dev *dev = ns->dev; dma_addr_t dma_addr; @@ -1498,10 +1440,7 @@ static int nvme_trans_power_state(struct nvme_ns *ns, struct sg_io_hdr *hdr, res = nvme_trans_status_code(hdr, nvme_sc); if (res) goto out_dma; - if (nvme_sc) { - res = nvme_sc; - goto out_dma; - } + id_ctrl = mem; lowest_pow_st = max(POWER_STATE_0, (int)(id_ctrl->npss - 1)); @@ -1544,10 +1483,7 @@ static int nvme_trans_power_state(struct nvme_ns *ns, struct sg_io_hdr *hdr, nvme_sc = nvme_set_features(dev, NVME_FEAT_POWER_MGMT, ps_desired, 0, NULL); res = nvme_trans_status_code(hdr, nvme_sc); - if (res) - goto out_dma; - if (nvme_sc) - res = nvme_sc; + out_dma: dma_free_coherent(dev->dev, sizeof(struct nvme_id_ctrl), mem, dma_addr); out: @@ -1559,24 +1495,20 @@ static int nvme_trans_send_activate_fw_cmd(struct nvme_ns *ns, struct sg_io_hdr { struct nvme_command c; int nvme_sc; - int res; memset(&c, 0, sizeof(c)); c.common.opcode = nvme_admin_activate_fw; c.common.cdw10[0] = cpu_to_le32(buffer_id | NVME_FWACT_REPL_ACTV); nvme_sc = nvme_submit_sync_cmd(ns->queue, &c); - res = nvme_trans_status_code(hdr, nvme_sc); - if (res) - return res; - return nvme_sc; + return nvme_trans_status_code(hdr, nvme_sc); } static int nvme_trans_send_download_fw_cmd(struct nvme_ns *ns, struct sg_io_hdr *hdr, u8 opcode, u32 tot_len, u32 offset, u8 buffer_id) { - int res = SNTI_TRANSLATION_SUCCESS; + int res; int nvme_sc; struct nvme_dev *dev = ns->dev; struct nvme_command c; @@ -1611,10 +1543,6 @@ static int nvme_trans_send_download_fw_cmd(struct nvme_ns *ns, struct sg_io_hdr nvme_sc = nvme_submit_sync_cmd(dev->admin_q, &c); res = nvme_trans_status_code(hdr, nvme_sc); - if (res) - goto out_unmap; - if (nvme_sc) - res = nvme_sc; out_unmap: nvme_unmap_user_pages(dev, DMA_TO_DEVICE, iod); @@ -1682,7 +1610,7 @@ static void nvme_trans_modesel_save_bd(struct nvme_ns *ns, u8 *parm_list, static int nvme_trans_modesel_get_mp(struct nvme_ns *ns, struct sg_io_hdr *hdr, u8 *mode_page, u8 page_code) { - int res = SNTI_TRANSLATION_SUCCESS; + int res = 0; int nvme_sc; struct nvme_dev *dev = ns->dev; unsigned dword11; @@ -1693,12 +1621,6 @@ static int nvme_trans_modesel_get_mp(struct nvme_ns *ns, struct sg_io_hdr *hdr, nvme_sc = nvme_set_features(dev, NVME_FEAT_VOLATILE_WC, dword11, 0, NULL); res = nvme_trans_status_code(hdr, nvme_sc); - if (res) - break; - if (nvme_sc) { - res = nvme_sc; - break; - } break; case MODE_PAGE_CONTROL: break; @@ -1710,8 +1632,6 @@ static int nvme_trans_modesel_get_mp(struct nvme_ns *ns, struct sg_io_hdr *hdr, ILLEGAL_REQUEST, SCSI_ASC_INVALID_PARAMETER, SCSI_ASCQ_CAUSE_NOT_REPORTABLE); - if (!res) - res = SNTI_INTERNAL_ERROR; break; } break; @@ -1719,8 +1639,6 @@ static int nvme_trans_modesel_get_mp(struct nvme_ns *ns, struct sg_io_hdr *hdr, res = nvme_trans_completion(hdr, SAM_STAT_CHECK_CONDITION, ILLEGAL_REQUEST, SCSI_ASC_INVALID_CDB, SCSI_ASCQ_CAUSE_NOT_REPORTABLE); - if (!res) - res = SNTI_INTERNAL_ERROR; break; } @@ -1731,7 +1649,7 @@ static int nvme_trans_modesel_data(struct nvme_ns *ns, struct sg_io_hdr *hdr, u8 *cmd, u16 parm_list_len, u8 pf, u8 sp, u8 cdb10) { - int res = SNTI_TRANSLATION_SUCCESS; + int res; u8 *parm_list; u16 bd_len; u8 llbaa = 0; @@ -1747,7 +1665,7 @@ static int nvme_trans_modesel_data(struct nvme_ns *ns, struct sg_io_hdr *hdr, } res = nvme_trans_copy_from_user(hdr, parm_list, parm_list_len); - if (res != SNTI_TRANSLATION_SUCCESS) + if (res) goto out_mem; nvme_trans_modesel_get_bd_len(parm_list, cdb10, &bd_len, &llbaa); @@ -1785,7 +1703,7 @@ static int nvme_trans_modesel_data(struct nvme_ns *ns, struct sg_io_hdr *hdr, mp_size = parm_list[index + 1] + 2; res = nvme_trans_modesel_get_mp(ns, hdr, &parm_list[index], page_code); - if (res != SNTI_TRANSLATION_SUCCESS) + if (res) break; index += mp_size; } while (index < parm_list_len); @@ -1801,7 +1719,7 @@ static int nvme_trans_modesel_data(struct nvme_ns *ns, struct sg_io_hdr *hdr, static int nvme_trans_fmt_set_blk_size_count(struct nvme_ns *ns, struct sg_io_hdr *hdr) { - int res = SNTI_TRANSLATION_SUCCESS; + int res = 0; int nvme_sc; struct nvme_dev *dev = ns->dev; dma_addr_t dma_addr; @@ -1828,10 +1746,7 @@ static int nvme_trans_fmt_set_blk_size_count(struct nvme_ns *ns, res = nvme_trans_status_code(hdr, nvme_sc); if (res) goto out_dma; - if (nvme_sc) { - res = nvme_sc; - goto out_dma; - } + id_ns = mem; if (ns->mode_select_num_blocks == 0) @@ -1852,7 +1767,7 @@ static int nvme_trans_fmt_set_blk_size_count(struct nvme_ns *ns, static int nvme_trans_fmt_get_parm_header(struct sg_io_hdr *hdr, u8 len, u8 format_prot_info, u8 *nvme_pf_code) { - int res = SNTI_TRANSLATION_SUCCESS; + int res; u8 *parm_list; u8 pf_usage, pf_code; @@ -1862,7 +1777,7 @@ static int nvme_trans_fmt_get_parm_header(struct sg_io_hdr *hdr, u8 len, goto out; } res = nvme_trans_copy_from_user(hdr, parm_list, len); - if (res != SNTI_TRANSLATION_SUCCESS) + if (res) goto out_mem; if ((parm_list[FORMAT_UNIT_IMMED_OFFSET] & @@ -1912,7 +1827,7 @@ static int nvme_trans_fmt_get_parm_header(struct sg_io_hdr *hdr, u8 len, static int nvme_trans_fmt_send_cmd(struct nvme_ns *ns, struct sg_io_hdr *hdr, u8 prot_info) { - int res = SNTI_TRANSLATION_SUCCESS; + int res; int nvme_sc; struct nvme_dev *dev = ns->dev; dma_addr_t dma_addr; @@ -1936,10 +1851,7 @@ static int nvme_trans_fmt_send_cmd(struct nvme_ns *ns, struct sg_io_hdr *hdr, res = nvme_trans_status_code(hdr, nvme_sc); if (res) goto out_dma; - if (nvme_sc) { - res = nvme_sc; - goto out_dma; - } + id_ns = mem; flbas = (id_ns->flbas) & 0x0F; nlbaf = id_ns->nlbaf; @@ -1970,10 +1882,6 @@ static int nvme_trans_fmt_send_cmd(struct nvme_ns *ns, struct sg_io_hdr *hdr, nvme_sc = nvme_submit_sync_cmd(dev->admin_q, &c); res = nvme_trans_status_code(hdr, nvme_sc); - if (res) - goto out_dma; - if (nvme_sc) - res = nvme_sc; out_dma: dma_free_coherent(dev->dev, sizeof(struct nvme_id_ns), mem, dma_addr); @@ -2059,8 +1967,7 @@ static u16 nvme_trans_io_get_control(struct nvme_ns *ns, static int nvme_trans_do_nvme_io(struct nvme_ns *ns, struct sg_io_hdr *hdr, struct nvme_trans_io_cdb *cdb_info, u8 is_write) { - int res = SNTI_TRANSLATION_SUCCESS; - int nvme_sc; + int nvme_sc = NVME_SC_SUCCESS; struct nvme_dev *dev = ns->dev; u32 num_cmds; struct nvme_iod *iod; @@ -2117,18 +2024,16 @@ static int nvme_trans_do_nvme_io(struct nvme_ns *ns, struct sg_io_hdr *hdr, iod = nvme_map_user_pages(dev, (is_write) ? DMA_TO_DEVICE : DMA_FROM_DEVICE, (unsigned long)next_mapping_addr, unit_len); - if (IS_ERR(iod)) { - res = PTR_ERR(iod); - goto out; - } + if (IS_ERR(iod)) + return PTR_ERR(iod); + retcode = nvme_setup_prps(dev, iod, unit_len, GFP_KERNEL); if (retcode != unit_len) { nvme_unmap_user_pages(dev, (is_write) ? DMA_TO_DEVICE : DMA_FROM_DEVICE, iod); nvme_free_iod(dev, iod); - res = -ENOMEM; - goto out; + return -ENOMEM; } c.rw.prp1 = cpu_to_le64(sg_dma_address(iod->sg)); c.rw.prp2 = cpu_to_le64(iod->first_dma); @@ -2136,23 +2041,18 @@ static int nvme_trans_do_nvme_io(struct nvme_ns *ns, struct sg_io_hdr *hdr, nvme_offset += unit_num_blocks; nvme_sc = nvme_submit_sync_cmd(ns->queue, &c); - if (nvme_sc != NVME_SC_SUCCESS) { - nvme_unmap_user_pages(dev, - (is_write) ? DMA_TO_DEVICE : DMA_FROM_DEVICE, - iod); - nvme_free_iod(dev, iod); - res = nvme_trans_status_code(hdr, nvme_sc); - goto out; - } + nvme_unmap_user_pages(dev, (is_write) ? DMA_TO_DEVICE : DMA_FROM_DEVICE, iod); nvme_free_iod(dev, iod); + + + if (nvme_sc != NVME_SC_SUCCESS) + break; } - res = nvme_trans_status_code(hdr, NVME_SC_SUCCESS); - out: - return res; + return nvme_trans_status_code(hdr, nvme_sc); } @@ -2161,7 +2061,7 @@ static int nvme_trans_do_nvme_io(struct nvme_ns *ns, struct sg_io_hdr *hdr, static int nvme_trans_io(struct nvme_ns *ns, struct sg_io_hdr *hdr, u8 is_write, u8 *cmd) { - int res = SNTI_TRANSLATION_SUCCESS; + int res = 0; struct nvme_trans_io_cdb cdb_info; u8 opcode = cmd[0]; u64 xfer_bytes; @@ -2190,7 +2090,7 @@ static int nvme_trans_io(struct nvme_ns *ns, struct sg_io_hdr *hdr, u8 is_write, break; default: /* Will never really reach here */ - res = SNTI_INTERNAL_ERROR; + res = -EIO; goto out; } @@ -2232,7 +2132,7 @@ static int nvme_trans_io(struct nvme_ns *ns, struct sg_io_hdr *hdr, u8 is_write, /* Send NVMe IO Command(s) */ res = nvme_trans_do_nvme_io(ns, hdr, &cdb_info, is_write); - if (res != SNTI_TRANSLATION_SUCCESS) + if (res) goto out; out: @@ -2242,7 +2142,7 @@ static int nvme_trans_io(struct nvme_ns *ns, struct sg_io_hdr *hdr, u8 is_write, static int nvme_trans_inquiry(struct nvme_ns *ns, struct sg_io_hdr *hdr, u8 *cmd) { - int res = SNTI_TRANSLATION_SUCCESS; + int res = 0; u8 evpd; u8 page_code; int alloc_len; @@ -2310,7 +2210,7 @@ static int nvme_trans_inquiry(struct nvme_ns *ns, struct sg_io_hdr *hdr, static int nvme_trans_log_sense(struct nvme_ns *ns, struct sg_io_hdr *hdr, u8 *cmd) { - int res = SNTI_TRANSLATION_SUCCESS; + int res; u16 alloc_len; u8 sp; u8 pc; @@ -2357,7 +2257,6 @@ static int nvme_trans_log_sense(struct nvme_ns *ns, struct sg_io_hdr *hdr, static int nvme_trans_mode_select(struct nvme_ns *ns, struct sg_io_hdr *hdr, u8 *cmd) { - int res = SNTI_TRANSLATION_SUCCESS; u8 cdb10 = 0; u16 parm_list_len; u8 page_format; @@ -2383,17 +2282,17 @@ static int nvme_trans_mode_select(struct nvme_ns *ns, struct sg_io_hdr *hdr, * According to SPC-4 r24, a paramter list length field of 0 * shall not be considered an error */ - res = nvme_trans_modesel_data(ns, hdr, cmd, parm_list_len, + return nvme_trans_modesel_data(ns, hdr, cmd, parm_list_len, page_format, save_pages, cdb10); } - return res; + return 0; } static int nvme_trans_mode_sense(struct nvme_ns *ns, struct sg_io_hdr *hdr, u8 *cmd) { - int res = SNTI_TRANSLATION_SUCCESS; + int res = 0; u16 alloc_len; u8 cdb10 = 0; u8 page_code; @@ -2463,7 +2362,7 @@ static int nvme_trans_mode_sense(struct nvme_ns *ns, struct sg_io_hdr *hdr, static int nvme_trans_read_capacity(struct nvme_ns *ns, struct sg_io_hdr *hdr, u8 *cmd) { - int res = SNTI_TRANSLATION_SUCCESS; + int res; int nvme_sc; u32 alloc_len = READ_CAP_10_RESP_SIZE; u32 resp_size = READ_CAP_10_RESP_SIZE; @@ -2492,10 +2391,7 @@ static int nvme_trans_read_capacity(struct nvme_ns *ns, struct sg_io_hdr *hdr, res = nvme_trans_status_code(hdr, nvme_sc); if (res) goto out_dma; - if (nvme_sc) { - res = nvme_sc; - goto out_dma; - } + id_ns = mem; response = kzalloc(resp_size, GFP_KERNEL); @@ -2518,7 +2414,7 @@ static int nvme_trans_read_capacity(struct nvme_ns *ns, struct sg_io_hdr *hdr, static int nvme_trans_report_luns(struct nvme_ns *ns, struct sg_io_hdr *hdr, u8 *cmd) { - int res = SNTI_TRANSLATION_SUCCESS; + int res; int nvme_sc; u32 alloc_len, xfer_len, resp_size; u8 select_report; @@ -2553,10 +2449,7 @@ static int nvme_trans_report_luns(struct nvme_ns *ns, struct sg_io_hdr *hdr, res = nvme_trans_status_code(hdr, nvme_sc); if (res) goto out_dma; - if (nvme_sc) { - res = nvme_sc; - goto out_dma; - } + id_ctrl = mem; ll_length = le32_to_cpu(id_ctrl->nn) * LUN_ENTRY_SIZE; resp_size = ll_length + LUN_DATA_HEADER_SIZE; @@ -2602,7 +2495,7 @@ static int nvme_trans_report_luns(struct nvme_ns *ns, struct sg_io_hdr *hdr, static int nvme_trans_request_sense(struct nvme_ns *ns, struct sg_io_hdr *hdr, u8 *cmd) { - int res = SNTI_TRANSLATION_SUCCESS; + int res; u8 alloc_len, xfer_len, resp_size; u8 desc_format; u8 *response; @@ -2661,7 +2554,7 @@ static int nvme_trans_security_protocol(struct nvme_ns *ns, static int nvme_trans_start_stop(struct nvme_ns *ns, struct sg_io_hdr *hdr, u8 *cmd) { - int res = SNTI_TRANSLATION_SUCCESS; + int res; int nvme_sc; struct nvme_command c; u8 immed, pcmod, pc, no_flush, start; @@ -2679,7 +2572,7 @@ static int nvme_trans_start_stop(struct nvme_ns *ns, struct sg_io_hdr *hdr, start &= START_STOP_UNIT_CDB_START_MASK; if (immed != 0) { - res = nvme_trans_completion(hdr, SAM_STAT_CHECK_CONDITION, + return nvme_trans_completion(hdr, SAM_STAT_CHECK_CONDITION, ILLEGAL_REQUEST, SCSI_ASC_INVALID_CDB, SCSI_ASCQ_CAUSE_NOT_REPORTABLE); } else { @@ -2692,24 +2585,16 @@ static int nvme_trans_start_stop(struct nvme_ns *ns, struct sg_io_hdr *hdr, nvme_sc = nvme_submit_sync_cmd(ns->queue, &c); res = nvme_trans_status_code(hdr, nvme_sc); if (res) - goto out; - if (nvme_sc) { - res = nvme_sc; - goto out; - } + return res; } /* Setup the expected power state transition */ - res = nvme_trans_power_state(ns, hdr, pc, pcmod, start); + return nvme_trans_power_state(ns, hdr, pc, pcmod, start); } - - out: - return res; } static int nvme_trans_synchronize_cache(struct nvme_ns *ns, struct sg_io_hdr *hdr, u8 *cmd) { - int res = SNTI_TRANSLATION_SUCCESS; int nvme_sc; struct nvme_command c; @@ -2718,20 +2603,13 @@ static int nvme_trans_synchronize_cache(struct nvme_ns *ns, c.common.nsid = cpu_to_le32(ns->ns_id); nvme_sc = nvme_submit_sync_cmd(ns->queue, &c); - res = nvme_trans_status_code(hdr, nvme_sc); - if (res) - goto out; - if (nvme_sc) - res = nvme_sc; - - out: - return res; + return nvme_trans_status_code(hdr, nvme_sc); } static int nvme_trans_format_unit(struct nvme_ns *ns, struct sg_io_hdr *hdr, u8 *cmd) { - int res = SNTI_TRANSLATION_SUCCESS; + int res; u8 parm_hdr_len = 0; u8 nvme_pf_code = 0; u8 format_prot_info, long_list, format_data; @@ -2769,7 +2647,7 @@ static int nvme_trans_format_unit(struct nvme_ns *ns, struct sg_io_hdr *hdr, if (parm_hdr_len > 0) { res = nvme_trans_fmt_get_parm_header(hdr, parm_hdr_len, format_prot_info, &nvme_pf_code); - if (res != SNTI_TRANSLATION_SUCCESS) + if (res) goto out; } @@ -2778,7 +2656,7 @@ static int nvme_trans_format_unit(struct nvme_ns *ns, struct sg_io_hdr *hdr, /* Determine Block size and count and send format command */ res = nvme_trans_fmt_set_blk_size_count(ns, hdr); - if (res != SNTI_TRANSLATION_SUCCESS) + if (res) goto out; res = nvme_trans_fmt_send_cmd(ns, hdr, nvme_pf_code); @@ -2791,23 +2669,20 @@ static int nvme_trans_test_unit_ready(struct nvme_ns *ns, struct sg_io_hdr *hdr, u8 *cmd) { - int res = SNTI_TRANSLATION_SUCCESS; struct nvme_dev *dev = ns->dev; if (!(readl(&dev->bar->csts) & NVME_CSTS_RDY)) - res = nvme_trans_completion(hdr, SAM_STAT_CHECK_CONDITION, + return nvme_trans_completion(hdr, SAM_STAT_CHECK_CONDITION, NOT_READY, SCSI_ASC_LUN_NOT_READY, SCSI_ASCQ_CAUSE_NOT_REPORTABLE); else - res = nvme_trans_completion(hdr, SAM_STAT_GOOD, NO_SENSE, 0, 0); - - return res; + return nvme_trans_completion(hdr, SAM_STAT_GOOD, NO_SENSE, 0, 0); } static int nvme_trans_write_buffer(struct nvme_ns *ns, struct sg_io_hdr *hdr, u8 *cmd) { - int res = SNTI_TRANSLATION_SUCCESS; + int res = 0; u32 buffer_offset, parm_list_length; u8 buffer_id, mode; @@ -2837,7 +2712,7 @@ static int nvme_trans_write_buffer(struct nvme_ns *ns, struct sg_io_hdr *hdr, res = nvme_trans_send_download_fw_cmd(ns, hdr, nvme_admin_download_fw, parm_list_length, buffer_offset, buffer_id); - if (res != SNTI_TRANSLATION_SUCCESS) + if (res) goto out; res = nvme_trans_send_activate_fw_cmd(ns, hdr, buffer_id); break; @@ -2893,7 +2768,7 @@ static int nvme_trans_unmap(struct nvme_ns *ns, struct sg_io_hdr *hdr, return -ENOMEM; res = nvme_trans_copy_from_user(hdr, plist, list_len); - if (res != SNTI_TRANSLATION_SUCCESS) + if (res) goto out; ndesc = be16_to_cpu(plist->unmap_blk_desc_data_len) >> 4; @@ -3038,15 +2913,16 @@ int nvme_sg_io(struct nvme_ns *ns, struct sg_io_hdr __user *u_hdr) if (hdr.cmd_len > BLK_MAX_CDB) return -EINVAL; + /* + * A positive return code means a NVMe status, which has been + * translated to sense data. + */ retcode = nvme_scsi_translate(ns, &hdr); if (retcode < 0) return retcode; - if (retcode > 0) - retcode = SNTI_TRANSLATION_SUCCESS; if (copy_to_user(u_hdr, &hdr, sizeof(sg_io_hdr_t)) > 0) return -EFAULT; - - return retcode; + return 0; } int nvme_sg_get_version_num(int __user *ip) -- cgit v0.10.2 From 3726897efde1d7a43b6f966ab81b0c143a176556 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 22 May 2015 11:12:42 +0200 Subject: nvme: first round at deobsfucating the SCSI translation code Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe diff --git a/drivers/block/nvme-scsi.c b/drivers/block/nvme-scsi.c index 390c46d..9fe0a2c 100644 --- a/drivers/block/nvme-scsi.c +++ b/drivers/block/nvme-scsi.c @@ -41,6 +41,7 @@ #include #include #include +#include #include #include @@ -55,49 +56,14 @@ static int sg_version_num = 30534; /* 2 digits for each component */ #define VPD_BLOCK_LIMITS 0xB0 #define VPD_BLOCK_DEV_CHARACTERISTICS 0xB1 -/* CDB offsets */ -#define REPORT_LUNS_CDB_ALLOC_LENGTH_OFFSET 6 -#define REPORT_LUNS_SR_OFFSET 2 -#define READ_CAP_16_CDB_ALLOC_LENGTH_OFFSET 10 -#define REQUEST_SENSE_CDB_ALLOC_LENGTH_OFFSET 4 -#define REQUEST_SENSE_DESC_OFFSET 1 -#define REQUEST_SENSE_DESC_MASK 0x01 -#define DESCRIPTOR_FORMAT_SENSE_DATA_TYPE 1 -#define INQUIRY_EVPD_BYTE_OFFSET 1 -#define INQUIRY_PAGE_CODE_BYTE_OFFSET 2 -#define INQUIRY_EVPD_BIT_MASK 1 -#define INQUIRY_CDB_ALLOCATION_LENGTH_OFFSET 3 -#define START_STOP_UNIT_CDB_IMMED_OFFSET 1 -#define START_STOP_UNIT_CDB_IMMED_MASK 0x1 -#define START_STOP_UNIT_CDB_POWER_COND_MOD_OFFSET 3 -#define START_STOP_UNIT_CDB_POWER_COND_MOD_MASK 0xF -#define START_STOP_UNIT_CDB_POWER_COND_OFFSET 4 -#define START_STOP_UNIT_CDB_POWER_COND_MASK 0xF0 -#define START_STOP_UNIT_CDB_NO_FLUSH_OFFSET 4 -#define START_STOP_UNIT_CDB_NO_FLUSH_MASK 0x4 -#define START_STOP_UNIT_CDB_START_OFFSET 4 -#define START_STOP_UNIT_CDB_START_MASK 0x1 -#define WRITE_BUFFER_CDB_MODE_OFFSET 1 -#define WRITE_BUFFER_CDB_MODE_MASK 0x1F -#define WRITE_BUFFER_CDB_BUFFER_ID_OFFSET 2 -#define WRITE_BUFFER_CDB_BUFFER_OFFSET_OFFSET 3 -#define WRITE_BUFFER_CDB_PARM_LIST_LENGTH_OFFSET 6 -#define FORMAT_UNIT_CDB_FORMAT_PROT_INFO_OFFSET 1 -#define FORMAT_UNIT_CDB_FORMAT_PROT_INFO_MASK 0xC0 -#define FORMAT_UNIT_CDB_FORMAT_PROT_INFO_SHIFT 6 -#define FORMAT_UNIT_CDB_LONG_LIST_OFFSET 1 -#define FORMAT_UNIT_CDB_LONG_LIST_MASK 0x20 -#define FORMAT_UNIT_CDB_FORMAT_DATA_OFFSET 1 -#define FORMAT_UNIT_CDB_FORMAT_DATA_MASK 0x10 +/* format unit paramter list offsets */ #define FORMAT_UNIT_SHORT_PARM_LIST_LEN 4 #define FORMAT_UNIT_LONG_PARM_LIST_LEN 8 #define FORMAT_UNIT_PROT_INT_OFFSET 3 #define FORMAT_UNIT_PROT_FIELD_USAGE_OFFSET 0 #define FORMAT_UNIT_PROT_FIELD_USAGE_MASK 0x07 -#define UNMAP_CDB_PARAM_LIST_LENGTH_OFFSET 7 /* Misc. defines */ -#define NIBBLE_SHIFT 4 #define FIXED_SENSE_DATA 0x70 #define DESC_FORMAT_SENSE_DATA 0x72 #define FIXED_SENSE_DATA_ADD_LENGTH 10 @@ -145,22 +111,7 @@ static int sg_version_num = 30534; /* 2 digits for each component */ #define IO_CDB_WP_MASK 0xE0 #define IO_CDB_WP_SHIFT 5 #define IO_CDB_FUA_MASK 0x8 -#define IO_6_CDB_LBA_OFFSET 0 #define IO_6_CDB_LBA_MASK 0x001FFFFF -#define IO_6_CDB_TX_LEN_OFFSET 4 -#define IO_6_DEFAULT_TX_LEN 256 -#define IO_10_CDB_LBA_OFFSET 2 -#define IO_10_CDB_TX_LEN_OFFSET 7 -#define IO_10_CDB_WP_OFFSET 1 -#define IO_10_CDB_FUA_OFFSET 1 -#define IO_12_CDB_LBA_OFFSET 2 -#define IO_12_CDB_TX_LEN_OFFSET 6 -#define IO_12_CDB_WP_OFFSET 1 -#define IO_12_CDB_FUA_OFFSET 1 -#define IO_16_CDB_FUA_OFFSET 1 -#define IO_16_CDB_WP_OFFSET 1 -#define IO_16_CDB_LBA_OFFSET 2 -#define IO_16_CDB_TX_LEN_OFFSET 10 /* Mode Sense/Select defines */ #define MODE_PAGE_INFO_EXCEP 0x1C @@ -176,23 +127,14 @@ static int sg_version_num = 30534; /* 2 digits for each component */ #define MODE_PAGE_INF_EXC_LEN 0x0C #define MODE_PAGE_ALL_LEN 0x54 #define MODE_SENSE6_MPH_SIZE 4 -#define MODE_SENSE6_ALLOC_LEN_OFFSET 4 -#define MODE_SENSE_PAGE_CONTROL_OFFSET 2 #define MODE_SENSE_PAGE_CONTROL_MASK 0xC0 #define MODE_SENSE_PAGE_CODE_OFFSET 2 #define MODE_SENSE_PAGE_CODE_MASK 0x3F -#define MODE_SENSE_LLBAA_OFFSET 1 #define MODE_SENSE_LLBAA_MASK 0x10 #define MODE_SENSE_LLBAA_SHIFT 4 -#define MODE_SENSE_DBD_OFFSET 1 #define MODE_SENSE_DBD_MASK 8 #define MODE_SENSE_DBD_SHIFT 3 #define MODE_SENSE10_MPH_SIZE 8 -#define MODE_SENSE10_ALLOC_LEN_OFFSET 7 -#define MODE_SELECT_CDB_PAGE_FORMAT_OFFSET 1 -#define MODE_SELECT_CDB_SAVE_PAGES_OFFSET 1 -#define MODE_SELECT_6_CDB_PARAM_LIST_LENGTH_OFFSET 4 -#define MODE_SELECT_10_CDB_PARAM_LIST_LENGTH_OFFSET 7 #define MODE_SELECT_CDB_PAGE_FORMAT_MASK 0x10 #define MODE_SELECT_CDB_SAVE_PAGES_MASK 0x1 #define MODE_SELECT_6_BD_OFFSET 3 @@ -218,14 +160,11 @@ static int sg_version_num = 30534; /* 2 digits for each component */ #define LOG_PAGE_SUPPORTED_LOG_PAGES_LENGTH 0x07 #define LOG_PAGE_INFORMATIONAL_EXCEPTIONS_PAGE 0x2F #define LOG_PAGE_TEMPERATURE_PAGE 0x0D -#define LOG_SENSE_CDB_SP_OFFSET 1 #define LOG_SENSE_CDB_SP_NOT_ENABLED 0 -#define LOG_SENSE_CDB_PC_OFFSET 2 #define LOG_SENSE_CDB_PC_MASK 0xC0 #define LOG_SENSE_CDB_PC_SHIFT 6 #define LOG_SENSE_CDB_PC_CUMULATIVE_VALUES 1 #define LOG_SENSE_CDB_PAGE_CODE_MASK 0x3F -#define LOG_SENSE_CDB_ALLOC_LENGTH_OFFSET 7 #define REMAINING_INFO_EXCP_PAGE_LENGTH 0x8 #define LOG_INFO_EXCP_PAGE_LENGTH 0xC #define REMAINING_TEMP_PAGE_LENGTH 0xC @@ -275,77 +214,11 @@ static int sg_version_num = 30534; /* 2 digits for each component */ #define SCSI_ASCQ_POWER_LOSS_EXPECTED 0x08 #define SCSI_ASCQ_INVALID_LUN_ID 0x09 -/** - * DEVICE_SPECIFIC_PARAMETER in mode parameter header (see sbc2r16) to - * enable DPOFUA support type 0x10 value. - */ -#define DEVICE_SPECIFIC_PARAMETER 0 -#define VPD_ID_DESCRIPTOR_LENGTH sizeof(VPD_IDENTIFICATION_DESCRIPTOR) - -/* MACROs to extract information from CDBs */ - -#define GET_OPCODE(cdb) cdb[0] - -#define GET_U8_FROM_CDB(cdb, index) (cdb[index] << 0) - -#define GET_U16_FROM_CDB(cdb, index) ((cdb[index] << 8) | (cdb[index + 1] << 0)) - -#define GET_U24_FROM_CDB(cdb, index) ((cdb[index] << 16) | \ -(cdb[index + 1] << 8) | \ -(cdb[index + 2] << 0)) - -#define GET_U32_FROM_CDB(cdb, index) ((cdb[index] << 24) | \ -(cdb[index + 1] << 16) | \ -(cdb[index + 2] << 8) | \ -(cdb[index + 3] << 0)) - -#define GET_U64_FROM_CDB(cdb, index) ((((u64)cdb[index]) << 56) | \ -(((u64)cdb[index + 1]) << 48) | \ -(((u64)cdb[index + 2]) << 40) | \ -(((u64)cdb[index + 3]) << 32) | \ -(((u64)cdb[index + 4]) << 24) | \ -(((u64)cdb[index + 5]) << 16) | \ -(((u64)cdb[index + 6]) << 8) | \ -(((u64)cdb[index + 7]) << 0)) - -/* Inquiry Helper Macros */ -#define GET_INQ_EVPD_BIT(cdb) \ -((GET_U8_FROM_CDB(cdb, INQUIRY_EVPD_BYTE_OFFSET) & \ -INQUIRY_EVPD_BIT_MASK) ? 1 : 0) - -#define GET_INQ_PAGE_CODE(cdb) \ -(GET_U8_FROM_CDB(cdb, INQUIRY_PAGE_CODE_BYTE_OFFSET)) - -#define GET_INQ_ALLOC_LENGTH(cdb) \ -(GET_U16_FROM_CDB(cdb, INQUIRY_CDB_ALLOCATION_LENGTH_OFFSET)) - -/* Report LUNs Helper Macros */ -#define GET_REPORT_LUNS_ALLOC_LENGTH(cdb) \ -(GET_U32_FROM_CDB(cdb, REPORT_LUNS_CDB_ALLOC_LENGTH_OFFSET)) - -/* Read Capacity Helper Macros */ -#define GET_READ_CAP_16_ALLOC_LENGTH(cdb) \ -(GET_U32_FROM_CDB(cdb, READ_CAP_16_CDB_ALLOC_LENGTH_OFFSET)) - -#define IS_READ_CAP_16(cdb) \ -((cdb[0] == SERVICE_ACTION_IN_16 && cdb[1] == SAI_READ_CAPACITY_16) ? 1 : 0) - -/* Request Sense Helper Macros */ -#define GET_REQUEST_SENSE_ALLOC_LENGTH(cdb) \ -(GET_U8_FROM_CDB(cdb, REQUEST_SENSE_CDB_ALLOC_LENGTH_OFFSET)) - -/* Mode Sense Helper Macros */ -#define GET_MODE_SENSE_DBD(cdb) \ -((GET_U8_FROM_CDB(cdb, MODE_SENSE_DBD_OFFSET) & MODE_SENSE_DBD_MASK) >> \ -MODE_SENSE_DBD_SHIFT) - -#define GET_MODE_SENSE_LLBAA(cdb) \ -((GET_U8_FROM_CDB(cdb, MODE_SENSE_LLBAA_OFFSET) & \ -MODE_SENSE_LLBAA_MASK) >> MODE_SENSE_LLBAA_SHIFT) - -#define GET_MODE_SENSE_MPH_SIZE(cdb10) \ -(cdb10 ? MODE_SENSE10_MPH_SIZE : MODE_SENSE6_MPH_SIZE) - +/* copied from drivers/usb/gadget/function/storage_common.h */ +static inline u32 get_unaligned_be24(u8 *buf) +{ + return 0xffffff & (u32) get_unaligned_be32(buf - 1); +} /* Struct to gather data that needs to be extracted from a SCSI CDB. Not conforming to any particular CDB variant, but compatible with all. */ @@ -1334,9 +1207,10 @@ static int nvme_trans_mode_page_create(struct nvme_ns *ns, u16 mode_pages_offset_1; u16 blk_desc_len, blk_desc_offset, mode_data_length; - dbd = GET_MODE_SENSE_DBD(cmd); - llbaa = GET_MODE_SENSE_LLBAA(cmd); - mph_size = GET_MODE_SENSE_MPH_SIZE(cdb10); + dbd = (cmd[1] & MODE_SENSE_DBD_MASK) >> MODE_SENSE_DBD_SHIFT; + llbaa = (cmd[1] & MODE_SENSE_LLBAA_MASK) >> MODE_SENSE_LLBAA_SHIFT; + mph_size = cdb10 ? MODE_SENSE10_MPH_SIZE : MODE_SENSE6_MPH_SIZE; + blk_desc_len = nvme_trans_get_blk_desc_len(dbd, llbaa); resp_size = mph_size + blk_desc_len + mode_pages_tot_len; @@ -1896,46 +1770,39 @@ static inline void nvme_trans_get_io_cdb6(u8 *cmd, { cdb_info->fua = 0; cdb_info->prot_info = 0; - cdb_info->lba = GET_U32_FROM_CDB(cmd, IO_6_CDB_LBA_OFFSET) & - IO_6_CDB_LBA_MASK; - cdb_info->xfer_len = GET_U8_FROM_CDB(cmd, IO_6_CDB_TX_LEN_OFFSET); + cdb_info->lba = get_unaligned_be32(&cmd[0]) & IO_6_CDB_LBA_MASK; + cdb_info->xfer_len = cmd[4]; /* sbc3r27 sec 5.32 - TRANSFER LEN of 0 implies a 256 Block transfer */ if (cdb_info->xfer_len == 0) - cdb_info->xfer_len = IO_6_DEFAULT_TX_LEN; + cdb_info->xfer_len = 256; } static inline void nvme_trans_get_io_cdb10(u8 *cmd, struct nvme_trans_io_cdb *cdb_info) { - cdb_info->fua = GET_U8_FROM_CDB(cmd, IO_10_CDB_FUA_OFFSET) & - IO_CDB_FUA_MASK; - cdb_info->prot_info = GET_U8_FROM_CDB(cmd, IO_10_CDB_WP_OFFSET) & - IO_CDB_WP_MASK >> IO_CDB_WP_SHIFT; - cdb_info->lba = GET_U32_FROM_CDB(cmd, IO_10_CDB_LBA_OFFSET); - cdb_info->xfer_len = GET_U16_FROM_CDB(cmd, IO_10_CDB_TX_LEN_OFFSET); + cdb_info->fua = cmd[1] & IO_CDB_FUA_MASK; + cdb_info->prot_info = cmd[1] & IO_CDB_WP_MASK >> IO_CDB_WP_SHIFT; + cdb_info->lba = get_unaligned_be32(&cmd[2]); + cdb_info->xfer_len = get_unaligned_be16(&cmd[7]); } static inline void nvme_trans_get_io_cdb12(u8 *cmd, struct nvme_trans_io_cdb *cdb_info) { - cdb_info->fua = GET_U8_FROM_CDB(cmd, IO_12_CDB_FUA_OFFSET) & - IO_CDB_FUA_MASK; - cdb_info->prot_info = GET_U8_FROM_CDB(cmd, IO_12_CDB_WP_OFFSET) & - IO_CDB_WP_MASK >> IO_CDB_WP_SHIFT; - cdb_info->lba = GET_U32_FROM_CDB(cmd, IO_12_CDB_LBA_OFFSET); - cdb_info->xfer_len = GET_U32_FROM_CDB(cmd, IO_12_CDB_TX_LEN_OFFSET); + cdb_info->fua = cmd[1] & IO_CDB_FUA_MASK; + cdb_info->prot_info = cmd[1] & IO_CDB_WP_MASK >> IO_CDB_WP_SHIFT; + cdb_info->lba = get_unaligned_be32(&cmd[2]); + cdb_info->xfer_len = get_unaligned_be32(&cmd[6]); } static inline void nvme_trans_get_io_cdb16(u8 *cmd, struct nvme_trans_io_cdb *cdb_info) { - cdb_info->fua = GET_U8_FROM_CDB(cmd, IO_16_CDB_FUA_OFFSET) & - IO_CDB_FUA_MASK; - cdb_info->prot_info = GET_U8_FROM_CDB(cmd, IO_16_CDB_WP_OFFSET) & - IO_CDB_WP_MASK >> IO_CDB_WP_SHIFT; - cdb_info->lba = GET_U64_FROM_CDB(cmd, IO_16_CDB_LBA_OFFSET); - cdb_info->xfer_len = GET_U32_FROM_CDB(cmd, IO_16_CDB_TX_LEN_OFFSET); + cdb_info->fua = cmd[1] & IO_CDB_FUA_MASK; + cdb_info->prot_info = cmd[1] & IO_CDB_WP_MASK >> IO_CDB_WP_SHIFT; + cdb_info->lba = get_unaligned_be64(&cmd[2]); + cdb_info->xfer_len = get_unaligned_be32(&cmd[10]); } static inline u32 nvme_trans_io_get_num_cmds(struct sg_io_hdr *hdr, @@ -2148,9 +2015,9 @@ static int nvme_trans_inquiry(struct nvme_ns *ns, struct sg_io_hdr *hdr, int alloc_len; u8 *inq_response; - evpd = GET_INQ_EVPD_BIT(cmd); - page_code = GET_INQ_PAGE_CODE(cmd); - alloc_len = GET_INQ_ALLOC_LENGTH(cmd); + evpd = cmd[1] & 0x01; + page_code = cmd[2]; + alloc_len = get_unaligned_be16(&cmd[3]); inq_response = kmalloc(alloc_len, GFP_KERNEL); if (inq_response == NULL) { @@ -2212,27 +2079,25 @@ static int nvme_trans_log_sense(struct nvme_ns *ns, struct sg_io_hdr *hdr, { int res; u16 alloc_len; - u8 sp; u8 pc; u8 page_code; - sp = GET_U8_FROM_CDB(cmd, LOG_SENSE_CDB_SP_OFFSET); - if (sp != LOG_SENSE_CDB_SP_NOT_ENABLED) { + if (cmd[1] != LOG_SENSE_CDB_SP_NOT_ENABLED) { res = nvme_trans_completion(hdr, SAM_STAT_CHECK_CONDITION, ILLEGAL_REQUEST, SCSI_ASC_INVALID_CDB, SCSI_ASCQ_CAUSE_NOT_REPORTABLE); goto out; } - pc = GET_U8_FROM_CDB(cmd, LOG_SENSE_CDB_PC_OFFSET); - page_code = pc & LOG_SENSE_CDB_PAGE_CODE_MASK; - pc = (pc & LOG_SENSE_CDB_PC_MASK) >> LOG_SENSE_CDB_PC_SHIFT; + + page_code = cmd[2] & LOG_SENSE_CDB_PAGE_CODE_MASK; + pc = (cmd[2] & LOG_SENSE_CDB_PC_MASK) >> LOG_SENSE_CDB_PC_SHIFT; if (pc != LOG_SENSE_CDB_PC_CUMULATIVE_VALUES) { res = nvme_trans_completion(hdr, SAM_STAT_CHECK_CONDITION, ILLEGAL_REQUEST, SCSI_ASC_INVALID_CDB, SCSI_ASCQ_CAUSE_NOT_REPORTABLE); goto out; } - alloc_len = GET_U16_FROM_CDB(cmd, LOG_SENSE_CDB_ALLOC_LENGTH_OFFSET); + alloc_len = get_unaligned_be16(&cmd[7]); switch (page_code) { case LOG_PAGE_SUPPORTED_LOG_PAGES_PAGE: res = nvme_trans_log_supp_pages(ns, hdr, alloc_len); @@ -2262,18 +2127,13 @@ static int nvme_trans_mode_select(struct nvme_ns *ns, struct sg_io_hdr *hdr, u8 page_format; u8 save_pages; - page_format = GET_U8_FROM_CDB(cmd, MODE_SELECT_CDB_PAGE_FORMAT_OFFSET); - page_format &= MODE_SELECT_CDB_PAGE_FORMAT_MASK; + page_format = cmd[1] & MODE_SELECT_CDB_PAGE_FORMAT_MASK; + save_pages = cmd[1] & MODE_SELECT_CDB_SAVE_PAGES_MASK; - save_pages = GET_U8_FROM_CDB(cmd, MODE_SELECT_CDB_SAVE_PAGES_OFFSET); - save_pages &= MODE_SELECT_CDB_SAVE_PAGES_MASK; - - if (GET_OPCODE(cmd) == MODE_SELECT) { - parm_list_len = GET_U8_FROM_CDB(cmd, - MODE_SELECT_6_CDB_PARAM_LIST_LENGTH_OFFSET); + if (cmd[0] == MODE_SELECT) { + parm_list_len = cmd[4]; } else { - parm_list_len = GET_U16_FROM_CDB(cmd, - MODE_SELECT_10_CDB_PARAM_LIST_LENGTH_OFFSET); + parm_list_len = cmd[7]; cdb10 = 1; } @@ -2295,29 +2155,23 @@ static int nvme_trans_mode_sense(struct nvme_ns *ns, struct sg_io_hdr *hdr, int res = 0; u16 alloc_len; u8 cdb10 = 0; - u8 page_code; - u8 pc; - if (GET_OPCODE(cmd) == MODE_SENSE) { - alloc_len = GET_U8_FROM_CDB(cmd, MODE_SENSE6_ALLOC_LEN_OFFSET); + if (cmd[0] == MODE_SENSE) { + alloc_len = cmd[4]; } else { - alloc_len = GET_U16_FROM_CDB(cmd, - MODE_SENSE10_ALLOC_LEN_OFFSET); + alloc_len = get_unaligned_be16(&cmd[7]); cdb10 = 1; } - pc = GET_U8_FROM_CDB(cmd, MODE_SENSE_PAGE_CONTROL_OFFSET) & - MODE_SENSE_PAGE_CONTROL_MASK; - if (pc != MODE_SENSE_PC_CURRENT_VALUES) { + if ((cmd[2] & MODE_SENSE_PAGE_CONTROL_MASK) != + MODE_SENSE_PC_CURRENT_VALUES) { res = nvme_trans_completion(hdr, SAM_STAT_CHECK_CONDITION, ILLEGAL_REQUEST, SCSI_ASC_INVALID_CDB, SCSI_ASCQ_CAUSE_NOT_REPORTABLE); goto out; } - page_code = GET_U8_FROM_CDB(cmd, MODE_SENSE_PAGE_CODE_OFFSET) & - MODE_SENSE_PAGE_CODE_MASK; - switch (page_code) { + switch (cmd[2] & MODE_SENSE_PAGE_CODE_MASK) { case MODE_PAGE_CACHING: res = nvme_trans_mode_page_create(ns, hdr, cmd, alloc_len, cdb10, @@ -2360,24 +2214,25 @@ static int nvme_trans_mode_sense(struct nvme_ns *ns, struct sg_io_hdr *hdr, } static int nvme_trans_read_capacity(struct nvme_ns *ns, struct sg_io_hdr *hdr, - u8 *cmd) + u8 *cmd, u8 cdb16) { int res; int nvme_sc; - u32 alloc_len = READ_CAP_10_RESP_SIZE; - u32 resp_size = READ_CAP_10_RESP_SIZE; + u32 alloc_len; + u32 resp_size; u32 xfer_len; - u8 cdb16; struct nvme_dev *dev = ns->dev; dma_addr_t dma_addr; void *mem; struct nvme_id_ns *id_ns; u8 *response; - cdb16 = IS_READ_CAP_16(cmd); if (cdb16) { - alloc_len = GET_READ_CAP_16_ALLOC_LENGTH(cmd); + alloc_len = get_unaligned_be32(&cmd[10]); resp_size = READ_CAP_16_RESP_SIZE; + } else { + alloc_len = READ_CAP_10_RESP_SIZE; + resp_size = READ_CAP_10_RESP_SIZE; } mem = dma_alloc_coherent(dev->dev, sizeof(struct nvme_id_ns), @@ -2417,7 +2272,6 @@ static int nvme_trans_report_luns(struct nvme_ns *ns, struct sg_io_hdr *hdr, int res; int nvme_sc; u32 alloc_len, xfer_len, resp_size; - u8 select_report; u8 *response; struct nvme_dev *dev = ns->dev; dma_addr_t dma_addr; @@ -2427,17 +2281,14 @@ static int nvme_trans_report_luns(struct nvme_ns *ns, struct sg_io_hdr *hdr, u8 lun_id_offset = REPORT_LUNS_FIRST_LUN_OFFSET; __be32 tmp_len; - alloc_len = GET_REPORT_LUNS_ALLOC_LENGTH(cmd); - select_report = GET_U8_FROM_CDB(cmd, REPORT_LUNS_SR_OFFSET); - - if ((select_report != ALL_LUNS_RETURNED) && - (select_report != ALL_WELL_KNOWN_LUNS_RETURNED) && - (select_report != RESTRICTED_LUNS_RETURNED)) { - res = nvme_trans_completion(hdr, SAM_STAT_CHECK_CONDITION, + switch (cmd[2]) { + default: + return nvme_trans_completion(hdr, SAM_STAT_CHECK_CONDITION, ILLEGAL_REQUEST, SCSI_ASC_INVALID_CDB, SCSI_ASCQ_CAUSE_NOT_REPORTABLE); - goto out; - } else { + case ALL_LUNS_RETURNED: + case ALL_WELL_KNOWN_LUNS_RETURNED: + case RESTRICTED_LUNS_RETURNED: /* NVMe Controller Identify */ mem = dma_alloc_coherent(dev->dev, sizeof(struct nvme_id_ctrl), &dma_addr, GFP_KERNEL); @@ -2454,6 +2305,7 @@ static int nvme_trans_report_luns(struct nvme_ns *ns, struct sg_io_hdr *hdr, ll_length = le32_to_cpu(id_ctrl->nn) * LUN_ENTRY_SIZE; resp_size = ll_length + LUN_DATA_HEADER_SIZE; + alloc_len = get_unaligned_be32(&cmd[6]); if (alloc_len < resp_size) { res = nvme_trans_completion(hdr, SAM_STAT_CHECK_CONDITION, @@ -2500,9 +2352,8 @@ static int nvme_trans_request_sense(struct nvme_ns *ns, struct sg_io_hdr *hdr, u8 desc_format; u8 *response; - alloc_len = GET_REQUEST_SENSE_ALLOC_LENGTH(cmd); - desc_format = GET_U8_FROM_CDB(cmd, REQUEST_SENSE_DESC_OFFSET); - desc_format &= REQUEST_SENSE_DESC_MASK; + desc_format = cmd[1] & 0x01; + alloc_len = cmd[4]; resp_size = ((desc_format) ? (DESC_FMT_SENSE_DATA_SIZE) : (FIXED_FMT_SENSE_DATA_SIZE)); @@ -2512,7 +2363,7 @@ static int nvme_trans_request_sense(struct nvme_ns *ns, struct sg_io_hdr *hdr, goto out; } - if (desc_format == DESCRIPTOR_FORMAT_SENSE_DATA_TYPE) { + if (desc_format) { /* Descriptor Format Sense Data */ response[0] = DESC_FORMAT_SENSE_DATA; response[1] = NO_SENSE; @@ -2559,17 +2410,11 @@ static int nvme_trans_start_stop(struct nvme_ns *ns, struct sg_io_hdr *hdr, struct nvme_command c; u8 immed, pcmod, pc, no_flush, start; - immed = GET_U8_FROM_CDB(cmd, START_STOP_UNIT_CDB_IMMED_OFFSET); - pcmod = GET_U8_FROM_CDB(cmd, START_STOP_UNIT_CDB_POWER_COND_MOD_OFFSET); - pc = GET_U8_FROM_CDB(cmd, START_STOP_UNIT_CDB_POWER_COND_OFFSET); - no_flush = GET_U8_FROM_CDB(cmd, START_STOP_UNIT_CDB_NO_FLUSH_OFFSET); - start = GET_U8_FROM_CDB(cmd, START_STOP_UNIT_CDB_START_OFFSET); - - immed &= START_STOP_UNIT_CDB_IMMED_MASK; - pcmod &= START_STOP_UNIT_CDB_POWER_COND_MOD_MASK; - pc = (pc & START_STOP_UNIT_CDB_POWER_COND_MASK) >> NIBBLE_SHIFT; - no_flush &= START_STOP_UNIT_CDB_NO_FLUSH_MASK; - start &= START_STOP_UNIT_CDB_START_MASK; + immed = cmd[1] & 0x01; + pcmod = cmd[3] & 0x0f; + pc = (cmd[4] & 0xf0) >> 4; + no_flush = cmd[4] & 0x04; + start = cmd[4] & 0x01; if (immed != 0) { return nvme_trans_completion(hdr, SAM_STAT_CHECK_CONDITION, @@ -2614,16 +2459,9 @@ static int nvme_trans_format_unit(struct nvme_ns *ns, struct sg_io_hdr *hdr, u8 nvme_pf_code = 0; u8 format_prot_info, long_list, format_data; - format_prot_info = GET_U8_FROM_CDB(cmd, - FORMAT_UNIT_CDB_FORMAT_PROT_INFO_OFFSET); - long_list = GET_U8_FROM_CDB(cmd, FORMAT_UNIT_CDB_LONG_LIST_OFFSET); - format_data = GET_U8_FROM_CDB(cmd, FORMAT_UNIT_CDB_FORMAT_DATA_OFFSET); - - format_prot_info = (format_prot_info & - FORMAT_UNIT_CDB_FORMAT_PROT_INFO_MASK) >> - FORMAT_UNIT_CDB_FORMAT_PROT_INFO_SHIFT; - long_list &= FORMAT_UNIT_CDB_LONG_LIST_MASK; - format_data &= FORMAT_UNIT_CDB_FORMAT_DATA_MASK; + format_prot_info = (cmd[1] & 0xc0) >> 6; + long_list = cmd[1] & 0x20; + format_data = cmd[1] & 0x10; if (format_data != 0) { if (format_prot_info != 0) { @@ -2686,8 +2524,7 @@ static int nvme_trans_write_buffer(struct nvme_ns *ns, struct sg_io_hdr *hdr, u32 buffer_offset, parm_list_length; u8 buffer_id, mode; - parm_list_length = - GET_U24_FROM_CDB(cmd, WRITE_BUFFER_CDB_PARM_LIST_LENGTH_OFFSET); + parm_list_length = get_unaligned_be24(&cmd[6]); if (parm_list_length % BYTES_TO_DWORDS != 0) { /* NVMe expects Firmware file to be a whole number of DWORDS */ res = nvme_trans_completion(hdr, SAM_STAT_CHECK_CONDITION, @@ -2695,17 +2532,15 @@ static int nvme_trans_write_buffer(struct nvme_ns *ns, struct sg_io_hdr *hdr, SCSI_ASCQ_CAUSE_NOT_REPORTABLE); goto out; } - buffer_id = GET_U8_FROM_CDB(cmd, WRITE_BUFFER_CDB_BUFFER_ID_OFFSET); + buffer_id = cmd[2]; if (buffer_id > NVME_MAX_FIRMWARE_SLOT) { res = nvme_trans_completion(hdr, SAM_STAT_CHECK_CONDITION, ILLEGAL_REQUEST, SCSI_ASC_INVALID_CDB, SCSI_ASCQ_CAUSE_NOT_REPORTABLE); goto out; } - mode = GET_U8_FROM_CDB(cmd, WRITE_BUFFER_CDB_MODE_OFFSET) & - WRITE_BUFFER_CDB_MODE_MASK; - buffer_offset = - GET_U24_FROM_CDB(cmd, WRITE_BUFFER_CDB_BUFFER_OFFSET_OFFSET); + mode = cmd[1] & 0x1f; + buffer_offset = get_unaligned_be24(&cmd[3]); switch (mode) { case DOWNLOAD_SAVE_ACTIVATE: @@ -2759,7 +2594,7 @@ static int nvme_trans_unmap(struct nvme_ns *ns, struct sg_io_hdr *hdr, u16 ndesc, list_len; dma_addr_t dma_addr; - list_len = GET_U16_FROM_CDB(cmd, UNMAP_CDB_PARAM_LIST_LENGTH_OFFSET); + list_len = get_unaligned_be16(&cmd[7]); if (!list_len) return -EINVAL; @@ -2853,13 +2688,16 @@ static int nvme_scsi_translate(struct nvme_ns *ns, struct sg_io_hdr *hdr) retcode = nvme_trans_mode_sense(ns, hdr, cmd); break; case READ_CAPACITY: - retcode = nvme_trans_read_capacity(ns, hdr, cmd); + retcode = nvme_trans_read_capacity(ns, hdr, cmd, 0); break; case SERVICE_ACTION_IN_16: - if (IS_READ_CAP_16(cmd)) - retcode = nvme_trans_read_capacity(ns, hdr, cmd); - else + switch (cmd[1]) { + case SAI_READ_CAPACITY_16: + retcode = nvme_trans_read_capacity(ns, hdr, cmd, 1); + break; + default: goto out; + } break; case REPORT_LUNS: retcode = nvme_trans_report_luns(ns, hdr, cmd); -- cgit v0.10.2 From cbbb7a2ec6001a0c15297c85184c9cc7fae5f11e Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 22 May 2015 11:12:43 +0200 Subject: nvme: simplify and cleanup the READ/WRITE SCSI CDB parsing code Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe diff --git a/drivers/block/nvme-scsi.c b/drivers/block/nvme-scsi.c index 9fe0a2c..b119143 100644 --- a/drivers/block/nvme-scsi.c +++ b/drivers/block/nvme-scsi.c @@ -107,12 +107,6 @@ static int sg_version_num = 30534; /* 2 digits for each component */ #define EXTENDED_INQUIRY_DATA_PAGE_LENGTH 0x3C #define RESERVED_FIELD 0 -/* SCSI READ/WRITE Defines */ -#define IO_CDB_WP_MASK 0xE0 -#define IO_CDB_WP_SHIFT 5 -#define IO_CDB_FUA_MASK 0x8 -#define IO_6_CDB_LBA_MASK 0x001FFFFF - /* Mode Sense/Select defines */ #define MODE_PAGE_INFO_EXCEP 0x1C #define MODE_PAGE_CACHING 0x08 @@ -1763,48 +1757,6 @@ static int nvme_trans_fmt_send_cmd(struct nvme_ns *ns, struct sg_io_hdr *hdr, return res; } -/* Read/Write Helper Functions */ - -static inline void nvme_trans_get_io_cdb6(u8 *cmd, - struct nvme_trans_io_cdb *cdb_info) -{ - cdb_info->fua = 0; - cdb_info->prot_info = 0; - cdb_info->lba = get_unaligned_be32(&cmd[0]) & IO_6_CDB_LBA_MASK; - cdb_info->xfer_len = cmd[4]; - - /* sbc3r27 sec 5.32 - TRANSFER LEN of 0 implies a 256 Block transfer */ - if (cdb_info->xfer_len == 0) - cdb_info->xfer_len = 256; -} - -static inline void nvme_trans_get_io_cdb10(u8 *cmd, - struct nvme_trans_io_cdb *cdb_info) -{ - cdb_info->fua = cmd[1] & IO_CDB_FUA_MASK; - cdb_info->prot_info = cmd[1] & IO_CDB_WP_MASK >> IO_CDB_WP_SHIFT; - cdb_info->lba = get_unaligned_be32(&cmd[2]); - cdb_info->xfer_len = get_unaligned_be16(&cmd[7]); -} - -static inline void nvme_trans_get_io_cdb12(u8 *cmd, - struct nvme_trans_io_cdb *cdb_info) -{ - cdb_info->fua = cmd[1] & IO_CDB_FUA_MASK; - cdb_info->prot_info = cmd[1] & IO_CDB_WP_MASK >> IO_CDB_WP_SHIFT; - cdb_info->lba = get_unaligned_be32(&cmd[2]); - cdb_info->xfer_len = get_unaligned_be32(&cmd[6]); -} - -static inline void nvme_trans_get_io_cdb16(u8 *cmd, - struct nvme_trans_io_cdb *cdb_info) -{ - cdb_info->fua = cmd[1] & IO_CDB_FUA_MASK; - cdb_info->prot_info = cmd[1] & IO_CDB_WP_MASK >> IO_CDB_WP_SHIFT; - cdb_info->lba = get_unaligned_be64(&cmd[2]); - cdb_info->xfer_len = get_unaligned_be32(&cmd[10]); -} - static inline u32 nvme_trans_io_get_num_cmds(struct sg_io_hdr *hdr, struct nvme_trans_io_cdb *cdb_info, u32 max_blocks) @@ -1929,7 +1881,7 @@ static int nvme_trans_io(struct nvme_ns *ns, struct sg_io_hdr *hdr, u8 is_write, u8 *cmd) { int res = 0; - struct nvme_trans_io_cdb cdb_info; + struct nvme_trans_io_cdb cdb_info = { 0, }; u8 opcode = cmd[0]; u64 xfer_bytes; u64 sum_iov_len = 0; @@ -1937,23 +1889,41 @@ static int nvme_trans_io(struct nvme_ns *ns, struct sg_io_hdr *hdr, u8 is_write, int i; size_t not_copied; - /* Extract Fields from CDB */ + /* + * The FUA and WPROTECT fields are not supported in 6-byte CDBs, + * but always in the same place for all others. + */ + switch (opcode) { + case WRITE_6: + case READ_6: + break; + default: + cdb_info.fua = cmd[1] & 0x8; + cdb_info.prot_info = (cmd[1] & 0xe0) >> 5; + } + switch (opcode) { case WRITE_6: case READ_6: - nvme_trans_get_io_cdb6(cmd, &cdb_info); + cdb_info.lba = get_unaligned_be24(&cmd[1]); + cdb_info.xfer_len = cmd[4]; + if (cdb_info.xfer_len == 0) + cdb_info.xfer_len = 256; break; case WRITE_10: case READ_10: - nvme_trans_get_io_cdb10(cmd, &cdb_info); + cdb_info.lba = get_unaligned_be32(&cmd[2]); + cdb_info.xfer_len = get_unaligned_be16(&cmd[7]); break; case WRITE_12: case READ_12: - nvme_trans_get_io_cdb12(cmd, &cdb_info); + cdb_info.lba = get_unaligned_be32(&cmd[2]); + cdb_info.xfer_len = get_unaligned_be32(&cmd[6]); break; case WRITE_16: case READ_16: - nvme_trans_get_io_cdb16(cmd, &cdb_info); + cdb_info.lba = get_unaligned_be64(&cmd[2]); + cdb_info.xfer_len = get_unaligned_be32(&cmd[10]); break; default: /* Will never really reach here */ -- cgit v0.10.2 From 908517684807f3b3d93893da78c7906f5ff2c49b Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 22 May 2015 11:12:44 +0200 Subject: nvme: report the DPOFUA in MODE_SENSE NVMe device always support the FUA bit, and the SCSI translations accepts the DPO bit, which doesn't have much of a meaning for us. Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe diff --git a/drivers/block/nvme-scsi.c b/drivers/block/nvme-scsi.c index b119143..f53da60 100644 --- a/drivers/block/nvme-scsi.c +++ b/drivers/block/nvme-scsi.c @@ -999,14 +999,14 @@ static int nvme_trans_fill_mode_parm_hdr(u8 *resp, int len, u8 cdb10, u8 llbaa, if (cdb10) { resp[0] = (mode_data_length & 0xFF00) >> 8; resp[1] = (mode_data_length & 0x00FF); - /* resp[2] and [3] are zero */ + resp[3] = 0x10 /* DPOFUA */; resp[4] = llbaa; resp[5] = RESERVED_FIELD; resp[6] = (blk_desc_len & 0xFF00) >> 8; resp[7] = (blk_desc_len & 0x00FF); } else { resp[0] = (mode_data_length & 0x00FF); - /* resp[1] and [2] are zero */ + resp[2] = 0x10 /* DPOFUA */; resp[3] = (blk_desc_len & 0x00FF); } -- cgit v0.10.2 From 772ce43559e076730ddff5907fabcb3485545e38 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 22 May 2015 11:12:45 +0200 Subject: nvme: fail SCSI read/write command with unsupported protection bit Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe diff --git a/drivers/block/nvme-scsi.c b/drivers/block/nvme-scsi.c index f53da60..342f5b7 100644 --- a/drivers/block/nvme-scsi.c +++ b/drivers/block/nvme-scsi.c @@ -1900,6 +1900,13 @@ static int nvme_trans_io(struct nvme_ns *ns, struct sg_io_hdr *hdr, u8 is_write, default: cdb_info.fua = cmd[1] & 0x8; cdb_info.prot_info = (cmd[1] & 0xe0) >> 5; + if (cdb_info.prot_info && !ns->pi_type) { + return nvme_trans_completion(hdr, + SAM_STAT_CHECK_CONDITION, + ILLEGAL_REQUEST, + SCSI_ASC_INVALID_CDB, + SCSI_ASCQ_CAUSE_NOT_REPORTABLE); + } } switch (opcode) { -- cgit v0.10.2 From d29ec8241c10eacf59c23b3828a88dbae06e7e3f Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 22 May 2015 11:12:46 +0200 Subject: nvme: submit internal commands through the block layer Use block layer queues with an internal cmd_type to submit internally generated NVMe commands. This both simplifies the code a lot and allow for a better structure. For example now the LighNVM code can construct commands without knowing the details of the underlying I/O descriptors. Or a future NVMe over network target could inject commands, as well as could the SCSI translation and ioctl code be reused for such a beast. Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe diff --git a/drivers/block/nvme-core.c b/drivers/block/nvme-core.c index 870a926..03bd638 100644 --- a/drivers/block/nvme-core.c +++ b/drivers/block/nvme-core.c @@ -445,7 +445,7 @@ static struct nvme_iod *nvme_alloc_iod(struct request *rq, struct nvme_dev *dev, (unsigned long) rq, gfp); } -void nvme_free_iod(struct nvme_dev *dev, struct nvme_iod *iod) +static void nvme_free_iod(struct nvme_dev *dev, struct nvme_iod *iod) { const int last_prp = dev->page_size / 8 - 1; int i; @@ -605,7 +605,12 @@ static void req_completion(struct nvme_queue *nvmeq, void *ctx, spin_unlock_irqrestore(req->q->queue_lock, flags); return; } - req->errors = nvme_error_status(status); + if (req->cmd_type == REQ_TYPE_DRV_PRIV) { + req->sense_len = le32_to_cpup(&cqe->result); + req->errors = status; + } else { + req->errors = nvme_error_status(status); + } } else req->errors = 0; @@ -630,8 +635,8 @@ static void req_completion(struct nvme_queue *nvmeq, void *ctx, } /* length is in bytes. gfp flags indicates whether we may sleep. */ -int nvme_setup_prps(struct nvme_dev *dev, struct nvme_iod *iod, int total_len, - gfp_t gfp) +static int nvme_setup_prps(struct nvme_dev *dev, struct nvme_iod *iod, + int total_len, gfp_t gfp) { struct dma_pool *pool; int length = total_len; @@ -709,6 +714,23 @@ int nvme_setup_prps(struct nvme_dev *dev, struct nvme_iod *iod, int total_len, return total_len; } +static void nvme_submit_priv(struct nvme_queue *nvmeq, struct request *req, + struct nvme_iod *iod) +{ + struct nvme_command *cmnd = &nvmeq->sq_cmds[nvmeq->sq_tail]; + + memcpy(cmnd, req->cmd, sizeof(struct nvme_command)); + cmnd->rw.command_id = req->tag; + if (req->nr_phys_segments) { + cmnd->rw.prp1 = cpu_to_le64(sg_dma_address(iod->sg)); + cmnd->rw.prp2 = cpu_to_le64(iod->first_dma); + } + + if (++nvmeq->sq_tail == nvmeq->q_depth) + nvmeq->sq_tail = 0; + writel(nvmeq->sq_tail, nvmeq->q_db); +} + /* * We reuse the small pool to allocate the 16-byte range here as it is not * worth having a special pool for these or additional cases to handle freeing @@ -807,11 +829,15 @@ static int nvme_submit_iod(struct nvme_queue *nvmeq, struct nvme_iod *iod, return 0; } +/* + * NOTE: ns is NULL when called on the admin queue. + */ static int nvme_queue_rq(struct blk_mq_hw_ctx *hctx, const struct blk_mq_queue_data *bd) { struct nvme_ns *ns = hctx->queue->queuedata; struct nvme_queue *nvmeq = hctx->driver_data; + struct nvme_dev *dev = nvmeq->dev; struct request *req = bd->rq; struct nvme_cmd_info *cmd = blk_mq_rq_to_pdu(req); struct nvme_iod *iod; @@ -822,7 +848,7 @@ static int nvme_queue_rq(struct blk_mq_hw_ctx *hctx, * unless this namespace is formated such that the metadata can be * stripped/generated by the controller with PRACT=1. */ - if (ns->ms && !blk_integrity_rq(req)) { + if (ns && ns->ms && !blk_integrity_rq(req)) { if (!(ns->pi_type && ns->ms == 8)) { req->errors = -EFAULT; blk_mq_complete_request(req); @@ -830,7 +856,7 @@ static int nvme_queue_rq(struct blk_mq_hw_ctx *hctx, } } - iod = nvme_alloc_iod(req, ns->dev, GFP_ATOMIC); + iod = nvme_alloc_iod(req, dev, GFP_ATOMIC); if (!iod) return BLK_MQ_RQ_QUEUE_BUSY; @@ -841,8 +867,7 @@ static int nvme_queue_rq(struct blk_mq_hw_ctx *hctx, * as it is not worth having a special pool for these or * additional cases to handle freeing the iod. */ - range = dma_pool_alloc(nvmeq->dev->prp_small_pool, - GFP_ATOMIC, + range = dma_pool_alloc(dev->prp_small_pool, GFP_ATOMIC, &iod->first_dma); if (!range) goto retry_cmd; @@ -860,9 +885,8 @@ static int nvme_queue_rq(struct blk_mq_hw_ctx *hctx, goto retry_cmd; if (blk_rq_bytes(req) != - nvme_setup_prps(nvmeq->dev, iod, blk_rq_bytes(req), GFP_ATOMIC)) { - dma_unmap_sg(nvmeq->dev->dev, iod->sg, - iod->nents, dma_dir); + nvme_setup_prps(dev, iod, blk_rq_bytes(req), GFP_ATOMIC)) { + dma_unmap_sg(dev->dev, iod->sg, iod->nents, dma_dir); goto retry_cmd; } if (blk_integrity_rq(req)) { @@ -884,7 +908,9 @@ static int nvme_queue_rq(struct blk_mq_hw_ctx *hctx, nvme_set_info(cmd, iod, req_completion); spin_lock_irq(&nvmeq->q_lock); - if (req->cmd_flags & REQ_DISCARD) + if (req->cmd_type == REQ_TYPE_DRV_PRIV) + nvme_submit_priv(nvmeq, req, iod); + else if (req->cmd_flags & REQ_DISCARD) nvme_submit_discard(nvmeq, ns, req, iod); else if (req->cmd_flags & REQ_FLUSH) nvme_submit_flush(nvmeq, ns, req->tag); @@ -896,10 +922,10 @@ static int nvme_queue_rq(struct blk_mq_hw_ctx *hctx, return BLK_MQ_RQ_QUEUE_OK; error_cmd: - nvme_free_iod(nvmeq->dev, iod); + nvme_free_iod(dev, iod); return BLK_MQ_RQ_QUEUE_ERROR; retry_cmd: - nvme_free_iod(nvmeq->dev, iod); + nvme_free_iod(dev, iod); return BLK_MQ_RQ_QUEUE_BUSY; } @@ -942,15 +968,6 @@ static int nvme_process_cq(struct nvme_queue *nvmeq) return 1; } -/* Admin queue isn't initialized as a request queue. If at some point this - * happens anyway, make sure to notify the user */ -static int nvme_admin_queue_rq(struct blk_mq_hw_ctx *hctx, - const struct blk_mq_queue_data *bd) -{ - WARN_ON_ONCE(1); - return BLK_MQ_RQ_QUEUE_ERROR; -} - static irqreturn_t nvme_irq(int irq, void *data) { irqreturn_t result; @@ -972,59 +989,61 @@ static irqreturn_t nvme_irq_check(int irq, void *data) return IRQ_WAKE_THREAD; } -struct sync_cmd_info { - struct task_struct *task; - u32 result; - int status; -}; - -static void sync_completion(struct nvme_queue *nvmeq, void *ctx, - struct nvme_completion *cqe) -{ - struct sync_cmd_info *cmdinfo = ctx; - cmdinfo->result = le32_to_cpup(&cqe->result); - cmdinfo->status = le16_to_cpup(&cqe->status) >> 1; - wake_up_process(cmdinfo->task); -} - /* * Returns 0 on success. If the result is negative, it's a Linux error code; * if the result is positive, it's an NVM Express status code */ -static int __nvme_submit_sync_cmd(struct request_queue *q, - struct nvme_command *cmd, u32 *result, unsigned timeout) +int __nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd, + void *buffer, void __user *ubuffer, unsigned bufflen, + u32 *result, unsigned timeout) { - struct sync_cmd_info cmdinfo; - struct nvme_cmd_info *cmd_rq; + bool write = cmd->common.opcode & 1; + struct bio *bio = NULL; struct request *req; - int res; + int ret; - req = blk_mq_alloc_request(q, WRITE, GFP_KERNEL, false); + req = blk_mq_alloc_request(q, write, GFP_KERNEL, false); if (IS_ERR(req)) return PTR_ERR(req); - cmdinfo.task = current; - cmdinfo.status = -EINTR; + req->cmd_type = REQ_TYPE_DRV_PRIV; + req->__data_len = 0; + req->__sector = (sector_t) -1; + req->bio = req->biotail = NULL; - cmd->common.command_id = req->tag; + req->timeout = ADMIN_TIMEOUT; - cmd_rq = blk_mq_rq_to_pdu(req); - nvme_set_info(cmd_rq, &cmdinfo, sync_completion); + req->cmd = (unsigned char *)cmd; + req->cmd_len = sizeof(struct nvme_command); + req->sense = NULL; + req->sense_len = 0; - set_current_state(TASK_UNINTERRUPTIBLE); - nvme_submit_cmd(cmd_rq->nvmeq, cmd); - schedule(); + if (buffer && bufflen) { + ret = blk_rq_map_kern(q, req, buffer, bufflen, __GFP_WAIT); + if (ret) + goto out; + } else if (ubuffer && bufflen) { + ret = blk_rq_map_user(q, req, NULL, ubuffer, bufflen, __GFP_WAIT); + if (ret) + goto out; + bio = req->bio; + } + blk_execute_rq(req->q, NULL, req, 0); + if (bio) + blk_rq_unmap_user(bio); if (result) - *result = cmdinfo.result; - res = cmdinfo.status; + *result = req->sense_len; + ret = req->errors; + out: blk_mq_free_request(req); - return res; + return ret; } -int nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd) +int nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd, + void *buffer, unsigned bufflen) { - return __nvme_submit_sync_cmd(q, cmd, NULL, 0); + return __nvme_submit_sync_cmd(q, cmd, buffer, NULL, bufflen, NULL, 0); } static int nvme_submit_async_admin_req(struct nvme_dev *dev) @@ -1081,7 +1100,7 @@ static int adapter_delete_queue(struct nvme_dev *dev, u8 opcode, u16 id) c.delete_queue.opcode = opcode; c.delete_queue.qid = cpu_to_le16(id); - return nvme_submit_sync_cmd(dev->admin_q, &c); + return nvme_submit_sync_cmd(dev->admin_q, &c, NULL, 0); } static int adapter_alloc_cq(struct nvme_dev *dev, u16 qid, @@ -1090,6 +1109,10 @@ static int adapter_alloc_cq(struct nvme_dev *dev, u16 qid, struct nvme_command c; int flags = NVME_QUEUE_PHYS_CONTIG | NVME_CQ_IRQ_ENABLED; + /* + * Note: we (ab)use the fact the the prp fields survive if no data + * is attached to the request. + */ memset(&c, 0, sizeof(c)); c.create_cq.opcode = nvme_admin_create_cq; c.create_cq.prp1 = cpu_to_le64(nvmeq->cq_dma_addr); @@ -1098,7 +1121,7 @@ static int adapter_alloc_cq(struct nvme_dev *dev, u16 qid, c.create_cq.cq_flags = cpu_to_le16(flags); c.create_cq.irq_vector = cpu_to_le16(nvmeq->cq_vector); - return nvme_submit_sync_cmd(dev->admin_q, &c); + return nvme_submit_sync_cmd(dev->admin_q, &c, NULL, 0); } static int adapter_alloc_sq(struct nvme_dev *dev, u16 qid, @@ -1107,6 +1130,10 @@ static int adapter_alloc_sq(struct nvme_dev *dev, u16 qid, struct nvme_command c; int flags = NVME_QUEUE_PHYS_CONTIG | NVME_SQ_PRIO_MEDIUM; + /* + * Note: we (ab)use the fact the the prp fields survive if no data + * is attached to the request. + */ memset(&c, 0, sizeof(c)); c.create_sq.opcode = nvme_admin_create_sq; c.create_sq.prp1 = cpu_to_le64(nvmeq->sq_dma_addr); @@ -1115,7 +1142,7 @@ static int adapter_alloc_sq(struct nvme_dev *dev, u16 qid, c.create_sq.sq_flags = cpu_to_le16(flags); c.create_sq.cqid = cpu_to_le16(qid); - return nvme_submit_sync_cmd(dev->admin_q, &c); + return nvme_submit_sync_cmd(dev->admin_q, &c, NULL, 0); } static int adapter_delete_cq(struct nvme_dev *dev, u16 cqid) @@ -1128,18 +1155,43 @@ static int adapter_delete_sq(struct nvme_dev *dev, u16 sqid) return adapter_delete_queue(dev, nvme_admin_delete_sq, sqid); } -int nvme_identify(struct nvme_dev *dev, unsigned nsid, unsigned cns, - dma_addr_t dma_addr) +int nvme_identify_ctrl(struct nvme_dev *dev, struct nvme_id_ctrl **id) { - struct nvme_command c; + struct nvme_command c = { + .identify.opcode = nvme_admin_identify, + .identify.cns = cpu_to_le32(1), + }; + int error; - memset(&c, 0, sizeof(c)); - c.identify.opcode = nvme_admin_identify; - c.identify.nsid = cpu_to_le32(nsid); - c.identify.prp1 = cpu_to_le64(dma_addr); - c.identify.cns = cpu_to_le32(cns); + *id = kmalloc(sizeof(struct nvme_id_ctrl), GFP_KERNEL); + if (!*id) + return -ENOMEM; - return nvme_submit_sync_cmd(dev->admin_q, &c); + error = nvme_submit_sync_cmd(dev->admin_q, &c, *id, + sizeof(struct nvme_id_ctrl)); + if (error) + kfree(*id); + return error; +} + +int nvme_identify_ns(struct nvme_dev *dev, unsigned nsid, + struct nvme_id_ns **id) +{ + struct nvme_command c = { + .identify.opcode = nvme_admin_identify, + .identify.nsid = cpu_to_le32(nsid), + }; + int error; + + *id = kmalloc(sizeof(struct nvme_id_ns), GFP_KERNEL); + if (!*id) + return -ENOMEM; + + error = nvme_submit_sync_cmd(dev->admin_q, &c, *id, + sizeof(struct nvme_id_ns)); + if (error) + kfree(*id); + return error; } int nvme_get_features(struct nvme_dev *dev, unsigned fid, unsigned nsid, @@ -1153,7 +1205,8 @@ int nvme_get_features(struct nvme_dev *dev, unsigned fid, unsigned nsid, c.features.prp1 = cpu_to_le64(dma_addr); c.features.fid = cpu_to_le32(fid); - return __nvme_submit_sync_cmd(dev->admin_q, &c, result, 0); + return __nvme_submit_sync_cmd(dev->admin_q, &c, NULL, NULL, 0, + result, 0); } int nvme_set_features(struct nvme_dev *dev, unsigned fid, unsigned dword11, @@ -1167,7 +1220,30 @@ int nvme_set_features(struct nvme_dev *dev, unsigned fid, unsigned dword11, c.features.fid = cpu_to_le32(fid); c.features.dword11 = cpu_to_le32(dword11); - return __nvme_submit_sync_cmd(dev->admin_q, &c, result, 0); + return __nvme_submit_sync_cmd(dev->admin_q, &c, NULL, NULL, 0, + result, 0); +} + +int nvme_get_log_page(struct nvme_dev *dev, struct nvme_smart_log **log) +{ + struct nvme_command c = { + .common.opcode = nvme_admin_get_log_page, + .common.nsid = cpu_to_le32(0xFFFFFFFF), + .common.cdw10[0] = cpu_to_le32( + (((sizeof(struct nvme_smart_log) / 4) - 1) << 16) | + NVME_LOG_SMART), + }; + int error; + + *log = kmalloc(sizeof(struct nvme_smart_log), GFP_KERNEL); + if (!*log) + return -ENOMEM; + + error = nvme_submit_sync_cmd(dev->admin_q, &c, *log, + sizeof(struct nvme_smart_log)); + if (error) + kfree(*log); + return error; } /** @@ -1523,7 +1599,7 @@ static int nvme_shutdown_ctrl(struct nvme_dev *dev) } static struct blk_mq_ops nvme_mq_admin_ops = { - .queue_rq = nvme_admin_queue_rq, + .queue_rq = nvme_queue_rq, .map_queue = blk_mq_map_queue, .init_hctx = nvme_admin_init_hctx, .exit_hctx = nvme_exit_hctx, @@ -1644,122 +1720,41 @@ static int nvme_configure_admin_queue(struct nvme_dev *dev) return result; } -struct nvme_iod *nvme_map_user_pages(struct nvme_dev *dev, int write, - unsigned long addr, unsigned length) -{ - int i, err, count, nents, offset; - struct scatterlist *sg; - struct page **pages; - struct nvme_iod *iod; - - if (addr & 3) - return ERR_PTR(-EINVAL); - if (!length || length > INT_MAX - PAGE_SIZE) - return ERR_PTR(-EINVAL); - - offset = offset_in_page(addr); - count = DIV_ROUND_UP(offset + length, PAGE_SIZE); - pages = kcalloc(count, sizeof(*pages), GFP_KERNEL); - if (!pages) - return ERR_PTR(-ENOMEM); - - err = get_user_pages_fast(addr, count, 1, pages); - if (err < count) { - count = err; - err = -EFAULT; - goto put_pages; - } - - err = -ENOMEM; - iod = __nvme_alloc_iod(count, length, dev, 0, GFP_KERNEL); - if (!iod) - goto put_pages; - - sg = iod->sg; - sg_init_table(sg, count); - for (i = 0; i < count; i++) { - sg_set_page(&sg[i], pages[i], - min_t(unsigned, length, PAGE_SIZE - offset), - offset); - length -= (PAGE_SIZE - offset); - offset = 0; - } - sg_mark_end(&sg[i - 1]); - iod->nents = count; - - nents = dma_map_sg(dev->dev, sg, count, - write ? DMA_TO_DEVICE : DMA_FROM_DEVICE); - if (!nents) - goto free_iod; - - kfree(pages); - return iod; - - free_iod: - kfree(iod); - put_pages: - for (i = 0; i < count; i++) - put_page(pages[i]); - kfree(pages); - return ERR_PTR(err); -} - -void nvme_unmap_user_pages(struct nvme_dev *dev, int write, - struct nvme_iod *iod) -{ - int i; - - dma_unmap_sg(dev->dev, iod->sg, iod->nents, - write ? DMA_TO_DEVICE : DMA_FROM_DEVICE); - - for (i = 0; i < iod->nents; i++) - put_page(sg_page(&iod->sg[i])); -} - static int nvme_submit_io(struct nvme_ns *ns, struct nvme_user_io __user *uio) { struct nvme_dev *dev = ns->dev; struct nvme_user_io io; struct nvme_command c; - unsigned length, meta_len, prp_len; + unsigned length, meta_len; int status, write; - struct nvme_iod *iod; dma_addr_t meta_dma = 0; void *meta = NULL; if (copy_from_user(&io, uio, sizeof(io))) return -EFAULT; - length = (io.nblocks + 1) << ns->lba_shift; - meta_len = (io.nblocks + 1) * ns->ms; - - if (meta_len && ((io.metadata & 3) || !io.metadata) && !ns->ext) - return -EINVAL; - else if (meta_len && ns->ext) { - length += meta_len; - meta_len = 0; - } - - write = io.opcode & 1; switch (io.opcode) { case nvme_cmd_write: case nvme_cmd_read: case nvme_cmd_compare: - iod = nvme_map_user_pages(dev, write, io.addr, length); break; default: return -EINVAL; } - if (IS_ERR(iod)) - return PTR_ERR(iod); + length = (io.nblocks + 1) << ns->lba_shift; + meta_len = (io.nblocks + 1) * ns->ms; + write = io.opcode & 1; - prp_len = nvme_setup_prps(dev, iod, length, GFP_KERNEL); - if (length != prp_len) { - status = -ENOMEM; - goto unmap; - } if (meta_len) { + if (((io.metadata & 3) || !io.metadata) && !ns->ext) + return -EINVAL; + + if (ns->ext) { + length += meta_len; + meta_len = 0; + } + meta = dma_alloc_coherent(dev->dev, meta_len, &meta_dma, GFP_KERNEL); if (!meta) { @@ -1786,13 +1781,11 @@ static int nvme_submit_io(struct nvme_ns *ns, struct nvme_user_io __user *uio) c.rw.reftag = cpu_to_le32(io.reftag); c.rw.apptag = cpu_to_le16(io.apptag); c.rw.appmask = cpu_to_le16(io.appmask); - c.rw.prp1 = cpu_to_le64(sg_dma_address(iod->sg)); - c.rw.prp2 = cpu_to_le64(iod->first_dma); c.rw.metadata = cpu_to_le64(meta_dma); - status = nvme_submit_sync_cmd(ns->queue, &c); + + status = __nvme_submit_sync_cmd(ns->queue, &c, NULL, + (void __user *)io.addr, length, NULL, 0); unmap: - nvme_unmap_user_pages(dev, write, iod); - nvme_free_iod(dev, iod); if (meta) { if (status == NVME_SC_SUCCESS && !write) { if (copy_to_user((void __user *)io.metadata, meta, @@ -1809,9 +1802,8 @@ static int nvme_user_cmd(struct nvme_dev *dev, struct nvme_ns *ns, { struct nvme_passthru_cmd cmd; struct nvme_command c; - int status, length; - struct nvme_iod *uninitialized_var(iod); - unsigned timeout; + unsigned timeout = 0; + int status; if (!capable(CAP_SYS_ADMIN)) return -EACCES; @@ -1831,38 +1823,17 @@ static int nvme_user_cmd(struct nvme_dev *dev, struct nvme_ns *ns, c.common.cdw10[4] = cpu_to_le32(cmd.cdw14); c.common.cdw10[5] = cpu_to_le32(cmd.cdw15); - length = cmd.data_len; - if (cmd.data_len) { - iod = nvme_map_user_pages(dev, cmd.opcode & 1, cmd.addr, - length); - if (IS_ERR(iod)) - return PTR_ERR(iod); - length = nvme_setup_prps(dev, iod, length, GFP_KERNEL); - c.common.prp1 = cpu_to_le64(sg_dma_address(iod->sg)); - c.common.prp2 = cpu_to_le64(iod->first_dma); - } - - timeout = cmd.timeout_ms ? msecs_to_jiffies(cmd.timeout_ms) : - ADMIN_TIMEOUT; - - if (length != cmd.data_len) { - status = -ENOMEM; - goto out; - } + if (cmd.timeout_ms) + timeout = msecs_to_jiffies(cmd.timeout_ms); status = __nvme_submit_sync_cmd(ns ? ns->queue : dev->admin_q, &c, - &cmd.result, timeout); - -out: - if (cmd.data_len) { - nvme_unmap_user_pages(dev, cmd.opcode & 1, iod); - nvme_free_iod(dev, iod); + NULL, (void __user *)cmd.addr, cmd.data_len, + &cmd.result, timeout); + if (status >= 0) { + if (put_user(cmd.result, &ucmd->result)) + return -EFAULT; } - if ((status >= 0) && copy_to_user(&ucmd->result, &cmd.result, - sizeof(cmd.result))) - status = -EFAULT; - return status; } @@ -1954,22 +1925,14 @@ static int nvme_revalidate_disk(struct gendisk *disk) struct nvme_ns *ns = disk->private_data; struct nvme_dev *dev = ns->dev; struct nvme_id_ns *id; - dma_addr_t dma_addr; u8 lbaf, pi_type; u16 old_ms; unsigned short bs; - id = dma_alloc_coherent(dev->dev, 4096, &dma_addr, GFP_KERNEL); - if (!id) { - dev_warn(dev->dev, "%s: Memory alocation failure\n", __func__); + if (nvme_identify_ns(dev, ns->ns_id, &id)) { + dev_warn(dev->dev, "%s: Identify failure\n", __func__); return 0; } - if (nvme_identify(dev, ns->ns_id, 0, dma_addr)) { - dev_warn(dev->dev, - "identify failed ns:%d, setting capacity to 0\n", - ns->ns_id); - memset(id, 0, sizeof(*id)); - } old_ms = ns->ms; lbaf = id->flbas & NVME_NS_FLBAS_LBA_MASK; @@ -2010,7 +1973,7 @@ static int nvme_revalidate_disk(struct gendisk *disk) if (dev->oncs & NVME_CTRL_ONCS_DSM) nvme_config_discard(ns); - dma_free_coherent(dev->dev, 4096, id, dma_addr); + kfree(id); return 0; } @@ -2250,22 +2213,14 @@ static int nvme_dev_add(struct nvme_dev *dev) int res; unsigned nn, i; struct nvme_id_ctrl *ctrl; - void *mem; - dma_addr_t dma_addr; int shift = NVME_CAP_MPSMIN(readq(&dev->bar->cap)) + 12; - mem = dma_alloc_coherent(dev->dev, 4096, &dma_addr, GFP_KERNEL); - if (!mem) - return -ENOMEM; - - res = nvme_identify(dev, 0, 1, dma_addr); + res = nvme_identify_ctrl(dev, &ctrl); if (res) { dev_err(dev->dev, "Identify Controller failed (%d)\n", res); - dma_free_coherent(dev->dev, 4096, mem, dma_addr); return -EIO; } - ctrl = mem; nn = le32_to_cpup(&ctrl->nn); dev->oncs = le16_to_cpup(&ctrl->oncs); dev->abort_limit = ctrl->acl + 1; @@ -2287,7 +2242,7 @@ static int nvme_dev_add(struct nvme_dev *dev) } else dev->max_hw_sectors = max_hw_sectors; } - dma_free_coherent(dev->dev, 4096, mem, dma_addr); + kfree(ctrl); dev->tagset.ops = &nvme_mq_ops; dev->tagset.nr_hw_queues = dev->online_queues - 1; diff --git a/drivers/block/nvme-scsi.c b/drivers/block/nvme-scsi.c index 342f5b7..8e6223e 100644 --- a/drivers/block/nvme-scsi.c +++ b/drivers/block/nvme-scsi.c @@ -525,8 +525,6 @@ static int nvme_trans_standard_inquiry_page(struct nvme_ns *ns, int alloc_len) { struct nvme_dev *dev = ns->dev; - dma_addr_t dma_addr; - void *mem; struct nvme_id_ns *id_ns; int res; int nvme_sc; @@ -536,21 +534,17 @@ static int nvme_trans_standard_inquiry_page(struct nvme_ns *ns, u8 cmdque = 0x01 << 1; u8 fw_offset = sizeof(dev->firmware_rev); - mem = dma_alloc_coherent(dev->dev, sizeof(struct nvme_id_ns), - &dma_addr, GFP_KERNEL); - if (mem == NULL) { - res = -ENOMEM; - goto out_dma; - } - /* nvme ns identify - use DPS value for PROTECT field */ - nvme_sc = nvme_identify(dev, ns->ns_id, 0, dma_addr); + nvme_sc = nvme_identify_ns(dev, ns->ns_id, &id_ns); res = nvme_trans_status_code(hdr, nvme_sc); if (res) - goto out_free; + return res; - id_ns = mem; - (id_ns->dps) ? (protect = 0x01) : (protect = 0); + if (id_ns->dps) + protect = 0x01; + else + protect = 0; + kfree(id_ns); memset(inq_response, 0, STANDARD_INQUIRY_LENGTH); inq_response[2] = VERSION_SPC_4; @@ -567,12 +561,7 @@ static int nvme_trans_standard_inquiry_page(struct nvme_ns *ns, strncpy(&inq_response[32], dev->firmware_rev + fw_offset, 4); xfer_len = min(alloc_len, STANDARD_INQUIRY_LENGTH); - res = nvme_trans_copy_to_user(hdr, inq_response, xfer_len); - - out_free: - dma_free_coherent(dev->dev, sizeof(struct nvme_id_ns), mem, dma_addr); - out_dma: - return res; + return nvme_trans_copy_to_user(hdr, inq_response, xfer_len); } static int nvme_trans_supported_vpd_pages(struct nvme_ns *ns, @@ -615,40 +604,35 @@ static int nvme_trans_device_id_page(struct nvme_ns *ns, struct sg_io_hdr *hdr, u8 *inq_response, int alloc_len) { struct nvme_dev *dev = ns->dev; - dma_addr_t dma_addr; - void *mem; int res; int nvme_sc; int xfer_len; __be32 tmp_id = cpu_to_be32(ns->ns_id); - mem = dma_alloc_coherent(dev->dev, sizeof(struct nvme_id_ns), - &dma_addr, GFP_KERNEL); - if (mem == NULL) { - res = -ENOMEM; - goto out_dma; - } - memset(inq_response, 0, alloc_len); inq_response[1] = INQ_DEVICE_IDENTIFICATION_PAGE; /* Page Code */ if (readl(&dev->bar->vs) >= NVME_VS(1, 1)) { - struct nvme_id_ns *id_ns = mem; - void *eui = id_ns->eui64; - int len = sizeof(id_ns->eui64); + struct nvme_id_ns *id_ns; + void *eui; + int len; - nvme_sc = nvme_identify(dev, ns->ns_id, 0, dma_addr); + nvme_sc = nvme_identify_ns(dev, ns->ns_id, &id_ns); res = nvme_trans_status_code(hdr, nvme_sc); if (res) - goto out_free; + return res; + eui = id_ns->eui64; + len = sizeof(id_ns->eui64); if (readl(&dev->bar->vs) >= NVME_VS(1, 2)) { if (bitmap_empty(eui, len * 8)) { eui = id_ns->nguid; len = sizeof(id_ns->nguid); } } - if (bitmap_empty(eui, len * 8)) + if (bitmap_empty(eui, len * 8)) { + kfree(id_ns); goto scsi_string; + } inq_response[3] = 4 + len; /* Page Length */ /* Designation Descriptor start */ @@ -657,14 +641,14 @@ static int nvme_trans_device_id_page(struct nvme_ns *ns, struct sg_io_hdr *hdr, inq_response[6] = 0x00; /* Rsvd */ inq_response[7] = len; /* Designator Length */ memcpy(&inq_response[8], eui, len); + kfree(id_ns); } else { scsi_string: if (alloc_len < 72) { - res = nvme_trans_completion(hdr, + return nvme_trans_completion(hdr, SAM_STAT_CHECK_CONDITION, ILLEGAL_REQUEST, SCSI_ASC_INVALID_CDB, SCSI_ASCQ_CAUSE_NOT_REPORTABLE); - goto out_free; } inq_response[3] = 0x48; /* Page Length */ /* Designation Descriptor start */ @@ -679,12 +663,7 @@ static int nvme_trans_device_id_page(struct nvme_ns *ns, struct sg_io_hdr *hdr, memcpy(&inq_response[56], dev->serial, sizeof(dev->serial)); } xfer_len = alloc_len; - res = nvme_trans_copy_to_user(hdr, inq_response, xfer_len); - - out_free: - dma_free_coherent(dev->dev, sizeof(struct nvme_id_ns), mem, dma_addr); - out_dma: - return res; + return nvme_trans_copy_to_user(hdr, inq_response, xfer_len); } static int nvme_trans_ext_inq_page(struct nvme_ns *ns, struct sg_io_hdr *hdr, @@ -694,8 +673,6 @@ static int nvme_trans_ext_inq_page(struct nvme_ns *ns, struct sg_io_hdr *hdr, int res; int nvme_sc; struct nvme_dev *dev = ns->dev; - dma_addr_t dma_addr; - void *mem; struct nvme_id_ctrl *id_ctrl; struct nvme_id_ns *id_ns; int xfer_len; @@ -708,39 +685,32 @@ static int nvme_trans_ext_inq_page(struct nvme_ns *ns, struct sg_io_hdr *hdr, u8 luiclr = 0x01; inq_response = kmalloc(EXTENDED_INQUIRY_DATA_PAGE_LENGTH, GFP_KERNEL); - if (inq_response == NULL) { - res = -ENOMEM; - goto out_mem; - } - - mem = dma_alloc_coherent(dev->dev, sizeof(struct nvme_id_ns), - &dma_addr, GFP_KERNEL); - if (mem == NULL) { - res = -ENOMEM; - goto out_dma; - } + if (inq_response == NULL) + return -ENOMEM; - /* nvme ns identify */ - nvme_sc = nvme_identify(dev, ns->ns_id, 0, dma_addr); + nvme_sc = nvme_identify_ns(dev, ns->ns_id, &id_ns); res = nvme_trans_status_code(hdr, nvme_sc); if (res) - goto out_free; + goto out_free_inq; + + spt = spt_lut[id_ns->dpc & 0x07] << 3; + if (id_ns->dps) + protect = 0x01; + else + protect = 0; + kfree(id_ns); - id_ns = mem; - spt = spt_lut[(id_ns->dpc) & 0x07] << 3; - (id_ns->dps) ? (protect = 0x01) : (protect = 0); grd_chk = protect << 2; app_chk = protect << 1; ref_chk = protect; - /* nvme controller identify */ - nvme_sc = nvme_identify(dev, 0, 1, dma_addr); + nvme_sc = nvme_identify_ctrl(dev, &id_ctrl); res = nvme_trans_status_code(hdr, nvme_sc); if (res) - goto out_free; + goto out_free_inq; - id_ctrl = mem; v_sup = id_ctrl->vwc; + kfree(id_ctrl); memset(inq_response, 0, EXTENDED_INQUIRY_DATA_PAGE_LENGTH); inq_response[1] = INQ_EXTENDED_INQUIRY_DATA_PAGE; /* Page Code */ @@ -756,11 +726,8 @@ static int nvme_trans_ext_inq_page(struct nvme_ns *ns, struct sg_io_hdr *hdr, xfer_len = min(alloc_len, EXTENDED_INQUIRY_DATA_PAGE_LENGTH); res = nvme_trans_copy_to_user(hdr, inq_response, xfer_len); - out_free: - dma_free_coherent(dev->dev, sizeof(struct nvme_id_ns), mem, dma_addr); - out_dma: + out_free_inq: kfree(inq_response); - out_mem: return res; } @@ -847,43 +814,27 @@ static int nvme_trans_log_info_exceptions(struct nvme_ns *ns, int res; int xfer_len; u8 *log_response; - struct nvme_command c; struct nvme_dev *dev = ns->dev; struct nvme_smart_log *smart_log; - dma_addr_t dma_addr; - void *mem; u8 temp_c; u16 temp_k; log_response = kzalloc(LOG_INFO_EXCP_PAGE_LENGTH, GFP_KERNEL); - if (log_response == NULL) { - res = -ENOMEM; - goto out_mem; - } + if (log_response == NULL) + return -ENOMEM; - mem = dma_alloc_coherent(dev->dev, sizeof(struct nvme_smart_log), - &dma_addr, GFP_KERNEL); - if (mem == NULL) { - res = -ENOMEM; - goto out_dma; - } + res = nvme_get_log_page(dev, &smart_log); + if (res < 0) + goto out_free_response; - /* Get SMART Log Page */ - memset(&c, 0, sizeof(c)); - c.common.opcode = nvme_admin_get_log_page; - c.common.nsid = cpu_to_le32(0xFFFFFFFF); - c.common.prp1 = cpu_to_le64(dma_addr); - c.common.cdw10[0] = cpu_to_le32((((sizeof(struct nvme_smart_log) / - BYTES_TO_DWORDS) - 1) << 16) | NVME_LOG_SMART); - res = nvme_submit_sync_cmd(dev->admin_q, &c); if (res != NVME_SC_SUCCESS) { temp_c = LOG_TEMP_UNKNOWN; } else { - smart_log = mem; temp_k = (smart_log->temperature[1] << 8) + (smart_log->temperature[0]); temp_c = temp_k - KELVIN_TEMP_FACTOR; } + kfree(smart_log); log_response[0] = LOG_PAGE_INFORMATIONAL_EXCEPTIONS_PAGE; /* Subpage=0x00, Page Length MSB=0 */ @@ -899,11 +850,8 @@ static int nvme_trans_log_info_exceptions(struct nvme_ns *ns, xfer_len = min(alloc_len, LOG_INFO_EXCP_PAGE_LENGTH); res = nvme_trans_copy_to_user(hdr, log_response, xfer_len); - dma_free_coherent(dev->dev, sizeof(struct nvme_smart_log), - mem, dma_addr); - out_dma: + out_free_response: kfree(log_response); - out_mem: return res; } @@ -913,44 +861,28 @@ static int nvme_trans_log_temperature(struct nvme_ns *ns, struct sg_io_hdr *hdr, int res; int xfer_len; u8 *log_response; - struct nvme_command c; struct nvme_dev *dev = ns->dev; struct nvme_smart_log *smart_log; - dma_addr_t dma_addr; - void *mem; u32 feature_resp; u8 temp_c_cur, temp_c_thresh; u16 temp_k; log_response = kzalloc(LOG_TEMP_PAGE_LENGTH, GFP_KERNEL); - if (log_response == NULL) { - res = -ENOMEM; - goto out_mem; - } + if (log_response == NULL) + return -ENOMEM; - mem = dma_alloc_coherent(dev->dev, sizeof(struct nvme_smart_log), - &dma_addr, GFP_KERNEL); - if (mem == NULL) { - res = -ENOMEM; - goto out_dma; - } + res = nvme_get_log_page(dev, &smart_log); + if (res < 0) + goto out_free_response; - /* Get SMART Log Page */ - memset(&c, 0, sizeof(c)); - c.common.opcode = nvme_admin_get_log_page; - c.common.nsid = cpu_to_le32(0xFFFFFFFF); - c.common.prp1 = cpu_to_le64(dma_addr); - c.common.cdw10[0] = cpu_to_le32((((sizeof(struct nvme_smart_log) / - BYTES_TO_DWORDS) - 1) << 16) | NVME_LOG_SMART); - res = nvme_submit_sync_cmd(dev->admin_q, &c); if (res != NVME_SC_SUCCESS) { temp_c_cur = LOG_TEMP_UNKNOWN; } else { - smart_log = mem; temp_k = (smart_log->temperature[1] << 8) + (smart_log->temperature[0]); temp_c_cur = temp_k - KELVIN_TEMP_FACTOR; } + kfree(smart_log); /* Get Features for Temp Threshold */ res = nvme_get_features(dev, NVME_FEAT_TEMP_THRESH, 0, 0, @@ -979,11 +911,8 @@ static int nvme_trans_log_temperature(struct nvme_ns *ns, struct sg_io_hdr *hdr, xfer_len = min(alloc_len, LOG_TEMP_PAGE_LENGTH); res = nvme_trans_copy_to_user(hdr, log_response, xfer_len); - dma_free_coherent(dev->dev, sizeof(struct nvme_smart_log), - mem, dma_addr); - out_dma: + out_free_response: kfree(log_response); - out_mem: return res; } @@ -1019,8 +948,6 @@ static int nvme_trans_fill_blk_desc(struct nvme_ns *ns, struct sg_io_hdr *hdr, int res; int nvme_sc; struct nvme_dev *dev = ns->dev; - dma_addr_t dma_addr; - void *mem; struct nvme_id_ns *id_ns; u8 flbas; u32 lba_length; @@ -1030,20 +957,11 @@ static int nvme_trans_fill_blk_desc(struct nvme_ns *ns, struct sg_io_hdr *hdr, else if (llbaa > 0 && len < MODE_PAGE_LLBAA_BLK_DES_LEN) return -EINVAL; - mem = dma_alloc_coherent(dev->dev, sizeof(struct nvme_id_ns), - &dma_addr, GFP_KERNEL); - if (mem == NULL) { - res = -ENOMEM; - goto out; - } - - /* nvme ns identify */ - nvme_sc = nvme_identify(dev, ns->ns_id, 0, dma_addr); + nvme_sc = nvme_identify_ns(dev, ns->ns_id, &id_ns); res = nvme_trans_status_code(hdr, nvme_sc); if (res) - goto out_dma; + return res; - id_ns = mem; flbas = (id_ns->flbas) & 0x0F; lba_length = (1 << (id_ns->lbaf[flbas].ds)); @@ -1063,9 +981,7 @@ static int nvme_trans_fill_blk_desc(struct nvme_ns *ns, struct sg_io_hdr *hdr, memcpy(&resp[12], &tmp_len, sizeof(u32)); } - out_dma: - dma_free_coherent(dev->dev, sizeof(struct nvme_id_ns), mem, dma_addr); - out: + kfree(id_ns); return res; } @@ -1291,26 +1207,17 @@ static int nvme_trans_power_state(struct nvme_ns *ns, struct sg_io_hdr *hdr, int res; int nvme_sc; struct nvme_dev *dev = ns->dev; - dma_addr_t dma_addr; - void *mem; struct nvme_id_ctrl *id_ctrl; int lowest_pow_st; /* max npss = lowest power consumption */ unsigned ps_desired = 0; - /* NVMe Controller Identify */ - mem = dma_alloc_coherent(dev->dev, sizeof(struct nvme_id_ctrl), - &dma_addr, GFP_KERNEL); - if (mem == NULL) { - res = -ENOMEM; - goto out; - } - nvme_sc = nvme_identify(dev, 0, 1, dma_addr); + nvme_sc = nvme_identify_ctrl(dev, &id_ctrl); res = nvme_trans_status_code(hdr, nvme_sc); if (res) - goto out_dma; + return res; - id_ctrl = mem; lowest_pow_st = max(POWER_STATE_0, (int)(id_ctrl->npss - 1)); + kfree(id_ctrl); switch (pc) { case NVME_POWER_STATE_START_VALID: @@ -1350,12 +1257,7 @@ static int nvme_trans_power_state(struct nvme_ns *ns, struct sg_io_hdr *hdr, } nvme_sc = nvme_set_features(dev, NVME_FEAT_POWER_MGMT, ps_desired, 0, NULL); - res = nvme_trans_status_code(hdr, nvme_sc); - - out_dma: - dma_free_coherent(dev->dev, sizeof(struct nvme_id_ctrl), mem, dma_addr); - out: - return res; + return nvme_trans_status_code(hdr, nvme_sc); } static int nvme_trans_send_activate_fw_cmd(struct nvme_ns *ns, struct sg_io_hdr *hdr, @@ -1368,7 +1270,7 @@ static int nvme_trans_send_activate_fw_cmd(struct nvme_ns *ns, struct sg_io_hdr c.common.opcode = nvme_admin_activate_fw; c.common.cdw10[0] = cpu_to_le32(buffer_id | NVME_FWACT_REPL_ACTV); - nvme_sc = nvme_submit_sync_cmd(ns->queue, &c); + nvme_sc = nvme_submit_sync_cmd(ns->queue, &c, NULL, 0); return nvme_trans_status_code(hdr, nvme_sc); } @@ -1376,15 +1278,9 @@ static int nvme_trans_send_download_fw_cmd(struct nvme_ns *ns, struct sg_io_hdr u8 opcode, u32 tot_len, u32 offset, u8 buffer_id) { - int res; int nvme_sc; struct nvme_dev *dev = ns->dev; struct nvme_command c; - struct nvme_iod *iod = NULL; - unsigned length; - - memset(&c, 0, sizeof(c)); - c.common.opcode = nvme_admin_download_fw; if (hdr->iovec_count > 0) { /* Assuming SGL is not allowed for this command */ @@ -1394,28 +1290,15 @@ static int nvme_trans_send_download_fw_cmd(struct nvme_ns *ns, struct sg_io_hdr SCSI_ASC_INVALID_CDB, SCSI_ASCQ_CAUSE_NOT_REPORTABLE); } - iod = nvme_map_user_pages(dev, DMA_TO_DEVICE, - (unsigned long)hdr->dxferp, tot_len); - if (IS_ERR(iod)) - return PTR_ERR(iod); - length = nvme_setup_prps(dev, iod, tot_len, GFP_KERNEL); - if (length != tot_len) { - res = -ENOMEM; - goto out_unmap; - } - c.dlfw.prp1 = cpu_to_le64(sg_dma_address(iod->sg)); - c.dlfw.prp2 = cpu_to_le64(iod->first_dma); + memset(&c, 0, sizeof(c)); + c.common.opcode = nvme_admin_download_fw; c.dlfw.numd = cpu_to_le32((tot_len/BYTES_TO_DWORDS) - 1); c.dlfw.offset = cpu_to_le32(offset/BYTES_TO_DWORDS); - nvme_sc = nvme_submit_sync_cmd(dev->admin_q, &c); - res = nvme_trans_status_code(hdr, nvme_sc); - - out_unmap: - nvme_unmap_user_pages(dev, DMA_TO_DEVICE, iod); - nvme_free_iod(dev, iod); - return res; + nvme_sc = __nvme_submit_sync_cmd(dev->admin_q, &c, NULL, + hdr->dxferp, tot_len, NULL, 0); + return nvme_trans_status_code(hdr, nvme_sc); } /* Mode Select Helper Functions */ @@ -1590,9 +1473,6 @@ static int nvme_trans_fmt_set_blk_size_count(struct nvme_ns *ns, int res = 0; int nvme_sc; struct nvme_dev *dev = ns->dev; - dma_addr_t dma_addr; - void *mem; - struct nvme_id_ns *id_ns; u8 flbas; /* @@ -1603,19 +1483,12 @@ static int nvme_trans_fmt_set_blk_size_count(struct nvme_ns *ns, */ if (ns->mode_select_num_blocks == 0 || ns->mode_select_block_len == 0) { - mem = dma_alloc_coherent(dev->dev, - sizeof(struct nvme_id_ns), &dma_addr, GFP_KERNEL); - if (mem == NULL) { - res = -ENOMEM; - goto out; - } - /* nvme ns identify */ - nvme_sc = nvme_identify(dev, ns->ns_id, 0, dma_addr); + struct nvme_id_ns *id_ns; + + nvme_sc = nvme_identify_ns(dev, ns->ns_id, &id_ns); res = nvme_trans_status_code(hdr, nvme_sc); if (res) - goto out_dma; - - id_ns = mem; + return res; if (ns->mode_select_num_blocks == 0) ns->mode_select_num_blocks = le64_to_cpu(id_ns->ncap); @@ -1624,12 +1497,11 @@ static int nvme_trans_fmt_set_blk_size_count(struct nvme_ns *ns, ns->mode_select_block_len = (1 << (id_ns->lbaf[flbas].ds)); } - out_dma: - dma_free_coherent(dev->dev, sizeof(struct nvme_id_ns), - mem, dma_addr); + + kfree(id_ns); } - out: - return res; + + return 0; } static int nvme_trans_fmt_get_parm_header(struct sg_io_hdr *hdr, u8 len, @@ -1698,8 +1570,6 @@ static int nvme_trans_fmt_send_cmd(struct nvme_ns *ns, struct sg_io_hdr *hdr, int res; int nvme_sc; struct nvme_dev *dev = ns->dev; - dma_addr_t dma_addr; - void *mem; struct nvme_id_ns *id_ns; u8 i; u8 flbas, nlbaf; @@ -1708,19 +1578,11 @@ static int nvme_trans_fmt_send_cmd(struct nvme_ns *ns, struct sg_io_hdr *hdr, struct nvme_command c; /* Loop thru LBAF's in id_ns to match reqd lbaf, put in cdw10 */ - mem = dma_alloc_coherent(dev->dev, sizeof(struct nvme_id_ns), - &dma_addr, GFP_KERNEL); - if (mem == NULL) { - res = -ENOMEM; - goto out; - } - /* nvme ns identify */ - nvme_sc = nvme_identify(dev, ns->ns_id, 0, dma_addr); + nvme_sc = nvme_identify_ns(dev, ns->ns_id, &id_ns); res = nvme_trans_status_code(hdr, nvme_sc); if (res) - goto out_dma; + return res; - id_ns = mem; flbas = (id_ns->flbas) & 0x0F; nlbaf = id_ns->nlbaf; @@ -1748,12 +1610,10 @@ static int nvme_trans_fmt_send_cmd(struct nvme_ns *ns, struct sg_io_hdr *hdr, c.format.nsid = cpu_to_le32(ns->ns_id); c.format.cdw10 = cpu_to_le32(cdw10); - nvme_sc = nvme_submit_sync_cmd(dev->admin_q, &c); + nvme_sc = nvme_submit_sync_cmd(dev->admin_q, &c, NULL, 0); res = nvme_trans_status_code(hdr, nvme_sc); - out_dma: - dma_free_coherent(dev->dev, sizeof(struct nvme_id_ns), mem, dma_addr); - out: + kfree(id_ns); return res; } @@ -1787,9 +1647,7 @@ static int nvme_trans_do_nvme_io(struct nvme_ns *ns, struct sg_io_hdr *hdr, struct nvme_trans_io_cdb *cdb_info, u8 is_write) { int nvme_sc = NVME_SC_SUCCESS; - struct nvme_dev *dev = ns->dev; u32 num_cmds; - struct nvme_iod *iod; u64 unit_len; u64 unit_num_blocks; /* Number of blocks to xfer in each nvme cmd */ u32 retcode; @@ -1840,35 +1698,17 @@ static int nvme_trans_do_nvme_io(struct nvme_ns *ns, struct sg_io_hdr *hdr, control = nvme_trans_io_get_control(ns, cdb_info); c.rw.control = cpu_to_le16(control); - iod = nvme_map_user_pages(dev, - (is_write) ? DMA_TO_DEVICE : DMA_FROM_DEVICE, - (unsigned long)next_mapping_addr, unit_len); - if (IS_ERR(iod)) - return PTR_ERR(iod); - - retcode = nvme_setup_prps(dev, iod, unit_len, GFP_KERNEL); - if (retcode != unit_len) { - nvme_unmap_user_pages(dev, - (is_write) ? DMA_TO_DEVICE : DMA_FROM_DEVICE, - iod); - nvme_free_iod(dev, iod); - return -ENOMEM; + if (get_capacity(ns->disk) - unit_num_blocks < + cdb_info->lba + nvme_offset) { + nvme_sc = NVME_SC_LBA_RANGE; + break; } - c.rw.prp1 = cpu_to_le64(sg_dma_address(iod->sg)); - c.rw.prp2 = cpu_to_le64(iod->first_dma); + nvme_sc = __nvme_submit_sync_cmd(ns->queue, &c, NULL, + next_mapping_addr, unit_len, NULL, 0); + if (nvme_sc) + break; nvme_offset += unit_num_blocks; - - nvme_sc = nvme_submit_sync_cmd(ns->queue, &c); - - nvme_unmap_user_pages(dev, - (is_write) ? DMA_TO_DEVICE : DMA_FROM_DEVICE, - iod); - nvme_free_iod(dev, iod); - - - if (nvme_sc != NVME_SC_SUCCESS) - break; } return nvme_trans_status_code(hdr, nvme_sc); @@ -2199,8 +2039,6 @@ static int nvme_trans_read_capacity(struct nvme_ns *ns, struct sg_io_hdr *hdr, u32 resp_size; u32 xfer_len; struct nvme_dev *dev = ns->dev; - dma_addr_t dma_addr; - void *mem; struct nvme_id_ns *id_ns; u8 *response; @@ -2212,24 +2050,15 @@ static int nvme_trans_read_capacity(struct nvme_ns *ns, struct sg_io_hdr *hdr, resp_size = READ_CAP_10_RESP_SIZE; } - mem = dma_alloc_coherent(dev->dev, sizeof(struct nvme_id_ns), - &dma_addr, GFP_KERNEL); - if (mem == NULL) { - res = -ENOMEM; - goto out; - } - /* nvme ns identify */ - nvme_sc = nvme_identify(dev, ns->ns_id, 0, dma_addr); + nvme_sc = nvme_identify_ns(dev, ns->ns_id, &id_ns); res = nvme_trans_status_code(hdr, nvme_sc); if (res) - goto out_dma; - - id_ns = mem; + return res; response = kzalloc(resp_size, GFP_KERNEL); if (response == NULL) { res = -ENOMEM; - goto out_dma; + goto out_free_id; } nvme_trans_fill_read_cap(response, id_ns, cdb16); @@ -2237,9 +2066,8 @@ static int nvme_trans_read_capacity(struct nvme_ns *ns, struct sg_io_hdr *hdr, res = nvme_trans_copy_to_user(hdr, response, xfer_len); kfree(response); - out_dma: - dma_free_coherent(dev->dev, sizeof(struct nvme_id_ns), mem, dma_addr); - out: + out_free_id: + kfree(id_ns); return res; } @@ -2251,8 +2079,6 @@ static int nvme_trans_report_luns(struct nvme_ns *ns, struct sg_io_hdr *hdr, u32 alloc_len, xfer_len, resp_size; u8 *response; struct nvme_dev *dev = ns->dev; - dma_addr_t dma_addr; - void *mem; struct nvme_id_ctrl *id_ctrl; u32 ll_length, lun_id; u8 lun_id_offset = REPORT_LUNS_FIRST_LUN_OFFSET; @@ -2266,19 +2092,11 @@ static int nvme_trans_report_luns(struct nvme_ns *ns, struct sg_io_hdr *hdr, case ALL_LUNS_RETURNED: case ALL_WELL_KNOWN_LUNS_RETURNED: case RESTRICTED_LUNS_RETURNED: - /* NVMe Controller Identify */ - mem = dma_alloc_coherent(dev->dev, sizeof(struct nvme_id_ctrl), - &dma_addr, GFP_KERNEL); - if (mem == NULL) { - res = -ENOMEM; - goto out; - } - nvme_sc = nvme_identify(dev, 0, 1, dma_addr); + nvme_sc = nvme_identify_ctrl(dev, &id_ctrl); res = nvme_trans_status_code(hdr, nvme_sc); if (res) - goto out_dma; + return res; - id_ctrl = mem; ll_length = le32_to_cpu(id_ctrl->nn) * LUN_ENTRY_SIZE; resp_size = ll_length + LUN_DATA_HEADER_SIZE; @@ -2288,13 +2106,13 @@ static int nvme_trans_report_luns(struct nvme_ns *ns, struct sg_io_hdr *hdr, SAM_STAT_CHECK_CONDITION, ILLEGAL_REQUEST, SCSI_ASC_INVALID_CDB, SCSI_ASCQ_CAUSE_NOT_REPORTABLE); - goto out_dma; + goto out_free_id; } response = kzalloc(resp_size, GFP_KERNEL); if (response == NULL) { res = -ENOMEM; - goto out_dma; + goto out_free_id; } /* The first LUN ID will always be 0 per the SAM spec */ @@ -2315,9 +2133,8 @@ static int nvme_trans_report_luns(struct nvme_ns *ns, struct sg_io_hdr *hdr, res = nvme_trans_copy_to_user(hdr, response, xfer_len); kfree(response); - out_dma: - dma_free_coherent(dev->dev, sizeof(struct nvme_id_ctrl), mem, dma_addr); - out: + out_free_id: + kfree(id_ctrl); return res; } @@ -2379,12 +2196,23 @@ static int nvme_trans_security_protocol(struct nvme_ns *ns, SCSI_ASCQ_CAUSE_NOT_REPORTABLE); } -static int nvme_trans_start_stop(struct nvme_ns *ns, struct sg_io_hdr *hdr, - u8 *cmd) +static int nvme_trans_synchronize_cache(struct nvme_ns *ns, + struct sg_io_hdr *hdr) { - int res; int nvme_sc; struct nvme_command c; + + memset(&c, 0, sizeof(c)); + c.common.opcode = nvme_cmd_flush; + c.common.nsid = cpu_to_le32(ns->ns_id); + + nvme_sc = nvme_submit_sync_cmd(ns->queue, &c, NULL, 0); + return nvme_trans_status_code(hdr, nvme_sc); +} + +static int nvme_trans_start_stop(struct nvme_ns *ns, struct sg_io_hdr *hdr, + u8 *cmd) +{ u8 immed, pcmod, pc, no_flush, start; immed = cmd[1] & 0x01; @@ -2400,12 +2228,7 @@ static int nvme_trans_start_stop(struct nvme_ns *ns, struct sg_io_hdr *hdr, } else { if (no_flush == 0) { /* Issue NVME FLUSH command prior to START STOP UNIT */ - memset(&c, 0, sizeof(c)); - c.common.opcode = nvme_cmd_flush; - c.common.nsid = cpu_to_le32(ns->ns_id); - - nvme_sc = nvme_submit_sync_cmd(ns->queue, &c); - res = nvme_trans_status_code(hdr, nvme_sc); + int res = nvme_trans_synchronize_cache(ns, hdr); if (res) return res; } @@ -2414,20 +2237,6 @@ static int nvme_trans_start_stop(struct nvme_ns *ns, struct sg_io_hdr *hdr, } } -static int nvme_trans_synchronize_cache(struct nvme_ns *ns, - struct sg_io_hdr *hdr, u8 *cmd) -{ - int nvme_sc; - struct nvme_command c; - - memset(&c, 0, sizeof(c)); - c.common.opcode = nvme_cmd_flush; - c.common.nsid = cpu_to_le32(ns->ns_id); - - nvme_sc = nvme_submit_sync_cmd(ns->queue, &c); - return nvme_trans_status_code(hdr, nvme_sc); -} - static int nvme_trans_format_unit(struct nvme_ns *ns, struct sg_io_hdr *hdr, u8 *cmd) { @@ -2563,13 +2372,11 @@ struct scsi_unmap_parm_list { static int nvme_trans_unmap(struct nvme_ns *ns, struct sg_io_hdr *hdr, u8 *cmd) { - struct nvme_dev *dev = ns->dev; struct scsi_unmap_parm_list *plist; struct nvme_dsm_range *range; struct nvme_command c; int i, nvme_sc, res = -ENOMEM; u16 ndesc, list_len; - dma_addr_t dma_addr; list_len = get_unaligned_be16(&cmd[7]); if (!list_len) @@ -2589,8 +2396,7 @@ static int nvme_trans_unmap(struct nvme_ns *ns, struct sg_io_hdr *hdr, goto out; } - range = dma_alloc_coherent(dev->dev, ndesc * sizeof(*range), - &dma_addr, GFP_KERNEL); + range = kcalloc(ndesc, sizeof(*range), GFP_KERNEL); if (!range) goto out; @@ -2603,14 +2409,14 @@ static int nvme_trans_unmap(struct nvme_ns *ns, struct sg_io_hdr *hdr, memset(&c, 0, sizeof(c)); c.dsm.opcode = nvme_cmd_dsm; c.dsm.nsid = cpu_to_le32(ns->ns_id); - c.dsm.prp1 = cpu_to_le64(dma_addr); c.dsm.nr = cpu_to_le32(ndesc - 1); c.dsm.attributes = cpu_to_le32(NVME_DSMGMT_AD); - nvme_sc = nvme_submit_sync_cmd(ns->queue, &c); + nvme_sc = nvme_submit_sync_cmd(ns->queue, &c, range, + ndesc * sizeof(*range)); res = nvme_trans_status_code(hdr, nvme_sc); - dma_free_coherent(dev->dev, ndesc * sizeof(*range), range, dma_addr); + kfree(range); out: kfree(plist); return res; @@ -2690,7 +2496,7 @@ static int nvme_scsi_translate(struct nvme_ns *ns, struct sg_io_hdr *hdr) retcode = nvme_trans_start_stop(ns, hdr, cmd); break; case SYNCHRONIZE_CACHE: - retcode = nvme_trans_synchronize_cache(ns, hdr, cmd); + retcode = nvme_trans_synchronize_cache(ns, hdr); break; case FORMAT_UNIT: retcode = nvme_trans_format_unit(ns, hdr, cmd); diff --git a/include/linux/nvme.h b/include/linux/nvme.h index de0e49a..986bf8a 100644 --- a/include/linux/nvme.h +++ b/include/linux/nvme.h @@ -146,21 +146,15 @@ static inline u64 nvme_block_nr(struct nvme_ns *ns, sector_t sector) return (sector >> (ns->lba_shift - 9)); } -/** - * nvme_free_iod - frees an nvme_iod - * @dev: The device that the I/O was submitted to - * @iod: The memory to free - */ -void nvme_free_iod(struct nvme_dev *dev, struct nvme_iod *iod); - -int nvme_setup_prps(struct nvme_dev *, struct nvme_iod *, int, gfp_t); -struct nvme_iod *nvme_map_user_pages(struct nvme_dev *dev, int write, - unsigned long addr, unsigned length); -void nvme_unmap_user_pages(struct nvme_dev *dev, int write, - struct nvme_iod *iod); -int nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd); -int nvme_identify(struct nvme_dev *, unsigned nsid, unsigned cns, - dma_addr_t dma_addr); +int nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd, + void *buf, unsigned bufflen); +int __nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd, + void *buffer, void __user *ubuffer, unsigned bufflen, + u32 *result, unsigned timeout); +int nvme_identify_ctrl(struct nvme_dev *dev, struct nvme_id_ctrl **id); +int nvme_identify_ns(struct nvme_dev *dev, unsigned nsid, + struct nvme_id_ns **id); +int nvme_get_log_page(struct nvme_dev *dev, struct nvme_smart_log **log); int nvme_get_features(struct nvme_dev *dev, unsigned fid, unsigned nsid, dma_addr_t dma_addr, u32 *result); int nvme_set_features(struct nvme_dev *dev, unsigned fid, unsigned dword11, -- cgit v0.10.2 From a0a931d6a2c1fbc5d5966ebf0e7a043748692c22 Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Fri, 22 May 2015 12:28:31 -0600 Subject: NVMe: Fix obtaining command result Replaces req->sense_len usage, which is not owned by the LLD, to req->special to contain the command result for driver created commands, and sets the result unconditionally on completion. Signed-off-by: Keith Busch Cc: Christoph Hellwig Cc: Jens Axboe Fixes: d29ec8241c10 ("nvme: submit internal commands through the block layer") Signed-off-by: Jens Axboe diff --git a/drivers/block/nvme-core.c b/drivers/block/nvme-core.c index 03bd638..c42bc53 100644 --- a/drivers/block/nvme-core.c +++ b/drivers/block/nvme-core.c @@ -606,13 +606,16 @@ static void req_completion(struct nvme_queue *nvmeq, void *ctx, return; } if (req->cmd_type == REQ_TYPE_DRV_PRIV) { - req->sense_len = le32_to_cpup(&cqe->result); req->errors = status; } else { req->errors = nvme_error_status(status); } } else req->errors = 0; + if (req->cmd_type == REQ_TYPE_DRV_PRIV) { + u32 result = le32_to_cpup(&cqe->result); + req->special = (void *)(uintptr_t)result; + } if (cmd_rq->aborted) dev_warn(nvmeq->dev->dev, @@ -1015,8 +1018,7 @@ int __nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd, req->cmd = (unsigned char *)cmd; req->cmd_len = sizeof(struct nvme_command); - req->sense = NULL; - req->sense_len = 0; + req->special = (void *)0; if (buffer && bufflen) { ret = blk_rq_map_kern(q, req, buffer, bufflen, __GFP_WAIT); @@ -1033,7 +1035,7 @@ int __nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd, if (bio) blk_rq_unmap_user(bio); if (result) - *result = req->sense_len; + *result = (u32)(uintptr_t)req->special; ret = req->errors; out: blk_mq_free_request(req); -- cgit v0.10.2 From f4ff414aeb472397d3b4fc15c22ca65bab219ec8 Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Thu, 28 May 2015 09:48:54 -0600 Subject: NVMe: Use requested sync command timeout Signed-off-by: Keith Busch Signed-off-by: Jens Axboe diff --git a/drivers/block/nvme-core.c b/drivers/block/nvme-core.c index c42bc53..4eb9917 100644 --- a/drivers/block/nvme-core.c +++ b/drivers/block/nvme-core.c @@ -1014,7 +1014,7 @@ int __nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd, req->__sector = (sector_t) -1; req->bio = req->biotail = NULL; - req->timeout = ADMIN_TIMEOUT; + req->timeout = timeout ? timeout : ADMIN_TIMEOUT; req->cmd = (unsigned char *)cmd; req->cmd_len = sizeof(struct nvme_command); -- cgit v0.10.2 From 75619bfa904d0f2840b4274eb92ce47b2e1c472e Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Thu, 28 May 2015 09:48:55 -0600 Subject: NVMe: End sync requests immediately on failure Do not retry failed sync commands so the original status may be seen without issuing unnecessary retries. Signed-off-by: Keith Busch Signed-off-by: Jens Axboe diff --git a/drivers/block/nvme-core.c b/drivers/block/nvme-core.c index 4eb9917..6ed1356 100644 --- a/drivers/block/nvme-core.c +++ b/drivers/block/nvme-core.c @@ -1010,6 +1010,7 @@ int __nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd, return PTR_ERR(req); req->cmd_type = REQ_TYPE_DRV_PRIV; + req->cmd_flags = REQ_FAILFAST_DRIVER; req->__data_len = 0; req->__sector = (sector_t) -1; req->bio = req->biotail = NULL; -- cgit v0.10.2 From 42483228d4c019ffc86b8dbea7dfbc3f9566fe7e Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Mon, 1 Jun 2015 09:29:54 -0600 Subject: NVMe: Remove hctx reliance for multi-namespace The driver needs to track shared tags to support multiple namespaces that may be dynamically allocated or deleted. Relying on the first request_queue's hctx's is not appropriate as we cannot clear outstanding tags for all namespaces using this handle, nor can the driver easily track all request_queue's hctx as namespaces are attached/detached. Instead, this patch uses the nvme_dev's tagset to get the shared tag resources instead of through a request_queue hctx. Signed-off-by: Keith Busch Signed-off-by: Jens Axboe diff --git a/drivers/block/nvme-core.c b/drivers/block/nvme-core.c index 6ed1356..513908f 100644 --- a/drivers/block/nvme-core.c +++ b/drivers/block/nvme-core.c @@ -102,6 +102,7 @@ struct nvme_queue { spinlock_t q_lock; struct nvme_command *sq_cmds; volatile struct nvme_completion *cqes; + struct blk_mq_tags **tags; dma_addr_t sq_dma_addr; dma_addr_t cq_dma_addr; u32 __iomem *q_db; @@ -114,7 +115,6 @@ struct nvme_queue { u8 cq_phase; u8 cqe_seen; struct async_cmd_info cmdinfo; - struct blk_mq_hw_ctx *hctx; }; /* @@ -182,9 +182,12 @@ static int nvme_admin_init_hctx(struct blk_mq_hw_ctx *hctx, void *data, struct nvme_dev *dev = data; struct nvme_queue *nvmeq = dev->queues[0]; - WARN_ON(nvmeq->hctx); - nvmeq->hctx = hctx; + WARN_ON(hctx_idx != 0); + WARN_ON(dev->admin_tagset.tags[0] != hctx->tags); + WARN_ON(nvmeq->tags); + hctx->driver_data = nvmeq; + nvmeq->tags = &dev->admin_tagset.tags[0]; return 0; } @@ -201,27 +204,16 @@ static int nvme_admin_init_request(void *data, struct request *req, return 0; } -static void nvme_exit_hctx(struct blk_mq_hw_ctx *hctx, unsigned int hctx_idx) -{ - struct nvme_queue *nvmeq = hctx->driver_data; - - nvmeq->hctx = NULL; -} - static int nvme_init_hctx(struct blk_mq_hw_ctx *hctx, void *data, unsigned int hctx_idx) { struct nvme_dev *dev = data; - struct nvme_queue *nvmeq = dev->queues[ - (hctx_idx % dev->queue_count) + 1]; - - if (!nvmeq->hctx) - nvmeq->hctx = hctx; + struct nvme_queue *nvmeq = dev->queues[hctx_idx + 1]; - /* nvmeq queues are shared between namespaces. We assume here that - * blk-mq map the tags so they match up with the nvme queue tags. */ - WARN_ON(nvmeq->hctx->tags != hctx->tags); + if (!nvmeq->tags) + nvmeq->tags = &dev->tagset.tags[hctx_idx]; + WARN_ON(dev->tagset.tags[hctx_idx] != hctx->tags); hctx->driver_data = nvmeq; return 0; } @@ -320,7 +312,7 @@ static void abort_completion(struct nvme_queue *nvmeq, void *ctx, u16 status = le16_to_cpup(&cqe->status) >> 1; u32 result = le32_to_cpup(&cqe->result); - blk_mq_free_hctx_request(nvmeq->hctx, req); + blk_mq_free_request(req); dev_warn(nvmeq->q_dmadev, "Abort status:%x result:%x", status, result); ++nvmeq->dev->abort_limit; @@ -333,14 +325,13 @@ static void async_completion(struct nvme_queue *nvmeq, void *ctx, cmdinfo->result = le32_to_cpup(&cqe->result); cmdinfo->status = le16_to_cpup(&cqe->status) >> 1; queue_kthread_work(cmdinfo->worker, &cmdinfo->work); - blk_mq_free_hctx_request(nvmeq->hctx, cmdinfo->req); + blk_mq_free_request(cmdinfo->req); } static inline struct nvme_cmd_info *get_cmd_from_tag(struct nvme_queue *nvmeq, unsigned int tag) { - struct blk_mq_hw_ctx *hctx = nvmeq->hctx; - struct request *req = blk_mq_tag_to_rq(hctx->tags, tag); + struct request *req = blk_mq_tag_to_rq(*nvmeq->tags, tag); return blk_mq_rq_to_pdu(req); } @@ -1068,7 +1059,7 @@ static int nvme_submit_async_admin_req(struct nvme_dev *dev) c.common.opcode = nvme_admin_async_event; c.common.command_id = req->tag; - blk_mq_free_hctx_request(nvmeq->hctx, req); + blk_mq_free_request(req); return __nvme_submit_cmd(nvmeq, &c); } @@ -1310,8 +1301,7 @@ static void nvme_abort_req(struct request *req) } } -static void nvme_cancel_queue_ios(struct blk_mq_hw_ctx *hctx, - struct request *req, void *data, bool reserved) +static void nvme_cancel_queue_ios(struct request *req, void *data, bool reserved) { struct nvme_queue *nvmeq = data; void *ctx; @@ -1408,11 +1398,9 @@ static int nvme_suspend_queue(struct nvme_queue *nvmeq) static void nvme_clear_queue(struct nvme_queue *nvmeq) { - struct blk_mq_hw_ctx *hctx = nvmeq->hctx; - spin_lock_irq(&nvmeq->q_lock); - if (hctx && hctx->tags) - blk_mq_tag_busy_iter(hctx, nvme_cancel_queue_ios, nvmeq); + if (nvmeq->tags && *nvmeq->tags) + blk_mq_all_tag_busy_iter(*nvmeq->tags, nvme_cancel_queue_ios, nvmeq); spin_unlock_irq(&nvmeq->q_lock); } @@ -1605,7 +1593,6 @@ static struct blk_mq_ops nvme_mq_admin_ops = { .queue_rq = nvme_queue_rq, .map_queue = blk_mq_map_queue, .init_hctx = nvme_admin_init_hctx, - .exit_hctx = nvme_exit_hctx, .init_request = nvme_admin_init_request, .timeout = nvme_timeout, }; @@ -1614,7 +1601,6 @@ static struct blk_mq_ops nvme_mq_ops = { .queue_rq = nvme_queue_rq, .map_queue = blk_mq_map_queue, .init_hctx = nvme_init_hctx, - .exit_hctx = nvme_exit_hctx, .init_request = nvme_init_request, .timeout = nvme_timeout, }; @@ -2724,11 +2710,11 @@ static void nvme_set_irq_hints(struct nvme_dev *dev) for (i = 0; i < dev->online_queues; i++) { nvmeq = dev->queues[i]; - if (!nvmeq->hctx) + if (!nvmeq->tags || !(*nvmeq->tags)) continue; irq_set_affinity_hint(dev->entry[nvmeq->cq_vector].vector, - nvmeq->hctx->cpumask); + blk_mq_tags_cpumask(*nvmeq->tags)); } } -- cgit v0.10.2 From 419c21a3b6275d40a10901f700efcd40515b6db6 Mon Sep 17 00:00:00 2001 From: Akinobu Mita Date: Tue, 2 Jun 2015 08:35:09 +0900 Subject: null_blk: prevent timer handler running on a different CPU where started When irqmode=2 (IRQ completion handler is timer), timer handler should be called on the same CPU where the timer has been started. Since completion_queues are per-cpu and the completion handler only touches completion_queue for local CPU, we need to prevent the handler from running on a different CPU where the timer has been started. Otherwise, the IO cannot be completed until another completion handler is executed on that CPU. Signed-off-by: Akinobu Mita Cc: Jens Axboe Signed-off-by: Jens Axboe diff --git a/drivers/block/null_blk.c b/drivers/block/null_blk.c index 65cd61a..6f0a58e 100644 --- a/drivers/block/null_blk.c +++ b/drivers/block/null_blk.c @@ -257,7 +257,7 @@ static void null_cmd_end_timer(struct nullb_cmd *cmd) if (llist_add(&cmd->ll_list, &cq->list)) { ktime_t kt = ktime_set(0, completion_nsec); - hrtimer_start(&cq->timer, kt, HRTIMER_MODE_REL); + hrtimer_start(&cq->timer, kt, HRTIMER_MODE_REL_PINNED); } put_cpu(); -- cgit v0.10.2 From 8b70f45e2eb275da886b9c9dee190436d12d876a Mon Sep 17 00:00:00 2001 From: Akinobu Mita Date: Tue, 2 Jun 2015 08:35:10 +0900 Subject: null_blk: restart request processing on completion handler When irqmode=2 (IRQ completion handler is timer) and queue_mode=1 (Block interface to use is rq), the completion handler should restart request handling for any pending requests on a queue because request processing stops when the number of commands are queued more than hw_queue_depth (null_rq_prep_fn returns BLKPREP_DEFER). Without this change, the following command cannot finish. # modprobe null_blk irqmode=2 queue_mode=1 hw_queue_depth=1 # fio --name=t --rw=read --size=1g --direct=1 \ --ioengine=libaio --iodepth=64 --filename=/dev/nullb0 Signed-off-by: Akinobu Mita Cc: Jens Axboe Signed-off-by: Jens Axboe diff --git a/drivers/block/null_blk.c b/drivers/block/null_blk.c index 6f0a58e..6f9b753 100644 --- a/drivers/block/null_blk.c +++ b/drivers/block/null_blk.c @@ -243,6 +243,17 @@ static enum hrtimer_restart null_cmd_timer_expired(struct hrtimer *timer) cmd = container_of(entry, struct nullb_cmd, ll_list); entry = entry->next; end_cmd(cmd); + + if (cmd->rq) { + struct request_queue *q = cmd->rq->q; + + if (!q->mq_ops && blk_queue_stopped(q)) { + spin_lock(q->queue_lock); + if (blk_queue_stopped(q)) + blk_start_queue(q); + spin_unlock(q->queue_lock); + } + } } while (entry); } @@ -334,6 +345,7 @@ static int null_rq_prep_fn(struct request_queue *q, struct request *req) req->special = cmd; return BLKPREP_OK; } + blk_stop_queue(q); return BLKPREP_DEFER; } -- cgit v0.10.2 From 4cc06521ee1f153e0d292413a5bff7bbbdee92d0 Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Fri, 5 Jun 2015 10:30:08 -0600 Subject: NVMe: add sysfs and ioctl controller reset We need the ability to perform an nvme controller reset as discussed on the mailing list thread: http://lists.infradead.org/pipermail/linux-nvme/2015-March/001585.html This adds a sysfs entry that when written to will reset perform an NVMe controller reset if the controller was successfully initialized in the first place. This also adds locking around resetting the device in the async probe method so the driver can't schedule two resets. Signed-off-by: Keith Busch Cc: Brandon Schultz Cc: David Sariel Updated by Jens to: 1) Merge this with the ioctl reset patch from David Sariel. The ioctl path now shares the reset code from the sysfs path. 2) Don't flush work if we fail issuing the reset. Signed-off-by: Jens Axboe diff --git a/drivers/block/nvme-core.c b/drivers/block/nvme-core.c index 513908f..9682e29 100644 --- a/drivers/block/nvme-core.c +++ b/drivers/block/nvme-core.c @@ -80,6 +80,7 @@ static wait_queue_head_t nvme_kthread_wait; static struct class *nvme_class; static void nvme_reset_failed_dev(struct work_struct *ws); +static int nvme_reset(struct nvme_dev *dev); static int nvme_process_cq(struct nvme_queue *nvmeq); struct async_cmd_info { @@ -2689,6 +2690,9 @@ static long nvme_dev_ioctl(struct file *f, unsigned int cmd, unsigned long arg) return -ENOTTY; ns = list_first_entry(&dev->namespaces, struct nvme_ns, list); return nvme_user_cmd(dev, ns, (void __user *)arg); + case NVME_IOCTL_RESET: + dev_warn(dev->dev, "resetting controller\n"); + return nvme_reset(dev); default: return -ENOTTY; } @@ -2839,6 +2843,44 @@ static void nvme_reset_workfn(struct work_struct *work) dev->reset_workfn(work); } +static int nvme_reset(struct nvme_dev *dev) +{ + int ret = -EBUSY; + + if (!dev->admin_q || blk_queue_dying(dev->admin_q)) + return -ENODEV; + + spin_lock(&dev_list_lock); + if (!work_pending(&dev->reset_work)) { + dev->reset_workfn = nvme_reset_failed_dev; + queue_work(nvme_workq, &dev->reset_work); + ret = 0; + } + spin_unlock(&dev_list_lock); + + if (!ret) { + flush_work(&dev->reset_work); + return 0; + } + + return ret; +} + +static ssize_t nvme_sysfs_reset(struct device *dev, + struct device_attribute *attr, const char *buf, + size_t count) +{ + struct nvme_dev *ndev = dev_get_drvdata(dev); + int ret; + + ret = nvme_reset(ndev); + if (ret < 0) + return ret; + + return count; +} +static DEVICE_ATTR(reset_controller, S_IWUSR, NULL, nvme_sysfs_reset); + static void nvme_async_probe(struct work_struct *work); static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id) { @@ -2883,12 +2925,20 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id) goto release_pools; } get_device(dev->device); + dev_set_drvdata(dev->device, dev); + + result = device_create_file(dev->device, &dev_attr_reset_controller); + if (result) + goto put_dev; INIT_LIST_HEAD(&dev->node); INIT_WORK(&dev->probe_work, nvme_async_probe); schedule_work(&dev->probe_work); return 0; + put_dev: + device_destroy(nvme_class, MKDEV(nvme_char_major, dev->instance)); + put_device(dev->device); release_pools: nvme_release_prp_pools(dev); release: @@ -2919,10 +2969,12 @@ static void nvme_async_probe(struct work_struct *work) nvme_set_irq_hints(dev); return; reset: + spin_lock(&dev_list_lock); if (!work_busy(&dev->reset_work)) { dev->reset_workfn = nvme_reset_failed_dev; queue_work(nvme_workq, &dev->reset_work); } + spin_unlock(&dev_list_lock); } static void nvme_reset_notify(struct pci_dev *pdev, bool prepare) @@ -2952,6 +3004,7 @@ static void nvme_remove(struct pci_dev *pdev) pci_set_drvdata(pdev, NULL); flush_work(&dev->probe_work); flush_work(&dev->reset_work); + device_remove_file(dev->device, &dev_attr_reset_controller); nvme_dev_shutdown(dev); nvme_dev_remove(dev); nvme_dev_remove_admin(dev); diff --git a/include/uapi/linux/nvme.h b/include/uapi/linux/nvme.h index aef9a81..b660dc2 100644 --- a/include/uapi/linux/nvme.h +++ b/include/uapi/linux/nvme.h @@ -579,5 +579,6 @@ struct nvme_passthru_cmd { #define NVME_IOCTL_ADMIN_CMD _IOWR('N', 0x41, struct nvme_admin_cmd) #define NVME_IOCTL_SUBMIT_IO _IOW('N', 0x42, struct nvme_user_io) #define NVME_IOCTL_IO_CMD _IOWR('N', 0x43, struct nvme_passthru_cmd) +#define NVME_IOCTL_RESET _IO('N', 0x44) #endif /* _UAPI_LINUX_NVME_H */ -- cgit v0.10.2 From 36a7e993eedb2c3f11de3b686b351f75e1edbbb5 Mon Sep 17 00:00:00 2001 From: Jon Derrick Date: Wed, 27 May 2015 12:26:23 -0600 Subject: NVMe: Memory barrier before queue_count is incremented Protects against reordering and/or preempting which would allow the kthread to access the queue descriptor before it is set up Signed-off-by: Jon Derrick Acked-by: Keith Busch Signed-off-by: Jens Axboe diff --git a/drivers/block/nvme-core.c b/drivers/block/nvme-core.c index 9682e29..cae7cac 100644 --- a/drivers/block/nvme-core.c +++ b/drivers/block/nvme-core.c @@ -1453,9 +1453,12 @@ static struct nvme_queue *nvme_alloc_queue(struct nvme_dev *dev, int qid, nvmeq->q_db = &dev->dbs[qid * 2 * dev->db_stride]; nvmeq->q_depth = depth; nvmeq->qid = qid; - dev->queue_count++; dev->queues[qid] = nvmeq; + /* make sure queue descriptor is set before queue count, for kthread */ + mb(); + dev->queue_count++; + return nvmeq; free_cqdma: -- cgit v0.10.2 From a5768aa887fb636f0cc4c83a2f1242506aaf50f6 Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Mon, 1 Jun 2015 14:28:14 -0600 Subject: NVMe: Automatic namespace rescan Namespaces may be dynamically allocated and deleted or attached and detached. This has the driver rescan the device for namespace changes after each device reset or namespace change asynchronous event. There could potentially be many detached namespaces that we don't want polluting /dev/ with unusable block handles, so this will delete disks if the namespace is not active as indicated by the response from identify namespace. This also skips adding the disk if no capacity is provisioned to the namespace in the first place. Signed-off-by: Keith Busch Signed-off-by: Jens Axboe diff --git a/drivers/block/nvme-core.c b/drivers/block/nvme-core.c index cae7cac..2072ae8 100644 --- a/drivers/block/nvme-core.c +++ b/drivers/block/nvme-core.c @@ -29,6 +29,7 @@ #include #include #include +#include #include #include #include @@ -300,9 +301,16 @@ static void async_req_completion(struct nvme_queue *nvmeq, void *ctx, if (status == NVME_SC_SUCCESS || status == NVME_SC_ABORT_REQ) ++nvmeq->dev->event_limit; - if (status == NVME_SC_SUCCESS) - dev_warn(nvmeq->q_dmadev, - "async event result %08x\n", result); + if (status != NVME_SC_SUCCESS) + return; + + switch (result & 0xff07) { + case NVME_AER_NOTICE_NS_CHANGED: + dev_info(nvmeq->q_dmadev, "rescanning\n"); + schedule_work(&nvmeq->dev->scan_work); + default: + dev_warn(nvmeq->q_dmadev, "async event result %08x\n", result); + } } static void abort_completion(struct nvme_queue *nvmeq, void *ctx, @@ -1923,8 +1931,13 @@ static int nvme_revalidate_disk(struct gendisk *disk) unsigned short bs; if (nvme_identify_ns(dev, ns->ns_id, &id)) { - dev_warn(dev->dev, "%s: Identify failure\n", __func__); - return 0; + dev_warn(dev->dev, "%s: Identify failure nvme%dn%d\n", __func__, + dev->instance, ns->ns_id); + return -ENODEV; + } + if (id->ncap == 0) { + kfree(id); + return -ENODEV; } old_ms = ns->ms; @@ -1958,7 +1971,7 @@ static int nvme_revalidate_disk(struct gendisk *disk) !ns->ext) nvme_init_integrity(ns); - if (id->ncap == 0 || (ns->ms && !blk_get_integrity(disk))) + if (ns->ms && !blk_get_integrity(disk)) set_capacity(disk, 0); else set_capacity(disk, le64_to_cpup(&id->nsze) << (ns->lba_shift - 9)); @@ -2073,11 +2086,16 @@ static void nvme_alloc_ns(struct nvme_dev *dev, unsigned nsid) * requires it. */ set_capacity(disk, 0); - nvme_revalidate_disk(ns->disk); + if (nvme_revalidate_disk(ns->disk)) + goto out_free_disk; + add_disk(ns->disk); if (ns->ms) revalidate_disk(ns->disk); return; + out_free_disk: + kfree(disk); + list_del(&ns->list); out_free_queue: blk_cleanup_queue(ns->queue); out_free_ns: @@ -2194,6 +2212,99 @@ static int nvme_setup_io_queues(struct nvme_dev *dev) return result; } +static void nvme_free_namespace(struct nvme_ns *ns) +{ + list_del(&ns->list); + + spin_lock(&dev_list_lock); + ns->disk->private_data = NULL; + spin_unlock(&dev_list_lock); + + put_disk(ns->disk); + kfree(ns); +} + +static int ns_cmp(void *priv, struct list_head *a, struct list_head *b) +{ + struct nvme_ns *nsa = container_of(a, struct nvme_ns, list); + struct nvme_ns *nsb = container_of(b, struct nvme_ns, list); + + return nsa->ns_id - nsb->ns_id; +} + +static struct nvme_ns *nvme_find_ns(struct nvme_dev *dev, unsigned nsid) +{ + struct nvme_ns *ns; + + list_for_each_entry(ns, &dev->namespaces, list) { + if (ns->ns_id == nsid) + return ns; + if (ns->ns_id > nsid) + break; + } + return NULL; +} + +static inline bool nvme_io_incapable(struct nvme_dev *dev) +{ + return (!dev->bar || readl(&dev->bar->csts) & NVME_CSTS_CFS || + dev->online_queues < 2); +} + +static void nvme_ns_remove(struct nvme_ns *ns) +{ + bool kill = nvme_io_incapable(ns->dev) && !blk_queue_dying(ns->queue); + + if (kill) + blk_set_queue_dying(ns->queue); + if (ns->disk->flags & GENHD_FL_UP) { + if (blk_get_integrity(ns->disk)) + blk_integrity_unregister(ns->disk); + del_gendisk(ns->disk); + } + if (kill || !blk_queue_dying(ns->queue)) { + blk_mq_abort_requeue_list(ns->queue); + blk_cleanup_queue(ns->queue); + } +} + +static void nvme_scan_namespaces(struct nvme_dev *dev, unsigned nn) +{ + struct nvme_ns *ns, *next; + unsigned i; + + for (i = 1; i <= nn; i++) { + ns = nvme_find_ns(dev, i); + if (ns) { + if (revalidate_disk(ns->disk)) { + nvme_ns_remove(ns); + nvme_free_namespace(ns); + } + } else + nvme_alloc_ns(dev, i); + } + list_for_each_entry_safe(ns, next, &dev->namespaces, list) { + if (ns->ns_id > nn) { + nvme_ns_remove(ns); + nvme_free_namespace(ns); + } + } + list_sort(NULL, &dev->namespaces, ns_cmp); +} + +static void nvme_dev_scan(struct work_struct *work) +{ + struct nvme_dev *dev = container_of(work, struct nvme_dev, scan_work); + struct nvme_id_ctrl *ctrl; + + if (!dev->tagset.tags) + return; + if (nvme_identify_ctrl(dev, &ctrl)) + return; + nvme_scan_namespaces(dev, le32_to_cpup(&ctrl->nn)); + kfree(ctrl); +} + /* * Return: error value if an error occurred setting up the queues or calling * Identify Device. 0 if these succeeded, even if adding some of the @@ -2204,7 +2315,7 @@ static int nvme_dev_add(struct nvme_dev *dev) { struct pci_dev *pdev = to_pci_dev(dev->dev); int res; - unsigned nn, i; + unsigned nn; struct nvme_id_ctrl *ctrl; int shift = NVME_CAP_MPSMIN(readq(&dev->bar->cap)) + 12; @@ -2250,9 +2361,7 @@ static int nvme_dev_add(struct nvme_dev *dev) if (blk_mq_alloc_tag_set(&dev->tagset)) return 0; - for (i = 1; i <= nn; i++) - nvme_alloc_ns(dev, i); - + schedule_work(&dev->scan_work); return 0; } @@ -2552,17 +2661,8 @@ static void nvme_dev_remove(struct nvme_dev *dev) { struct nvme_ns *ns; - list_for_each_entry(ns, &dev->namespaces, list) { - if (ns->disk->flags & GENHD_FL_UP) { - if (blk_get_integrity(ns->disk)) - blk_integrity_unregister(ns->disk); - del_gendisk(ns->disk); - } - if (!blk_queue_dying(ns->queue)) { - blk_mq_abort_requeue_list(ns->queue); - blk_cleanup_queue(ns->queue); - } - } + list_for_each_entry(ns, &dev->namespaces, list) + nvme_ns_remove(ns); } static int nvme_setup_prp_pools(struct nvme_dev *dev) @@ -2621,16 +2721,8 @@ static void nvme_free_namespaces(struct nvme_dev *dev) { struct nvme_ns *ns, *next; - list_for_each_entry_safe(ns, next, &dev->namespaces, list) { - list_del(&ns->list); - - spin_lock(&dev_list_lock); - ns->disk->private_data = NULL; - spin_unlock(&dev_list_lock); - - put_disk(ns->disk); - kfree(ns); - } + list_for_each_entry_safe(ns, next, &dev->namespaces, list) + nvme_free_namespace(ns); } static void nvme_free_dev(struct kref *kref) @@ -2814,6 +2906,7 @@ static int nvme_dev_resume(struct nvme_dev *dev) spin_unlock(&dev_list_lock); } else { nvme_unfreeze_queues(dev); + schedule_work(&dev->scan_work); nvme_set_irq_hints(dev); } return 0; @@ -2935,6 +3028,7 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id) goto put_dev; INIT_LIST_HEAD(&dev->node); + INIT_WORK(&dev->scan_work, nvme_dev_scan); INIT_WORK(&dev->probe_work, nvme_async_probe); schedule_work(&dev->probe_work); return 0; @@ -3007,6 +3101,7 @@ static void nvme_remove(struct pci_dev *pdev) pci_set_drvdata(pdev, NULL); flush_work(&dev->probe_work); flush_work(&dev->reset_work); + flush_work(&dev->scan_work); device_remove_file(dev->device, &dev_attr_reset_controller); nvme_dev_shutdown(dev); nvme_dev_remove(dev); diff --git a/include/linux/nvme.h b/include/linux/nvme.h index 986bf8a..c0d94ed 100644 --- a/include/linux/nvme.h +++ b/include/linux/nvme.h @@ -92,6 +92,7 @@ struct nvme_dev { work_func_t reset_workfn; struct work_struct reset_work; struct work_struct probe_work; + struct work_struct scan_work; char name[12]; char serial[20]; char model[40]; diff --git a/include/uapi/linux/nvme.h b/include/uapi/linux/nvme.h index b660dc2..732b32e 100644 --- a/include/uapi/linux/nvme.h +++ b/include/uapi/linux/nvme.h @@ -179,6 +179,10 @@ enum { NVME_SMART_CRIT_VOLATILE_MEMORY = 1 << 4, }; +enum { + NVME_AER_NOTICE_NS_CHANGED = 0x0002, +}; + struct nvme_lba_range_type { __u8 type; __u8 attributes; -- cgit v0.10.2 From e7bdd17b0869782d89c371507ee45bb1425615a0 Mon Sep 17 00:00:00 2001 From: Geoff Levand Date: Wed, 10 Jun 2015 18:00:17 +0000 Subject: block/ps3vram: Fix sparse warnings Fix sparse warnings like these: drivers/block/ps3vram.c: warning: incorrect type in assignment (different address spaces) drivers/block/ps3vram.c: expected unsigned int [usertype] *ctrl drivers/block/ps3vram.c: got void [noderef] * Cc: Jim Paris Cc: Jens Axboe Signed-off-by: Geoff Levand Acked-by: Jim Paris Signed-off-by: Jens Axboe diff --git a/drivers/block/ps3vram.c b/drivers/block/ps3vram.c index ef45cfb..a7bf836 100644 --- a/drivers/block/ps3vram.c +++ b/drivers/block/ps3vram.c @@ -73,8 +73,8 @@ struct ps3vram_priv { u64 memory_handle; u64 context_handle; - u32 *ctrl; - void *reports; + u32 __iomem *ctrl; + void __iomem *reports; u8 *xdr_buf; u32 *fifo_base; @@ -104,7 +104,7 @@ static char *size = "256M"; module_param(size, charp, 0); MODULE_PARM_DESC(size, "memory size"); -static u32 *ps3vram_get_notifier(void *reports, int notifier) +static u32 __iomem *ps3vram_get_notifier(void __iomem *reports, int notifier) { return reports + DMA_NOTIFIER_OFFSET_BASE + DMA_NOTIFIER_SIZE * notifier; @@ -113,22 +113,22 @@ static u32 *ps3vram_get_notifier(void *reports, int notifier) static void ps3vram_notifier_reset(struct ps3_system_bus_device *dev) { struct ps3vram_priv *priv = ps3_system_bus_get_drvdata(dev); - u32 *notify = ps3vram_get_notifier(priv->reports, NOTIFIER); + u32 __iomem *notify = ps3vram_get_notifier(priv->reports, NOTIFIER); int i; for (i = 0; i < 4; i++) - notify[i] = 0xffffffff; + iowrite32be(0xffffffff, notify + i); } static int ps3vram_notifier_wait(struct ps3_system_bus_device *dev, unsigned int timeout_ms) { struct ps3vram_priv *priv = ps3_system_bus_get_drvdata(dev); - u32 *notify = ps3vram_get_notifier(priv->reports, NOTIFIER); + u32 __iomem *notify = ps3vram_get_notifier(priv->reports, NOTIFIER); unsigned long timeout; for (timeout = 20; timeout; timeout--) { - if (!notify[3]) + if (!ioread32be(notify + 3)) return 0; udelay(10); } @@ -136,7 +136,7 @@ static int ps3vram_notifier_wait(struct ps3_system_bus_device *dev, timeout = jiffies + msecs_to_jiffies(timeout_ms); do { - if (!notify[3]) + if (!ioread32be(notify + 3)) return 0; msleep(1); } while (time_before(jiffies, timeout)); @@ -148,8 +148,8 @@ static void ps3vram_init_ring(struct ps3_system_bus_device *dev) { struct ps3vram_priv *priv = ps3_system_bus_get_drvdata(dev); - priv->ctrl[CTRL_PUT] = FIFO_BASE + FIFO_OFFSET; - priv->ctrl[CTRL_GET] = FIFO_BASE + FIFO_OFFSET; + iowrite32be(FIFO_BASE + FIFO_OFFSET, priv->ctrl + CTRL_PUT); + iowrite32be(FIFO_BASE + FIFO_OFFSET, priv->ctrl + CTRL_GET); } static int ps3vram_wait_ring(struct ps3_system_bus_device *dev, @@ -159,14 +159,14 @@ static int ps3vram_wait_ring(struct ps3_system_bus_device *dev, unsigned long timeout = jiffies + msecs_to_jiffies(timeout_ms); do { - if (priv->ctrl[CTRL_PUT] == priv->ctrl[CTRL_GET]) + if (ioread32be(priv->ctrl + CTRL_PUT) == ioread32be(priv->ctrl + CTRL_GET)) return 0; msleep(1); } while (time_before(jiffies, timeout)); dev_warn(&dev->core, "FIFO timeout (%08x/%08x/%08x)\n", - priv->ctrl[CTRL_PUT], priv->ctrl[CTRL_GET], - priv->ctrl[CTRL_TOP]); + ioread32be(priv->ctrl + CTRL_PUT), ioread32be(priv->ctrl + CTRL_GET), + ioread32be(priv->ctrl + CTRL_TOP)); return -ETIMEDOUT; } @@ -189,7 +189,7 @@ static void ps3vram_rewind_ring(struct ps3_system_bus_device *dev) ps3vram_out_ring(priv, 0x20000000 | (FIFO_BASE + FIFO_OFFSET)); - priv->ctrl[CTRL_PUT] = FIFO_BASE + FIFO_OFFSET; + iowrite32be(FIFO_BASE + FIFO_OFFSET, priv->ctrl + CTRL_PUT); /* asking the HV for a blit will kick the FIFO */ status = lv1_gpu_fb_blit(priv->context_handle, 0, 0, 0, 0); @@ -207,8 +207,8 @@ static void ps3vram_fire_ring(struct ps3_system_bus_device *dev) mutex_lock(&ps3_gpu_mutex); - priv->ctrl[CTRL_PUT] = FIFO_BASE + FIFO_OFFSET + - (priv->fifo_ptr - priv->fifo_base) * sizeof(u32); + iowrite32be(FIFO_BASE + FIFO_OFFSET + (priv->fifo_ptr - priv->fifo_base) + * sizeof(u32), priv->ctrl + CTRL_PUT); /* asking the HV for a blit will kick the FIFO */ status = lv1_gpu_fb_blit(priv->context_handle, 0, 0, 0, 0); -- cgit v0.10.2 From de667203fdbb77745ce7baa9ed280d2cc27b1753 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Wed, 10 Jun 2015 18:00:17 +0000 Subject: block/ps3vram: Remove obsolete reference to MTD The ps3vram driver is a plain block device driver since commit f507cd22035fdadd5dbb476dd05e9e7ee21c3b84 ("ps3/block: Replace mtd/ps3vram by block/ps3vram"). Signed-off-by: Geert Uytterhoeven Signed-off-by: Geoff Levand Acked-by: Jim Paris Signed-off-by: Jens Axboe diff --git a/drivers/block/ps3vram.c b/drivers/block/ps3vram.c index a7bf836..b1612eb 100644 --- a/drivers/block/ps3vram.c +++ b/drivers/block/ps3vram.c @@ -1,5 +1,5 @@ /* - * ps3vram - Use extra PS3 video ram as MTD block device. + * ps3vram - Use extra PS3 video ram as block device. * * Copyright 2009 Sony Corporation * -- cgit v0.10.2 From 3715a5d014e1326b8e6d008dfbf05615014a067e Mon Sep 17 00:00:00 2001 From: Geoff Levand Date: Wed, 10 Jun 2015 18:00:18 +0000 Subject: MAINTAINERS: Update ps3vram block driver Add myself as co-maintainer of the ps3vram block driver, and add linuxppc-dev as a relevant mailing list. I have been acting as maintainer of this driver for the last several years, and if there is some inquiry regarding it I would like to be notified. Signed-off-by: Geoff Levand Acked-by: Jim Paris Signed-off-by: Jens Axboe diff --git a/MAINTAINERS b/MAINTAINERS index 781e099..ffcb4e5 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -7852,6 +7852,8 @@ F: sound/ppc/snd_ps3* PS3VRAM DRIVER M: Jim Paris +M: Geoff Levand +L: linuxppc-dev@lists.ozlabs.org L: cbe-oss-dev@lists.ozlabs.org S: Maintained F: drivers/block/ps3vram.c -- cgit v0.10.2 From 02b48265e7437bfe153af16337b14ee74f00905f Mon Sep 17 00:00:00 2001 From: Asai Thambi SP Date: Mon, 11 May 2015 15:48:00 -0700 Subject: mtip32xx: fix rmmod issue put_disk() need to be called after del_gendisk() to free the disk object structure. Signed-off-by: Selvan Mani Signed-off-by: Asai Thambi S P Signed-off-by: Jens Axboe diff --git a/drivers/block/mtip32xx/mtip32xx.c b/drivers/block/mtip32xx/mtip32xx.c index 3bd7ca9..b79b59a 100644 --- a/drivers/block/mtip32xx/mtip32xx.c +++ b/drivers/block/mtip32xx/mtip32xx.c @@ -2809,6 +2809,7 @@ static int mtip_free_orphan(struct driver_data *dd) kobject_put(kobj); } del_gendisk(dd->disk); + put_disk(dd->disk); dd->disk = NULL; } if (dd->queue) { @@ -4095,13 +4096,13 @@ static int mtip_block_remove(struct driver_data *dd) dd->bdev = NULL; } if (dd->disk) { + del_gendisk(dd->disk); if (dd->disk->queue) { - del_gendisk(dd->disk); blk_cleanup_queue(dd->queue); blk_mq_free_tag_set(&dd->tags); dd->queue = NULL; - } else - put_disk(dd->disk); + } + put_disk(dd->disk); } dd->disk = NULL; @@ -4140,12 +4141,12 @@ static int mtip_block_shutdown(struct driver_data *dd) dev_info(&dd->pdev->dev, "Shutting down %s ...\n", dd->disk->disk_name); + del_gendisk(dd->disk); if (dd->disk->queue) { - del_gendisk(dd->disk); blk_cleanup_queue(dd->queue); blk_mq_free_tag_set(&dd->tags); - } else - put_disk(dd->disk); + } + put_disk(dd->disk); dd->disk = NULL; dd->queue = NULL; } -- cgit v0.10.2 From a7806fadc5f68b1551e4fa85f5e655c0448727f1 Mon Sep 17 00:00:00 2001 From: Asai Thambi SP Date: Mon, 11 May 2015 15:49:28 -0700 Subject: mtip32xx: remove unused variable 'port->allocated' Remove unused variable 'port->allocated' Signed-off-by: Selvan Mani Signed-off-by: Asai Thambi S P Signed-off-by: Jens Axboe diff --git a/drivers/block/mtip32xx/mtip32xx.c b/drivers/block/mtip32xx/mtip32xx.c index b79b59a..0dd5d76 100644 --- a/drivers/block/mtip32xx/mtip32xx.c +++ b/drivers/block/mtip32xx/mtip32xx.c @@ -623,8 +623,7 @@ static void mtip_handle_tfe(struct driver_data *dd) set_bit(MTIP_PF_EH_ACTIVE_BIT, &port->flags); - if (test_bit(MTIP_PF_IC_ACTIVE_BIT, &port->flags) && - test_bit(MTIP_TAG_INTERNAL, port->allocated)) { + if (test_bit(MTIP_PF_IC_ACTIVE_BIT, &port->flags)) { cmd = mtip_cmd_from_tag(dd, MTIP_TAG_INTERNAL); dbg_printk(MTIP_DRV_NAME " TFE for the internal command\n"); @@ -2625,18 +2624,6 @@ static ssize_t mtip_hw_read_registers(struct file *f, char __user *ubuf, readl(dd->mmio + HOST_IRQ_STAT)); size += sprintf(&buf[size], "\n"); - size += sprintf(&buf[size], "L/ Allocated : [ 0x"); - - for (n = dd->slot_groups-1; n >= 0; n--) { - if (sizeof(long) > sizeof(u32)) - group_allocated = - dd->port->allocated[n/2] >> (32*(n&1)); - else - group_allocated = dd->port->allocated[n]; - size += sprintf(&buf[size], "%08X ", group_allocated); - } - size += sprintf(&buf[size], "]\n"); - size += sprintf(&buf[size], "L/ Commands in Q : [ 0x"); for (n = dd->slot_groups-1; n >= 0; n--) { diff --git a/drivers/block/mtip32xx/mtip32xx.h b/drivers/block/mtip32xx/mtip32xx.h index ba1b31e..d7a5459 100644 --- a/drivers/block/mtip32xx/mtip32xx.h +++ b/drivers/block/mtip32xx/mtip32xx.h @@ -412,19 +412,13 @@ struct mtip_port { * by the DMA when the driver issues internal commands. */ dma_addr_t sector_buffer_dma; - /* - * Bit significant, used to determine if a command slot has - * been allocated. i.e. the slot is in use. Bits are cleared - * when the command slot and all associated data structures - * are no longer needed. - */ + u16 *log_buf; dma_addr_t log_buf_dma; u8 *smart_buf; dma_addr_t smart_buf_dma; - unsigned long allocated[SLOTBITS_IN_LONGS]; /* * used to queue commands when an internal command is in progress * or error handling is active -- cgit v0.10.2 From ee04bed690cb49a49512a641405bac42d13c2b2a Mon Sep 17 00:00:00 2001 From: Asai Thambi SP Date: Mon, 11 May 2015 15:50:50 -0700 Subject: mtip32xx: fix incorrectly setting MTIP_DDF_SEC_LOCK_BIT Fix incorrectly setting MTIP_DDF_SEC_LOCK_BIT Signed-off-by: Selvan Mani Signed-off-by: Asai Thambi S P Signed-off-by: Jens Axboe diff --git a/drivers/block/mtip32xx/mtip32xx.c b/drivers/block/mtip32xx/mtip32xx.c index 0dd5d76..e905c81 100644 --- a/drivers/block/mtip32xx/mtip32xx.c +++ b/drivers/block/mtip32xx/mtip32xx.c @@ -990,15 +990,11 @@ static bool mtip_pause_ncq(struct mtip_port *port, reply = port->rxfis + RX_FIS_D2H_REG; task_file_data = readl(port->mmio+PORT_TFDATA); - if (fis->command == ATA_CMD_SEC_ERASE_UNIT) - clear_bit(MTIP_DDF_SEC_LOCK_BIT, &port->dd->dd_flag); - if ((task_file_data & 1)) return false; if (fis->command == ATA_CMD_SEC_ERASE_PREP) { set_bit(MTIP_PF_SE_ACTIVE_BIT, &port->flags); - set_bit(MTIP_DDF_SEC_LOCK_BIT, &port->dd->dd_flag); port->ic_pause_timer = jiffies; return true; } else if ((fis->command == ATA_CMD_DOWNLOAD_MICRO) && @@ -1010,6 +1006,7 @@ static bool mtip_pause_ncq(struct mtip_port *port, ((fis->command == 0xFC) && (fis->features == 0x27 || fis->features == 0x72 || fis->features == 0x62 || fis->features == 0x26))) { + clear_bit(MTIP_DDF_SEC_LOCK_BIT, &port->dd->dd_flag); /* Com reset after secure erase or lowlevel format */ mtip_restart_port(port); return false; -- cgit v0.10.2 From 686d8e0bb5207c2a651eb5b28ac15db33adda59d Mon Sep 17 00:00:00 2001 From: Asai Thambi SP Date: Mon, 11 May 2015 15:51:27 -0700 Subject: mtip32xx: Abort I/O during secure erase operation Currently I/Os are being queued when secure erase operation starts, and issue them after the operation completes. As all data will be gone when the operation completes, any queued I/O doesn't make sense. Hence, abort I/O (return -ENODATA) as soon as the driver receives. Signed-off-by: Selvan Mani Signed-off-by: Asai Thambi S P Signed-off-by: Jens Axboe diff --git a/drivers/block/mtip32xx/mtip32xx.c b/drivers/block/mtip32xx/mtip32xx.c index e905c81..92cb601 100644 --- a/drivers/block/mtip32xx/mtip32xx.c +++ b/drivers/block/mtip32xx/mtip32xx.c @@ -994,7 +994,6 @@ static bool mtip_pause_ncq(struct mtip_port *port, return false; if (fis->command == ATA_CMD_SEC_ERASE_PREP) { - set_bit(MTIP_PF_SE_ACTIVE_BIT, &port->flags); port->ic_pause_timer = jiffies; return true; } else if ((fis->command == ATA_CMD_DOWNLOAD_MICRO) && @@ -1009,6 +1008,7 @@ static bool mtip_pause_ncq(struct mtip_port *port, clear_bit(MTIP_DDF_SEC_LOCK_BIT, &port->dd->dd_flag); /* Com reset after secure erase or lowlevel format */ mtip_restart_port(port); + clear_bit(MTIP_PF_SE_ACTIVE_BIT, &port->flags); return false; } @@ -1108,9 +1108,10 @@ static int mtip_exec_internal_command(struct mtip_port *port, int_cmd = mtip_get_int_command(dd); set_bit(MTIP_PF_IC_ACTIVE_BIT, &port->flags); - port->ic_pause_timer = 0; - clear_bit(MTIP_PF_SE_ACTIVE_BIT, &port->flags); + if (fis->command == ATA_CMD_SEC_ERASE_PREP) + set_bit(MTIP_PF_SE_ACTIVE_BIT, &port->flags); + clear_bit(MTIP_PF_DM_ACTIVE_BIT, &port->flags); if (atomic == GFP_KERNEL) { @@ -1247,11 +1248,11 @@ static int mtip_exec_internal_command(struct mtip_port *port, exec_ic_exit: /* Clear the allocated and active bits for the internal command. */ mtip_put_int_command(dd, int_cmd); + clear_bit(MTIP_PF_IC_ACTIVE_BIT, &port->flags); if (rv >= 0 && mtip_pause_ncq(port, fis)) { /* NCQ paused */ return rv; } - clear_bit(MTIP_PF_IC_ACTIVE_BIT, &port->flags); wake_up_interruptible(&port->svc_wait); return rv; @@ -3684,6 +3685,26 @@ static const struct block_device_operations mtip_block_ops = { .owner = THIS_MODULE }; +static inline bool is_se_active(struct driver_data *dd) +{ + if (unlikely(test_bit(MTIP_PF_SE_ACTIVE_BIT, &dd->port->flags))) { + if (dd->port->ic_pause_timer) { + unsigned long to = dd->port->ic_pause_timer + + msecs_to_jiffies(1000); + if (time_after(jiffies, to)) { + clear_bit(MTIP_PF_SE_ACTIVE_BIT, + &dd->port->flags); + clear_bit(MTIP_DDF_SEC_LOCK_BIT, &dd->dd_flag); + dd->port->ic_pause_timer = 0; + wake_up_interruptible(&dd->port->svc_wait); + return false; + } + } + return true; + } + return false; +} + /* * Block layer make request function. * @@ -3701,6 +3722,9 @@ static int mtip_submit_request(struct blk_mq_hw_ctx *hctx, struct request *rq) struct mtip_cmd *cmd = blk_mq_rq_to_pdu(rq); unsigned int nents; + if (is_se_active(dd)) + return -ENODATA; + if (unlikely(dd->dd_flag & MTIP_DDF_STOP_IO)) { if (unlikely(test_bit(MTIP_DDF_REMOVE_PENDING_BIT, &dd->dd_flag))) { -- cgit v0.10.2 From 2132a544727eb17f76bfef8b550a016a41c38821 Mon Sep 17 00:00:00 2001 From: Asai Thambi SP Date: Mon, 11 May 2015 15:53:18 -0700 Subject: mtip32xx: fix crash on surprise removal of the drive pci and block layers have changed a lot compared to when SRSI support was added. Given the current state of pci and block layers, this driver do not have to do any specific handling. Signed-off-by: Asai Thambi S P Signed-off-by: Selvan Mani Signed-off-by: Jens Axboe diff --git a/drivers/block/mtip32xx/mtip32xx.c b/drivers/block/mtip32xx/mtip32xx.c index 92cb601..0c429b5 100644 --- a/drivers/block/mtip32xx/mtip32xx.c +++ b/drivers/block/mtip32xx/mtip32xx.c @@ -895,6 +895,10 @@ static inline irqreturn_t mtip_handle_irq(struct driver_data *data) /* Acknowledge the interrupt status on the port.*/ port_stat = readl(port->mmio + PORT_IRQ_STAT); + if (unlikely(port_stat == 0xFFFFFFFF)) { + mtip_check_surprise_removal(dd->pdev); + return IRQ_HANDLED; + } writel(port_stat, port->mmio + PORT_IRQ_STAT); /* Demux port status */ @@ -2765,49 +2769,6 @@ static void mtip_hw_debugfs_exit(struct driver_data *dd) debugfs_remove_recursive(dd->dfs_node); } -static int mtip_free_orphan(struct driver_data *dd) -{ - struct kobject *kobj; - - if (dd->bdev) { - if (dd->bdev->bd_holders >= 1) - return -2; - - bdput(dd->bdev); - dd->bdev = NULL; - } - - mtip_hw_debugfs_exit(dd); - - spin_lock(&rssd_index_lock); - ida_remove(&rssd_index_ida, dd->index); - spin_unlock(&rssd_index_lock); - - if (!test_bit(MTIP_DDF_INIT_DONE_BIT, &dd->dd_flag) && - test_bit(MTIP_DDF_REBUILD_FAILED_BIT, &dd->dd_flag)) { - put_disk(dd->disk); - } else { - if (dd->disk) { - kobj = kobject_get(&disk_to_dev(dd->disk)->kobj); - if (kobj) { - mtip_hw_sysfs_exit(dd, kobj); - kobject_put(kobj); - } - del_gendisk(dd->disk); - put_disk(dd->disk); - dd->disk = NULL; - } - if (dd->queue) { - dd->queue->queuedata = NULL; - blk_cleanup_queue(dd->queue); - blk_mq_free_tag_set(&dd->tags); - dd->queue = NULL; - } - } - kfree(dd); - return 0; -} - /* * Perform any init/resume time hardware setup * @@ -2955,7 +2916,6 @@ static int mtip_service_thread(void *data) unsigned long slot, slot_start, slot_wrap; unsigned int num_cmd_slots = dd->slot_groups * 32; struct mtip_port *port = dd->port; - int ret; while (1) { if (kthread_should_stop() || @@ -3041,18 +3001,6 @@ restart_eh: if (kthread_should_stop()) goto st_out; } - - while (1) { - ret = mtip_free_orphan(dd); - if (!ret) { - /* NOTE: All data structures are invalid, do not - * access any here */ - return 0; - } - msleep_interruptible(1000); - if (kthread_should_stop()) - goto st_out; - } st_out: return 0; } @@ -3380,6 +3328,7 @@ static int mtip_hw_exit(struct driver_data *dd) /* Release the IRQ. */ irq_set_affinity_hint(dd->pdev->irq, NULL); devm_free_irq(&dd->pdev->dev, dd->pdev->irq, dd); + msleep(1000); /* Free dma regions */ mtip_dma_free(dd); @@ -4075,52 +4024,51 @@ static int mtip_block_remove(struct driver_data *dd) { struct kobject *kobj; - if (!dd->sr) { - mtip_hw_debugfs_exit(dd); + mtip_hw_debugfs_exit(dd); - if (dd->mtip_svc_handler) { - set_bit(MTIP_PF_SVC_THD_STOP_BIT, &dd->port->flags); - wake_up_interruptible(&dd->port->svc_wait); - kthread_stop(dd->mtip_svc_handler); - } + if (dd->mtip_svc_handler) { + set_bit(MTIP_PF_SVC_THD_STOP_BIT, &dd->port->flags); + wake_up_interruptible(&dd->port->svc_wait); + kthread_stop(dd->mtip_svc_handler); + } - /* Clean up the sysfs attributes, if created */ - if (test_bit(MTIP_DDF_INIT_DONE_BIT, &dd->dd_flag)) { - kobj = kobject_get(&disk_to_dev(dd->disk)->kobj); - if (kobj) { - mtip_hw_sysfs_exit(dd, kobj); - kobject_put(kobj); - } + /* Clean up the sysfs attributes, if created */ + if (test_bit(MTIP_DDF_INIT_DONE_BIT, &dd->dd_flag)) { + kobj = kobject_get(&disk_to_dev(dd->disk)->kobj); + if (kobj) { + mtip_hw_sysfs_exit(dd, kobj); + kobject_put(kobj); } + } + if (!dd->sr) mtip_standby_drive(dd); - - /* - * Delete our gendisk structure. This also removes the device - * from /dev - */ - if (dd->bdev) { - bdput(dd->bdev); - dd->bdev = NULL; - } - if (dd->disk) { - del_gendisk(dd->disk); - if (dd->disk->queue) { - blk_cleanup_queue(dd->queue); - blk_mq_free_tag_set(&dd->tags); - dd->queue = NULL; - } - put_disk(dd->disk); - } - dd->disk = NULL; - - spin_lock(&rssd_index_lock); - ida_remove(&rssd_index_ida, dd->index); - spin_unlock(&rssd_index_lock); - } else { + else dev_info(&dd->pdev->dev, "device %s surprise removal\n", dd->disk->disk_name); + + /* + * Delete our gendisk structure. This also removes the device + * from /dev + */ + if (dd->bdev) { + bdput(dd->bdev); + dd->bdev = NULL; + } + if (dd->disk) { + del_gendisk(dd->disk); + if (dd->disk->queue) { + blk_cleanup_queue(dd->queue); + blk_mq_free_tag_set(&dd->tags); + dd->queue = NULL; + } + put_disk(dd->disk); } + dd->disk = NULL; + + spin_lock(&rssd_index_lock); + ida_remove(&rssd_index_ida, dd->index); + spin_unlock(&rssd_index_lock); /* De-initialize the protocol layer. */ mtip_hw_exit(dd); @@ -4516,6 +4464,7 @@ static void mtip_pci_remove(struct pci_dev *pdev) "Completion workers still active!\n"); } + blk_mq_stop_hw_queues(dd->queue); /* Clean up the block layer. */ mtip_block_remove(dd); @@ -4533,10 +4482,8 @@ static void mtip_pci_remove(struct pci_dev *pdev) list_del_init(&dd->remove_list); spin_unlock_irqrestore(&dev_lock, flags); - if (!dd->sr) - kfree(dd); - else - set_bit(MTIP_DDF_REMOVE_DONE_BIT, &dd->dd_flag); + kfree(dd); + set_bit(MTIP_DDF_REMOVE_DONE_BIT, &dd->dd_flag); pcim_iounmap_regions(pdev, 1 << MTIP_ABAR); pci_set_drvdata(pdev, NULL); -- cgit v0.10.2 From 284eb9a202a24fec4aed02d7526abc29827f6cbb Mon Sep 17 00:00:00 2001 From: Asai Thambi SP Date: Mon, 11 May 2015 15:54:19 -0700 Subject: mtip32xx: remove unnecessary sleep in mtip_ftl_rebuild_poll() Signed-off-by: Asai Thambi S P Signed-off-by: Jens Axboe diff --git a/drivers/block/mtip32xx/mtip32xx.c b/drivers/block/mtip32xx/mtip32xx.c index 0c429b5..3781250 100644 --- a/drivers/block/mtip32xx/mtip32xx.c +++ b/drivers/block/mtip32xx/mtip32xx.c @@ -2891,7 +2891,6 @@ static int mtip_ftl_rebuild_poll(struct driver_data *dd) mtip_block_initialize(dd); return 0; } - ssleep(10); } while (time_before(jiffies, timeout)); /* Check for timeout */ -- cgit v0.10.2 From 75787265d61fdce212c45805b36779754392d034 Mon Sep 17 00:00:00 2001 From: Asai Thambi SP Date: Mon, 11 May 2015 15:55:26 -0700 Subject: mtip32xx: fix minor number When a device is surprise removed and inserted, it is assigned a new minor number because driver use multiples of 'instance' number. Modified to use the multiples of 'index' for minor number. Signed-off-by: Asai Thambi S P Signed-off-by: Jens Axboe diff --git a/drivers/block/mtip32xx/mtip32xx.c b/drivers/block/mtip32xx/mtip32xx.c index 3781250..0b223e3 100644 --- a/drivers/block/mtip32xx/mtip32xx.c +++ b/drivers/block/mtip32xx/mtip32xx.c @@ -3857,7 +3857,8 @@ static int mtip_block_initialize(struct driver_data *dd) dd->disk->driverfs_dev = &dd->pdev->dev; dd->disk->major = dd->major; - dd->disk->first_minor = dd->instance * MTIP_MAX_MINORS; + dd->disk->first_minor = index * MTIP_MAX_MINORS; + dd->disk->minors = MTIP_MAX_MINORS; dd->disk->fops = &mtip_block_ops; dd->disk->private_data = dd; dd->index = index; -- cgit v0.10.2 From 2f17d71dd71fe62957f155eee028c6ba79c79f01 Mon Sep 17 00:00:00 2001 From: Asai Thambi SP Date: Mon, 11 May 2015 15:57:16 -0700 Subject: mtip32xx: increase wait time for hba reset In LUN failure conditions, device takes longer time to complete the hba reset. Increased wait time from 1 second to 10 seconds. Signed-off-by: Sam Bradshaw Signed-off-by: Asai Thambi S P Signed-off-by: Jens Axboe diff --git a/drivers/block/mtip32xx/mtip32xx.c b/drivers/block/mtip32xx/mtip32xx.c index 0b223e3..144e7d9 100644 --- a/drivers/block/mtip32xx/mtip32xx.c +++ b/drivers/block/mtip32xx/mtip32xx.c @@ -269,8 +269,11 @@ static int mtip_hba_reset(struct driver_data *dd) /* Flush */ readl(dd->mmio + HOST_CTL); - /* Spin for up to 2 seconds, waiting for reset acknowledgement */ - timeout = jiffies + msecs_to_jiffies(2000); + /* + * Spin for up to 10 seconds waiting for reset acknowledgement. Spec + * is 1 sec but in LUN failure conditions, up to 10 secs are required + */ + timeout = jiffies + msecs_to_jiffies(10000); do { mdelay(10); if (test_bit(MTIP_DDF_REMOVE_PENDING_BIT, &dd->dd_flag)) -- cgit v0.10.2 From e112af0dc9f55099b948e55077504a44b4162c79 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Matias=20Bj=C3=B8rling?= Date: Fri, 5 Jun 2015 14:54:24 +0200 Subject: nvme: don't overwrite req->cmd_flags on sync cmd MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In __nvme_submit_sync_cmd, the request direction is overwritten when the REQ_FAILFAST_DRIVER flag is set. Signed-off-by: Matias Bjørling Reviewed-by: Christoph Hellwig Fixes: 75619bfa904d0 ("NVMe: End sync requests immediately on failure") Signed-off-by: Jens Axboe diff --git a/drivers/block/nvme-core.c b/drivers/block/nvme-core.c index 2072ae8..12d5b7b 100644 --- a/drivers/block/nvme-core.c +++ b/drivers/block/nvme-core.c @@ -1010,7 +1010,7 @@ int __nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd, return PTR_ERR(req); req->cmd_type = REQ_TYPE_DRV_PRIV; - req->cmd_flags = REQ_FAILFAST_DRIVER; + req->cmd_flags |= REQ_FAILFAST_DRIVER; req->__data_len = 0; req->__sector = (sector_t) -1; req->bio = req->biotail = NULL; -- cgit v0.10.2 From 71feb364e7faadc681e714f7fdc2bede208ba26c Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Fri, 19 Jun 2015 11:07:30 -0600 Subject: NVMe: Fix IO for extended metadata formats This fixes io submit ioctl handling when using extended metadata formats. When these formats are used, the user provides a single virtually contiguous buffer containing both the block and metadata interleaved, so the metadata size needs to be added to the total length and not mapped as a separate transfer. The command is also driver generated, so this patch does not enforce blk-integrity extensions provide the metadata buffer. Reported-by: Marcin Dziegielewski Signed-off-by: Keith Busch Signed-off-by: Jens Axboe diff --git a/drivers/block/nvme-core.c b/drivers/block/nvme-core.c index 12d5b7b..a501d3e 100644 --- a/drivers/block/nvme-core.c +++ b/drivers/block/nvme-core.c @@ -852,7 +852,8 @@ static int nvme_queue_rq(struct blk_mq_hw_ctx *hctx, * stripped/generated by the controller with PRACT=1. */ if (ns && ns->ms && !blk_integrity_rq(req)) { - if (!(ns->pi_type && ns->ms == 8)) { + if (!(ns->pi_type && ns->ms == 8) && + req->cmd_type != REQ_TYPE_DRV_PRIV) { req->errors = -EFAULT; blk_mq_complete_request(req); return BLK_MQ_RQ_QUEUE_OK; @@ -1747,15 +1748,14 @@ static int nvme_submit_io(struct nvme_ns *ns, struct nvme_user_io __user *uio) meta_len = (io.nblocks + 1) * ns->ms; write = io.opcode & 1; + if (ns->ext) { + length += meta_len; + meta_len = 0; + } if (meta_len) { if (((io.metadata & 3) || !io.metadata) && !ns->ext) return -EINVAL; - if (ns->ext) { - length += meta_len; - meta_len = 0; - } - meta = dma_alloc_coherent(dev->dev, meta_len, &meta_dma, GFP_KERNEL); if (!meta) { -- cgit v0.10.2 From 51ef72bda70841fddd595142ed0e7e0fc571c500 Mon Sep 17 00:00:00 2001 From: Axel Lin Date: Sat, 20 Jun 2015 16:29:14 +0800 Subject: block: nvme-scsi: Catch kcalloc failure res variable was initialized to -ENOMEM, but it's override by nvme_trans_copy_from_user(). So current code returns 0 if kcalloc fails. Fix it to return proper error code. Signed-off-by: Axel Lin Signed-off-by: Jens Axboe diff --git a/drivers/block/nvme-scsi.c b/drivers/block/nvme-scsi.c index 8e6223e..ab6d1a0 100644 --- a/drivers/block/nvme-scsi.c +++ b/drivers/block/nvme-scsi.c @@ -2375,7 +2375,7 @@ static int nvme_trans_unmap(struct nvme_ns *ns, struct sg_io_hdr *hdr, struct scsi_unmap_parm_list *plist; struct nvme_dsm_range *range; struct nvme_command c; - int i, nvme_sc, res = -ENOMEM; + int i, nvme_sc, res; u16 ndesc, list_len; list_len = get_unaligned_be16(&cmd[7]); @@ -2397,8 +2397,10 @@ static int nvme_trans_unmap(struct nvme_ns *ns, struct sg_io_hdr *hdr, } range = kcalloc(ndesc, sizeof(*range), GFP_KERNEL); - if (!range) + if (!range) { + res = -ENOMEM; goto out; + } for (i = 0; i < ndesc; i++) { range[i].nlb = cpu_to_le32(be32_to_cpu(plist->desc[i].nlb)); -- cgit v0.10.2 From 98f57c5196f7a1f681246858f5860c1120d01ca9 Mon Sep 17 00:00:00 2001 From: Selvan Mani Date: Wed, 24 Jun 2015 08:48:46 -0600 Subject: mtip32xx: Fix accessing freed memory In mtip_pci_remove(), driver data 'dd' is accessed after freeing it. This is a residue of SRSI code cleanup in the patch 016a41c38821 "mtip32xx: fix crash on surprise removal of the drive". Removed the bit flags MTIP_DDF_REMOVE_DONE_BIT and MTIP_PF_SR_CLEANUP_BIT. Reported-by: Julia Lawall Signed-off-by: Vignesh Gunasekaran Signed-off-by: Selvan Mani Signed-off-by: Asai Thambi S P Signed-off-by: Jens Axboe diff --git a/drivers/block/mtip32xx/mtip32xx.c b/drivers/block/mtip32xx/mtip32xx.c index 144e7d9..4a2ef09 100644 --- a/drivers/block/mtip32xx/mtip32xx.c +++ b/drivers/block/mtip32xx/mtip32xx.c @@ -163,12 +163,6 @@ static bool mtip_check_surprise_removal(struct pci_dev *pdev) else dev_warn(&dd->pdev->dev, "%s: dd->queue is NULL\n", __func__); - if (dd->port) { - set_bit(MTIP_PF_SR_CLEANUP_BIT, &dd->port->flags); - wake_up_interruptible(&dd->port->svc_wait); - } else - dev_warn(&dd->pdev->dev, - "%s: dd->port is NULL\n", __func__); return true; /* device removed */ } @@ -2938,10 +2932,6 @@ static int mtip_service_thread(void *data) test_bit(MTIP_PF_SVC_THD_STOP_BIT, &port->flags)) goto st_out; - /* If I am an orphan, start self cleanup */ - if (test_bit(MTIP_PF_SR_CLEANUP_BIT, &port->flags)) - break; - if (unlikely(test_bit(MTIP_DDF_REMOVE_PENDING_BIT, &dd->dd_flag))) goto st_out; @@ -2995,14 +2985,6 @@ restart_eh: } } - /* wait for pci remove to exit */ - while (1) { - if (test_bit(MTIP_DDF_REMOVE_DONE_BIT, &dd->dd_flag)) - break; - msleep_interruptible(1000); - if (kthread_should_stop()) - goto st_out; - } st_out: return 0; } @@ -4486,7 +4468,6 @@ static void mtip_pci_remove(struct pci_dev *pdev) spin_unlock_irqrestore(&dev_lock, flags); kfree(dd); - set_bit(MTIP_DDF_REMOVE_DONE_BIT, &dd->dd_flag); pcim_iounmap_regions(pdev, 1 << MTIP_ABAR); pci_set_drvdata(pdev, NULL); diff --git a/drivers/block/mtip32xx/mtip32xx.h b/drivers/block/mtip32xx/mtip32xx.h index d7a5459..3274784 100644 --- a/drivers/block/mtip32xx/mtip32xx.h +++ b/drivers/block/mtip32xx/mtip32xx.h @@ -142,7 +142,6 @@ enum { MTIP_PF_SVC_THD_ACTIVE_BIT = 4, MTIP_PF_ISSUE_CMDS_BIT = 5, MTIP_PF_REBUILD_BIT = 6, - MTIP_PF_SR_CLEANUP_BIT = 7, MTIP_PF_SVC_THD_STOP_BIT = 8, /* below are bit numbers in 'dd_flag' defined in driver_data */ @@ -150,7 +149,6 @@ enum { MTIP_DDF_REMOVE_PENDING_BIT = 1, MTIP_DDF_OVER_TEMP_BIT = 2, MTIP_DDF_WRITE_PROTECT_BIT = 3, - MTIP_DDF_REMOVE_DONE_BIT = 4, MTIP_DDF_CLEANUP_BIT = 5, MTIP_DDF_RESUME_BIT = 6, MTIP_DDF_INIT_DONE_BIT = 7, -- cgit v0.10.2