From 8757ad65d30f009fe0beeb2d70d3cd834cb998f2 Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Fri, 11 Apr 2014 10:37:39 -0400 Subject: NVMe: Update copyright headers Make the copyright dates accurate and remove the final paragraph that includes the address of the FSF. Signed-off-by: Matthew Wilcox diff --git a/drivers/block/nvme-core.c b/drivers/block/nvme-core.c index 7c64fa7..eacd64e 100644 --- a/drivers/block/nvme-core.c +++ b/drivers/block/nvme-core.c @@ -10,10 +10,6 @@ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for * more details. - * - * You should have received a copy of the GNU General Public License along with - * this program; if not, write to the Free Software Foundation, Inc., - * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. */ #include diff --git a/drivers/block/nvme-scsi.c b/drivers/block/nvme-scsi.c index 2c3f5be..da3b252 100644 --- a/drivers/block/nvme-scsi.c +++ b/drivers/block/nvme-scsi.c @@ -1,6 +1,6 @@ /* * NVM Express device driver - * Copyright (c) 2011, Intel Corporation. + * Copyright (c) 2011-2014, Intel Corporation. * * This program is free software; you can redistribute it and/or modify it * under the terms and conditions of the GNU General Public License, @@ -10,10 +10,6 @@ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for * more details. - * - * You should have received a copy of the GNU General Public License along with - * this program; if not, write to the Free Software Foundation, Inc., - * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. */ /* diff --git a/include/linux/nvme.h b/include/linux/nvme.h index a50173c..cfd084c 100644 --- a/include/linux/nvme.h +++ b/include/linux/nvme.h @@ -1,6 +1,6 @@ /* * Definitions for the NVM Express interface - * Copyright (c) 2011-2013, Intel Corporation. + * Copyright (c) 2011-2014, Intel Corporation. * * This program is free software; you can redistribute it and/or modify it * under the terms and conditions of the GNU General Public License, @@ -10,10 +10,6 @@ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for * more details. - * - * You should have received a copy of the GNU General Public License along with - * this program; if not, write to the Free Software Foundation, Inc., - * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. */ #ifndef _LINUX_NVME_H diff --git a/include/uapi/linux/nvme.h b/include/uapi/linux/nvme.h index 096fe1c..ad9014e 100644 --- a/include/uapi/linux/nvme.h +++ b/include/uapi/linux/nvme.h @@ -1,6 +1,6 @@ /* * Definitions for the NVM Express interface - * Copyright (c) 2011-2013, Intel Corporation. + * Copyright (c) 2011-2014, Intel Corporation. * * This program is free software; you can redistribute it and/or modify it * under the terms and conditions of the GNU General Public License, @@ -10,10 +10,6 @@ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for * more details. - * - * You should have received a copy of the GNU General Public License along with - * this program; if not, write to the Free Software Foundation, Inc., - * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. */ #ifndef _UAPI_LINUX_NVME_H -- cgit v0.10.2 From 27e8166c31656f7780e682eaf6bc9c3b8dd03253 Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Fri, 11 Apr 2014 11:58:45 -0400 Subject: NVMe: Improve error messages Help people diagnose what is going wrong at initialisation time by printing out which command has gone wrong and what the device returned. Also fix the error message printed while waiting for reset. Signed-off-by: Matthew Wilcox Reviewed-by: Keith Busch diff --git a/drivers/block/nvme-core.c b/drivers/block/nvme-core.c index eacd64e..074e982 100644 --- a/drivers/block/nvme-core.c +++ b/drivers/block/nvme-core.c @@ -1354,7 +1354,8 @@ static int nvme_wait_ready(struct nvme_dev *dev, u64 cap, bool enabled) return -EINTR; if (time_after(jiffies, timeout)) { dev_err(&dev->pci_dev->dev, - "Device not ready; aborting initialisation\n"); + "Device not ready; aborting %s\n", enabled ? + "initialisation" : "reset"); return -ENODEV; } } @@ -2058,8 +2059,13 @@ static int set_queue_count(struct nvme_dev *dev, int count) status = nvme_set_features(dev, NVME_FEAT_NUM_QUEUES, q_count, 0, &result); - if (status) - return status < 0 ? -EIO : -EBUSY; + if (status < 0) + return status; + if (status > 0) { + dev_err(&dev->pci_dev->dev, "Could not set queue count (%d)\n", + status); + return -EBUSY; + } return min(result & 0xffff, result >> 16) + 1; } @@ -2180,6 +2186,7 @@ static int nvme_dev_add(struct nvme_dev *dev) res = nvme_identify(dev, 0, 1, dma_addr); if (res) { + dev_err(&pdev->dev, "Identify Controller failed (%d)\n", res); res = -EIO; goto out; } -- cgit v0.10.2 From 94bbac4052eb93219ca0aa370ca741486b25fb98 Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Thu, 24 Apr 2014 18:53:50 -0600 Subject: NVMe: Protect against badly formatted CQEs If a misbehaving device posts a CQE with a command id < depth but for one that was never allocated, the command info will have a callback function set to NULL and we don't want to try invoking that. Signed-off-by: Keith Busch Signed-off-by: Matthew Wilcox diff --git a/drivers/block/nvme-core.c b/drivers/block/nvme-core.c index 074e982..b9f07f8 100644 --- a/drivers/block/nvme-core.c +++ b/drivers/block/nvme-core.c @@ -243,8 +243,9 @@ static void *free_cmdid(struct nvme_queue *nvmeq, int cmdid, void *ctx; struct nvme_cmd_info *info = nvme_cmd_info(nvmeq); - if (cmdid >= nvmeq->q_depth) { - *fn = special_completion; + if (cmdid >= nvmeq->q_depth || !info[cmdid].fn) { + if (fn) + *fn = special_completion; return CMD_CTX_INVALID; } if (fn) -- cgit v0.10.2 From 3291fa57cb1b004c1a4823beb28b5cc72555f1a5 Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Mon, 28 Apr 2014 12:30:52 -0600 Subject: NVMe: Add tracepoints Adding tracepoints for bio_complete and block_split into nvme to help with gathering IO info using blktrace and blkparse. Signed-off-by: Keith Busch Signed-off-by: Matthew Wilcox diff --git a/block/blk-core.c b/block/blk-core.c index a0e3096..c488b55 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -43,6 +43,7 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(block_bio_remap); EXPORT_TRACEPOINT_SYMBOL_GPL(block_rq_remap); EXPORT_TRACEPOINT_SYMBOL_GPL(block_bio_complete); +EXPORT_TRACEPOINT_SYMBOL_GPL(block_split); EXPORT_TRACEPOINT_SYMBOL_GPL(block_unplug); DEFINE_IDA(blk_queue_ida); diff --git a/drivers/block/nvme-core.c b/drivers/block/nvme-core.c index b9f07f8..025dd4c 100644 --- a/drivers/block/nvme-core.c +++ b/drivers/block/nvme-core.c @@ -42,6 +42,8 @@ #include #include +#include + #define NVME_Q_DEPTH 1024 #define SQ_SIZE(depth) (depth * sizeof(struct nvme_command)) #define CQ_SIZE(depth) (depth * sizeof(struct nvme_completion)) @@ -411,6 +413,7 @@ static void bio_completion(struct nvme_queue *nvmeq, void *ctx, struct nvme_iod *iod = ctx; struct bio *bio = iod->private; u16 status = le16_to_cpup(&cqe->status) >> 1; + int error = 0; if (unlikely(status)) { if (!(status & NVME_SC_DNR || @@ -423,6 +426,7 @@ static void bio_completion(struct nvme_queue *nvmeq, void *ctx, wake_up(&nvmeq->sq_full); return; } + error = -EIO; } if (iod->nents) { dma_unmap_sg(nvmeq->q_dmadev, iod->sg, iod->nents, @@ -430,10 +434,9 @@ static void bio_completion(struct nvme_queue *nvmeq, void *ctx, nvme_end_io_acct(bio, iod->start_time); } nvme_free_iod(nvmeq->dev, iod); - if (status) - bio_endio(bio, -EIO); - else - bio_endio(bio, 0); + + trace_block_bio_complete(bdev_get_queue(bio->bi_bdev), bio, error); + bio_endio(bio, error); } /* length is in bytes. gfp flags indicates whether we may sleep. */ @@ -522,6 +525,8 @@ static int nvme_split_and_submit(struct bio *bio, struct nvme_queue *nvmeq, if (!split) return -ENOMEM; + trace_block_split(bdev_get_queue(bio->bi_bdev), bio, + split->bi_iter.bi_sector); bio_chain(split, bio); if (!waitqueue_active(&nvmeq->sq_full)) -- cgit v0.10.2 From a7d2ce2832d84e0182585f63bf96ca7323b3aee7 Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Tue, 29 Apr 2014 11:41:28 -0600 Subject: NVMe: Configure support for block flush This configures an nvme request_queue as flush capable if the device has a volatile write cache present. Signed-off-by: Keith Busch Signed-off-by: Matthew Wilcox diff --git a/drivers/block/nvme-core.c b/drivers/block/nvme-core.c index 025dd4c..e7c4fdb 100644 --- a/drivers/block/nvme-core.c +++ b/drivers/block/nvme-core.c @@ -1897,6 +1897,8 @@ static struct nvme_ns *nvme_alloc_ns(struct nvme_dev *dev, unsigned nsid, blk_queue_logical_block_size(ns->queue, 1 << ns->lba_shift); if (dev->max_hw_sectors) blk_queue_max_hw_sectors(ns->queue, dev->max_hw_sectors); + if (dev->vwc & NVME_CTRL_VWC_PRESENT) + blk_queue_flush(ns->queue, REQ_FLUSH | REQ_FUA); disk->major = nvme_major; disk->first_minor = 0; @@ -2201,6 +2203,7 @@ static int nvme_dev_add(struct nvme_dev *dev) nn = le32_to_cpup(&ctrl->nn); dev->oncs = le16_to_cpup(&ctrl->oncs); dev->abort_limit = ctrl->acl + 1; + dev->vwc = ctrl->vwc; memcpy(dev->serial, ctrl->sn, sizeof(ctrl->sn)); memcpy(dev->model, ctrl->mn, sizeof(ctrl->mn)); memcpy(dev->firmware_rev, ctrl->fr, sizeof(ctrl->fr)); diff --git a/include/linux/nvme.h b/include/linux/nvme.h index cfd084c..6266373 100644 --- a/include/linux/nvme.h +++ b/include/linux/nvme.h @@ -99,6 +99,7 @@ struct nvme_dev { u32 stripe_size; u16 oncs; u16 abort_limit; + u8 vwc; u8 initialized; }; diff --git a/include/uapi/linux/nvme.h b/include/uapi/linux/nvme.h index ad9014e..f090336 100644 --- a/include/uapi/linux/nvme.h +++ b/include/uapi/linux/nvme.h @@ -73,6 +73,7 @@ enum { NVME_CTRL_ONCS_COMPARE = 1 << 0, NVME_CTRL_ONCS_WRITE_UNCORRECTABLE = 1 << 1, NVME_CTRL_ONCS_DSM = 1 << 2, + NVME_CTRL_VWC_PRESENT = 1 << 0, }; struct nvme_lbaf { -- cgit v0.10.2 From 53562be74bd06bbe74d2acf3caca5398f8eeb160 Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Tue, 29 Apr 2014 11:41:29 -0600 Subject: NVMe: Flush with data support It is possible a filesystem may send a flush flagged bio with write data. There is no such composite NVMe command, so the driver sends flush and write separately. The device is allowed to execute these commands in any order, so it was possible the driver ends the bio after the write completes, but while the flush is still active. We don't want to let a filesystem believe flush succeeded before it really has; this could cause data corruption on a power loss between these events. To fix, this patch splits the flush and write into chained bios. Signed-off-by: Keith Busch Signed-off-by: Matthew Wilcox diff --git a/drivers/block/nvme-core.c b/drivers/block/nvme-core.c index e7c4fdb..cd8a8bc7 100644 --- a/drivers/block/nvme-core.c +++ b/drivers/block/nvme-core.c @@ -197,16 +197,13 @@ static int alloc_cmdid_killable(struct nvme_queue *nvmeq, void *ctx, #define CMD_CTX_CANCELLED (0x30C + CMD_CTX_BASE) #define CMD_CTX_COMPLETED (0x310 + CMD_CTX_BASE) #define CMD_CTX_INVALID (0x314 + CMD_CTX_BASE) -#define CMD_CTX_FLUSH (0x318 + CMD_CTX_BASE) -#define CMD_CTX_ABORT (0x31C + CMD_CTX_BASE) +#define CMD_CTX_ABORT (0x318 + CMD_CTX_BASE) static void special_completion(struct nvme_queue *nvmeq, void *ctx, struct nvme_completion *cqe) { if (ctx == CMD_CTX_CANCELLED) return; - if (ctx == CMD_CTX_FLUSH) - return; if (ctx == CMD_CTX_ABORT) { ++nvmeq->dev->abort_limit; return; @@ -629,16 +626,6 @@ static int nvme_submit_flush(struct nvme_queue *nvmeq, struct nvme_ns *ns, return 0; } -int nvme_submit_flush_data(struct nvme_queue *nvmeq, struct nvme_ns *ns) -{ - int cmdid = alloc_cmdid(nvmeq, (void *)CMD_CTX_FLUSH, - special_completion, NVME_IO_TIMEOUT); - if (unlikely(cmdid < 0)) - return cmdid; - - return nvme_submit_flush(nvmeq, ns, cmdid); -} - static int nvme_submit_iod(struct nvme_queue *nvmeq, struct nvme_iod *iod) { struct bio *bio = iod->private; @@ -654,7 +641,7 @@ static int nvme_submit_iod(struct nvme_queue *nvmeq, struct nvme_iod *iod) if (bio->bi_rw & REQ_DISCARD) return nvme_submit_discard(nvmeq, ns, bio, iod, cmdid); - if ((bio->bi_rw & REQ_FLUSH) && !iod->nents) + if (bio->bi_rw & REQ_FLUSH) return nvme_submit_flush(nvmeq, ns, cmdid); control = 0; @@ -688,6 +675,26 @@ static int nvme_submit_iod(struct nvme_queue *nvmeq, struct nvme_iod *iod) return 0; } +static int nvme_split_flush_data(struct nvme_queue *nvmeq, struct bio *bio) +{ + struct bio *split = bio_clone(bio, GFP_ATOMIC); + if (!split) + return -ENOMEM; + + split->bi_iter.bi_size = 0; + split->bi_phys_segments = 0; + bio->bi_rw &= ~REQ_FLUSH; + bio_chain(split, bio); + + if (!waitqueue_active(&nvmeq->sq_full)) + add_wait_queue(&nvmeq->sq_full, &nvmeq->sq_cong_wait); + bio_list_add(&nvmeq->sq_cong, split); + bio_list_add(&nvmeq->sq_cong, bio); + wake_up_process(nvme_thread); + + return 0; +} + /* * Called with local interrupts disabled and the q_lock held. May not sleep. */ @@ -698,11 +705,8 @@ static int nvme_submit_bio_queue(struct nvme_queue *nvmeq, struct nvme_ns *ns, int psegs = bio_phys_segments(ns->queue, bio); int result; - if ((bio->bi_rw & REQ_FLUSH) && psegs) { - result = nvme_submit_flush_data(nvmeq, ns); - if (result) - return result; - } + if ((bio->bi_rw & REQ_FLUSH) && psegs) + return nvme_split_flush_data(nvmeq, bio); iod = nvme_alloc_iod(psegs, bio->bi_iter.bi_size, GFP_ATOMIC); if (!iod) diff --git a/include/linux/nvme.h b/include/linux/nvme.h index 6266373..1813cfd 100644 --- a/include/linux/nvme.h +++ b/include/linux/nvme.h @@ -156,7 +156,6 @@ struct nvme_iod *nvme_map_user_pages(struct nvme_dev *dev, int write, void nvme_unmap_user_pages(struct nvme_dev *dev, int write, struct nvme_iod *iod); int nvme_submit_io_cmd(struct nvme_dev *, struct nvme_command *, u32 *); -int nvme_submit_flush_data(struct nvme_queue *nvmeq, struct nvme_ns *ns); int nvme_submit_admin_cmd(struct nvme_dev *, struct nvme_command *, u32 *result); int nvme_identify(struct nvme_dev *, unsigned nsid, unsigned cns, -- cgit v0.10.2 From f2727f7eb9132803d06309839a95de3dad82d237 Mon Sep 17 00:00:00 2001 From: Dimitri John Ledkov Date: Wed, 7 May 2014 20:55:30 +0100 Subject: NVMe: Update namespace and controller identify structures to the 1.1a spec Controller: add CNTLID, AVSCC, APSTA, NVSCC, ACWU, SGLS fields. Namespace: add NMIC, RESCAP, EUI64 fields. EUI64 is specifically interesting, since it can be used to construct an UEFI NVMe device path for a boot entry. As per NVM Express 1.1a spec: http://www.nvmexpress.org/wp-content/uploads/NVM-Express-1_1a.pdf Signed-off-by: Dimitri John Ledkov Signed-off-by: Matthew Wilcox diff --git a/include/uapi/linux/nvme.h b/include/uapi/linux/nvme.h index f090336..e74da78 100644 --- a/include/uapi/linux/nvme.h +++ b/include/uapi/linux/nvme.h @@ -45,7 +45,8 @@ struct nvme_id_ctrl { __u8 ieee[3]; __u8 mic; __u8 mdts; - __u8 rsvd78[178]; + __u16 cntlid; + __u8 rsvd80[176]; __le16 oacs; __u8 acl; __u8 aerl; @@ -53,7 +54,9 @@ struct nvme_id_ctrl { __u8 lpa; __u8 elpe; __u8 npss; - __u8 rsvd264[248]; + __u8 avscc; + __u8 apsta; + __u8 rsvd266[246]; __u8 sqes; __u8 cqes; __u8 rsvd514[2]; @@ -64,7 +67,12 @@ struct nvme_id_ctrl { __u8 vwc; __le16 awun; __le16 awupf; - __u8 rsvd530[1518]; + __u8 nvscc; + __u8 rsvd531; + __le16 acwu; + __u8 rsvd534[2]; + __le32 sgls; + __u8 rsvd540[1508]; struct nvme_id_power_state psd[32]; __u8 vs[1024]; }; @@ -92,7 +100,10 @@ struct nvme_id_ns { __u8 mc; __u8 dpc; __u8 dps; - __u8 rsvd30[98]; + __u8 nmic; + __u8 rescap; + __u8 rsvd32[88]; + __u8 eui64[8]; struct nvme_lbaf lbaf[16]; __u8 rsvd192[192]; __u8 vs[3712]; -- cgit v0.10.2 From 21bd78bcf4208e84deab0d34f9d4e034d0580d0c Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Fri, 9 May 2014 22:42:26 -0400 Subject: NVMe: Enable BUILD_BUG_ON checks Since _nvme_check_size() wasn't being called from anywhere, the compiler was optimising it away ... along with all the link-time build failures that would result if any of the structures were the wrong size. Call it from nvme_exit() for no particular reason. Signed-off-by: Matthew Wilcox diff --git a/drivers/block/nvme-core.c b/drivers/block/nvme-core.c index cd8a8bc7..b821558 100644 --- a/drivers/block/nvme-core.c +++ b/drivers/block/nvme-core.c @@ -2912,6 +2912,7 @@ static void __exit nvme_exit(void) unregister_blkdev(nvme_major, "nvme"); destroy_workqueue(nvme_workq); BUG_ON(nvme_thread && !IS_ERR(nvme_thread)); + _nvme_check_size(); } MODULE_AUTHOR("Matthew Wilcox "); -- cgit v0.10.2 From 23372af15e638bf3ce0764554db3b5e58bf7ceb8 Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Fri, 9 May 2014 22:45:08 -0400 Subject: NVMe: Update data structures for NVMe 1.2 Include changes from the current set of ratified Technical Proposals for NVMe 1.2. Signed-off-by: Matthew Wilcox diff --git a/include/uapi/linux/nvme.h b/include/uapi/linux/nvme.h index e74da78..a6fb2a3 100644 --- a/include/uapi/linux/nvme.h +++ b/include/uapi/linux/nvme.h @@ -27,7 +27,12 @@ struct nvme_id_power_state { __u8 read_lat; __u8 write_tput; __u8 write_lat; - __u8 rsvd16[16]; + __le16 idle_power; + __u8 idle_scale; + __u8 rsvd19; + __le16 active_power; + __u8 active_work_scale; + __u8 rsvd23[9]; }; enum { @@ -46,7 +51,8 @@ struct nvme_id_ctrl { __u8 mic; __u8 mdts; __u16 cntlid; - __u8 rsvd80[176]; + __u32 ver; + __u8 rsvd84[172]; __le16 oacs; __u8 acl; __u8 aerl; @@ -56,7 +62,9 @@ struct nvme_id_ctrl { __u8 npss; __u8 avscc; __u8 apsta; - __u8 rsvd266[246]; + __le16 wctemp; + __le16 cctemp; + __u8 rsvd270[242]; __u8 sqes; __u8 cqes; __u8 rsvd514[2]; @@ -102,7 +110,12 @@ struct nvme_id_ns { __u8 dps; __u8 nmic; __u8 rescap; - __u8 rsvd32[88]; + __u8 fpi; + __u8 rsvd33; + __le16 nawun; + __le16 nawupf; + __le16 nacwu; + __u8 rsvd40[80]; __u8 eui64[8]; struct nvme_lbaf lbaf[16]; __u8 rsvd192[192]; @@ -134,7 +147,10 @@ struct nvme_smart_log { __u8 unsafe_shutdowns[16]; __u8 media_errors[16]; __u8 num_err_log_entries[16]; - __u8 rsvd192[320]; + __le32 warning_temp_time; + __le32 critical_comp_time; + __le16 temp_sensor[8]; + __u8 rsvd216[296]; }; enum { -- cgit v0.10.2 From 4131f2fcdc5853d995044d4ed995a25ee4eb5ab2 Mon Sep 17 00:00:00 2001 From: Indraneel Mukherjee Date: Thu, 29 May 2014 12:02:03 +0530 Subject: NVMe: Fix the buffer size passed in GetLogPage(CDW10.NUMD) In GetLogPage the buffer size passed to device is a 0's based value. Signed-off-by: Indraneel M Reported-by: Shiro Itou Reviewed-by: Vishal Verma Signed-off-by: Matthew Wilcox diff --git a/drivers/block/nvme-scsi.c b/drivers/block/nvme-scsi.c index da3b252..7ec61c4 100644 --- a/drivers/block/nvme-scsi.c +++ b/drivers/block/nvme-scsi.c @@ -1014,8 +1014,8 @@ static int nvme_trans_log_info_exceptions(struct nvme_ns *ns, c.common.opcode = nvme_admin_get_log_page; c.common.nsid = cpu_to_le32(0xFFFFFFFF); c.common.prp1 = cpu_to_le64(dma_addr); - c.common.cdw10[0] = cpu_to_le32(((sizeof(struct nvme_smart_log) / - BYTES_TO_DWORDS) << 16) | NVME_GET_SMART_LOG_PAGE); + c.common.cdw10[0] = cpu_to_le32((((sizeof(struct nvme_smart_log) / + BYTES_TO_DWORDS) - 1) << 16) | NVME_GET_SMART_LOG_PAGE); res = nvme_submit_admin_cmd(dev, &c, NULL); if (res != NVME_SC_SUCCESS) { temp_c = LOG_TEMP_UNKNOWN; @@ -1082,8 +1082,8 @@ static int nvme_trans_log_temperature(struct nvme_ns *ns, struct sg_io_hdr *hdr, c.common.opcode = nvme_admin_get_log_page; c.common.nsid = cpu_to_le32(0xFFFFFFFF); c.common.prp1 = cpu_to_le64(dma_addr); - c.common.cdw10[0] = cpu_to_le32(((sizeof(struct nvme_smart_log) / - BYTES_TO_DWORDS) << 16) | NVME_GET_SMART_LOG_PAGE); + c.common.cdw10[0] = cpu_to_le32((((sizeof(struct nvme_smart_log) / + BYTES_TO_DWORDS) - 1) << 16) | NVME_GET_SMART_LOG_PAGE); res = nvme_submit_admin_cmd(dev, &c, NULL); if (res != NVME_SC_SUCCESS) { temp_c_cur = LOG_TEMP_UNKNOWN; -- cgit v0.10.2 From 6808c5fb7fc1574c7608a38c9819f1639d89c3d0 Mon Sep 17 00:00:00 2001 From: Santosh Y Date: Thu, 29 May 2014 10:01:52 +0530 Subject: NVMe: Prevent possible NULL pointer dereference kmalloc() used by the nvme_alloc_iod() to allocate memory for 'iod' can fail. So check the return value. Signed-off-by: Santosh Y Signed-off-by: Matthew Wilcox diff --git a/drivers/block/nvme-core.c b/drivers/block/nvme-core.c index b821558..872d8e4 100644 --- a/drivers/block/nvme-core.c +++ b/drivers/block/nvme-core.c @@ -1488,7 +1488,11 @@ struct nvme_iod *nvme_map_user_pages(struct nvme_dev *dev, int write, goto put_pages; } + err = -ENOMEM; iod = nvme_alloc_iod(count, length, GFP_KERNEL); + if (!iod) + goto put_pages; + sg = iod->sg; sg_init_table(sg, count); for (i = 0; i < count; i++) { @@ -1501,7 +1505,6 @@ struct nvme_iod *nvme_map_user_pages(struct nvme_dev *dev, int write, sg_mark_end(&sg[i - 1]); iod->nents = count; - err = -ENOMEM; nents = dma_map_sg(&dev->pci_dev->dev, sg, count, write ? DMA_TO_DEVICE : DMA_FROM_DEVICE); if (!nents) -- cgit v0.10.2 From 61e4ce086df0a64a555880089e3b782517c828c0 Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Tue, 13 May 2014 11:42:01 -0600 Subject: NVMe: Make iod bio timeout a parameter This was originally set to 4 times the IO timeout, but that was when the IO timeout was 5 seconds instead of 30. 20 seconds for total time to failure seemed more reasonable than 2 minutes for most, but other users have requested to make this a module parameter instead. Signed-off-by: Keith Busch [renamed the module parameter to retry_time] [made retry_time static] Signed-off-by: Matthew Wilcox diff --git a/drivers/block/nvme-core.c b/drivers/block/nvme-core.c index 872d8e4..1a91106 100644 --- a/drivers/block/nvme-core.c +++ b/drivers/block/nvme-core.c @@ -48,12 +48,16 @@ #define SQ_SIZE(depth) (depth * sizeof(struct nvme_command)) #define CQ_SIZE(depth) (depth * sizeof(struct nvme_completion)) #define ADMIN_TIMEOUT (60 * HZ) -#define IOD_TIMEOUT (4 * NVME_IO_TIMEOUT) +#define IOD_TIMEOUT (retry_time * HZ) unsigned char io_timeout = 30; module_param(io_timeout, byte, 0644); MODULE_PARM_DESC(io_timeout, "timeout in seconds for I/O"); +static unsigned char retry_time = 30; +module_param(retry_time, byte, 0644); +MODULE_PARM_DESC(retry_time, "time in seconds to retry failed I/O"); + static int nvme_major; module_param(nvme_major, int, 0); -- cgit v0.10.2 From 9d43cf646eab948db81913379dacb1dcecacb1eb Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Tue, 13 May 2014 11:42:02 -0600 Subject: NVMe: Make admin timeout a module parameter Signed-off-by: Keith Busch [made admin_timeout static] Signed-off-by: Matthew Wilcox diff --git a/drivers/block/nvme-core.c b/drivers/block/nvme-core.c index 1a91106..12c57eb 100644 --- a/drivers/block/nvme-core.c +++ b/drivers/block/nvme-core.c @@ -44,11 +44,15 @@ #include -#define NVME_Q_DEPTH 1024 +#define NVME_Q_DEPTH 1024 #define SQ_SIZE(depth) (depth * sizeof(struct nvme_command)) #define CQ_SIZE(depth) (depth * sizeof(struct nvme_completion)) -#define ADMIN_TIMEOUT (60 * HZ) -#define IOD_TIMEOUT (retry_time * HZ) +#define ADMIN_TIMEOUT (admin_timeout * HZ) +#define IOD_TIMEOUT (retry_time * HZ) + +static unsigned char admin_timeout = 60; +module_param(admin_timeout, byte, 0644); +MODULE_PARM_DESC(admin_timeout, "timeout in seconds for admin commands"); unsigned char io_timeout = 30; module_param(io_timeout, byte, 0644); -- cgit v0.10.2 From de672b9748f78dcbc663e12ea44cb24dc287baf0 Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Tue, 3 Jun 2014 17:06:26 -0400 Subject: NVMe: Delete NVME_GET_FEAT_TEMP_THRESH This define isn't used, and any code that wanted to use it should use NVME_FEAT_TEMP_THRESH instead. Signed-off-by: Matthew Wilcox diff --git a/drivers/block/nvme-scsi.c b/drivers/block/nvme-scsi.c index 7ec61c4..13403ee 100644 --- a/drivers/block/nvme-scsi.c +++ b/drivers/block/nvme-scsi.c @@ -240,7 +240,6 @@ static int sg_version_num = 30534; /* 2 digits for each component */ /* NVMe Namespace and Command Defines */ #define NVME_GET_SMART_LOG_PAGE 0x02 -#define NVME_GET_FEAT_TEMP_THRESH 0x04 #define BYTES_TO_DWORDS 4 #define NVME_MAX_FIRMWARE_SLOT 7 -- cgit v0.10.2 From a51afb54339c5e9ee72df66ae0f2ac5aacfed365 Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Tue, 13 May 2014 10:32:46 -0600 Subject: NVMe: Fix nvme get/put queue semantics The routines to get and lock nvme queues required the caller to "put" or "unlock" them even if getting one returned NULL. This patch fixes that. Signed-off-by: Keith Busch Signed-off-by: Matthew Wilcox diff --git a/drivers/block/nvme-core.c b/drivers/block/nvme-core.c index 12c57eb..29a3e85 100644 --- a/drivers/block/nvme-core.c +++ b/drivers/block/nvme-core.c @@ -285,9 +285,17 @@ static struct nvme_queue *raw_nvmeq(struct nvme_dev *dev, int qid) static struct nvme_queue *get_nvmeq(struct nvme_dev *dev) __acquires(RCU) { + struct nvme_queue *nvmeq; unsigned queue_id = get_cpu_var(*dev->io_queue); + rcu_read_lock(); - return rcu_dereference(dev->queues[queue_id]); + nvmeq = rcu_dereference(dev->queues[queue_id]); + if (nvmeq) + return nvmeq; + + rcu_read_unlock(); + put_cpu_var(*dev->io_queue); + return NULL; } static void put_nvmeq(struct nvme_queue *nvmeq) __releases(RCU) @@ -299,8 +307,15 @@ static void put_nvmeq(struct nvme_queue *nvmeq) __releases(RCU) static struct nvme_queue *lock_nvmeq(struct nvme_dev *dev, int q_idx) __acquires(RCU) { + struct nvme_queue *nvmeq; + rcu_read_lock(); - return rcu_dereference(dev->queues[q_idx]); + nvmeq = rcu_dereference(dev->queues[q_idx]); + if (nvmeq) + return nvmeq; + + rcu_read_unlock(); + return NULL; } static void unlock_nvmeq(struct nvme_queue *nvmeq) __releases(RCU) @@ -809,7 +824,6 @@ static void nvme_make_request(struct request_queue *q, struct bio *bio) int result = -EBUSY; if (!nvmeq) { - put_nvmeq(NULL); bio_endio(bio, -EIO); return; } @@ -884,10 +898,8 @@ static int nvme_submit_sync_cmd(struct nvme_dev *dev, int q_idx, struct nvme_queue *nvmeq; nvmeq = lock_nvmeq(dev, q_idx); - if (!nvmeq) { - unlock_nvmeq(nvmeq); + if (!nvmeq) return -ENODEV; - } cmdinfo.task = current; cmdinfo.status = -EINTR; @@ -912,9 +924,10 @@ static int nvme_submit_sync_cmd(struct nvme_dev *dev, int q_idx, if (cmdinfo.status == -EINTR) { nvmeq = lock_nvmeq(dev, q_idx); - if (nvmeq) + if (nvmeq) { nvme_abort_command(nvmeq, cmdid); - unlock_nvmeq(nvmeq); + unlock_nvmeq(nvmeq); + } return -EINTR; } -- cgit v0.10.2 From b4e75cbf1364c4bbce3599c3279892a55b6ede07 Mon Sep 17 00:00:00 2001 From: Sam Bradshaw Date: Fri, 9 May 2014 13:27:07 -0700 Subject: NVMe: Adhere to request queue block accounting enable/disable Recently, a new sysfs control "iostats" was added to selectively enable or disable io statistics collection for request queues. This patch hooks that control. IO statistics collection is rather expensive on large, multi-node machines with drives pushing millions of iops. Having the ability to disable collection if not needed can improve throughput significantly. As a data point, on a quad E5-4640, I see more than 50% throughput improvement when io statistics accounting is disabled during heavily multi-threaded small block random read benchmarks where device performance is in the million iops+ range. Signed-off-by: Sam Bradshaw Signed-off-by: Matthew Wilcox diff --git a/drivers/block/nvme-core.c b/drivers/block/nvme-core.c index 29a3e85..bb6ce31 100644 --- a/drivers/block/nvme-core.c +++ b/drivers/block/nvme-core.c @@ -406,25 +406,30 @@ void nvme_free_iod(struct nvme_dev *dev, struct nvme_iod *iod) static void nvme_start_io_acct(struct bio *bio) { struct gendisk *disk = bio->bi_bdev->bd_disk; - const int rw = bio_data_dir(bio); - int cpu = part_stat_lock(); - part_round_stats(cpu, &disk->part0); - part_stat_inc(cpu, &disk->part0, ios[rw]); - part_stat_add(cpu, &disk->part0, sectors[rw], bio_sectors(bio)); - part_inc_in_flight(&disk->part0, rw); - part_stat_unlock(); + if (blk_queue_io_stat(disk->queue)) { + const int rw = bio_data_dir(bio); + int cpu = part_stat_lock(); + part_round_stats(cpu, &disk->part0); + part_stat_inc(cpu, &disk->part0, ios[rw]); + part_stat_add(cpu, &disk->part0, sectors[rw], + bio_sectors(bio)); + part_inc_in_flight(&disk->part0, rw); + part_stat_unlock(); + } } static void nvme_end_io_acct(struct bio *bio, unsigned long start_time) { struct gendisk *disk = bio->bi_bdev->bd_disk; - const int rw = bio_data_dir(bio); - unsigned long duration = jiffies - start_time; - int cpu = part_stat_lock(); - part_stat_add(cpu, &disk->part0, ticks[rw], duration); - part_round_stats(cpu, &disk->part0); - part_dec_in_flight(&disk->part0, rw); - part_stat_unlock(); + if (blk_queue_io_stat(disk->queue)) { + const int rw = bio_data_dir(bio); + unsigned long duration = jiffies - start_time; + int cpu = part_stat_lock(); + part_stat_add(cpu, &disk->part0, ticks[rw], duration); + part_round_stats(cpu, &disk->part0); + part_dec_in_flight(&disk->part0, rw); + part_stat_unlock(); + } } static void bio_completion(struct nvme_queue *nvmeq, void *ctx, -- cgit v0.10.2 From dedf4b156134e0dedec18ebecda3e74077fa7c92 Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Tue, 29 Apr 2014 15:52:27 -0600 Subject: NVMe: Use last bytes of f/w rev SCSI Inquiry After skipping right-padded spaces, use the last four bytes of the firmware revision when reporting the Inquiry Product Revision. These are generally more indicative to what is running. Signed-off-by: Keith Busch Acked-by: Vishal Verma Signed-off-by: Matthew Wilcox diff --git a/drivers/block/nvme-scsi.c b/drivers/block/nvme-scsi.c index 13403ee..4fc25b9 100644 --- a/drivers/block/nvme-scsi.c +++ b/drivers/block/nvme-scsi.c @@ -681,6 +681,7 @@ static int nvme_trans_standard_inquiry_page(struct nvme_ns *ns, u8 resp_data_format = 0x02; u8 protect; u8 cmdque = 0x01 << 1; + u8 fw_offset = sizeof(dev->firmware_rev); mem = dma_alloc_coherent(&dev->pci_dev->dev, sizeof(struct nvme_id_ns), &dma_addr, GFP_KERNEL); @@ -716,7 +717,11 @@ static int nvme_trans_standard_inquiry_page(struct nvme_ns *ns, inq_response[7] = cmdque; /* wbus16=0 | sync=0 | vs=0 */ strncpy(&inq_response[8], "NVMe ", 8); strncpy(&inq_response[16], dev->model, 16); - strncpy(&inq_response[32], dev->firmware_rev, 4); + + while (dev->firmware_rev[fw_offset - 1] == ' ' && fw_offset > 4) + fw_offset--; + fw_offset -= 4; + strncpy(&inq_response[32], dev->firmware_rev + fw_offset, 4); xfer_len = min(alloc_len, STANDARD_INQUIRY_LENGTH); res = nvme_trans_copy_to_user(hdr, inq_response, xfer_len); -- cgit v0.10.2 From bd67608a6127c994e897c49cc4f72d9095925301 Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Tue, 3 Jun 2014 23:04:30 -0400 Subject: NVMe: Rename io_timeout to nvme_io_timeout It's positively immoral to have a global variable called 'io_timeout'. Keep the module parameter called io_timeout, though. Signed-off-by: Matthew Wilcox diff --git a/drivers/block/nvme-core.c b/drivers/block/nvme-core.c index bb6ce31..2af079e 100644 --- a/drivers/block/nvme-core.c +++ b/drivers/block/nvme-core.c @@ -54,8 +54,8 @@ static unsigned char admin_timeout = 60; module_param(admin_timeout, byte, 0644); MODULE_PARM_DESC(admin_timeout, "timeout in seconds for admin commands"); -unsigned char io_timeout = 30; -module_param(io_timeout, byte, 0644); +unsigned char nvme_io_timeout = 30; +module_param_named(io_timeout, nvme_io_timeout, byte, 0644); MODULE_PARM_DESC(io_timeout, "timeout in seconds for I/O"); static unsigned char retry_time = 30; diff --git a/include/linux/nvme.h b/include/linux/nvme.h index 1813cfd..8541dd9 100644 --- a/include/linux/nvme.h +++ b/include/linux/nvme.h @@ -62,8 +62,8 @@ enum { #define NVME_VS(major, minor) (major << 16 | minor) -extern unsigned char io_timeout; -#define NVME_IO_TIMEOUT (io_timeout * HZ) +extern unsigned char nvme_io_timeout; +#define NVME_IO_TIMEOUT (nvme_io_timeout * HZ) /* * Represents an NVM Express device. Each nvme_dev is a PCI function. -- cgit v0.10.2 From f3db22feb5de6b98b7bae924c2d4b6c8d65bedae Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Wed, 11 Jun 2014 11:51:35 -0600 Subject: NVMe: Fix hot cpu notification dead lock There is a potential dead lock if a cpu event occurs during nvme probe since it registered with hot cpu notification. This fixes the race by having the module register with notification outside of probe rather than have each device register. The actual work is done in a scheduled work queue instead of in the notifier since assigning IO queues has the potential to block if the driver creates additional queues. Signed-off-by: Keith Busch Signed-off-by: Matthew Wilcox diff --git a/drivers/block/nvme-core.c b/drivers/block/nvme-core.c index 2af079e..e0ac121 100644 --- a/drivers/block/nvme-core.c +++ b/drivers/block/nvme-core.c @@ -73,6 +73,7 @@ static LIST_HEAD(dev_list); static struct task_struct *nvme_thread; static struct workqueue_struct *nvme_workq; static wait_queue_head_t nvme_kthread_wait; +static struct notifier_block nvme_nb; static void nvme_reset_failed_dev(struct work_struct *ws); @@ -2115,14 +2116,25 @@ static size_t db_bar_size(struct nvme_dev *dev, unsigned nr_io_queues) return 4096 + ((nr_io_queues + 1) * 8 * dev->db_stride); } +static void nvme_cpu_workfn(struct work_struct *work) +{ + struct nvme_dev *dev = container_of(work, struct nvme_dev, cpu_work); + if (dev->initialized) + nvme_assign_io_queues(dev); +} + static int nvme_cpu_notify(struct notifier_block *self, unsigned long action, void *hcpu) { - struct nvme_dev *dev = container_of(self, struct nvme_dev, nb); + struct nvme_dev *dev; + switch (action) { case CPU_ONLINE: case CPU_DEAD: - nvme_assign_io_queues(dev); + spin_lock(&dev_list_lock); + list_for_each_entry(dev, &dev_list, node) + schedule_work(&dev->cpu_work); + spin_unlock(&dev_list_lock); break; } return NOTIFY_OK; @@ -2191,11 +2203,6 @@ static int nvme_setup_io_queues(struct nvme_dev *dev) nvme_free_queues(dev, nr_io_queues + 1); nvme_assign_io_queues(dev); - dev->nb.notifier_call = &nvme_cpu_notify; - result = register_hotcpu_notifier(&dev->nb); - if (result) - goto free_queues; - return 0; free_queues: @@ -2495,8 +2502,6 @@ static void nvme_dev_shutdown(struct nvme_dev *dev) int i; dev->initialized = 0; - unregister_hotcpu_notifier(&dev->nb); - nvme_dev_list_remove(dev); if (!dev->bar || (dev->bar && readl(&dev->bar->csts) == -1)) { @@ -2767,6 +2772,7 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id) INIT_LIST_HEAD(&dev->namespaces); dev->reset_workfn = nvme_reset_failed_dev; INIT_WORK(&dev->reset_work, nvme_reset_workfn); + INIT_WORK(&dev->cpu_work, nvme_cpu_workfn); dev->pci_dev = pdev; pci_set_drvdata(pdev, dev); result = nvme_set_instance(dev); @@ -2836,6 +2842,7 @@ static void nvme_remove(struct pci_dev *pdev) pci_set_drvdata(pdev, NULL); flush_work(&dev->reset_work); + flush_work(&dev->cpu_work); misc_deregister(&dev->miscdev); nvme_dev_remove(dev); nvme_dev_shutdown(dev); @@ -2923,11 +2930,18 @@ static int __init nvme_init(void) else if (result > 0) nvme_major = result; - result = pci_register_driver(&nvme_driver); + nvme_nb.notifier_call = &nvme_cpu_notify; + result = register_hotcpu_notifier(&nvme_nb); if (result) goto unregister_blkdev; + + result = pci_register_driver(&nvme_driver); + if (result) + goto unregister_hotcpu; return 0; + unregister_hotcpu: + unregister_hotcpu_notifier(&nvme_nb); unregister_blkdev: unregister_blkdev(nvme_major, "nvme"); kill_workq: @@ -2938,6 +2952,7 @@ static int __init nvme_init(void) static void __exit nvme_exit(void) { pci_unregister_driver(&nvme_driver); + unregister_hotcpu_notifier(&nvme_nb); unregister_blkdev(nvme_major, "nvme"); destroy_workqueue(nvme_workq); BUG_ON(nvme_thread && !IS_ERR(nvme_thread)); diff --git a/include/linux/nvme.h b/include/linux/nvme.h index 8541dd9..2bf4031 100644 --- a/include/linux/nvme.h +++ b/include/linux/nvme.h @@ -90,7 +90,7 @@ struct nvme_dev { struct miscdevice miscdev; work_func_t reset_workfn; struct work_struct reset_work; - struct notifier_block nb; + struct work_struct cpu_work; char name[12]; char serial[20]; char model[40]; -- cgit v0.10.2 From 3d69bb6e4699251102dc145b7800dd012ddec375 Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Fri, 13 Jun 2014 10:53:49 -0400 Subject: NVMe: Define Log Page constants Taken from the 1.1a version of the spec Signed-off-by: Matthew Wilcox diff --git a/include/uapi/linux/nvme.h b/include/uapi/linux/nvme.h index a6fb2a3..29a7d86 100644 --- a/include/uapi/linux/nvme.h +++ b/include/uapi/linux/nvme.h @@ -306,6 +306,10 @@ enum { NVME_FEAT_WRITE_ATOMIC = 0x0a, NVME_FEAT_ASYNC_EVENT = 0x0b, NVME_FEAT_SW_PROGRESS = 0x0c, + NVME_LOG_ERROR = 0x01, + NVME_LOG_SMART = 0x02, + NVME_LOG_FW_SLOT = 0x03, + NVME_LOG_RESERVATION = 0x80, NVME_FWACT_REPL = (0 << 3), NVME_FWACT_REPL_ACTV = (1 << 3), NVME_FWACT_ACTV = (2 << 3), -- cgit v0.10.2 From ef351b97dedaa7a6e257ed4f554718e384d8786b Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Fri, 13 Jun 2014 10:54:21 -0400 Subject: NVMe: Use Log Page constants in SCSI emulation The nvme-scsi file defined its own Log Page constant. Use the newly-defined one from the header file instead. Signed-off-by: Matthew Wilcox diff --git a/drivers/block/nvme-scsi.c b/drivers/block/nvme-scsi.c index 4fc25b9..24308ae 100644 --- a/drivers/block/nvme-scsi.c +++ b/drivers/block/nvme-scsi.c @@ -239,7 +239,6 @@ static int sg_version_num = 30534; /* 2 digits for each component */ #define READ_CAP_16_RESP_SIZE 32 /* NVMe Namespace and Command Defines */ -#define NVME_GET_SMART_LOG_PAGE 0x02 #define BYTES_TO_DWORDS 4 #define NVME_MAX_FIRMWARE_SLOT 7 @@ -1019,7 +1018,7 @@ static int nvme_trans_log_info_exceptions(struct nvme_ns *ns, c.common.nsid = cpu_to_le32(0xFFFFFFFF); c.common.prp1 = cpu_to_le64(dma_addr); c.common.cdw10[0] = cpu_to_le32((((sizeof(struct nvme_smart_log) / - BYTES_TO_DWORDS) - 1) << 16) | NVME_GET_SMART_LOG_PAGE); + BYTES_TO_DWORDS) - 1) << 16) | NVME_LOG_SMART); res = nvme_submit_admin_cmd(dev, &c, NULL); if (res != NVME_SC_SUCCESS) { temp_c = LOG_TEMP_UNKNOWN; @@ -1087,7 +1086,7 @@ static int nvme_trans_log_temperature(struct nvme_ns *ns, struct sg_io_hdr *hdr, c.common.nsid = cpu_to_le32(0xFFFFFFFF); c.common.prp1 = cpu_to_le64(dma_addr); c.common.cdw10[0] = cpu_to_le32((((sizeof(struct nvme_smart_log) / - BYTES_TO_DWORDS) - 1) << 16) | NVME_GET_SMART_LOG_PAGE); + BYTES_TO_DWORDS) - 1) << 16) | NVME_LOG_SMART); res = nvme_submit_admin_cmd(dev, &c, NULL); if (res != NVME_SC_SUCCESS) { temp_c_cur = LOG_TEMP_UNKNOWN; -- cgit v0.10.2 From b8e080847a7292347a3eee76264f77e4abcb61f7 Mon Sep 17 00:00:00 2001 From: Dan McLeran Date: Fri, 6 Jun 2014 08:27:27 -0600 Subject: NVMe: Fix START_STOP_UNIT Scsi->NVMe translation. This patch contains several fixes for Scsi START_STOP_UNIT. The previous code did not account for signed vs. unsigned arithmetic which resulted in an invalid lowest power state caculation when the device only supports 1 power state. The code for Power Condition == 2 (Idle) was not following the spec. The spec calls for setting the device to specific power states, depending upon Power Condition Modifier, without accounting for the number of power states supported by the device. The code for Power Condition == 3 (Standby) was using a hard-coded '0' which is replaced with the macro POWER_STATE_0. Signed-off-by: Dan McLeran Reviewed-by: Vishal Verma Signed-off-by: Matthew Wilcox diff --git a/drivers/block/nvme-scsi.c b/drivers/block/nvme-scsi.c index 24308ae..a4cd6d6 100644 --- a/drivers/block/nvme-scsi.c +++ b/drivers/block/nvme-scsi.c @@ -1476,7 +1476,7 @@ static int nvme_trans_power_state(struct nvme_ns *ns, struct sg_io_hdr *hdr, goto out_dma; } id_ctrl = mem; - lowest_pow_st = id_ctrl->npss - 1; + lowest_pow_st = max(POWER_STATE_0, (int)(id_ctrl->npss - 1)); switch (pc) { case NVME_POWER_STATE_START_VALID: @@ -1493,20 +1493,19 @@ static int nvme_trans_power_state(struct nvme_ns *ns, struct sg_io_hdr *hdr, break; case NVME_POWER_STATE_IDLE: /* Action unspecified if POWER CONDITION MODIFIER != [0,1,2] */ - /* min of desired state and (lps-1) because lps is STOP */ if (pcmod == 0x0) - ps_desired = min(POWER_STATE_1, (lowest_pow_st - 1)); + ps_desired = POWER_STATE_1; else if (pcmod == 0x1) - ps_desired = min(POWER_STATE_2, (lowest_pow_st - 1)); + ps_desired = POWER_STATE_2; else if (pcmod == 0x2) - ps_desired = min(POWER_STATE_3, (lowest_pow_st - 1)); + ps_desired = POWER_STATE_3; break; case NVME_POWER_STATE_STANDBY: /* Action unspecified if POWER CONDITION MODIFIER != [0,1] */ if (pcmod == 0x0) - ps_desired = max(0, (lowest_pow_st - 2)); + ps_desired = max(POWER_STATE_0, (lowest_pow_st - 2)); else if (pcmod == 0x1) - ps_desired = max(0, (lowest_pow_st - 1)); + ps_desired = max(POWER_STATE_0, (lowest_pow_st - 1)); break; case NVME_POWER_STATE_LU_CONTROL: default: -- cgit v0.10.2