Merge git://git.infradead.org/users/willy/linux-nvme

Pull NVMe driver updates from Matthew Wilcox:
 "Various updates to the NVMe driver.  The most user-visible change is
  that drive hotplugging now works and CPU hotplug while an NVMe drive
  is installed should also work better"

* git://git.infradead.org/users/willy/linux-nvme:
  NVMe: Retry failed commands with non-fatal errors
  NVMe: Add getgeo to block ops
  NVMe: Start-stop nvme_thread during device add-remove.
  NVMe: Make I/O timeout a module parameter
  NVMe: CPU hot plug notification
  NVMe: per-cpu io queues
  NVMe: Replace DEFINE_PCI_DEVICE_TABLE
  NVMe: Fix divide-by-zero in nvme_trans_io_get_num_cmds
  NVMe: IOCTL path RCU protect queue access
  NVMe: RCU protected access to io queues
  NVMe: Initialize device reference count earlier
  NVMe: Add CONFIG_PM_SLEEP to suspend/resume functions
This commit is contained in:
Linus Torvalds 2014-04-11 16:45:59 -07:00
commit 3e8072d48b
4 changed files with 504 additions and 245 deletions

File diff suppressed because it is too large Load Diff

View File

@ -1562,13 +1562,14 @@ static int nvme_trans_send_fw_cmd(struct nvme_ns *ns, struct sg_io_hdr *hdr,
res = PTR_ERR(iod); res = PTR_ERR(iod);
goto out; goto out;
} }
length = nvme_setup_prps(dev, &c.common, iod, tot_len, length = nvme_setup_prps(dev, iod, tot_len, GFP_KERNEL);
GFP_KERNEL);
if (length != tot_len) { if (length != tot_len) {
res = -ENOMEM; res = -ENOMEM;
goto out_unmap; goto out_unmap;
} }
c.dlfw.prp1 = cpu_to_le64(sg_dma_address(iod->sg));
c.dlfw.prp2 = cpu_to_le64(iod->first_dma);
c.dlfw.numd = cpu_to_le32((tot_len/BYTES_TO_DWORDS) - 1); c.dlfw.numd = cpu_to_le32((tot_len/BYTES_TO_DWORDS) - 1);
c.dlfw.offset = cpu_to_le32(offset/BYTES_TO_DWORDS); c.dlfw.offset = cpu_to_le32(offset/BYTES_TO_DWORDS);
} else if (opcode == nvme_admin_activate_fw) { } else if (opcode == nvme_admin_activate_fw) {
@ -2033,7 +2034,6 @@ static int nvme_trans_do_nvme_io(struct nvme_ns *ns, struct sg_io_hdr *hdr,
int res = SNTI_TRANSLATION_SUCCESS; int res = SNTI_TRANSLATION_SUCCESS;
int nvme_sc; int nvme_sc;
struct nvme_dev *dev = ns->dev; struct nvme_dev *dev = ns->dev;
struct nvme_queue *nvmeq;
u32 num_cmds; u32 num_cmds;
struct nvme_iod *iod; struct nvme_iod *iod;
u64 unit_len; u64 unit_len;
@ -2045,7 +2045,7 @@ static int nvme_trans_do_nvme_io(struct nvme_ns *ns, struct sg_io_hdr *hdr,
struct nvme_command c; struct nvme_command c;
u8 opcode = (is_write ? nvme_cmd_write : nvme_cmd_read); u8 opcode = (is_write ? nvme_cmd_write : nvme_cmd_read);
u16 control; u16 control;
u32 max_blocks = nvme_block_nr(ns, dev->max_hw_sectors); u32 max_blocks = queue_max_hw_sectors(ns->queue);
num_cmds = nvme_trans_io_get_num_cmds(hdr, cdb_info, max_blocks); num_cmds = nvme_trans_io_get_num_cmds(hdr, cdb_info, max_blocks);
@ -2093,8 +2093,7 @@ static int nvme_trans_do_nvme_io(struct nvme_ns *ns, struct sg_io_hdr *hdr,
res = PTR_ERR(iod); res = PTR_ERR(iod);
goto out; goto out;
} }
retcode = nvme_setup_prps(dev, &c.common, iod, unit_len, retcode = nvme_setup_prps(dev, iod, unit_len, GFP_KERNEL);
GFP_KERNEL);
if (retcode != unit_len) { if (retcode != unit_len) {
nvme_unmap_user_pages(dev, nvme_unmap_user_pages(dev,
(is_write) ? DMA_TO_DEVICE : DMA_FROM_DEVICE, (is_write) ? DMA_TO_DEVICE : DMA_FROM_DEVICE,
@ -2103,21 +2102,12 @@ static int nvme_trans_do_nvme_io(struct nvme_ns *ns, struct sg_io_hdr *hdr,
res = -ENOMEM; res = -ENOMEM;
goto out; goto out;
} }
c.rw.prp1 = cpu_to_le64(sg_dma_address(iod->sg));
c.rw.prp2 = cpu_to_le64(iod->first_dma);
nvme_offset += unit_num_blocks; nvme_offset += unit_num_blocks;
nvmeq = get_nvmeq(dev); nvme_sc = nvme_submit_io_cmd(dev, &c, NULL);
/*
* Since nvme_submit_sync_cmd sleeps, we can't keep
* preemption disabled. We may be preempted at any
* point, and be rescheduled to a different CPU. That
* will cause cacheline bouncing, but no additional
* races since q_lock already protects against other
* CPUs.
*/
put_nvmeq(nvmeq);
nvme_sc = nvme_submit_sync_cmd(nvmeq, &c, NULL,
NVME_IO_TIMEOUT);
if (nvme_sc != NVME_SC_SUCCESS) { if (nvme_sc != NVME_SC_SUCCESS) {
nvme_unmap_user_pages(dev, nvme_unmap_user_pages(dev,
(is_write) ? DMA_TO_DEVICE : DMA_FROM_DEVICE, (is_write) ? DMA_TO_DEVICE : DMA_FROM_DEVICE,
@ -2644,7 +2634,6 @@ static int nvme_trans_start_stop(struct nvme_ns *ns, struct sg_io_hdr *hdr,
{ {
int res = SNTI_TRANSLATION_SUCCESS; int res = SNTI_TRANSLATION_SUCCESS;
int nvme_sc; int nvme_sc;
struct nvme_queue *nvmeq;
struct nvme_command c; struct nvme_command c;
u8 immed, pcmod, pc, no_flush, start; u8 immed, pcmod, pc, no_flush, start;
@ -2671,10 +2660,7 @@ static int nvme_trans_start_stop(struct nvme_ns *ns, struct sg_io_hdr *hdr,
c.common.opcode = nvme_cmd_flush; c.common.opcode = nvme_cmd_flush;
c.common.nsid = cpu_to_le32(ns->ns_id); c.common.nsid = cpu_to_le32(ns->ns_id);
nvmeq = get_nvmeq(ns->dev); nvme_sc = nvme_submit_io_cmd(ns->dev, &c, NULL);
put_nvmeq(nvmeq);
nvme_sc = nvme_submit_sync_cmd(nvmeq, &c, NULL, NVME_IO_TIMEOUT);
res = nvme_trans_status_code(hdr, nvme_sc); res = nvme_trans_status_code(hdr, nvme_sc);
if (res) if (res)
goto out; goto out;
@ -2697,15 +2683,12 @@ static int nvme_trans_synchronize_cache(struct nvme_ns *ns,
int res = SNTI_TRANSLATION_SUCCESS; int res = SNTI_TRANSLATION_SUCCESS;
int nvme_sc; int nvme_sc;
struct nvme_command c; struct nvme_command c;
struct nvme_queue *nvmeq;
memset(&c, 0, sizeof(c)); memset(&c, 0, sizeof(c));
c.common.opcode = nvme_cmd_flush; c.common.opcode = nvme_cmd_flush;
c.common.nsid = cpu_to_le32(ns->ns_id); c.common.nsid = cpu_to_le32(ns->ns_id);
nvmeq = get_nvmeq(ns->dev); nvme_sc = nvme_submit_io_cmd(ns->dev, &c, NULL);
put_nvmeq(nvmeq);
nvme_sc = nvme_submit_sync_cmd(nvmeq, &c, NULL, NVME_IO_TIMEOUT);
res = nvme_trans_status_code(hdr, nvme_sc); res = nvme_trans_status_code(hdr, nvme_sc);
if (res) if (res)
@ -2872,7 +2855,6 @@ static int nvme_trans_unmap(struct nvme_ns *ns, struct sg_io_hdr *hdr,
struct nvme_dev *dev = ns->dev; struct nvme_dev *dev = ns->dev;
struct scsi_unmap_parm_list *plist; struct scsi_unmap_parm_list *plist;
struct nvme_dsm_range *range; struct nvme_dsm_range *range;
struct nvme_queue *nvmeq;
struct nvme_command c; struct nvme_command c;
int i, nvme_sc, res = -ENOMEM; int i, nvme_sc, res = -ENOMEM;
u16 ndesc, list_len; u16 ndesc, list_len;
@ -2914,10 +2896,7 @@ static int nvme_trans_unmap(struct nvme_ns *ns, struct sg_io_hdr *hdr,
c.dsm.nr = cpu_to_le32(ndesc - 1); c.dsm.nr = cpu_to_le32(ndesc - 1);
c.dsm.attributes = cpu_to_le32(NVME_DSMGMT_AD); c.dsm.attributes = cpu_to_le32(NVME_DSMGMT_AD);
nvmeq = get_nvmeq(dev); nvme_sc = nvme_submit_io_cmd(dev, &c, NULL);
put_nvmeq(nvmeq);
nvme_sc = nvme_submit_sync_cmd(nvmeq, &c, NULL, NVME_IO_TIMEOUT);
res = nvme_trans_status_code(hdr, nvme_sc); res = nvme_trans_status_code(hdr, nvme_sc);
dma_free_coherent(&dev->pci_dev->dev, ndesc * sizeof(*range), dma_free_coherent(&dev->pci_dev->dev, ndesc * sizeof(*range),

View File

@ -66,20 +66,25 @@ enum {
#define NVME_VS(major, minor) (major << 16 | minor) #define NVME_VS(major, minor) (major << 16 | minor)
#define NVME_IO_TIMEOUT (5 * HZ) extern unsigned char io_timeout;
#define NVME_IO_TIMEOUT (io_timeout * HZ)
/* /*
* Represents an NVM Express device. Each nvme_dev is a PCI function. * Represents an NVM Express device. Each nvme_dev is a PCI function.
*/ */
struct nvme_dev { struct nvme_dev {
struct list_head node; struct list_head node;
struct nvme_queue **queues; struct nvme_queue __rcu **queues;
unsigned short __percpu *io_queue;
u32 __iomem *dbs; u32 __iomem *dbs;
struct pci_dev *pci_dev; struct pci_dev *pci_dev;
struct dma_pool *prp_page_pool; struct dma_pool *prp_page_pool;
struct dma_pool *prp_small_pool; struct dma_pool *prp_small_pool;
int instance; int instance;
int queue_count; unsigned queue_count;
unsigned online_queues;
unsigned max_qid;
int q_depth;
u32 db_stride; u32 db_stride;
u32 ctrl_config; u32 ctrl_config;
struct msix_entry *entry; struct msix_entry *entry;
@ -89,6 +94,7 @@ struct nvme_dev {
struct miscdevice miscdev; struct miscdevice miscdev;
work_func_t reset_workfn; work_func_t reset_workfn;
struct work_struct reset_work; struct work_struct reset_work;
struct notifier_block nb;
char name[12]; char name[12];
char serial[20]; char serial[20];
char model[40]; char model[40];
@ -131,6 +137,7 @@ struct nvme_iod {
int length; /* Of data, in bytes */ int length; /* Of data, in bytes */
unsigned long start_time; unsigned long start_time;
dma_addr_t first_dma; dma_addr_t first_dma;
struct list_head node;
struct scatterlist sg[0]; struct scatterlist sg[0];
}; };
@ -146,16 +153,12 @@ static inline u64 nvme_block_nr(struct nvme_ns *ns, sector_t sector)
*/ */
void nvme_free_iod(struct nvme_dev *dev, struct nvme_iod *iod); void nvme_free_iod(struct nvme_dev *dev, struct nvme_iod *iod);
int nvme_setup_prps(struct nvme_dev *dev, struct nvme_common_command *cmd, int nvme_setup_prps(struct nvme_dev *, struct nvme_iod *, int , gfp_t);
struct nvme_iod *iod, int total_len, gfp_t gfp);
struct nvme_iod *nvme_map_user_pages(struct nvme_dev *dev, int write, struct nvme_iod *nvme_map_user_pages(struct nvme_dev *dev, int write,
unsigned long addr, unsigned length); unsigned long addr, unsigned length);
void nvme_unmap_user_pages(struct nvme_dev *dev, int write, void nvme_unmap_user_pages(struct nvme_dev *dev, int write,
struct nvme_iod *iod); struct nvme_iod *iod);
struct nvme_queue *get_nvmeq(struct nvme_dev *dev); int nvme_submit_io_cmd(struct nvme_dev *, struct nvme_command *, u32 *);
void put_nvmeq(struct nvme_queue *nvmeq);
int nvme_submit_sync_cmd(struct nvme_queue *nvmeq, struct nvme_command *cmd,
u32 *result, unsigned timeout);
int nvme_submit_flush_data(struct nvme_queue *nvmeq, struct nvme_ns *ns); int nvme_submit_flush_data(struct nvme_queue *nvmeq, struct nvme_ns *ns);
int nvme_submit_admin_cmd(struct nvme_dev *, struct nvme_command *, int nvme_submit_admin_cmd(struct nvme_dev *, struct nvme_command *,
u32 *result); u32 *result);

View File

@ -434,6 +434,7 @@ enum {
NVME_SC_REFTAG_CHECK = 0x284, NVME_SC_REFTAG_CHECK = 0x284,
NVME_SC_COMPARE_FAILED = 0x285, NVME_SC_COMPARE_FAILED = 0x285,
NVME_SC_ACCESS_DENIED = 0x286, NVME_SC_ACCESS_DENIED = 0x286,
NVME_SC_DNR = 0x4000,
}; };
struct nvme_completion { struct nvme_completion {