diff options
Diffstat (limited to 'drivers')
216 files changed, 11656 insertions, 4685 deletions
diff --git a/drivers/acpi/Kconfig b/drivers/acpi/Kconfig index 3200060..92ed969 100644 --- a/drivers/acpi/Kconfig +++ b/drivers/acpi/Kconfig @@ -266,7 +266,8 @@ config ACPI_CUSTOM_DSDT default ACPI_CUSTOM_DSDT_FILE != "" config ACPI_INITRD_TABLE_OVERRIDE - bool "ACPI tables can be passed via uncompressed cpio in initrd" + bool "ACPI tables override via initrd" + depends on BLK_DEV_INITRD && X86 default n help This option provides functionality to override arbitrary ACPI tables diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c index 7ae2750..d668a8a 100644 --- a/drivers/acpi/apei/ghes.c +++ b/drivers/acpi/apei/ghes.c @@ -48,8 +48,8 @@ #include <linux/genalloc.h> #include <linux/pci.h> #include <linux/aer.h> -#include <acpi/apei.h> -#include <acpi/hed.h> + +#include <acpi/ghes.h> #include <asm/mce.h> #include <asm/tlbflush.h> #include <asm/nmi.h> @@ -84,42 +84,6 @@ ((struct acpi_hest_generic_status *) \ ((struct ghes_estatus_node *)(estatus_node) + 1)) -/* - * One struct ghes is created for each generic hardware error source. - * It provides the context for APEI hardware error timer/IRQ/SCI/NMI - * handler. - * - * estatus: memory buffer for error status block, allocated during - * HEST parsing. - */ -#define GHES_TO_CLEAR 0x0001 -#define GHES_EXITING 0x0002 - -struct ghes { - struct acpi_hest_generic *generic; - struct acpi_hest_generic_status *estatus; - u64 buffer_paddr; - unsigned long flags; - union { - struct list_head list; - struct timer_list timer; - unsigned int irq; - }; -}; - -struct ghes_estatus_node { - struct llist_node llnode; - struct acpi_hest_generic *generic; -}; - -struct ghes_estatus_cache { - u32 estatus_len; - atomic_t count; - struct acpi_hest_generic *generic; - unsigned long long time_in; - struct rcu_head rcu; -}; - bool ghes_disable; module_param_named(disable, ghes_disable, bool, 0); @@ -333,13 +297,6 @@ static void ghes_fini(struct ghes *ghes) apei_unmap_generic_address(&ghes->generic->error_status_address); } -enum { - GHES_SEV_NO = 0x0, - GHES_SEV_CORRECTED = 0x1, - GHES_SEV_RECOVERABLE = 0x2, - GHES_SEV_PANIC = 0x3, -}; - static inline int ghes_severity(int severity) { switch (severity) { @@ -452,7 +409,8 @@ static void ghes_clear_estatus(struct ghes *ghes) ghes->flags &= ~GHES_TO_CLEAR; } -static void ghes_do_proc(const struct acpi_hest_generic_status *estatus) +static void ghes_do_proc(struct ghes *ghes, + const struct acpi_hest_generic_status *estatus) { int sev, sec_sev; struct acpi_hest_generic_data *gdata; @@ -464,6 +422,8 @@ static void ghes_do_proc(const struct acpi_hest_generic_status *estatus) CPER_SEC_PLATFORM_MEM)) { struct cper_sec_mem_err *mem_err; mem_err = (struct cper_sec_mem_err *)(gdata+1); + ghes_edac_report_mem_error(ghes, sev, mem_err); + #ifdef CONFIG_X86_MCE apei_mce_report_mem_error(sev == GHES_SEV_CORRECTED, mem_err); @@ -682,7 +642,7 @@ static int ghes_proc(struct ghes *ghes) if (ghes_print_estatus(NULL, ghes->generic, ghes->estatus)) ghes_estatus_cache_add(ghes->generic, ghes->estatus); } - ghes_do_proc(ghes->estatus); + ghes_do_proc(ghes, ghes->estatus); out: ghes_clear_estatus(ghes); return 0; @@ -775,7 +735,7 @@ static void ghes_proc_in_irq(struct irq_work *irq_work) estatus = GHES_ESTATUS_FROM_NODE(estatus_node); len = apei_estatus_len(estatus); node_len = GHES_ESTATUS_NODE_LEN(len); - ghes_do_proc(estatus); + ghes_do_proc(estatus_node->ghes, estatus); if (!ghes_estatus_cached(estatus)) { generic = estatus_node->generic; if (ghes_print_estatus(NULL, generic, estatus)) @@ -864,6 +824,7 @@ static int ghes_notify_nmi(unsigned int cmd, struct pt_regs *regs) estatus_node = (void *)gen_pool_alloc(ghes_estatus_pool, node_len); if (estatus_node) { + estatus_node->ghes = ghes; estatus_node->generic = ghes->generic; estatus = GHES_ESTATUS_FROM_NODE(estatus_node); memcpy(estatus, ghes->estatus, len); @@ -942,6 +903,11 @@ static int ghes_probe(struct platform_device *ghes_dev) ghes = NULL; goto err; } + + rc = ghes_edac_register(ghes, &ghes_dev->dev); + if (rc < 0) + goto err; + switch (generic->notify.type) { case ACPI_HEST_NOTIFY_POLLED: ghes->timer.function = ghes_poll_func; @@ -954,13 +920,13 @@ static int ghes_probe(struct platform_device *ghes_dev) if (acpi_gsi_to_irq(generic->notify.vector, &ghes->irq)) { pr_err(GHES_PFX "Failed to map GSI to IRQ for generic hardware error source: %d\n", generic->header.source_id); - goto err; + goto err_edac_unreg; } if (request_irq(ghes->irq, ghes_irq_func, 0, "GHES IRQ", ghes)) { pr_err(GHES_PFX "Failed to register IRQ for generic hardware error source: %d\n", generic->header.source_id); - goto err; + goto err_edac_unreg; } break; case ACPI_HEST_NOTIFY_SCI: @@ -986,6 +952,8 @@ static int ghes_probe(struct platform_device *ghes_dev) platform_set_drvdata(ghes_dev, ghes); return 0; +err_edac_unreg: + ghes_edac_unregister(ghes); err: if (ghes) { ghes_fini(ghes); @@ -1038,6 +1006,9 @@ static int ghes_remove(struct platform_device *ghes_dev) } ghes_fini(ghes); + + ghes_edac_unregister(ghes); + kfree(ghes); platform_set_drvdata(ghes_dev, NULL); diff --git a/drivers/atm/atmtcp.c b/drivers/atm/atmtcp.c index b22d71c..0e3f8f9 100644 --- a/drivers/atm/atmtcp.c +++ b/drivers/atm/atmtcp.c @@ -157,7 +157,6 @@ static int atmtcp_v_ioctl(struct atm_dev *dev,unsigned int cmd,void __user *arg) { struct atm_cirange ci; struct atm_vcc *vcc; - struct hlist_node *node; struct sock *s; int i; @@ -171,7 +170,7 @@ static int atmtcp_v_ioctl(struct atm_dev *dev,unsigned int cmd,void __user *arg) for(i = 0; i < VCC_HTABLE_SIZE; ++i) { struct hlist_head *head = &vcc_hash[i]; - sk_for_each(s, node, head) { + sk_for_each(s, head) { vcc = atm_sk(s); if (vcc->dev != dev) continue; @@ -264,12 +263,11 @@ static struct atm_vcc *find_vcc(struct atm_dev *dev, short vpi, int vci) { struct hlist_head *head; struct atm_vcc *vcc; - struct hlist_node *node; struct sock *s; head = &vcc_hash[vci & (VCC_HTABLE_SIZE -1)]; - sk_for_each(s, node, head) { + sk_for_each(s, head) { vcc = atm_sk(s); if (vcc->dev == dev && vcc->vci == vci && vcc->vpi == vpi && diff --git a/drivers/atm/eni.c b/drivers/atm/eni.c index c1eb6fa..b1955ba 100644 --- a/drivers/atm/eni.c +++ b/drivers/atm/eni.c @@ -2093,7 +2093,6 @@ static unsigned char eni_phy_get(struct atm_dev *dev,unsigned long addr) static int eni_proc_read(struct atm_dev *dev,loff_t *pos,char *page) { - struct hlist_node *node; struct sock *s; static const char *signal[] = { "LOST","unknown","okay" }; struct eni_dev *eni_dev = ENI_DEV(dev); @@ -2171,7 +2170,7 @@ static int eni_proc_read(struct atm_dev *dev,loff_t *pos,char *page) for(i = 0; i < VCC_HTABLE_SIZE; ++i) { struct hlist_head *head = &vcc_hash[i]; - sk_for_each(s, node, head) { + sk_for_each(s, head) { struct eni_vcc *eni_vcc; int length; diff --git a/drivers/atm/he.c b/drivers/atm/he.c index 72b6960..d689126 100644 --- a/drivers/atm/he.c +++ b/drivers/atm/he.c @@ -329,7 +329,6 @@ __find_vcc(struct he_dev *he_dev, unsigned cid) { struct hlist_head *head; struct atm_vcc *vcc; - struct hlist_node *node; struct sock *s; short vpi; int vci; @@ -338,7 +337,7 @@ __find_vcc(struct he_dev *he_dev, unsigned cid) vci = cid & ((1 << he_dev->vcibits) - 1); head = &vcc_hash[vci & (VCC_HTABLE_SIZE -1)]; - sk_for_each(s, node, head) { + sk_for_each(s, head) { vcc = atm_sk(s); if (vcc->dev == he_dev->atm_dev && vcc->vci == vci && vcc->vpi == vpi && diff --git a/drivers/atm/nicstar.c b/drivers/atm/nicstar.c index ed1d2b7..6587dc2 100644 --- a/drivers/atm/nicstar.c +++ b/drivers/atm/nicstar.c @@ -251,7 +251,6 @@ static void nicstar_remove_one(struct pci_dev *pcidev) if (card->scd2vc[j] != NULL) free_scq(card, card->scd2vc[j]->scq, card->scd2vc[j]->tx_vcc); } - idr_remove_all(&card->idr); idr_destroy(&card->idr); pci_free_consistent(card->pcidev, NS_RSQSIZE + NS_RSQ_ALIGNMENT, card->rsq.org, card->rsq.dma); @@ -950,11 +949,10 @@ static void free_scq(ns_dev *card, scq_info *scq, struct atm_vcc *vcc) static void push_rxbufs(ns_dev * card, struct sk_buff *skb) { struct sk_buff *handle1, *handle2; - u32 id1 = 0, id2 = 0; + int id1, id2; u32 addr1, addr2; u32 stat; unsigned long flags; - int err; /* *BARF* */ handle2 = NULL; @@ -1027,23 +1025,12 @@ static void push_rxbufs(ns_dev * card, struct sk_buff *skb) card->lbfqc += 2; } - do { - if (!idr_pre_get(&card->idr, GFP_ATOMIC)) { - printk(KERN_ERR - "nicstar%d: no free memory for idr\n", - card->index); - goto out; - } - - if (!id1) - err = idr_get_new_above(&card->idr, handle1, 0, &id1); - - if (!id2 && err == 0) - err = idr_get_new_above(&card->idr, handle2, 0, &id2); - - } while (err == -EAGAIN); + id1 = idr_alloc(&card->idr, handle1, 0, 0, GFP_ATOMIC); + if (id1 < 0) + goto out; - if (err) + id2 = idr_alloc(&card->idr, handle2, 0, 0, GFP_ATOMIC); + if (id2 < 0) goto out; spin_lock_irqsave(&card->res_lock, flags); diff --git a/drivers/atm/solos-pci.c b/drivers/atm/solos-pci.c index 0474a89..32784d1 100644 --- a/drivers/atm/solos-pci.c +++ b/drivers/atm/solos-pci.c @@ -896,12 +896,11 @@ static struct atm_vcc *find_vcc(struct atm_dev *dev, short vpi, int vci) { struct hlist_head *head; struct atm_vcc *vcc = NULL; - struct hlist_node *node; struct sock *s; read_lock(&vcc_sklist_lock); head = &vcc_hash[vci & (VCC_HTABLE_SIZE -1)]; - sk_for_each(s, node, head) { + sk_for_each(s, head) { vcc = atm_sk(s); if (vcc->dev == dev && vcc->vci == vci && vcc->vpi == vpi && vcc->qos.rxtp.traffic_class != ATM_NONE && diff --git a/drivers/base/dma-buf.c b/drivers/base/dma-buf.c index ff5b745..2a7cb0d 100644 --- a/drivers/base/dma-buf.c +++ b/drivers/base/dma-buf.c @@ -39,6 +39,8 @@ static int dma_buf_release(struct inode *inode, struct file *file) dmabuf = file->private_data; + BUG_ON(dmabuf->vmapping_counter); + dmabuf->ops->release(dmabuf); kfree(dmabuf); return 0; @@ -445,6 +447,9 @@ EXPORT_SYMBOL_GPL(dma_buf_kunmap); int dma_buf_mmap(struct dma_buf *dmabuf, struct vm_area_struct *vma, unsigned long pgoff) { + struct file *oldfile; + int ret; + if (WARN_ON(!dmabuf || !vma)) return -EINVAL; @@ -458,14 +463,22 @@ int dma_buf_mmap(struct dma_buf *dmabuf, struct vm_area_struct *vma, return -EINVAL; /* readjust the vma */ - if (vma->vm_file) - fput(vma->vm_file); - - vma->vm_file = get_file(dmabuf->file); - + get_file(dmabuf->file); + oldfile = vma->vm_file; + vma->vm_file = dmabuf->file; vma->vm_pgoff = pgoff; - return dmabuf->ops->mmap(dmabuf, vma); + ret = dmabuf->ops->mmap(dmabuf, vma); + if (ret) { + /* restore old parameters on failure */ + vma->vm_file = oldfile; + fput(dmabuf->file); + } else { + if (oldfile) + fput(oldfile); + } + return ret; + } EXPORT_SYMBOL_GPL(dma_buf_mmap); @@ -481,12 +494,34 @@ EXPORT_SYMBOL_GPL(dma_buf_mmap); */ void *dma_buf_vmap(struct dma_buf *dmabuf) { + void *ptr; + if (WARN_ON(!dmabuf)) return NULL; - if (dmabuf->ops->vmap) - return dmabuf->ops->vmap(dmabuf); - return NULL; + if (!dmabuf->ops->vmap) + return NULL; + + mutex_lock(&dmabuf->lock); + if (dmabuf->vmapping_counter) { + dmabuf->vmapping_counter++; + BUG_ON(!dmabuf->vmap_ptr); + ptr = dmabuf->vmap_ptr; + goto out_unlock; + } + + BUG_ON(dmabuf->vmap_ptr); + + ptr = dmabuf->ops->vmap(dmabuf); + if (IS_ERR_OR_NULL(ptr)) + goto out_unlock; + + dmabuf->vmap_ptr = ptr; + dmabuf->vmapping_counter = 1; + +out_unlock: + mutex_unlock(&dmabuf->lock); + return ptr; } EXPORT_SYMBOL_GPL(dma_buf_vmap); @@ -500,7 +535,16 @@ void dma_buf_vunmap(struct dma_buf *dmabuf, void *vaddr) if (WARN_ON(!dmabuf)) return; - if (dmabuf->ops->vunmap) - dmabuf->ops->vunmap(dmabuf, vaddr); + BUG_ON(!dmabuf->vmap_ptr); + BUG_ON(dmabuf->vmapping_counter == 0); + BUG_ON(dmabuf->vmap_ptr != vaddr); + + mutex_lock(&dmabuf->lock); + if (--dmabuf->vmapping_counter == 0) { + if (dmabuf->ops->vunmap) + dmabuf->ops->vunmap(dmabuf, vaddr); + dmabuf->vmap_ptr = NULL; + } + mutex_unlock(&dmabuf->lock); } EXPORT_SYMBOL_GPL(dma_buf_vunmap); diff --git a/drivers/block/DAC960.c b/drivers/block/DAC960.c index 8f12dc7..5b5ee79 100644 --- a/drivers/block/DAC960.c +++ b/drivers/block/DAC960.c @@ -7054,6 +7054,7 @@ static long DAC960_gam_ioctl(struct file *file, unsigned int Request, else ErrorCode = 0; } + break; default: ErrorCode = -ENOTTY; } diff --git a/drivers/block/Kconfig b/drivers/block/Kconfig index 824e09c..5dc0dae 100644 --- a/drivers/block/Kconfig +++ b/drivers/block/Kconfig @@ -63,19 +63,6 @@ config AMIGA_Z2RAM To compile this driver as a module, choose M here: the module will be called z2ram. -config BLK_DEV_XD - tristate "XT hard disk support" - depends on ISA && ISA_DMA_API - select CHECK_SIGNATURE - help - Very old 8 bit hard disk controllers used in the IBM XT computer - will be supported if you say Y here. - - To compile this driver as a module, choose M here: the - module will be called xd. - - It's pretty unlikely that you have one of these: say N. - config GDROM tristate "SEGA Dreamcast GD-ROM drive" depends on SH_DREAMCAST @@ -544,4 +531,14 @@ config BLK_DEV_RBD If unsure, say N. +config BLK_DEV_RSXX + tristate "RamSam PCIe Flash SSD Device Driver" + depends on PCI + help + Device driver for IBM's high speed PCIe SSD + storage devices: RamSan-70 and RamSan-80. + + To compile this driver as a module, choose M here: the + module will be called rsxx. + endif # BLK_DEV diff --git a/drivers/block/Makefile b/drivers/block/Makefile index 17e82df..a3b4023 100644 --- a/drivers/block/Makefile +++ b/drivers/block/Makefile @@ -15,7 +15,6 @@ obj-$(CONFIG_ATARI_FLOPPY) += ataflop.o obj-$(CONFIG_AMIGA_Z2RAM) += z2ram.o obj-$(CONFIG_BLK_DEV_RAM) += brd.o obj-$(CONFIG_BLK_DEV_LOOP) += loop.o -obj-$(CONFIG_BLK_DEV_XD) += xd.o obj-$(CONFIG_BLK_CPQ_DA) += cpqarray.o obj-$(CONFIG_BLK_CPQ_CISS_DA) += cciss.o obj-$(CONFIG_BLK_DEV_DAC960) += DAC960.o @@ -41,4 +40,6 @@ obj-$(CONFIG_BLK_DEV_DRBD) += drbd/ obj-$(CONFIG_BLK_DEV_RBD) += rbd.o obj-$(CONFIG_BLK_DEV_PCIESSD_MTIP32XX) += mtip32xx/ +obj-$(CONFIG_BLK_DEV_RSXX) += rsxx/ + swim_mod-y := swim.o swim_asm.o diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 8c13eeb..e98da67 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -2660,25 +2660,24 @@ enum drbd_ret_code conn_new_minor(struct drbd_tconn *tconn, unsigned int minor, mdev->read_requests = RB_ROOT; mdev->write_requests = RB_ROOT; - if (!idr_pre_get(&minors, GFP_KERNEL)) - goto out_no_minor_idr; - if (idr_get_new_above(&minors, mdev, minor, &minor_got)) + minor_got = idr_alloc(&minors, mdev, minor, minor + 1, GFP_KERNEL); + if (minor_got < 0) { + if (minor_got == -ENOSPC) { + err = ERR_MINOR_EXISTS; + drbd_msg_put_info("requested minor exists already"); + } goto out_no_minor_idr; - if (minor_got != minor) { - err = ERR_MINOR_EXISTS; - drbd_msg_put_info("requested minor exists already"); - goto out_idr_remove_minor; } - if (!idr_pre_get(&tconn->volumes, GFP_KERNEL)) - goto out_idr_remove_minor; - if (idr_get_new_above(&tconn->volumes, mdev, vnr, &vnr_got)) + vnr_got = idr_alloc(&tconn->volumes, mdev, vnr, vnr + 1, GFP_KERNEL); + if (vnr_got < 0) { + if (vnr_got == -ENOSPC) { + err = ERR_INVALID_REQUEST; + drbd_msg_put_info("requested volume exists already"); + } goto out_idr_remove_minor; - if (vnr_got != vnr) { - err = ERR_INVALID_REQUEST; - drbd_msg_put_info("requested volume exists already"); - goto out_idr_remove_vol; } + add_disk(disk); kref_init(&mdev->kref); /* one ref for both idrs and the the add_disk */ @@ -2689,8 +2688,6 @@ enum drbd_ret_code conn_new_minor(struct drbd_tconn *tconn, unsigned int minor, return NO_ERROR; -out_idr_remove_vol: - idr_remove(&tconn->volumes, vnr_got); out_idr_remove_minor: idr_remove(&minors, minor_got); synchronize_rcu(); diff --git a/drivers/block/loop.c b/drivers/block/loop.c index 8031a8c..747bb2a 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -162,12 +162,13 @@ static struct loop_func_table *xfer_funcs[MAX_LO_CRYPT] = { static loff_t get_size(loff_t offset, loff_t sizelimit, struct file *file) { - loff_t size, loopsize; + loff_t loopsize; /* Compute loopsize in bytes */ - size = i_size_read(file->f_mapping->host); - loopsize = size - offset; - /* offset is beyond i_size, wierd but possible */ + loopsize = i_size_read(file->f_mapping->host); + if (offset > 0) + loopsize -= offset; + /* offset is beyond i_size, weird but possible */ if (loopsize < 0) return 0; @@ -190,6 +191,7 @@ figure_loop_size(struct loop_device *lo, loff_t offset, loff_t sizelimit) { loff_t size = get_size(offset, sizelimit, lo->lo_backing_file); sector_t x = (sector_t)size; + struct block_device *bdev = lo->lo_device; if (unlikely((loff_t)x != size)) return -EFBIG; @@ -198,6 +200,9 @@ figure_loop_size(struct loop_device *lo, loff_t offset, loff_t sizelimit) if (lo->lo_sizelimit != sizelimit) lo->lo_sizelimit = sizelimit; set_capacity(lo->lo_disk, x); + bd_set_size(bdev, (loff_t)get_capacity(bdev->bd_disk) << 9); + /* let user-space know about the new size */ + kobject_uevent(&disk_to_dev(bdev->bd_disk)->kobj, KOBJ_CHANGE); return 0; } @@ -1091,10 +1096,10 @@ loop_set_status(struct loop_device *lo, const struct loop_info64 *info) return err; if (lo->lo_offset != info->lo_offset || - lo->lo_sizelimit != info->lo_sizelimit) { + lo->lo_sizelimit != info->lo_sizelimit) if (figure_loop_size(lo, info->lo_offset, info->lo_sizelimit)) return -EFBIG; - } + loop_config_discard(lo); memcpy(lo->lo_file_name, info->lo_file_name, LO_NAME_SIZE); @@ -1271,28 +1276,10 @@ loop_get_status64(struct loop_device *lo, struct loop_info64 __user *arg) { static int loop_set_capacity(struct loop_device *lo, struct block_device *bdev) { - int err; - sector_t sec; - loff_t sz; - - err = -ENXIO; if (unlikely(lo->lo_state != Lo_bound)) - goto out; - err = figure_loop_size(lo, lo->lo_offset, lo->lo_sizelimit); - if (unlikely(err)) - goto out; - sec = get_capacity(lo->lo_disk); - /* the width of sector_t may be narrow for bit-shift */ - sz = sec; - sz <<= 9; - mutex_lock(&bdev->bd_mutex); - bd_set_size(bdev, sz); - /* let user-space know about the new size */ - kobject_uevent(&disk_to_dev(bdev->bd_disk)->kobj, KOBJ_CHANGE); - mutex_unlock(&bdev->bd_mutex); + return -ENXIO; - out: - return err; + return figure_loop_size(lo, lo->lo_offset, lo->lo_sizelimit); } static int lo_ioctl(struct block_device *bdev, fmode_t mode, @@ -1624,30 +1611,17 @@ static int loop_add(struct loop_device **l, int i) if (!lo) goto out; - if (!idr_pre_get(&loop_index_idr, GFP_KERNEL)) - goto out_free_dev; - + /* allocate id, if @id >= 0, we're requesting that specific id */ if (i >= 0) { - int m; - - /* create specific i in the index */ - err = idr_get_new_above(&loop_index_idr, lo, i, &m); - if (err >= 0 && i != m) { - idr_remove(&loop_index_idr, m); + err = idr_alloc(&loop_index_idr, lo, i, i + 1, GFP_KERNEL); + if (err == -ENOSPC) err = -EEXIST; - } - } else if (i == -1) { - int m; - - /* get next free nr */ - err = idr_get_new(&loop_index_idr, lo, &m); - if (err >= 0) - i = m; } else { - err = -EINVAL; + err = idr_alloc(&loop_index_idr, lo, 0, 0, GFP_KERNEL); } if (err < 0) goto out_free_dev; + i = err; lo->lo_queue = blk_alloc_queue(GFP_KERNEL); if (!lo->lo_queue) @@ -1858,11 +1832,15 @@ static int __init loop_init(void) max_part = (1UL << part_shift) - 1; } - if ((1UL << part_shift) > DISK_MAX_PARTS) - return -EINVAL; + if ((1UL << part_shift) > DISK_MAX_PARTS) { + err = -EINVAL; + goto misc_out; + } - if (max_loop > 1UL << (MINORBITS - part_shift)) - return -EINVAL; + if (max_loop > 1UL << (MINORBITS - part_shift)) { + err = -EINVAL; + goto misc_out; + } /* * If max_loop is specified, create that many devices upfront. @@ -1880,8 +1858,10 @@ static int __init loop_init(void) range = 1UL << MINORBITS; } - if (register_blkdev(LOOP_MAJOR, "loop")) - return -EIO; + if (register_blkdev(LOOP_MAJOR, "loop")) { + err = -EIO; + goto misc_out; + } blk_register_region(MKDEV(LOOP_MAJOR, 0), range, THIS_MODULE, loop_probe, NULL, NULL); @@ -1894,6 +1874,10 @@ static int __init loop_init(void) printk(KERN_INFO "loop: module loaded\n"); return 0; + +misc_out: + misc_deregister(&loop_misc); + return err; } static int loop_exit_cb(int id, void *ptr, void *data) @@ -1911,7 +1895,6 @@ static void __exit loop_exit(void) range = max_loop ? max_loop << part_shift : 1UL << MINORBITS; idr_for_each(&loop_index_idr, &loop_exit_cb, NULL); - idr_remove_all(&loop_index_idr); idr_destroy(&loop_index_idr); blk_unregister_region(MKDEV(LOOP_MAJOR, 0), range); diff --git a/drivers/block/mtip32xx/Kconfig b/drivers/block/mtip32xx/Kconfig index 0ba837f..1fca1f99 100644 --- a/drivers/block/mtip32xx/Kconfig +++ b/drivers/block/mtip32xx/Kconfig @@ -4,6 +4,6 @@ config BLK_DEV_PCIESSD_MTIP32XX tristate "Block Device Driver for Micron PCIe SSDs" - depends on PCI + depends on PCI && GENERIC_HARDIRQS help This enables the block driver for Micron PCIe SSDs. diff --git a/drivers/block/mtip32xx/mtip32xx.c b/drivers/block/mtip32xx/mtip32xx.c index 3fd1009..11cc952 100644 --- a/drivers/block/mtip32xx/mtip32xx.c +++ b/drivers/block/mtip32xx/mtip32xx.c @@ -88,6 +88,8 @@ static int instance; static int mtip_major; static struct dentry *dfs_parent; +static u32 cpu_use[NR_CPUS]; + static DEFINE_SPINLOCK(rssd_index_lock); static DEFINE_IDA(rssd_index_ida); @@ -296,16 +298,17 @@ static int hba_reset_nosleep(struct driver_data *dd) */ static inline void mtip_issue_ncq_command(struct mtip_port *port, int tag) { - atomic_set(&port->commands[tag].active, 1); + int group = tag >> 5; - spin_lock(&port->cmd_issue_lock); + atomic_set(&port->commands[tag].active, 1); + /* guard SACT and CI registers */ + spin_lock(&port->cmd_issue_lock[group]); writel((1 << MTIP_TAG_BIT(tag)), port->s_active[MTIP_TAG_INDEX(tag)]); writel((1 << MTIP_TAG_BIT(tag)), port->cmd_issue[MTIP_TAG_INDEX(tag)]); - - spin_unlock(&port->cmd_issue_lock); + spin_unlock(&port->cmd_issue_lock[group]); /* Set the command's timeout value.*/ port->commands[tag].comp_time = jiffies + msecs_to_jiffies( @@ -964,56 +967,56 @@ handle_tfe_exit: /* * Handle a set device bits interrupt */ -static inline void mtip_process_sdbf(struct driver_data *dd) +static inline void mtip_workq_sdbfx(struct mtip_port *port, int group, + u32 completed) { - struct mtip_port *port = dd->port; - int group, tag, bit; - u32 completed; + struct driver_data *dd = port->dd; + int tag, bit; struct mtip_cmd *command; - /* walk all bits in all slot groups */ - for (group = 0; group < dd->slot_groups; group++) { - completed = readl(port->completed[group]); - if (!completed) - continue; + if (!completed) { + WARN_ON_ONCE(!completed); + return; + } + /* clear completed status register in the hardware.*/ + writel(completed, port->completed[group]); - /* clear completed status register in the hardware.*/ - writel(completed, port->completed[group]); + /* Process completed commands. */ + for (bit = 0; (bit < 32) && completed; bit++) { + if (completed & 0x01) { + tag = (group << 5) | bit; - /* Process completed commands. */ - for (bit = 0; - (bit < 32) && completed; - bit++, completed >>= 1) { - if (completed & 0x01) { - tag = (group << 5) | bit; + /* skip internal command slot. */ + if (unlikely(tag == MTIP_TAG_INTERNAL)) + continue; - /* skip internal command slot. */ - if (unlikely(tag == MTIP_TAG_INTERNAL)) - continue; + command = &port->commands[tag]; + /* make internal callback */ + if (likely(command->comp_func)) { + command->comp_func( + port, + tag, + command->comp_data, + 0); + } else { + dev_warn(&dd->pdev->dev, + "Null completion " + "for tag %d", + tag); - command = &port->commands[tag]; - /* make internal callback */ - if (likely(command->comp_func)) { - command->comp_func( - port, - tag, - command->comp_data, - 0); - } else { - dev_warn(&dd->pdev->dev, - "Null completion " - "for tag %d", - tag); - - if (mtip_check_surprise_removal( - dd->pdev)) { - mtip_command_cleanup(dd); - return; - } + if (mtip_check_surprise_removal( + dd->pdev)) { + mtip_command_cleanup(dd); + return; } } } + completed >>= 1; } + + /* If last, re-enable interrupts */ + if (atomic_dec_return(&dd->irq_workers_active) == 0) + writel(0xffffffff, dd->mmio + HOST_IRQ_STAT); } /* @@ -1072,6 +1075,8 @@ static inline irqreturn_t mtip_handle_irq(struct driver_data *data) struct mtip_port *port = dd->port; u32 hba_stat, port_stat; int rv = IRQ_NONE; + int do_irq_enable = 1, i, workers; + struct mtip_work *twork; hba_stat = readl(dd->mmio + HOST_IRQ_STAT); if (hba_stat) { @@ -1082,8 +1087,42 @@ static inline irqreturn_t mtip_handle_irq(struct driver_data *data) writel(port_stat, port->mmio + PORT_IRQ_STAT); /* Demux port status */ - if (likely(port_stat & PORT_IRQ_SDB_FIS)) - mtip_process_sdbf(dd); + if (likely(port_stat & PORT_IRQ_SDB_FIS)) { + do_irq_enable = 0; + WARN_ON_ONCE(atomic_read(&dd->irq_workers_active) != 0); + + /* Start at 1: group zero is always local? */ + for (i = 0, workers = 0; i < MTIP_MAX_SLOT_GROUPS; + i++) { + twork = &dd->work[i]; + twork->completed = readl(port->completed[i]); + if (twork->completed) + workers++; + } + + atomic_set(&dd->irq_workers_active, workers); + if (workers) { + for (i = 1; i < MTIP_MAX_SLOT_GROUPS; i++) { + twork = &dd->work[i]; + if (twork->completed) + queue_work_on( + twork->cpu_binding, + dd->isr_workq, + &twork->work); + } + + if (likely(dd->work[0].completed)) + mtip_workq_sdbfx(port, 0, + dd->work[0].completed); + + } else { + /* + * Chip quirk: SDB interrupt but nothing + * to complete + */ + do_irq_enable = 1; + } + } if (unlikely(port_stat & PORT_IRQ_ERR)) { if (unlikely(mtip_check_surprise_removal(dd->pdev))) { @@ -1103,21 +1142,13 @@ static inline irqreturn_t mtip_handle_irq(struct driver_data *data) } /* acknowledge interrupt */ - writel(hba_stat, dd->mmio + HOST_IRQ_STAT); + if (unlikely(do_irq_enable)) + writel(hba_stat, dd->mmio + HOST_IRQ_STAT); return rv; } /* - * Wrapper for mtip_handle_irq - * (ignores return code) - */ -static void mtip_tasklet(unsigned long data) -{ - mtip_handle_irq((struct driver_data *) data); -} - -/* * HBA interrupt subroutine. * * @irq IRQ number. @@ -1130,8 +1161,8 @@ static void mtip_tasklet(unsigned long data) static irqreturn_t mtip_irq_handler(int irq, void *instance) { struct driver_data *dd = instance; - tasklet_schedule(&dd->tasklet); - return IRQ_HANDLED; + + return mtip_handle_irq(dd); } static void mtip_issue_non_ncq_command(struct mtip_port *port, int tag) @@ -1489,6 +1520,12 @@ static int mtip_get_identify(struct mtip_port *port, void __user *user_buffer) } #endif + /* Demux ID.DRAT & ID.RZAT to determine trim support */ + if (port->identify[69] & (1 << 14) && port->identify[69] & (1 << 5)) + port->dd->trim_supp = true; + else + port->dd->trim_supp = false; + /* Set the identify buffer as valid. */ port->identify_valid = 1; @@ -1676,6 +1713,81 @@ static int mtip_get_smart_attr(struct mtip_port *port, unsigned int id, } /* + * Trim unused sectors + * + * @dd pointer to driver_data structure + * @lba starting lba + * @len # of 512b sectors to trim + * + * return value + * -ENOMEM Out of dma memory + * -EINVAL Invalid parameters passed in, trim not supported + * -EIO Error submitting trim request to hw + */ +static int mtip_send_trim(struct driver_data *dd, unsigned int lba, unsigned int len) +{ + int i, rv = 0; + u64 tlba, tlen, sect_left; + struct mtip_trim_entry *buf; + dma_addr_t dma_addr; + struct host_to_dev_fis fis; + + if (!len || dd->trim_supp == false) + return -EINVAL; + + /* Trim request too big */ + WARN_ON(len > (MTIP_MAX_TRIM_ENTRY_LEN * MTIP_MAX_TRIM_ENTRIES)); + + /* Trim request not aligned on 4k boundary */ + WARN_ON(len % 8 != 0); + + /* Warn if vu_trim structure is too big */ + WARN_ON(sizeof(struct mtip_trim) > ATA_SECT_SIZE); + + /* Allocate a DMA buffer for the trim structure */ + buf = dmam_alloc_coherent(&dd->pdev->dev, ATA_SECT_SIZE, &dma_addr, + GFP_KERNEL); + if (!buf) + return -ENOMEM; + memset(buf, 0, ATA_SECT_SIZE); + + for (i = 0, sect_left = len, tlba = lba; + i < MTIP_MAX_TRIM_ENTRIES && sect_left; + i++) { + tlen = (sect_left >= MTIP_MAX_TRIM_ENTRY_LEN ? + MTIP_MAX_TRIM_ENTRY_LEN : + sect_left); + buf[i].lba = __force_bit2int cpu_to_le32(tlba); + buf[i].range = __force_bit2int cpu_to_le16(tlen); + tlba += tlen; + sect_left -= tlen; + } + WARN_ON(sect_left != 0); + + /* Build the fis */ + memset(&fis, 0, sizeof(struct host_to_dev_fis)); + fis.type = 0x27; + fis.opts = 1 << 7; + fis.command = 0xfb; + fis.features = 0x60; + fis.sect_count = 1; + fis.device = ATA_DEVICE_OBS; + + if (mtip_exec_internal_command(dd->port, + &fis, + 5, + dma_addr, + ATA_SECT_SIZE, + 0, + GFP_KERNEL, + MTIP_TRIM_TIMEOUT_MS) < 0) + rv = -EIO; + + dmam_free_coherent(&dd->pdev->dev, ATA_SECT_SIZE, buf, dma_addr); + return rv; +} + +/* * Get the drive capacity. * * @dd Pointer to the device data structure. @@ -3005,20 +3117,24 @@ static int mtip_hw_init(struct driver_data *dd) hba_setup(dd); - tasklet_init(&dd->tasklet, mtip_tasklet, (unsigned long)dd); - - dd->port = kzalloc(sizeof(struct mtip_port), GFP_KERNEL); + dd->port = kzalloc_node(sizeof(struct mtip_port), GFP_KERNEL, + dd->numa_node); if (!dd->port) { dev_err(&dd->pdev->dev, "Memory allocation: port structure\n"); return -ENOMEM; } + /* Continue workqueue setup */ + for (i = 0; i < MTIP_MAX_SLOT_GROUPS; i++) + dd->work[i].port = dd->port; + /* Counting semaphore to track command slot usage */ sema_init(&dd->port->cmd_slot, num_command_slots - 1); /* Spinlock to prevent concurrent issue */ - spin_lock_init(&dd->port->cmd_issue_lock); + for (i = 0; i < MTIP_MAX_SLOT_GROUPS; i++) + spin_lock_init(&dd->port->cmd_issue_lock[i]); /* Set the port mmio base address. */ dd->port->mmio = dd->mmio + PORT_OFFSET; @@ -3165,6 +3281,7 @@ static int mtip_hw_init(struct driver_data *dd) "Unable to allocate IRQ %d\n", dd->pdev->irq); goto out2; } + irq_set_affinity_hint(dd->pdev->irq, get_cpu_mask(dd->isr_binding)); /* Enable interrupts on the HBA. */ writel(readl(dd->mmio + HOST_CTL) | HOST_IRQ_EN, @@ -3241,7 +3358,8 @@ out3: writel(readl(dd->mmio + HOST_CTL) & ~HOST_IRQ_EN, dd->mmio + HOST_CTL); - /*Release the IRQ. */ + /* Release the IRQ. */ + irq_set_affinity_hint(dd->pdev->irq, NULL); devm_free_irq(&dd->pdev->dev, dd->pdev->irq, dd); out2: @@ -3291,11 +3409,9 @@ static int mtip_hw_exit(struct driver_data *dd) del_timer_sync(&dd->port->cmd_timer); /* Release the IRQ. */ + irq_set_affinity_hint(dd->pdev->irq, NULL); devm_free_irq(&dd->pdev->dev, dd->pdev->irq, dd); - /* Stop the bottom half tasklet. */ - tasklet_kill(&dd->tasklet); - /* Free the command/command header memory. */ dmam_free_coherent(&dd->pdev->dev, HW_PORT_PRIV_DMA_SZ + (ATA_SECT_SIZE * 4), @@ -3641,6 +3757,12 @@ static void mtip_make_request(struct request_queue *queue, struct bio *bio) } } + if (unlikely(bio->bi_rw & REQ_DISCARD)) { + bio_endio(bio, mtip_send_trim(dd, bio->bi_sector, + bio_sectors(bio))); + return; + } + if (unlikely(!bio_has_data(bio))) { blk_queue_flush(queue, 0); bio_endio(bio, 0); @@ -3711,7 +3833,7 @@ static int mtip_block_initialize(struct driver_data *dd) goto protocol_init_error; } - dd->disk = alloc_disk(MTIP_MAX_MINORS); + dd->disk = alloc_disk_node(MTIP_MAX_MINORS, dd->numa_node); if (dd->disk == NULL) { dev_err(&dd->pdev->dev, "Unable to allocate gendisk structure\n"); @@ -3755,7 +3877,7 @@ static int mtip_block_initialize(struct driver_data *dd) skip_create_disk: /* Allocate the request queue. */ - dd->queue = blk_alloc_queue(GFP_KERNEL); + dd->queue = blk_alloc_queue_node(GFP_KERNEL, dd->numa_node); if (dd->queue == NULL) { dev_err(&dd->pdev->dev, "Unable to allocate request queue\n"); @@ -3783,6 +3905,15 @@ skip_create_disk: */ blk_queue_flush(dd->queue, 0); + /* Signal trim support */ + if (dd->trim_supp == true) { + set_bit(QUEUE_FLAG_DISCARD, &dd->queue->queue_flags); + dd->queue->limits.discard_granularity = 4096; + blk_queue_max_discard_sectors(dd->queue, + MTIP_MAX_TRIM_ENTRY_LEN * MTIP_MAX_TRIM_ENTRIES); + dd->queue->limits.discard_zeroes_data = 0; + } + /* Set the capacity of the device in 512 byte sectors. */ if (!(mtip_hw_get_capacity(dd, &capacity))) { dev_warn(&dd->pdev->dev, @@ -3813,9 +3944,8 @@ skip_create_disk: start_service_thread: sprintf(thd_name, "mtip_svc_thd_%02d", index); - - dd->mtip_svc_handler = kthread_run(mtip_service_thread, - dd, thd_name); + dd->mtip_svc_handler = kthread_create_on_node(mtip_service_thread, + dd, dd->numa_node, thd_name); if (IS_ERR(dd->mtip_svc_handler)) { dev_err(&dd->pdev->dev, "service thread failed to start\n"); @@ -3823,7 +3953,7 @@ start_service_thread: rv = -EFAULT; goto kthread_run_error; } - + wake_up_process(dd->mtip_svc_handler); if (wait_for_rebuild == MTIP_FTL_REBUILD_MAGIC) rv = wait_for_rebuild; @@ -3963,6 +4093,56 @@ static int mtip_block_resume(struct driver_data *dd) return 0; } +static void drop_cpu(int cpu) +{ + cpu_use[cpu]--; +} + +static int get_least_used_cpu_on_node(int node) +{ + int cpu, least_used_cpu, least_cnt; + const struct cpumask *node_mask; + + node_mask = cpumask_of_node(node); + least_used_cpu = cpumask_first(node_mask); + least_cnt = cpu_use[least_used_cpu]; + cpu = least_used_cpu; + + for_each_cpu(cpu, node_mask) { + if (cpu_use[cpu] < least_cnt) { + least_used_cpu = cpu; + least_cnt = cpu_use[cpu]; + } + } + cpu_use[least_used_cpu]++; + return least_used_cpu; +} + +/* Helper for selecting a node in round robin mode */ +static inline int mtip_get_next_rr_node(void) +{ + static int next_node = -1; + + if (next_node == -1) { + next_node = first_online_node; + return next_node; + } + + next_node = next_online_node(next_node); + if (next_node == MAX_NUMNODES) + next_node = first_online_node; + return next_node; +} + +static DEFINE_HANDLER(0); +static DEFINE_HANDLER(1); +static DEFINE_HANDLER(2); +static DEFINE_HANDLER(3); +static DEFINE_HANDLER(4); +static DEFINE_HANDLER(5); +static DEFINE_HANDLER(6); +static DEFINE_HANDLER(7); + /* * Called for each supported PCI device detected. * @@ -3977,9 +4157,25 @@ static int mtip_pci_probe(struct pci_dev *pdev, { int rv = 0; struct driver_data *dd = NULL; + char cpu_list[256]; + const struct cpumask *node_mask; + int cpu, i = 0, j = 0; + int my_node = NUMA_NO_NODE; /* Allocate memory for this devices private data. */ - dd = kzalloc(sizeof(struct driver_data), GFP_KERNEL); + my_node = pcibus_to_node(pdev->bus); + if (my_node != NUMA_NO_NODE) { + if (!node_online(my_node)) + my_node = mtip_get_next_rr_node(); + } else { + dev_info(&pdev->dev, "Kernel not reporting proximity, choosing a node\n"); + my_node = mtip_get_next_rr_node(); + } + dev_info(&pdev->dev, "NUMA node %d (closest: %d,%d, probe on %d:%d)\n", + my_node, pcibus_to_node(pdev->bus), dev_to_node(&pdev->dev), + cpu_to_node(smp_processor_id()), smp_processor_id()); + + dd = kzalloc_node(sizeof(struct driver_data), GFP_KERNEL, my_node); if (dd == NULL) { dev_err(&pdev->dev, "Unable to allocate memory for driver data\n"); @@ -4016,19 +4212,82 @@ static int mtip_pci_probe(struct pci_dev *pdev, } } - pci_set_master(pdev); + /* Copy the info we may need later into the private data structure. */ + dd->major = mtip_major; + dd->instance = instance; + dd->pdev = pdev; + dd->numa_node = my_node; + memset(dd->workq_name, 0, 32); + snprintf(dd->workq_name, 31, "mtipq%d", dd->instance); + + dd->isr_workq = create_workqueue(dd->workq_name); + if (!dd->isr_workq) { + dev_warn(&pdev->dev, "Can't create wq %d\n", dd->instance); + goto block_initialize_err; + } + + memset(cpu_list, 0, sizeof(cpu_list)); + + node_mask = cpumask_of_node(dd->numa_node); + if (!cpumask_empty(node_mask)) { + for_each_cpu(cpu, node_mask) + { + snprintf(&cpu_list[j], 256 - j, "%d ", cpu); + j = strlen(cpu_list); + } + + dev_info(&pdev->dev, "Node %d on package %d has %d cpu(s): %s\n", + dd->numa_node, + topology_physical_package_id(cpumask_first(node_mask)), + nr_cpus_node(dd->numa_node), + cpu_list); + } else + dev_dbg(&pdev->dev, "mtip32xx: node_mask empty\n"); + + dd->isr_binding = get_least_used_cpu_on_node(dd->numa_node); + dev_info(&pdev->dev, "Initial IRQ binding node:cpu %d:%d\n", + cpu_to_node(dd->isr_binding), dd->isr_binding); + + /* first worker context always runs in ISR */ + dd->work[0].cpu_binding = dd->isr_binding; + dd->work[1].cpu_binding = get_least_used_cpu_on_node(dd->numa_node); + dd->work[2].cpu_binding = get_least_used_cpu_on_node(dd->numa_node); + dd->work[3].cpu_binding = dd->work[0].cpu_binding; + dd->work[4].cpu_binding = dd->work[1].cpu_binding; + dd->work[5].cpu_binding = dd->work[2].cpu_binding; + dd->work[6].cpu_binding = dd->work[2].cpu_binding; + dd->work[7].cpu_binding = dd->work[1].cpu_binding; + + /* Log the bindings */ + for_each_present_cpu(cpu) { + memset(cpu_list, 0, sizeof(cpu_list)); + for (i = 0, j = 0; i < MTIP_MAX_SLOT_GROUPS; i++) { + if (dd->work[i].cpu_binding == cpu) { + snprintf(&cpu_list[j], 256 - j, "%d ", i); + j = strlen(cpu_list); + } + } + if (j) + dev_info(&pdev->dev, "CPU %d: WQs %s\n", cpu, cpu_list); + } + + INIT_WORK(&dd->work[0].work, mtip_workq_sdbf0); + INIT_WORK(&dd->work[1].work, mtip_workq_sdbf1); + INIT_WORK(&dd->work[2].work, mtip_workq_sdbf2); + INIT_WORK(&dd->work[3].work, mtip_workq_sdbf3); + INIT_WORK(&dd->work[4].work, mtip_workq_sdbf4); + INIT_WORK(&dd->work[5].work, mtip_workq_sdbf5); + INIT_WORK(&dd->work[6].work, mtip_workq_sdbf6); + INIT_WORK(&dd->work[7].work, mtip_workq_sdbf7); + + pci_set_master(pdev); if (pci_enable_msi(pdev)) { dev_warn(&pdev->dev, "Unable to enable MSI interrupt.\n"); goto block_initialize_err; } - /* Copy the info we may need later into the private data structure. */ - dd->major = mtip_major; - dd->instance = instance; - dd->pdev = pdev; - /* Initialize the block layer. */ rv = mtip_block_initialize(dd); if (rv < 0) { @@ -4048,7 +4307,13 @@ static int mtip_pci_probe(struct pci_dev *pdev, block_initialize_err: pci_disable_msi(pdev); - + if (dd->isr_workq) { + flush_workqueue(dd->isr_workq); + destroy_workqueue(dd->isr_workq); + drop_cpu(dd->work[0].cpu_binding); + drop_cpu(dd->work[1].cpu_binding); + drop_cpu(dd->work[2].cpu_binding); + } setmask_err: pcim_iounmap_regions(pdev, 1 << MTIP_ABAR); @@ -4089,6 +4354,14 @@ static void mtip_pci_remove(struct pci_dev *pdev) /* Clean up the block layer. */ mtip_block_remove(dd); + if (dd->isr_workq) { + flush_workqueue(dd->isr_workq); + destroy_workqueue(dd->isr_workq); + drop_cpu(dd->work[0].cpu_binding); + drop_cpu(dd->work[1].cpu_binding); + drop_cpu(dd->work[2].cpu_binding); + } + pci_disable_msi(pdev); kfree(dd); diff --git a/drivers/block/mtip32xx/mtip32xx.h b/drivers/block/mtip32xx/mtip32xx.h index b174264..3bffff5 100644 --- a/drivers/block/mtip32xx/mtip32xx.h +++ b/drivers/block/mtip32xx/mtip32xx.h @@ -164,6 +164,35 @@ struct smart_attr { u8 res[3]; } __packed; +struct mtip_work { + struct work_struct work; + void *port; + int cpu_binding; + u32 completed; +} ____cacheline_aligned_in_smp; + +#define DEFINE_HANDLER(group) \ + void mtip_workq_sdbf##group(struct work_struct *work) \ + { \ + struct mtip_work *w = (struct mtip_work *) work; \ + mtip_workq_sdbfx(w->port, group, w->completed); \ + } + +#define MTIP_TRIM_TIMEOUT_MS 240000 +#define MTIP_MAX_TRIM_ENTRIES 8 +#define MTIP_MAX_TRIM_ENTRY_LEN 0xfff8 + +struct mtip_trim_entry { + u32 lba; /* starting lba of region */ + u16 rsvd; /* unused */ + u16 range; /* # of 512b blocks to trim */ +} __packed; + +struct mtip_trim { + /* Array of regions to trim */ + struct mtip_trim_entry entry[MTIP_MAX_TRIM_ENTRIES]; +} __packed; + /* Register Frame Information Structure (FIS), host to device. */ struct host_to_dev_fis { /* @@ -424,7 +453,7 @@ struct mtip_port { */ struct semaphore cmd_slot; /* Spinlock for working around command-issue bug. */ - spinlock_t cmd_issue_lock; + spinlock_t cmd_issue_lock[MTIP_MAX_SLOT_GROUPS]; }; /* @@ -447,9 +476,6 @@ struct driver_data { struct mtip_port *port; /* Pointer to the port data structure. */ - /* Tasklet used to process the bottom half of the ISR. */ - struct tasklet_struct tasklet; - unsigned product_type; /* magic value declaring the product type */ unsigned slot_groups; /* number of slot groups the product supports */ @@ -461,6 +487,20 @@ struct driver_data { struct task_struct *mtip_svc_handler; /* task_struct of svc thd */ struct dentry *dfs_node; + + bool trim_supp; /* flag indicating trim support */ + + int numa_node; /* NUMA support */ + + char workq_name[32]; + + struct workqueue_struct *isr_workq; + + struct mtip_work work[MTIP_MAX_SLOT_GROUPS]; + + atomic_t irq_workers_active; + + int isr_binding; }; #endif diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c index ade146b..7fecc78 100644 --- a/drivers/block/nbd.c +++ b/drivers/block/nbd.c @@ -98,6 +98,7 @@ static const char *nbdcmd_to_ascii(int cmd) case NBD_CMD_READ: return "read"; case NBD_CMD_WRITE: return "write"; case NBD_CMD_DISC: return "disconnect"; + case NBD_CMD_FLUSH: return "flush"; case NBD_CMD_TRIM: return "trim/discard"; } return "invalid"; @@ -244,8 +245,15 @@ static int nbd_send_req(struct nbd_device *nbd, struct request *req) request.magic = htonl(NBD_REQUEST_MAGIC); request.type = htonl(nbd_cmd(req)); - request.from = cpu_to_be64((u64)blk_rq_pos(req) << 9); - request.len = htonl(size); + + if (nbd_cmd(req) == NBD_CMD_FLUSH) { + /* Other values are reserved for FLUSH requests. */ + request.from = 0; + request.len = 0; + } else { + request.from = cpu_to_be64((u64)blk_rq_pos(req) << 9); + request.len = htonl(size); + } memcpy(request.handle, &req, sizeof(req)); dprintk(DBG_TX, "%s: request %p: sending control (%s@%llu,%uB)\n", @@ -482,6 +490,11 @@ static void nbd_handle_req(struct nbd_device *nbd, struct request *req) } } + if (req->cmd_flags & REQ_FLUSH) { + BUG_ON(unlikely(blk_rq_sectors(req))); + nbd_cmd(req) = NBD_CMD_FLUSH; + } + req->errors = 0; mutex_lock(&nbd->tx_lock); @@ -551,6 +564,7 @@ static int nbd_thread(void *data) */ static void do_nbd_request(struct request_queue *q) + __releases(q->queue_lock) __acquires(q->queue_lock) { struct request *req; @@ -595,12 +609,20 @@ static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *nbd, struct request sreq; dev_info(disk_to_dev(nbd->disk), "NBD_DISCONNECT\n"); + if (!nbd->sock) + return -EINVAL; + mutex_unlock(&nbd->tx_lock); + fsync_bdev(bdev); + mutex_lock(&nbd->tx_lock); blk_rq_init(NULL, &sreq); sreq.cmd_type = REQ_TYPE_SPECIAL; nbd_cmd(&sreq) = NBD_CMD_DISC; + + /* Check again after getting mutex back. */ if (!nbd->sock) return -EINVAL; + nbd_send_req(nbd, &sreq); return 0; } @@ -614,6 +636,7 @@ static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *nbd, nbd_clear_que(nbd); BUG_ON(!list_empty(&nbd->queue_head)); BUG_ON(!list_empty(&nbd->waiting_queue)); + kill_bdev(bdev); if (file) fput(file); return 0; @@ -681,9 +704,15 @@ static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *nbd, mutex_unlock(&nbd->tx_lock); + if (nbd->flags & NBD_FLAG_READ_ONLY) + set_device_ro(bdev, true); if (nbd->flags & NBD_FLAG_SEND_TRIM) queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, nbd->disk->queue); + if (nbd->flags & NBD_FLAG_SEND_FLUSH) + blk_queue_flush(nbd->disk->queue, REQ_FLUSH); + else + blk_queue_flush(nbd->disk->queue, 0); thread = kthread_create(nbd_thread, nbd, nbd->disk->disk_name); if (IS_ERR(thread)) { @@ -702,9 +731,12 @@ static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *nbd, nbd->file = NULL; nbd_clear_que(nbd); dev_warn(disk_to_dev(nbd->disk), "queue cleared\n"); + kill_bdev(bdev); queue_flag_clear_unlocked(QUEUE_FLAG_DISCARD, nbd->disk->queue); + set_device_ro(bdev, false); if (file) fput(file); + nbd->flags = 0; nbd->bytesize = 0; bdev->bd_inode->i_size = 0; set_capacity(nbd->disk, 0); diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c index 89576a0..6c81a4c 100644 --- a/drivers/block/rbd.c +++ b/drivers/block/rbd.c @@ -52,9 +52,12 @@ #define SECTOR_SHIFT 9 #define SECTOR_SIZE (1ULL << SECTOR_SHIFT) -/* It might be useful to have this defined elsewhere too */ +/* It might be useful to have these defined elsewhere */ -#define U64_MAX ((u64) (~0ULL)) +#define U8_MAX ((u8) (~0U)) +#define U16_MAX ((u16) (~0U)) +#define U32_MAX ((u32) (~0U)) +#define U64_MAX ((u64) (~0ULL)) #define RBD_DRV_NAME "rbd" #define RBD_DRV_NAME_LONG "rbd (rados block device)" @@ -66,7 +69,6 @@ (NAME_MAX - (sizeof (RBD_SNAP_DEV_NAME_PREFIX) - 1)) #define RBD_MAX_SNAP_COUNT 510 /* allows max snapc to fit in 4KB */ -#define RBD_MAX_OPT_LEN 1024 #define RBD_SNAP_HEAD_NAME "-" @@ -93,8 +95,6 @@ #define DEV_NAME_LEN 32 #define MAX_INT_FORMAT_WIDTH ((5 * sizeof (int)) / 2 + 1) -#define RBD_READ_ONLY_DEFAULT false - /* * block device image metadata (in-memory version) */ @@ -119,16 +119,33 @@ struct rbd_image_header { * An rbd image specification. * * The tuple (pool_id, image_id, snap_id) is sufficient to uniquely - * identify an image. + * identify an image. Each rbd_dev structure includes a pointer to + * an rbd_spec structure that encapsulates this identity. + * + * Each of the id's in an rbd_spec has an associated name. For a + * user-mapped image, the names are supplied and the id's associated + * with them are looked up. For a layered image, a parent image is + * defined by the tuple, and the names are looked up. + * + * An rbd_dev structure contains a parent_spec pointer which is + * non-null if the image it represents is a child in a layered + * image. This pointer will refer to the rbd_spec structure used + * by the parent rbd_dev for its own identity (i.e., the structure + * is shared between the parent and child). + * + * Since these structures are populated once, during the discovery + * phase of image construction, they are effectively immutable so + * we make no effort to synchronize access to them. + * + * Note that code herein does not assume the image name is known (it + * could be a null pointer). */ struct rbd_spec { u64 pool_id; char *pool_name; char *image_id; - size_t image_id_len; char *image_name; - size_t image_name_len; u64 snap_id; char *snap_name; @@ -136,10 +153,6 @@ struct rbd_spec { struct kref kref; }; -struct rbd_options { - bool read_only; -}; - /* * an instance of the client. multiple devices may share an rbd client. */ @@ -149,37 +162,76 @@ struct rbd_client { struct list_head node; }; -/* - * a request completion status - */ -struct rbd_req_status { - int done; - int rc; - u64 bytes; +struct rbd_img_request; +typedef void (*rbd_img_callback_t)(struct rbd_img_request *); + +#define BAD_WHICH U32_MAX /* Good which or bad which, which? */ + +struct rbd_obj_request; +typedef void (*rbd_obj_callback_t)(struct rbd_obj_request *); + +enum obj_request_type { + OBJ_REQUEST_NODATA, OBJ_REQUEST_BIO, OBJ_REQUEST_PAGES }; -/* - * a collection of requests - */ -struct rbd_req_coll { - int total; - int num_done; +struct rbd_obj_request { + const char *object_name; + u64 offset; /* object start byte */ + u64 length; /* bytes from offset */ + + struct rbd_img_request *img_request; + struct list_head links; /* img_request->obj_requests */ + u32 which; /* posn image request list */ + + enum obj_request_type type; + union { + struct bio *bio_list; + struct { + struct page **pages; + u32 page_count; + }; + }; + + struct ceph_osd_request *osd_req; + + u64 xferred; /* bytes transferred */ + u64 version; + int result; + atomic_t done; + + rbd_obj_callback_t callback; + struct completion completion; + struct kref kref; - struct rbd_req_status status[0]; }; -/* - * a single io request - */ -struct rbd_request { - struct request *rq; /* blk layer request */ - struct bio *bio; /* cloned bio */ - struct page **pages; /* list of used pages */ - u64 len; - int coll_index; - struct rbd_req_coll *coll; +struct rbd_img_request { + struct request *rq; + struct rbd_device *rbd_dev; + u64 offset; /* starting image byte offset */ + u64 length; /* byte count from offset */ + bool write_request; /* false for read */ + union { + struct ceph_snap_context *snapc; /* for writes */ + u64 snap_id; /* for reads */ + }; + spinlock_t completion_lock;/* protects next_completion */ + u32 next_completion; + rbd_img_callback_t callback; + + u32 obj_request_count; + struct list_head obj_requests; /* rbd_obj_request structs */ + + struct kref kref; }; +#define for_each_obj_request(ireq, oreq) \ + list_for_each_entry(oreq, &(ireq)->obj_requests, links) +#define for_each_obj_request_from(ireq, oreq) \ + list_for_each_entry_from(oreq, &(ireq)->obj_requests, links) +#define for_each_obj_request_safe(ireq, oreq, n) \ + list_for_each_entry_safe_reverse(oreq, n, &(ireq)->obj_requests, links) + struct rbd_snap { struct device dev; const char *name; @@ -209,16 +261,18 @@ struct rbd_device { char name[DEV_NAME_LEN]; /* blkdev name, e.g. rbd3 */ - spinlock_t lock; /* queue lock */ + spinlock_t lock; /* queue, flags, open_count */ struct rbd_image_header header; - bool exists; + unsigned long flags; /* possibly lock protected */ struct rbd_spec *spec; char *header_name; + struct ceph_file_layout layout; + struct ceph_osd_event *watch_event; - struct ceph_osd_request *watch_request; + struct rbd_obj_request *watch_request; struct rbd_spec *parent_spec; u64 parent_overlap; @@ -235,7 +289,19 @@ struct rbd_device { /* sysfs related */ struct device dev; - unsigned long open_count; + unsigned long open_count; /* protected by lock */ +}; + +/* + * Flag bits for rbd_dev->flags. If atomicity is required, + * rbd_dev->lock is used to protect access. + * + * Currently, only the "removing" flag (which is coupled with the + * "open_count" field) requires atomic access. + */ +enum rbd_dev_flags { + RBD_DEV_FLAG_EXISTS, /* mapped snapshot has not been deleted */ + RBD_DEV_FLAG_REMOVING, /* this mapping is being removed */ }; static DEFINE_MUTEX(ctl_mutex); /* Serialize open/close/setup/teardown */ @@ -277,6 +343,33 @@ static struct device rbd_root_dev = { .release = rbd_root_dev_release, }; +static __printf(2, 3) +void rbd_warn(struct rbd_device *rbd_dev, const char *fmt, ...) +{ + struct va_format vaf; + va_list args; + + va_start(args, fmt); + vaf.fmt = fmt; + vaf.va = &args; + + if (!rbd_dev) + printk(KERN_WARNING "%s: %pV\n", RBD_DRV_NAME, &vaf); + else if (rbd_dev->disk) + printk(KERN_WARNING "%s: %s: %pV\n", + RBD_DRV_NAME, rbd_dev->disk->disk_name, &vaf); + else if (rbd_dev->spec && rbd_dev->spec->image_name) + printk(KERN_WARNING "%s: image %s: %pV\n", + RBD_DRV_NAME, rbd_dev->spec->image_name, &vaf); + else if (rbd_dev->spec && rbd_dev->spec->image_id) + printk(KERN_WARNING "%s: id %s: %pV\n", + RBD_DRV_NAME, rbd_dev->spec->image_id, &vaf); + else /* punt */ + printk(KERN_WARNING "%s: rbd_dev %p: %pV\n", + RBD_DRV_NAME, rbd_dev, &vaf); + va_end(args); +} + #ifdef RBD_DEBUG #define rbd_assert(expr) \ if (unlikely(!(expr))) { \ @@ -296,14 +389,23 @@ static int rbd_dev_v2_refresh(struct rbd_device *rbd_dev, u64 *hver); static int rbd_open(struct block_device *bdev, fmode_t mode) { struct rbd_device *rbd_dev = bdev->bd_disk->private_data; + bool removing = false; if ((mode & FMODE_WRITE) && rbd_dev->mapping.read_only) return -EROFS; + spin_lock_irq(&rbd_dev->lock); + if (test_bit(RBD_DEV_FLAG_REMOVING, &rbd_dev->flags)) + removing = true; + else + rbd_dev->open_count++; + spin_unlock_irq(&rbd_dev->lock); + if (removing) + return -ENOENT; + mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING); (void) get_device(&rbd_dev->dev); set_device_ro(bdev, rbd_dev->mapping.read_only); - rbd_dev->open_count++; mutex_unlock(&ctl_mutex); return 0; @@ -312,10 +414,14 @@ static int rbd_open(struct block_device *bdev, fmode_t mode) static int rbd_release(struct gendisk *disk, fmode_t mode) { struct rbd_device *rbd_dev = disk->private_data; + unsigned long open_count_before; + + spin_lock_irq(&rbd_dev->lock); + open_count_before = rbd_dev->open_count--; + spin_unlock_irq(&rbd_dev->lock); + rbd_assert(open_count_before > 0); mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING); - rbd_assert(rbd_dev->open_count > 0); - rbd_dev->open_count--; put_device(&rbd_dev->dev); mutex_unlock(&ctl_mutex); @@ -337,7 +443,7 @@ static struct rbd_client *rbd_client_create(struct ceph_options *ceph_opts) struct rbd_client *rbdc; int ret = -ENOMEM; - dout("rbd_client_create\n"); + dout("%s:\n", __func__); rbdc = kmalloc(sizeof(struct rbd_client), GFP_KERNEL); if (!rbdc) goto out_opt; @@ -361,8 +467,8 @@ static struct rbd_client *rbd_client_create(struct ceph_options *ceph_opts) spin_unlock(&rbd_client_list_lock); mutex_unlock(&ctl_mutex); + dout("%s: rbdc %p\n", __func__, rbdc); - dout("rbd_client_create created %p\n", rbdc); return rbdc; out_err: @@ -373,6 +479,8 @@ out_mutex: out_opt: if (ceph_opts) ceph_destroy_options(ceph_opts); + dout("%s: error %d\n", __func__, ret); + return ERR_PTR(ret); } @@ -426,6 +534,12 @@ static match_table_t rbd_opts_tokens = { {-1, NULL} }; +struct rbd_options { + bool read_only; +}; + +#define RBD_READ_ONLY_DEFAULT false + static int parse_rbd_opts_token(char *c, void *private) { struct rbd_options *rbd_opts = private; @@ -493,7 +607,7 @@ static void rbd_client_release(struct kref *kref) { struct rbd_client *rbdc = container_of(kref, struct rbd_client, kref); - dout("rbd_release_client %p\n", rbdc); + dout("%s: rbdc %p\n", __func__, rbdc); spin_lock(&rbd_client_list_lock); list_del(&rbdc->node); spin_unlock(&rbd_client_list_lock); @@ -512,18 +626,6 @@ static void rbd_put_client(struct rbd_client *rbdc) kref_put(&rbdc->kref, rbd_client_release); } -/* - * Destroy requests collection - */ -static void rbd_coll_release(struct kref *kref) -{ - struct rbd_req_coll *coll = - container_of(kref, struct rbd_req_coll, kref); - - dout("rbd_coll_release %p\n", coll); - kfree(coll); -} - static bool rbd_image_format_valid(u32 image_format) { return image_format == 1 || image_format == 2; @@ -707,7 +809,8 @@ static int rbd_dev_set_mapping(struct rbd_device *rbd_dev) goto done; rbd_dev->mapping.read_only = true; } - rbd_dev->exists = true; + set_bit(RBD_DEV_FLAG_EXISTS, &rbd_dev->flags); + done: return ret; } @@ -724,7 +827,7 @@ static void rbd_header_free(struct rbd_image_header *header) header->snapc = NULL; } -static char *rbd_segment_name(struct rbd_device *rbd_dev, u64 offset) +static const char *rbd_segment_name(struct rbd_device *rbd_dev, u64 offset) { char *name; u64 segment; @@ -767,23 +870,6 @@ static u64 rbd_segment_length(struct rbd_device *rbd_dev, return length; } -static int rbd_get_num_segments(struct rbd_image_header *header, - u64 ofs, u64 len) -{ - u64 start_seg; - u64 end_seg; - - if (!len) - return 0; - if (len - 1 > U64_MAX - ofs) - return -ERANGE; - - start_seg = ofs >> header->obj_order; - end_seg = (ofs + len - 1) >> header->obj_order; - - return end_seg - start_seg + 1; -} - /* * returns the size of an object in the image */ @@ -949,8 +1035,10 @@ static struct bio *bio_chain_clone_range(struct bio **bio_src, unsigned int bi_size; struct bio *bio; - if (!bi) + if (!bi) { + rbd_warn(NULL, "bio_chain exhausted with %u left", len); goto out_err; /* EINVAL; ran out of bio's */ + } bi_size = min_t(unsigned int, bi->bi_size - off, len); bio = bio_clone_range(bi, off, bi_size, gfpmask); if (!bio) @@ -976,399 +1064,721 @@ out_err: return NULL; } -/* - * helpers for osd request op vectors. - */ -static struct ceph_osd_req_op *rbd_create_rw_ops(int num_ops, - int opcode, u32 payload_len) +static void rbd_obj_request_get(struct rbd_obj_request *obj_request) { - struct ceph_osd_req_op *ops; + dout("%s: obj %p (was %d)\n", __func__, obj_request, + atomic_read(&obj_request->kref.refcount)); + kref_get(&obj_request->kref); +} + +static void rbd_obj_request_destroy(struct kref *kref); +static void rbd_obj_request_put(struct rbd_obj_request *obj_request) +{ + rbd_assert(obj_request != NULL); + dout("%s: obj %p (was %d)\n", __func__, obj_request, + atomic_read(&obj_request->kref.refcount)); + kref_put(&obj_request->kref, rbd_obj_request_destroy); +} + +static void rbd_img_request_get(struct rbd_img_request *img_request) +{ + dout("%s: img %p (was %d)\n", __func__, img_request, + atomic_read(&img_request->kref.refcount)); + kref_get(&img_request->kref); +} + +static void rbd_img_request_destroy(struct kref *kref); +static void rbd_img_request_put(struct rbd_img_request *img_request) +{ + rbd_assert(img_request != NULL); + dout("%s: img %p (was %d)\n", __func__, img_request, + atomic_read(&img_request->kref.refcount)); + kref_put(&img_request->kref, rbd_img_request_destroy); +} + +static inline void rbd_img_obj_request_add(struct rbd_img_request *img_request, + struct rbd_obj_request *obj_request) +{ + rbd_assert(obj_request->img_request == NULL); + + rbd_obj_request_get(obj_request); + obj_request->img_request = img_request; + obj_request->which = img_request->obj_request_count; + rbd_assert(obj_request->which != BAD_WHICH); + img_request->obj_request_count++; + list_add_tail(&obj_request->links, &img_request->obj_requests); + dout("%s: img %p obj %p w=%u\n", __func__, img_request, obj_request, + obj_request->which); +} - ops = kzalloc(sizeof (*ops) * (num_ops + 1), GFP_NOIO); - if (!ops) +static inline void rbd_img_obj_request_del(struct rbd_img_request *img_request, + struct rbd_obj_request *obj_request) +{ + rbd_assert(obj_request->which != BAD_WHICH); + + dout("%s: img %p obj %p w=%u\n", __func__, img_request, obj_request, + obj_request->which); + list_del(&obj_request->links); + rbd_assert(img_request->obj_request_count > 0); + img_request->obj_request_count--; + rbd_assert(obj_request->which == img_request->obj_request_count); + obj_request->which = BAD_WHICH; + rbd_assert(obj_request->img_request == img_request); + obj_request->img_request = NULL; + obj_request->callback = NULL; + rbd_obj_request_put(obj_request); +} + +static bool obj_request_type_valid(enum obj_request_type type) +{ + switch (type) { + case OBJ_REQUEST_NODATA: + case OBJ_REQUEST_BIO: + case OBJ_REQUEST_PAGES: + return true; + default: + return false; + } +} + +static struct ceph_osd_req_op *rbd_osd_req_op_create(u16 opcode, ...) +{ + struct ceph_osd_req_op *op; + va_list args; + size_t size; + + op = kzalloc(sizeof (*op), GFP_NOIO); + if (!op) return NULL; + op->op = opcode; + va_start(args, opcode); + switch (opcode) { + case CEPH_OSD_OP_READ: + case CEPH_OSD_OP_WRITE: + /* rbd_osd_req_op_create(READ, offset, length) */ + /* rbd_osd_req_op_create(WRITE, offset, length) */ + op->extent.offset = va_arg(args, u64); + op->extent.length = va_arg(args, u64); + if (opcode == CEPH_OSD_OP_WRITE) + op->payload_len = op->extent.length; + break; + case CEPH_OSD_OP_STAT: + break; + case CEPH_OSD_OP_CALL: + /* rbd_osd_req_op_create(CALL, class, method, data, datalen) */ + op->cls.class_name = va_arg(args, char *); + size = strlen(op->cls.class_name); + rbd_assert(size <= (size_t) U8_MAX); + op->cls.class_len = size; + op->payload_len = size; + + op->cls.method_name = va_arg(args, char *); + size = strlen(op->cls.method_name); + rbd_assert(size <= (size_t) U8_MAX); + op->cls.method_len = size; + op->payload_len += size; + + op->cls.argc = 0; + op->cls.indata = va_arg(args, void *); + size = va_arg(args, size_t); + rbd_assert(size <= (size_t) U32_MAX); + op->cls.indata_len = (u32) size; + op->payload_len += size; + break; + case CEPH_OSD_OP_NOTIFY_ACK: + case CEPH_OSD_OP_WATCH: + /* rbd_osd_req_op_create(NOTIFY_ACK, cookie, version) */ + /* rbd_osd_req_op_create(WATCH, cookie, version, flag) */ + op->watch.cookie = va_arg(args, u64); + op->watch.ver = va_arg(args, u64); + op->watch.ver = cpu_to_le64(op->watch.ver); + if (opcode == CEPH_OSD_OP_WATCH && va_arg(args, int)) + op->watch.flag = (u8) 1; + break; + default: + rbd_warn(NULL, "unsupported opcode %hu\n", opcode); + kfree(op); + op = NULL; + break; + } + va_end(args); - ops[0].op = opcode; + return op; +} - /* - * op extent offset and length will be set later on - * in calc_raw_layout() - */ - ops[0].payload_len = payload_len; +static void rbd_osd_req_op_destroy(struct ceph_osd_req_op *op) +{ + kfree(op); +} + +static int rbd_obj_request_submit(struct ceph_osd_client *osdc, + struct rbd_obj_request *obj_request) +{ + dout("%s: osdc %p obj %p\n", __func__, osdc, obj_request); - return ops; + return ceph_osdc_start_request(osdc, obj_request->osd_req, false); } -static void rbd_destroy_ops(struct ceph_osd_req_op *ops) +static void rbd_img_request_complete(struct rbd_img_request *img_request) { - kfree(ops); + dout("%s: img %p\n", __func__, img_request); + if (img_request->callback) + img_request->callback(img_request); + else + rbd_img_request_put(img_request); } -static void rbd_coll_end_req_index(struct request *rq, - struct rbd_req_coll *coll, - int index, - int ret, u64 len) +/* Caller is responsible for rbd_obj_request_destroy(obj_request) */ + +static int rbd_obj_request_wait(struct rbd_obj_request *obj_request) { - struct request_queue *q; - int min, max, i; + dout("%s: obj %p\n", __func__, obj_request); - dout("rbd_coll_end_req_index %p index %d ret %d len %llu\n", - coll, index, ret, (unsigned long long) len); + return wait_for_completion_interruptible(&obj_request->completion); +} - if (!rq) - return; +static void obj_request_done_init(struct rbd_obj_request *obj_request) +{ + atomic_set(&obj_request->done, 0); + smp_wmb(); +} - if (!coll) { - blk_end_request(rq, ret, len); - return; +static void obj_request_done_set(struct rbd_obj_request *obj_request) +{ + int done; + + done = atomic_inc_return(&obj_request->done); + if (done > 1) { + struct rbd_img_request *img_request = obj_request->img_request; + struct rbd_device *rbd_dev; + + rbd_dev = img_request ? img_request->rbd_dev : NULL; + rbd_warn(rbd_dev, "obj_request %p was already done\n", + obj_request); } +} - q = rq->q; - - spin_lock_irq(q->queue_lock); - coll->status[index].done = 1; - coll->status[index].rc = ret; - coll->status[index].bytes = len; - max = min = coll->num_done; - while (max < coll->total && coll->status[max].done) - max++; - - for (i = min; i<max; i++) { - __blk_end_request(rq, coll->status[i].rc, - coll->status[i].bytes); - coll->num_done++; - kref_put(&coll->kref, rbd_coll_release); +static bool obj_request_done_test(struct rbd_obj_request *obj_request) +{ + smp_mb(); + return atomic_read(&obj_request->done) != 0; +} + +static void rbd_obj_request_complete(struct rbd_obj_request *obj_request) +{ + dout("%s: obj %p cb %p\n", __func__, obj_request, + obj_request->callback); + if (obj_request->callback) + obj_request->callback(obj_request); + else + complete_all(&obj_request->completion); +} + +static void rbd_osd_trivial_callback(struct rbd_obj_request *obj_request) +{ + dout("%s: obj %p\n", __func__, obj_request); + obj_request_done_set(obj_request); +} + +static void rbd_osd_read_callback(struct rbd_obj_request *obj_request) +{ + dout("%s: obj %p result %d %llu/%llu\n", __func__, obj_request, + obj_request->result, obj_request->xferred, obj_request->length); + /* + * ENOENT means a hole in the object. We zero-fill the + * entire length of the request. A short read also implies + * zero-fill to the end of the request. Either way we + * update the xferred count to indicate the whole request + * was satisfied. + */ + if (obj_request->result == -ENOENT) { + zero_bio_chain(obj_request->bio_list, 0); + obj_request->result = 0; + obj_request->xferred = obj_request->length; + } else if (obj_request->xferred < obj_request->length && + !obj_request->result) { + zero_bio_chain(obj_request->bio_list, obj_request->xferred); + obj_request->xferred = obj_request->length; } - spin_unlock_irq(q->queue_lock); + obj_request_done_set(obj_request); } -static void rbd_coll_end_req(struct rbd_request *req, - int ret, u64 len) +static void rbd_osd_write_callback(struct rbd_obj_request *obj_request) { - rbd_coll_end_req_index(req->rq, req->coll, req->coll_index, ret, len); + dout("%s: obj %p result %d %llu\n", __func__, obj_request, + obj_request->result, obj_request->length); + /* + * There is no such thing as a successful short write. + * Our xferred value is the number of bytes transferred + * back. Set it to our originally-requested length. + */ + obj_request->xferred = obj_request->length; + obj_request_done_set(obj_request); } /* - * Send ceph osd request + * For a simple stat call there's nothing to do. We'll do more if + * this is part of a write sequence for a layered image. */ -static int rbd_do_request(struct request *rq, - struct rbd_device *rbd_dev, - struct ceph_snap_context *snapc, - u64 snapid, - const char *object_name, u64 ofs, u64 len, - struct bio *bio, - struct page **pages, - int num_pages, - int flags, - struct ceph_osd_req_op *ops, - struct rbd_req_coll *coll, - int coll_index, - void (*rbd_cb)(struct ceph_osd_request *req, - struct ceph_msg *msg), - struct ceph_osd_request **linger_req, - u64 *ver) -{ - struct ceph_osd_request *req; - struct ceph_file_layout *layout; - int ret; - u64 bno; - struct timespec mtime = CURRENT_TIME; - struct rbd_request *req_data; - struct ceph_osd_request_head *reqhead; - struct ceph_osd_client *osdc; +static void rbd_osd_stat_callback(struct rbd_obj_request *obj_request) +{ + dout("%s: obj %p\n", __func__, obj_request); + obj_request_done_set(obj_request); +} - req_data = kzalloc(sizeof(*req_data), GFP_NOIO); - if (!req_data) { - if (coll) - rbd_coll_end_req_index(rq, coll, coll_index, - -ENOMEM, len); - return -ENOMEM; +static void rbd_osd_req_callback(struct ceph_osd_request *osd_req, + struct ceph_msg *msg) +{ + struct rbd_obj_request *obj_request = osd_req->r_priv; + u16 opcode; + + dout("%s: osd_req %p msg %p\n", __func__, osd_req, msg); + rbd_assert(osd_req == obj_request->osd_req); + rbd_assert(!!obj_request->img_request ^ + (obj_request->which == BAD_WHICH)); + + if (osd_req->r_result < 0) + obj_request->result = osd_req->r_result; + obj_request->version = le64_to_cpu(osd_req->r_reassert_version.version); + + WARN_ON(osd_req->r_num_ops != 1); /* For now */ + + /* + * We support a 64-bit length, but ultimately it has to be + * passed to blk_end_request(), which takes an unsigned int. + */ + obj_request->xferred = osd_req->r_reply_op_len[0]; + rbd_assert(obj_request->xferred < (u64) UINT_MAX); + opcode = osd_req->r_request_ops[0].op; + switch (opcode) { + case CEPH_OSD_OP_READ: + rbd_osd_read_callback(obj_request); + break; + case CEPH_OSD_OP_WRITE: + rbd_osd_write_callback(obj_request); + break; + case CEPH_OSD_OP_STAT: + rbd_osd_stat_callback(obj_request); + break; + case CEPH_OSD_OP_CALL: + case CEPH_OSD_OP_NOTIFY_ACK: + case CEPH_OSD_OP_WATCH: + rbd_osd_trivial_callback(obj_request); + break; + default: + rbd_warn(NULL, "%s: unsupported op %hu\n", + obj_request->object_name, (unsigned short) opcode); + break; } - if (coll) { - req_data->coll = coll; - req_data->coll_index = coll_index; + if (obj_request_done_test(obj_request)) + rbd_obj_request_complete(obj_request); +} + +static struct ceph_osd_request *rbd_osd_req_create( + struct rbd_device *rbd_dev, + bool write_request, + struct rbd_obj_request *obj_request, + struct ceph_osd_req_op *op) +{ + struct rbd_img_request *img_request = obj_request->img_request; + struct ceph_snap_context *snapc = NULL; + struct ceph_osd_client *osdc; + struct ceph_osd_request *osd_req; + struct timespec now; + struct timespec *mtime; + u64 snap_id = CEPH_NOSNAP; + u64 offset = obj_request->offset; + u64 length = obj_request->length; + + if (img_request) { + rbd_assert(img_request->write_request == write_request); + if (img_request->write_request) + snapc = img_request->snapc; + else + snap_id = img_request->snap_id; } - dout("rbd_do_request object_name=%s ofs=%llu len=%llu coll=%p[%d]\n", - object_name, (unsigned long long) ofs, - (unsigned long long) len, coll, coll_index); + /* Allocate and initialize the request, for the single op */ osdc = &rbd_dev->rbd_client->client->osdc; - req = ceph_osdc_alloc_request(osdc, flags, snapc, ops, - false, GFP_NOIO, pages, bio); - if (!req) { - ret = -ENOMEM; - goto done_pages; + osd_req = ceph_osdc_alloc_request(osdc, snapc, 1, false, GFP_ATOMIC); + if (!osd_req) + return NULL; /* ENOMEM */ + + rbd_assert(obj_request_type_valid(obj_request->type)); + switch (obj_request->type) { + case OBJ_REQUEST_NODATA: + break; /* Nothing to do */ + case OBJ_REQUEST_BIO: + rbd_assert(obj_request->bio_list != NULL); + osd_req->r_bio = obj_request->bio_list; + break; + case OBJ_REQUEST_PAGES: + osd_req->r_pages = obj_request->pages; + osd_req->r_num_pages = obj_request->page_count; + osd_req->r_page_alignment = offset & ~PAGE_MASK; + break; } - req->r_callback = rbd_cb; + if (write_request) { + osd_req->r_flags = CEPH_OSD_FLAG_WRITE | CEPH_OSD_FLAG_ONDISK; + now = CURRENT_TIME; + mtime = &now; + } else { + osd_req->r_flags = CEPH_OSD_FLAG_READ; + mtime = NULL; /* not needed for reads */ + offset = 0; /* These are not used... */ + length = 0; /* ...for osd read requests */ + } - req_data->rq = rq; - req_data->bio = bio; - req_data->pages = pages; - req_data->len = len; + osd_req->r_callback = rbd_osd_req_callback; + osd_req->r_priv = obj_request; - req->r_priv = req_data; + osd_req->r_oid_len = strlen(obj_request->object_name); + rbd_assert(osd_req->r_oid_len < sizeof (osd_req->r_oid)); + memcpy(osd_req->r_oid, obj_request->object_name, osd_req->r_oid_len); - reqhead = req->r_request->front.iov_base; - reqhead->snapid = cpu_to_le64(CEPH_NOSNAP); + osd_req->r_file_layout = rbd_dev->layout; /* struct */ - strncpy(req->r_oid, object_name, sizeof(req->r_oid)); - req->r_oid_len = strlen(req->r_oid); + /* osd_req will get its own reference to snapc (if non-null) */ - layout = &req->r_file_layout; - memset(layout, 0, sizeof(*layout)); - layout->fl_stripe_unit = cpu_to_le32(1 << RBD_MAX_OBJ_ORDER); - layout->fl_stripe_count = cpu_to_le32(1); - layout->fl_object_size = cpu_to_le32(1 << RBD_MAX_OBJ_ORDER); - layout->fl_pg_pool = cpu_to_le32((int) rbd_dev->spec->pool_id); - ret = ceph_calc_raw_layout(osdc, layout, snapid, ofs, &len, &bno, - req, ops); - rbd_assert(ret == 0); + ceph_osdc_build_request(osd_req, offset, length, 1, op, + snapc, snap_id, mtime); - ceph_osdc_build_request(req, ofs, &len, - ops, - snapc, - &mtime, - req->r_oid, req->r_oid_len); + return osd_req; +} - if (linger_req) { - ceph_osdc_set_request_linger(osdc, req); - *linger_req = req; - } +static void rbd_osd_req_destroy(struct ceph_osd_request *osd_req) +{ + ceph_osdc_put_request(osd_req); +} - ret = ceph_osdc_start_request(osdc, req, false); - if (ret < 0) - goto done_err; - - if (!rbd_cb) { - ret = ceph_osdc_wait_request(osdc, req); - if (ver) - *ver = le64_to_cpu(req->r_reassert_version.version); - dout("reassert_ver=%llu\n", - (unsigned long long) - le64_to_cpu(req->r_reassert_version.version)); - ceph_osdc_put_request(req); +/* object_name is assumed to be a non-null pointer and NUL-terminated */ + +static struct rbd_obj_request *rbd_obj_request_create(const char *object_name, + u64 offset, u64 length, + enum obj_request_type type) +{ + struct rbd_obj_request *obj_request; + size_t size; + char *name; + + rbd_assert(obj_request_type_valid(type)); + + size = strlen(object_name) + 1; + obj_request = kzalloc(sizeof (*obj_request) + size, GFP_KERNEL); + if (!obj_request) + return NULL; + + name = (char *)(obj_request + 1); + obj_request->object_name = memcpy(name, object_name, size); + obj_request->offset = offset; + obj_request->length = length; + obj_request->which = BAD_WHICH; + obj_request->type = type; + INIT_LIST_HEAD(&obj_request->links); + obj_request_done_init(obj_request); + init_completion(&obj_request->completion); + kref_init(&obj_request->kref); + + dout("%s: \"%s\" %llu/%llu %d -> obj %p\n", __func__, object_name, + offset, length, (int)type, obj_request); + + return obj_request; +} + +static void rbd_obj_request_destroy(struct kref *kref) +{ + struct rbd_obj_request *obj_request; + + obj_request = container_of(kref, struct rbd_obj_request, kref); + + dout("%s: obj %p\n", __func__, obj_request); + + rbd_assert(obj_request->img_request == NULL); + rbd_assert(obj_request->which == BAD_WHICH); + + if (obj_request->osd_req) + rbd_osd_req_destroy(obj_request->osd_req); + + rbd_assert(obj_request_type_valid(obj_request->type)); + switch (obj_request->type) { + case OBJ_REQUEST_NODATA: + break; /* Nothing to do */ + case OBJ_REQUEST_BIO: + if (obj_request->bio_list) + bio_chain_put(obj_request->bio_list); + break; + case OBJ_REQUEST_PAGES: + if (obj_request->pages) + ceph_release_page_vector(obj_request->pages, + obj_request->page_count); + break; } - return ret; -done_err: - bio_chain_put(req_data->bio); - ceph_osdc_put_request(req); -done_pages: - rbd_coll_end_req(req_data, ret, len); - kfree(req_data); - return ret; + kfree(obj_request); } /* - * Ceph osd op callback + * Caller is responsible for filling in the list of object requests + * that comprises the image request, and the Linux request pointer + * (if there is one). */ -static void rbd_req_cb(struct ceph_osd_request *req, struct ceph_msg *msg) -{ - struct rbd_request *req_data = req->r_priv; - struct ceph_osd_reply_head *replyhead; - struct ceph_osd_op *op; - __s32 rc; - u64 bytes; - int read_op; - - /* parse reply */ - replyhead = msg->front.iov_base; - WARN_ON(le32_to_cpu(replyhead->num_ops) == 0); - op = (void *)(replyhead + 1); - rc = le32_to_cpu(replyhead->result); - bytes = le64_to_cpu(op->extent.length); - read_op = (le16_to_cpu(op->op) == CEPH_OSD_OP_READ); - - dout("rbd_req_cb bytes=%llu readop=%d rc=%d\n", - (unsigned long long) bytes, read_op, (int) rc); - - if (rc == -ENOENT && read_op) { - zero_bio_chain(req_data->bio, 0); - rc = 0; - } else if (rc == 0 && read_op && bytes < req_data->len) { - zero_bio_chain(req_data->bio, bytes); - bytes = req_data->len; - } +static struct rbd_img_request *rbd_img_request_create( + struct rbd_device *rbd_dev, + u64 offset, u64 length, + bool write_request) +{ + struct rbd_img_request *img_request; + struct ceph_snap_context *snapc = NULL; - rbd_coll_end_req(req_data, rc, bytes); + img_request = kmalloc(sizeof (*img_request), GFP_ATOMIC); + if (!img_request) + return NULL; - if (req_data->bio) - bio_chain_put(req_data->bio); + if (write_request) { + down_read(&rbd_dev->header_rwsem); + snapc = ceph_get_snap_context(rbd_dev->header.snapc); + up_read(&rbd_dev->header_rwsem); + if (WARN_ON(!snapc)) { + kfree(img_request); + return NULL; /* Shouldn't happen */ + } + } - ceph_osdc_put_request(req); - kfree(req_data); + img_request->rq = NULL; + img_request->rbd_dev = rbd_dev; + img_request->offset = offset; + img_request->length = length; + img_request->write_request = write_request; + if (write_request) + img_request->snapc = snapc; + else + img_request->snap_id = rbd_dev->spec->snap_id; + spin_lock_init(&img_request->completion_lock); + img_request->next_completion = 0; + img_request->callback = NULL; + img_request->obj_request_count = 0; + INIT_LIST_HEAD(&img_request->obj_requests); + kref_init(&img_request->kref); + + rbd_img_request_get(img_request); /* Avoid a warning */ + rbd_img_request_put(img_request); /* TEMPORARY */ + + dout("%s: rbd_dev %p %s %llu/%llu -> img %p\n", __func__, rbd_dev, + write_request ? "write" : "read", offset, length, + img_request); + + return img_request; } -static void rbd_simple_req_cb(struct ceph_osd_request *req, struct ceph_msg *msg) +static void rbd_img_request_destroy(struct kref *kref) { - ceph_osdc_put_request(req); + struct rbd_img_request *img_request; + struct rbd_obj_request *obj_request; + struct rbd_obj_request *next_obj_request; + + img_request = container_of(kref, struct rbd_img_request, kref); + + dout("%s: img %p\n", __func__, img_request); + + for_each_obj_request_safe(img_request, obj_request, next_obj_request) + rbd_img_obj_request_del(img_request, obj_request); + rbd_assert(img_request->obj_request_count == 0); + + if (img_request->write_request) + ceph_put_snap_context(img_request->snapc); + + kfree(img_request); } -/* - * Do a synchronous ceph osd operation - */ -static int rbd_req_sync_op(struct rbd_device *rbd_dev, - struct ceph_snap_context *snapc, - u64 snapid, - int flags, - struct ceph_osd_req_op *ops, - const char *object_name, - u64 ofs, u64 inbound_size, - char *inbound, - struct ceph_osd_request **linger_req, - u64 *ver) +static int rbd_img_request_fill_bio(struct rbd_img_request *img_request, + struct bio *bio_list) { - int ret; - struct page **pages; - int num_pages; - - rbd_assert(ops != NULL); + struct rbd_device *rbd_dev = img_request->rbd_dev; + struct rbd_obj_request *obj_request = NULL; + struct rbd_obj_request *next_obj_request; + unsigned int bio_offset; + u64 image_offset; + u64 resid; + u16 opcode; + + dout("%s: img %p bio %p\n", __func__, img_request, bio_list); + + opcode = img_request->write_request ? CEPH_OSD_OP_WRITE + : CEPH_OSD_OP_READ; + bio_offset = 0; + image_offset = img_request->offset; + rbd_assert(image_offset == bio_list->bi_sector << SECTOR_SHIFT); + resid = img_request->length; + rbd_assert(resid > 0); + while (resid) { + const char *object_name; + unsigned int clone_size; + struct ceph_osd_req_op *op; + u64 offset; + u64 length; + + object_name = rbd_segment_name(rbd_dev, image_offset); + if (!object_name) + goto out_unwind; + offset = rbd_segment_offset(rbd_dev, image_offset); + length = rbd_segment_length(rbd_dev, image_offset, resid); + obj_request = rbd_obj_request_create(object_name, + offset, length, + OBJ_REQUEST_BIO); + kfree(object_name); /* object request has its own copy */ + if (!obj_request) + goto out_unwind; + + rbd_assert(length <= (u64) UINT_MAX); + clone_size = (unsigned int) length; + obj_request->bio_list = bio_chain_clone_range(&bio_list, + &bio_offset, clone_size, + GFP_ATOMIC); + if (!obj_request->bio_list) + goto out_partial; - num_pages = calc_pages_for(ofs, inbound_size); - pages = ceph_alloc_page_vector(num_pages, GFP_KERNEL); - if (IS_ERR(pages)) - return PTR_ERR(pages); + /* + * Build up the op to use in building the osd + * request. Note that the contents of the op are + * copied by rbd_osd_req_create(). + */ + op = rbd_osd_req_op_create(opcode, offset, length); + if (!op) + goto out_partial; + obj_request->osd_req = rbd_osd_req_create(rbd_dev, + img_request->write_request, + obj_request, op); + rbd_osd_req_op_destroy(op); + if (!obj_request->osd_req) + goto out_partial; + /* status and version are initially zero-filled */ + + rbd_img_obj_request_add(img_request, obj_request); + + image_offset += length; + resid -= length; + } - ret = rbd_do_request(NULL, rbd_dev, snapc, snapid, - object_name, ofs, inbound_size, NULL, - pages, num_pages, - flags, - ops, - NULL, 0, - NULL, - linger_req, ver); - if (ret < 0) - goto done; + return 0; - if ((flags & CEPH_OSD_FLAG_READ) && inbound) - ret = ceph_copy_from_page_vector(pages, inbound, ofs, ret); +out_partial: + rbd_obj_request_put(obj_request); +out_unwind: + for_each_obj_request_safe(img_request, obj_request, next_obj_request) + rbd_obj_request_put(obj_request); -done: - ceph_release_page_vector(pages, num_pages); - return ret; + return -ENOMEM; } -/* - * Do an asynchronous ceph osd operation - */ -static int rbd_do_op(struct request *rq, - struct rbd_device *rbd_dev, - struct ceph_snap_context *snapc, - u64 ofs, u64 len, - struct bio *bio, - struct rbd_req_coll *coll, - int coll_index) -{ - char *seg_name; - u64 seg_ofs; - u64 seg_len; - int ret; - struct ceph_osd_req_op *ops; - u32 payload_len; - int opcode; - int flags; - u64 snapid; - - seg_name = rbd_segment_name(rbd_dev, ofs); - if (!seg_name) - return -ENOMEM; - seg_len = rbd_segment_length(rbd_dev, ofs, len); - seg_ofs = rbd_segment_offset(rbd_dev, ofs); - - if (rq_data_dir(rq) == WRITE) { - opcode = CEPH_OSD_OP_WRITE; - flags = CEPH_OSD_FLAG_WRITE|CEPH_OSD_FLAG_ONDISK; - snapid = CEPH_NOSNAP; - payload_len = seg_len; - } else { - opcode = CEPH_OSD_OP_READ; - flags = CEPH_OSD_FLAG_READ; - snapc = NULL; - snapid = rbd_dev->spec->snap_id; - payload_len = 0; +static void rbd_img_obj_callback(struct rbd_obj_request *obj_request) +{ + struct rbd_img_request *img_request; + u32 which = obj_request->which; + bool more = true; + + img_request = obj_request->img_request; + + dout("%s: img %p obj %p\n", __func__, img_request, obj_request); + rbd_assert(img_request != NULL); + rbd_assert(img_request->rq != NULL); + rbd_assert(img_request->obj_request_count > 0); + rbd_assert(which != BAD_WHICH); + rbd_assert(which < img_request->obj_request_count); + rbd_assert(which >= img_request->next_completion); + + spin_lock_irq(&img_request->completion_lock); + if (which != img_request->next_completion) + goto out; + + for_each_obj_request_from(img_request, obj_request) { + unsigned int xferred; + int result; + + rbd_assert(more); + rbd_assert(which < img_request->obj_request_count); + + if (!obj_request_done_test(obj_request)) + break; + + rbd_assert(obj_request->xferred <= (u64) UINT_MAX); + xferred = (unsigned int) obj_request->xferred; + result = (int) obj_request->result; + if (result) + rbd_warn(NULL, "obj_request %s result %d xferred %u\n", + img_request->write_request ? "write" : "read", + result, xferred); + + more = blk_end_request(img_request->rq, result, xferred); + which++; } - ret = -ENOMEM; - ops = rbd_create_rw_ops(1, opcode, payload_len); - if (!ops) - goto done; + rbd_assert(more ^ (which == img_request->obj_request_count)); + img_request->next_completion = which; +out: + spin_unlock_irq(&img_request->completion_lock); - /* we've taken care of segment sizes earlier when we - cloned the bios. We should never have a segment - truncated at this point */ - rbd_assert(seg_len == len); - - ret = rbd_do_request(rq, rbd_dev, snapc, snapid, - seg_name, seg_ofs, seg_len, - bio, - NULL, 0, - flags, - ops, - coll, coll_index, - rbd_req_cb, 0, NULL); - - rbd_destroy_ops(ops); -done: - kfree(seg_name); - return ret; + if (!more) + rbd_img_request_complete(img_request); } -/* - * Request sync osd read - */ -static int rbd_req_sync_read(struct rbd_device *rbd_dev, - u64 snapid, - const char *object_name, - u64 ofs, u64 len, - char *buf, - u64 *ver) -{ - struct ceph_osd_req_op *ops; - int ret; +static int rbd_img_request_submit(struct rbd_img_request *img_request) +{ + struct rbd_device *rbd_dev = img_request->rbd_dev; + struct ceph_osd_client *osdc = &rbd_dev->rbd_client->client->osdc; + struct rbd_obj_request *obj_request; - ops = rbd_create_rw_ops(1, CEPH_OSD_OP_READ, 0); - if (!ops) - return -ENOMEM; + dout("%s: img %p\n", __func__, img_request); + for_each_obj_request(img_request, obj_request) { + int ret; - ret = rbd_req_sync_op(rbd_dev, NULL, - snapid, - CEPH_OSD_FLAG_READ, - ops, object_name, ofs, len, buf, NULL, ver); - rbd_destroy_ops(ops); + obj_request->callback = rbd_img_obj_callback; + ret = rbd_obj_request_submit(osdc, obj_request); + if (ret) + return ret; + /* + * The image request has its own reference to each + * of its object requests, so we can safely drop the + * initial one here. + */ + rbd_obj_request_put(obj_request); + } - return ret; + return 0; } -/* - * Request sync osd watch - */ -static int rbd_req_sync_notify_ack(struct rbd_device *rbd_dev, - u64 ver, - u64 notify_id) +static int rbd_obj_notify_ack(struct rbd_device *rbd_dev, + u64 ver, u64 notify_id) { - struct ceph_osd_req_op *ops; + struct rbd_obj_request *obj_request; + struct ceph_osd_req_op *op; + struct ceph_osd_client *osdc; int ret; - ops = rbd_create_rw_ops(1, CEPH_OSD_OP_NOTIFY_ACK, 0); - if (!ops) + obj_request = rbd_obj_request_create(rbd_dev->header_name, 0, 0, + OBJ_REQUEST_NODATA); + if (!obj_request) return -ENOMEM; - ops[0].watch.ver = cpu_to_le64(ver); - ops[0].watch.cookie = notify_id; - ops[0].watch.flag = 0; + ret = -ENOMEM; + op = rbd_osd_req_op_create(CEPH_OSD_OP_NOTIFY_ACK, notify_id, ver); + if (!op) + goto out; + obj_request->osd_req = rbd_osd_req_create(rbd_dev, false, + obj_request, op); + rbd_osd_req_op_destroy(op); + if (!obj_request->osd_req) + goto out; - ret = rbd_do_request(NULL, rbd_dev, NULL, CEPH_NOSNAP, - rbd_dev->header_name, 0, 0, NULL, - NULL, 0, - CEPH_OSD_FLAG_READ, - ops, - NULL, 0, - rbd_simple_req_cb, 0, NULL); + osdc = &rbd_dev->rbd_client->client->osdc; + obj_request->callback = rbd_obj_request_put; + ret = rbd_obj_request_submit(osdc, obj_request); +out: + if (ret) + rbd_obj_request_put(obj_request); - rbd_destroy_ops(ops); return ret; } @@ -1381,95 +1791,103 @@ static void rbd_watch_cb(u64 ver, u64 notify_id, u8 opcode, void *data) if (!rbd_dev) return; - dout("rbd_watch_cb %s notify_id=%llu opcode=%u\n", + dout("%s: \"%s\" notify_id %llu opcode %u\n", __func__, rbd_dev->header_name, (unsigned long long) notify_id, (unsigned int) opcode); rc = rbd_dev_refresh(rbd_dev, &hver); if (rc) - pr_warning(RBD_DRV_NAME "%d got notification but failed to " - " update snaps: %d\n", rbd_dev->major, rc); + rbd_warn(rbd_dev, "got notification but failed to " + " update snaps: %d\n", rc); - rbd_req_sync_notify_ack(rbd_dev, hver, notify_id); + rbd_obj_notify_ack(rbd_dev, hver, notify_id); } /* - * Request sync osd watch + * Request sync osd watch/unwatch. The value of "start" determines + * whether a watch request is being initiated or torn down. */ -static int rbd_req_sync_watch(struct rbd_device *rbd_dev) +static int rbd_dev_header_watch_sync(struct rbd_device *rbd_dev, int start) { - struct ceph_osd_req_op *ops; struct ceph_osd_client *osdc = &rbd_dev->rbd_client->client->osdc; + struct rbd_obj_request *obj_request; + struct ceph_osd_req_op *op; int ret; - ops = rbd_create_rw_ops(1, CEPH_OSD_OP_WATCH, 0); - if (!ops) - return -ENOMEM; + rbd_assert(start ^ !!rbd_dev->watch_event); + rbd_assert(start ^ !!rbd_dev->watch_request); - ret = ceph_osdc_create_event(osdc, rbd_watch_cb, 0, - (void *)rbd_dev, &rbd_dev->watch_event); - if (ret < 0) - goto fail; + if (start) { + ret = ceph_osdc_create_event(osdc, rbd_watch_cb, rbd_dev, + &rbd_dev->watch_event); + if (ret < 0) + return ret; + rbd_assert(rbd_dev->watch_event != NULL); + } - ops[0].watch.ver = cpu_to_le64(rbd_dev->header.obj_version); - ops[0].watch.cookie = cpu_to_le64(rbd_dev->watch_event->cookie); - ops[0].watch.flag = 1; + ret = -ENOMEM; + obj_request = rbd_obj_request_create(rbd_dev->header_name, 0, 0, + OBJ_REQUEST_NODATA); + if (!obj_request) + goto out_cancel; + + op = rbd_osd_req_op_create(CEPH_OSD_OP_WATCH, + rbd_dev->watch_event->cookie, + rbd_dev->header.obj_version, start); + if (!op) + goto out_cancel; + obj_request->osd_req = rbd_osd_req_create(rbd_dev, true, + obj_request, op); + rbd_osd_req_op_destroy(op); + if (!obj_request->osd_req) + goto out_cancel; + + if (start) + ceph_osdc_set_request_linger(osdc, obj_request->osd_req); + else + ceph_osdc_unregister_linger_request(osdc, + rbd_dev->watch_request->osd_req); + ret = rbd_obj_request_submit(osdc, obj_request); + if (ret) + goto out_cancel; + ret = rbd_obj_request_wait(obj_request); + if (ret) + goto out_cancel; + ret = obj_request->result; + if (ret) + goto out_cancel; - ret = rbd_req_sync_op(rbd_dev, NULL, - CEPH_NOSNAP, - CEPH_OSD_FLAG_WRITE | CEPH_OSD_FLAG_ONDISK, - ops, - rbd_dev->header_name, - 0, 0, NULL, - &rbd_dev->watch_request, NULL); + /* + * A watch request is set to linger, so the underlying osd + * request won't go away until we unregister it. We retain + * a pointer to the object request during that time (in + * rbd_dev->watch_request), so we'll keep a reference to + * it. We'll drop that reference (below) after we've + * unregistered it. + */ + if (start) { + rbd_dev->watch_request = obj_request; - if (ret < 0) - goto fail_event; + return 0; + } - rbd_destroy_ops(ops); - return 0; + /* We have successfully torn down the watch request */ -fail_event: + rbd_obj_request_put(rbd_dev->watch_request); + rbd_dev->watch_request = NULL; +out_cancel: + /* Cancel the event if we're tearing down, or on error */ ceph_osdc_cancel_event(rbd_dev->watch_event); rbd_dev->watch_event = NULL; -fail: - rbd_destroy_ops(ops); - return ret; -} + if (obj_request) + rbd_obj_request_put(obj_request); -/* - * Request sync osd unwatch - */ -static int rbd_req_sync_unwatch(struct rbd_device *rbd_dev) -{ - struct ceph_osd_req_op *ops; - int ret; - - ops = rbd_create_rw_ops(1, CEPH_OSD_OP_WATCH, 0); - if (!ops) - return -ENOMEM; - - ops[0].watch.ver = 0; - ops[0].watch.cookie = cpu_to_le64(rbd_dev->watch_event->cookie); - ops[0].watch.flag = 0; - - ret = rbd_req_sync_op(rbd_dev, NULL, - CEPH_NOSNAP, - CEPH_OSD_FLAG_WRITE | CEPH_OSD_FLAG_ONDISK, - ops, - rbd_dev->header_name, - 0, 0, NULL, NULL, NULL); - - - rbd_destroy_ops(ops); - ceph_osdc_cancel_event(rbd_dev->watch_event); - rbd_dev->watch_event = NULL; return ret; } /* * Synchronous osd object method call */ -static int rbd_req_sync_exec(struct rbd_device *rbd_dev, +static int rbd_obj_method_sync(struct rbd_device *rbd_dev, const char *object_name, const char *class_name, const char *method_name, @@ -1477,169 +1895,154 @@ static int rbd_req_sync_exec(struct rbd_device *rbd_dev, size_t outbound_size, char *inbound, size_t inbound_size, - int flags, - u64 *ver) + u64 *version) { - struct ceph_osd_req_op *ops; - int class_name_len = strlen(class_name); - int method_name_len = strlen(method_name); - int payload_size; + struct rbd_obj_request *obj_request; + struct ceph_osd_client *osdc; + struct ceph_osd_req_op *op; + struct page **pages; + u32 page_count; int ret; /* - * Any input parameters required by the method we're calling - * will be sent along with the class and method names as - * part of the message payload. That data and its size are - * supplied via the indata and indata_len fields (named from - * the perspective of the server side) in the OSD request - * operation. + * Method calls are ultimately read operations but they + * don't involve object data (so no offset or length). + * The result should placed into the inbound buffer + * provided. They also supply outbound data--parameters for + * the object method. Currently if this is present it will + * be a snapshot id. */ - payload_size = class_name_len + method_name_len + outbound_size; - ops = rbd_create_rw_ops(1, CEPH_OSD_OP_CALL, payload_size); - if (!ops) - return -ENOMEM; + page_count = (u32) calc_pages_for(0, inbound_size); + pages = ceph_alloc_page_vector(page_count, GFP_KERNEL); + if (IS_ERR(pages)) + return PTR_ERR(pages); - ops[0].cls.class_name = class_name; - ops[0].cls.class_len = (__u8) class_name_len; - ops[0].cls.method_name = method_name; - ops[0].cls.method_len = (__u8) method_name_len; - ops[0].cls.argc = 0; - ops[0].cls.indata = outbound; - ops[0].cls.indata_len = outbound_size; + ret = -ENOMEM; + obj_request = rbd_obj_request_create(object_name, 0, 0, + OBJ_REQUEST_PAGES); + if (!obj_request) + goto out; - ret = rbd_req_sync_op(rbd_dev, NULL, - CEPH_NOSNAP, - flags, ops, - object_name, 0, inbound_size, inbound, - NULL, ver); + obj_request->pages = pages; + obj_request->page_count = page_count; - rbd_destroy_ops(ops); + op = rbd_osd_req_op_create(CEPH_OSD_OP_CALL, class_name, + method_name, outbound, outbound_size); + if (!op) + goto out; + obj_request->osd_req = rbd_osd_req_create(rbd_dev, false, + obj_request, op); + rbd_osd_req_op_destroy(op); + if (!obj_request->osd_req) + goto out; - dout("cls_exec returned %d\n", ret); - return ret; -} + osdc = &rbd_dev->rbd_client->client->osdc; + ret = rbd_obj_request_submit(osdc, obj_request); + if (ret) + goto out; + ret = rbd_obj_request_wait(obj_request); + if (ret) + goto out; -static struct rbd_req_coll *rbd_alloc_coll(int num_reqs) -{ - struct rbd_req_coll *coll = - kzalloc(sizeof(struct rbd_req_coll) + - sizeof(struct rbd_req_status) * num_reqs, - GFP_ATOMIC); + ret = obj_request->result; + if (ret < 0) + goto out; + ret = 0; + ceph_copy_from_page_vector(pages, inbound, 0, obj_request->xferred); + if (version) + *version = obj_request->version; +out: + if (obj_request) + rbd_obj_request_put(obj_request); + else + ceph_release_page_vector(pages, page_count); - if (!coll) - return NULL; - coll->total = num_reqs; - kref_init(&coll->kref); - return coll; + return ret; } -/* - * block device queue callback - */ -static void rbd_rq_fn(struct request_queue *q) +static void rbd_request_fn(struct request_queue *q) + __releases(q->queue_lock) __acquires(q->queue_lock) { struct rbd_device *rbd_dev = q->queuedata; + bool read_only = rbd_dev->mapping.read_only; struct request *rq; + int result; while ((rq = blk_fetch_request(q))) { - struct bio *bio; - bool do_write; - unsigned int size; - u64 ofs; - int num_segs, cur_seg = 0; - struct rbd_req_coll *coll; - struct ceph_snap_context *snapc; - unsigned int bio_offset; - - dout("fetched request\n"); - - /* filter out block requests we don't understand */ - if ((rq->cmd_type != REQ_TYPE_FS)) { - __blk_end_request_all(rq, 0); - continue; - } + bool write_request = rq_data_dir(rq) == WRITE; + struct rbd_img_request *img_request; + u64 offset; + u64 length; + + /* Ignore any non-FS requests that filter through. */ - /* deduce our operation (read, write) */ - do_write = (rq_data_dir(rq) == WRITE); - if (do_write && rbd_dev->mapping.read_only) { - __blk_end_request_all(rq, -EROFS); + if (rq->cmd_type != REQ_TYPE_FS) { + dout("%s: non-fs request type %d\n", __func__, + (int) rq->cmd_type); + __blk_end_request_all(rq, 0); continue; } - spin_unlock_irq(q->queue_lock); + /* Ignore/skip any zero-length requests */ - down_read(&rbd_dev->header_rwsem); + offset = (u64) blk_rq_pos(rq) << SECTOR_SHIFT; + length = (u64) blk_rq_bytes(rq); - if (!rbd_dev->exists) { - rbd_assert(rbd_dev->spec->snap_id != CEPH_NOSNAP); - up_read(&rbd_dev->header_rwsem); - dout("request for non-existent snapshot"); - spin_lock_irq(q->queue_lock); - __blk_end_request_all(rq, -ENXIO); + if (!length) { + dout("%s: zero-length request\n", __func__); + __blk_end_request_all(rq, 0); continue; } - snapc = ceph_get_snap_context(rbd_dev->header.snapc); - - up_read(&rbd_dev->header_rwsem); - - size = blk_rq_bytes(rq); - ofs = blk_rq_pos(rq) * SECTOR_SIZE; - bio = rq->bio; + spin_unlock_irq(q->queue_lock); - dout("%s 0x%x bytes at 0x%llx\n", - do_write ? "write" : "read", - size, (unsigned long long) blk_rq_pos(rq) * SECTOR_SIZE); + /* Disallow writes to a read-only device */ - num_segs = rbd_get_num_segments(&rbd_dev->header, ofs, size); - if (num_segs <= 0) { - spin_lock_irq(q->queue_lock); - __blk_end_request_all(rq, num_segs); - ceph_put_snap_context(snapc); - continue; + if (write_request) { + result = -EROFS; + if (read_only) + goto end_request; + rbd_assert(rbd_dev->spec->snap_id == CEPH_NOSNAP); } - coll = rbd_alloc_coll(num_segs); - if (!coll) { - spin_lock_irq(q->queue_lock); - __blk_end_request_all(rq, -ENOMEM); - ceph_put_snap_context(snapc); - continue; - } - - bio_offset = 0; - do { - u64 limit = rbd_segment_length(rbd_dev, ofs, size); - unsigned int chain_size; - struct bio *bio_chain; - - BUG_ON(limit > (u64) UINT_MAX); - chain_size = (unsigned int) limit; - dout("rq->bio->bi_vcnt=%hu\n", rq->bio->bi_vcnt); - kref_get(&coll->kref); + /* + * Quit early if the mapped snapshot no longer + * exists. It's still possible the snapshot will + * have disappeared by the time our request arrives + * at the osd, but there's no sense in sending it if + * we already know. + */ + if (!test_bit(RBD_DEV_FLAG_EXISTS, &rbd_dev->flags)) { + dout("request for non-existent snapshot"); + rbd_assert(rbd_dev->spec->snap_id != CEPH_NOSNAP); + result = -ENXIO; + goto end_request; + } - /* Pass a cloned bio chain via an osd request */ + result = -EINVAL; + if (WARN_ON(offset && length > U64_MAX - offset + 1)) + goto end_request; /* Shouldn't happen */ - bio_chain = bio_chain_clone_range(&bio, - &bio_offset, chain_size, - GFP_ATOMIC); - if (bio_chain) - (void) rbd_do_op(rq, rbd_dev, snapc, - ofs, chain_size, - bio_chain, coll, cur_seg); - else - rbd_coll_end_req_index(rq, coll, cur_seg, - -ENOMEM, chain_size); - size -= chain_size; - ofs += chain_size; + result = -ENOMEM; + img_request = rbd_img_request_create(rbd_dev, offset, length, + write_request); + if (!img_request) + goto end_request; - cur_seg++; - } while (size > 0); - kref_put(&coll->kref, rbd_coll_release); + img_request->rq = rq; + result = rbd_img_request_fill_bio(img_request, rq->bio); + if (!result) + result = rbd_img_request_submit(img_request); + if (result) + rbd_img_request_put(img_request); +end_request: spin_lock_irq(q->queue_lock); - - ceph_put_snap_context(snapc); + if (result < 0) { + rbd_warn(rbd_dev, "obj_request %s result %d\n", + write_request ? "write" : "read", result); + __blk_end_request_all(rq, result); + } } } @@ -1703,6 +2106,71 @@ static void rbd_free_disk(struct rbd_device *rbd_dev) put_disk(disk); } +static int rbd_obj_read_sync(struct rbd_device *rbd_dev, + const char *object_name, + u64 offset, u64 length, + char *buf, u64 *version) + +{ + struct ceph_osd_req_op *op; + struct rbd_obj_request *obj_request; + struct ceph_osd_client *osdc; + struct page **pages = NULL; + u32 page_count; + size_t size; + int ret; + + page_count = (u32) calc_pages_for(offset, length); + pages = ceph_alloc_page_vector(page_count, GFP_KERNEL); + if (IS_ERR(pages)) + ret = PTR_ERR(pages); + + ret = -ENOMEM; + obj_request = rbd_obj_request_create(object_name, offset, length, + OBJ_REQUEST_PAGES); + if (!obj_request) + goto out; + + obj_request->pages = pages; + obj_request->page_count = page_count; + + op = rbd_osd_req_op_create(CEPH_OSD_OP_READ, offset, length); + if (!op) + goto out; + obj_request->osd_req = rbd_osd_req_create(rbd_dev, false, + obj_request, op); + rbd_osd_req_op_destroy(op); + if (!obj_request->osd_req) + goto out; + + osdc = &rbd_dev->rbd_client->client->osdc; + ret = rbd_obj_request_submit(osdc, obj_request); + if (ret) + goto out; + ret = rbd_obj_request_wait(obj_request); + if (ret) + goto out; + + ret = obj_request->result; + if (ret < 0) + goto out; + + rbd_assert(obj_request->xferred <= (u64) SIZE_MAX); + size = (size_t) obj_request->xferred; + ceph_copy_from_page_vector(pages, buf, 0, size); + rbd_assert(size <= (size_t) INT_MAX); + ret = (int) size; + if (version) + *version = obj_request->version; +out: + if (obj_request) + rbd_obj_request_put(obj_request); + else + ceph_release_page_vector(pages, page_count); + + return ret; +} + /* * Read the complete header for the given rbd device. * @@ -1741,24 +2209,20 @@ rbd_dev_v1_header_read(struct rbd_device *rbd_dev, u64 *version) if (!ondisk) return ERR_PTR(-ENOMEM); - ret = rbd_req_sync_read(rbd_dev, CEPH_NOSNAP, - rbd_dev->header_name, + ret = rbd_obj_read_sync(rbd_dev, rbd_dev->header_name, 0, size, (char *) ondisk, version); - if (ret < 0) goto out_err; if (WARN_ON((size_t) ret < size)) { ret = -ENXIO; - pr_warning("short header read for image %s" - " (want %zd got %d)\n", - rbd_dev->spec->image_name, size, ret); + rbd_warn(rbd_dev, "short header read (want %zd got %d)", + size, ret); goto out_err; } if (!rbd_dev_ondisk_valid(ondisk)) { ret = -ENXIO; - pr_warning("invalid header for image %s\n", - rbd_dev->spec->image_name); + rbd_warn(rbd_dev, "invalid header"); goto out_err; } @@ -1895,8 +2359,7 @@ static int rbd_init_disk(struct rbd_device *rbd_dev) disk->fops = &rbd_bd_ops; disk->private_data = rbd_dev; - /* init rq */ - q = blk_init_queue(rbd_rq_fn, &rbd_dev->lock); + q = blk_init_queue(rbd_request_fn, &rbd_dev->lock); if (!q) goto out_disk; @@ -2233,7 +2696,7 @@ static void rbd_spec_free(struct kref *kref) kfree(spec); } -struct rbd_device *rbd_dev_create(struct rbd_client *rbdc, +static struct rbd_device *rbd_dev_create(struct rbd_client *rbdc, struct rbd_spec *spec) { struct rbd_device *rbd_dev; @@ -2243,6 +2706,7 @@ struct rbd_device *rbd_dev_create(struct rbd_client *rbdc, return NULL; spin_lock_init(&rbd_dev->lock); + rbd_dev->flags = 0; INIT_LIST_HEAD(&rbd_dev->node); INIT_LIST_HEAD(&rbd_dev->snaps); init_rwsem(&rbd_dev->header_rwsem); @@ -2250,6 +2714,13 @@ struct rbd_device *rbd_dev_create(struct rbd_client *rbdc, rbd_dev->spec = spec; rbd_dev->rbd_client = rbdc; + /* Initialize the layout used for all rbd requests */ + + rbd_dev->layout.fl_stripe_unit = cpu_to_le32(1 << RBD_MAX_OBJ_ORDER); + rbd_dev->layout.fl_stripe_count = cpu_to_le32(1); + rbd_dev->layout.fl_object_size = cpu_to_le32(1 << RBD_MAX_OBJ_ORDER); + rbd_dev->layout.fl_pg_pool = cpu_to_le32((u32) spec->pool_id); + return rbd_dev; } @@ -2360,12 +2831,11 @@ static int _rbd_dev_v2_snap_size(struct rbd_device *rbd_dev, u64 snap_id, __le64 size; } __attribute__ ((packed)) size_buf = { 0 }; - ret = rbd_req_sync_exec(rbd_dev, rbd_dev->header_name, + ret = rbd_obj_method_sync(rbd_dev, rbd_dev->header_name, "rbd", "get_size", (char *) &snapid, sizeof (snapid), - (char *) &size_buf, sizeof (size_buf), - CEPH_OSD_FLAG_READ, NULL); - dout("%s: rbd_req_sync_exec returned %d\n", __func__, ret); + (char *) &size_buf, sizeof (size_buf), NULL); + dout("%s: rbd_obj_method_sync returned %d\n", __func__, ret); if (ret < 0) return ret; @@ -2396,15 +2866,13 @@ static int rbd_dev_v2_object_prefix(struct rbd_device *rbd_dev) if (!reply_buf) return -ENOMEM; - ret = rbd_req_sync_exec(rbd_dev, rbd_dev->header_name, + ret = rbd_obj_method_sync(rbd_dev, rbd_dev->header_name, "rbd", "get_object_prefix", NULL, 0, - reply_buf, RBD_OBJ_PREFIX_LEN_MAX, - CEPH_OSD_FLAG_READ, NULL); - dout("%s: rbd_req_sync_exec returned %d\n", __func__, ret); + reply_buf, RBD_OBJ_PREFIX_LEN_MAX, NULL); + dout("%s: rbd_obj_method_sync returned %d\n", __func__, ret); if (ret < 0) goto out; - ret = 0; /* rbd_req_sync_exec() can return positive */ p = reply_buf; rbd_dev->header.object_prefix = ceph_extract_encoded_string(&p, @@ -2435,12 +2903,12 @@ static int _rbd_dev_v2_snap_features(struct rbd_device *rbd_dev, u64 snap_id, u64 incompat; int ret; - ret = rbd_req_sync_exec(rbd_dev, rbd_dev->header_name, + ret = rbd_obj_method_sync(rbd_dev, rbd_dev->header_name, "rbd", "get_features", (char *) &snapid, sizeof (snapid), (char *) &features_buf, sizeof (features_buf), - CEPH_OSD_FLAG_READ, NULL); - dout("%s: rbd_req_sync_exec returned %d\n", __func__, ret); + NULL); + dout("%s: rbd_obj_method_sync returned %d\n", __func__, ret); if (ret < 0) return ret; @@ -2474,7 +2942,6 @@ static int rbd_dev_v2_parent_info(struct rbd_device *rbd_dev) void *end; char *image_id; u64 overlap; - size_t len = 0; int ret; parent_spec = rbd_spec_alloc(); @@ -2492,12 +2959,11 @@ static int rbd_dev_v2_parent_info(struct rbd_device *rbd_dev) } snapid = cpu_to_le64(CEPH_NOSNAP); - ret = rbd_req_sync_exec(rbd_dev, rbd_dev->header_name, + ret = rbd_obj_method_sync(rbd_dev, rbd_dev->header_name, "rbd", "get_parent", (char *) &snapid, sizeof (snapid), - (char *) reply_buf, size, - CEPH_OSD_FLAG_READ, NULL); - dout("%s: rbd_req_sync_exec returned %d\n", __func__, ret); + (char *) reply_buf, size, NULL); + dout("%s: rbd_obj_method_sync returned %d\n", __func__, ret); if (ret < 0) goto out_err; @@ -2508,13 +2974,18 @@ static int rbd_dev_v2_parent_info(struct rbd_device *rbd_dev) if (parent_spec->pool_id == CEPH_NOPOOL) goto out; /* No parent? No problem. */ - image_id = ceph_extract_encoded_string(&p, end, &len, GFP_KERNEL); + /* The ceph file layout needs to fit pool id in 32 bits */ + + ret = -EIO; + if (WARN_ON(parent_spec->pool_id > (u64) U32_MAX)) + goto out; + + image_id = ceph_extract_encoded_string(&p, end, NULL, GFP_KERNEL); if (IS_ERR(image_id)) { ret = PTR_ERR(image_id); goto out_err; } parent_spec->image_id = image_id; - parent_spec->image_id_len = len; ceph_decode_64_safe(&p, end, parent_spec->snap_id, out_err); ceph_decode_64_safe(&p, end, overlap, out_err); @@ -2544,26 +3015,25 @@ static char *rbd_dev_image_name(struct rbd_device *rbd_dev) rbd_assert(!rbd_dev->spec->image_name); - image_id_size = sizeof (__le32) + rbd_dev->spec->image_id_len; + len = strlen(rbd_dev->spec->image_id); + image_id_size = sizeof (__le32) + len; image_id = kmalloc(image_id_size, GFP_KERNEL); if (!image_id) return NULL; p = image_id; end = (char *) image_id + image_id_size; - ceph_encode_string(&p, end, rbd_dev->spec->image_id, - (u32) rbd_dev->spec->image_id_len); + ceph_encode_string(&p, end, rbd_dev->spec->image_id, (u32) len); size = sizeof (__le32) + RBD_IMAGE_NAME_LEN_MAX; reply_buf = kmalloc(size, GFP_KERNEL); if (!reply_buf) goto out; - ret = rbd_req_sync_exec(rbd_dev, RBD_DIRECTORY, + ret = rbd_obj_method_sync(rbd_dev, RBD_DIRECTORY, "rbd", "dir_get_name", image_id, image_id_size, - (char *) reply_buf, size, - CEPH_OSD_FLAG_READ, NULL); + (char *) reply_buf, size, NULL); if (ret < 0) goto out; p = reply_buf; @@ -2602,8 +3072,11 @@ static int rbd_dev_probe_update_spec(struct rbd_device *rbd_dev) osdc = &rbd_dev->rbd_client->client->osdc; name = ceph_pg_pool_name_by_id(osdc->osdmap, rbd_dev->spec->pool_id); - if (!name) - return -EIO; /* pool id too large (>= 2^31) */ + if (!name) { + rbd_warn(rbd_dev, "there is no pool with id %llu", + rbd_dev->spec->pool_id); /* Really a BUG() */ + return -EIO; + } rbd_dev->spec->pool_name = kstrdup(name, GFP_KERNEL); if (!rbd_dev->spec->pool_name) @@ -2612,19 +3085,17 @@ static int rbd_dev_probe_update_spec(struct rbd_device *rbd_dev) /* Fetch the image name; tolerate failure here */ name = rbd_dev_image_name(rbd_dev); - if (name) { - rbd_dev->spec->image_name_len = strlen(name); + if (name) rbd_dev->spec->image_name = (char *) name; - } else { - pr_warning(RBD_DRV_NAME "%d " - "unable to get image name for image id %s\n", - rbd_dev->major, rbd_dev->spec->image_id); - } + else + rbd_warn(rbd_dev, "unable to get image name"); /* Look up the snapshot name. */ name = rbd_snap_name(rbd_dev, rbd_dev->spec->snap_id); if (!name) { + rbd_warn(rbd_dev, "no snapshot with id %llu", + rbd_dev->spec->snap_id); /* Really a BUG() */ ret = -EIO; goto out_err; } @@ -2665,12 +3136,11 @@ static int rbd_dev_v2_snap_context(struct rbd_device *rbd_dev, u64 *ver) if (!reply_buf) return -ENOMEM; - ret = rbd_req_sync_exec(rbd_dev, rbd_dev->header_name, + ret = rbd_obj_method_sync(rbd_dev, rbd_dev->header_name, "rbd", "get_snapcontext", NULL, 0, - reply_buf, size, - CEPH_OSD_FLAG_READ, ver); - dout("%s: rbd_req_sync_exec returned %d\n", __func__, ret); + reply_buf, size, ver); + dout("%s: rbd_obj_method_sync returned %d\n", __func__, ret); if (ret < 0) goto out; @@ -2735,12 +3205,11 @@ static char *rbd_dev_v2_snap_name(struct rbd_device *rbd_dev, u32 which) return ERR_PTR(-ENOMEM); snap_id = cpu_to_le64(rbd_dev->header.snapc->snaps[which]); - ret = rbd_req_sync_exec(rbd_dev, rbd_dev->header_name, + ret = rbd_obj_method_sync(rbd_dev, rbd_dev->header_name, "rbd", "get_snapshot_name", (char *) &snap_id, sizeof (snap_id), - reply_buf, size, - CEPH_OSD_FLAG_READ, NULL); - dout("%s: rbd_req_sync_exec returned %d\n", __func__, ret); + reply_buf, size, NULL); + dout("%s: rbd_obj_method_sync returned %d\n", __func__, ret); if (ret < 0) goto out; @@ -2766,7 +3235,7 @@ out: static char *rbd_dev_v2_snap_info(struct rbd_device *rbd_dev, u32 which, u64 *snap_size, u64 *snap_features) { - __le64 snap_id; + u64 snap_id; u8 order; int ret; @@ -2865,10 +3334,17 @@ static int rbd_dev_snaps_update(struct rbd_device *rbd_dev) if (snap_id == CEPH_NOSNAP || (snap && snap->id > snap_id)) { struct list_head *next = links->next; - /* Existing snapshot not in the new snap context */ - + /* + * A previously-existing snapshot is not in + * the new snap context. + * + * If the now missing snapshot is the one the + * image is mapped to, clear its exists flag + * so we can avoid sending any more requests + * to it. + */ if (rbd_dev->spec->snap_id == snap->id) - rbd_dev->exists = false; + clear_bit(RBD_DEV_FLAG_EXISTS, &rbd_dev->flags); rbd_remove_snap_dev(snap); dout("%ssnap id %llu has been removed\n", rbd_dev->spec->snap_id == snap->id ? @@ -2942,7 +3418,7 @@ static int rbd_dev_snaps_register(struct rbd_device *rbd_dev) struct rbd_snap *snap; int ret = 0; - dout("%s called\n", __func__); + dout("%s:\n", __func__); if (WARN_ON(!device_is_registered(&rbd_dev->dev))) return -EIO; @@ -2983,22 +3459,6 @@ static void rbd_bus_del_dev(struct rbd_device *rbd_dev) device_unregister(&rbd_dev->dev); } -static int rbd_init_watch_dev(struct rbd_device *rbd_dev) -{ - int ret, rc; - - do { - ret = rbd_req_sync_watch(rbd_dev); - if (ret == -ERANGE) { - rc = rbd_dev_refresh(rbd_dev, NULL); - if (rc < 0) - return rc; - } - } while (ret == -ERANGE); - - return ret; -} - static atomic64_t rbd_dev_id_max = ATOMIC64_INIT(0); /* @@ -3138,11 +3598,9 @@ static inline char *dup_token(const char **buf, size_t *lenp) size_t len; len = next_token(buf); - dup = kmalloc(len + 1, GFP_KERNEL); + dup = kmemdup(*buf, len + 1, GFP_KERNEL); if (!dup) return NULL; - - memcpy(dup, *buf, len); *(dup + len) = '\0'; *buf += len; @@ -3210,8 +3668,10 @@ static int rbd_add_parse_args(const char *buf, /* The first four tokens are required */ len = next_token(&buf); - if (!len) - return -EINVAL; /* Missing monitor address(es) */ + if (!len) { + rbd_warn(NULL, "no monitor address(es) provided"); + return -EINVAL; + } mon_addrs = buf; mon_addrs_size = len + 1; buf += len; @@ -3220,8 +3680,10 @@ static int rbd_add_parse_args(const char *buf, options = dup_token(&buf, NULL); if (!options) return -ENOMEM; - if (!*options) - goto out_err; /* Missing options */ + if (!*options) { + rbd_warn(NULL, "no options provided"); + goto out_err; + } spec = rbd_spec_alloc(); if (!spec) @@ -3230,14 +3692,18 @@ static int rbd_add_parse_args(const char *buf, spec->pool_name = dup_token(&buf, NULL); if (!spec->pool_name) goto out_mem; - if (!*spec->pool_name) - goto out_err; /* Missing pool name */ + if (!*spec->pool_name) { + rbd_warn(NULL, "no pool name provided"); + goto out_err; + } - spec->image_name = dup_token(&buf, &spec->image_name_len); + spec->image_name = dup_token(&buf, NULL); if (!spec->image_name) goto out_mem; - if (!*spec->image_name) - goto out_err; /* Missing image name */ + if (!*spec->image_name) { + rbd_warn(NULL, "no image name provided"); + goto out_err; + } /* * Snapshot name is optional; default is to use "-" @@ -3251,10 +3717,9 @@ static int rbd_add_parse_args(const char *buf, ret = -ENAMETOOLONG; goto out_err; } - spec->snap_name = kmalloc(len + 1, GFP_KERNEL); + spec->snap_name = kmemdup(buf, len + 1, GFP_KERNEL); if (!spec->snap_name) goto out_mem; - memcpy(spec->snap_name, buf, len); *(spec->snap_name + len) = '\0'; /* Initialize all rbd options to the defaults */ @@ -3323,7 +3788,7 @@ static int rbd_dev_image_id(struct rbd_device *rbd_dev) * First, see if the format 2 image id file exists, and if * so, get the image's persistent id from it. */ - size = sizeof (RBD_ID_PREFIX) + rbd_dev->spec->image_name_len; + size = sizeof (RBD_ID_PREFIX) + strlen(rbd_dev->spec->image_name); object_name = kmalloc(size, GFP_NOIO); if (!object_name) return -ENOMEM; @@ -3339,21 +3804,18 @@ static int rbd_dev_image_id(struct rbd_device *rbd_dev) goto out; } - ret = rbd_req_sync_exec(rbd_dev, object_name, + ret = rbd_obj_method_sync(rbd_dev, object_name, "rbd", "get_id", NULL, 0, - response, RBD_IMAGE_ID_LEN_MAX, - CEPH_OSD_FLAG_READ, NULL); - dout("%s: rbd_req_sync_exec returned %d\n", __func__, ret); + response, RBD_IMAGE_ID_LEN_MAX, NULL); + dout("%s: rbd_obj_method_sync returned %d\n", __func__, ret); if (ret < 0) goto out; - ret = 0; /* rbd_req_sync_exec() can return positive */ p = response; rbd_dev->spec->image_id = ceph_extract_encoded_string(&p, p + RBD_IMAGE_ID_LEN_MAX, - &rbd_dev->spec->image_id_len, - GFP_NOIO); + NULL, GFP_NOIO); if (IS_ERR(rbd_dev->spec->image_id)) { ret = PTR_ERR(rbd_dev->spec->image_id); rbd_dev->spec->image_id = NULL; @@ -3377,11 +3839,10 @@ static int rbd_dev_v1_probe(struct rbd_device *rbd_dev) rbd_dev->spec->image_id = kstrdup("", GFP_KERNEL); if (!rbd_dev->spec->image_id) return -ENOMEM; - rbd_dev->spec->image_id_len = 0; /* Record the header object name for this rbd image. */ - size = rbd_dev->spec->image_name_len + sizeof (RBD_SUFFIX); + size = strlen(rbd_dev->spec->image_name) + sizeof (RBD_SUFFIX); rbd_dev->header_name = kmalloc(size, GFP_KERNEL); if (!rbd_dev->header_name) { ret = -ENOMEM; @@ -3427,7 +3888,7 @@ static int rbd_dev_v2_probe(struct rbd_device *rbd_dev) * Image id was filled in by the caller. Record the header * object name for this rbd image. */ - size = sizeof (RBD_HEADER_PREFIX) + rbd_dev->spec->image_id_len; + size = sizeof (RBD_HEADER_PREFIX) + strlen(rbd_dev->spec->image_id); rbd_dev->header_name = kmalloc(size, GFP_KERNEL); if (!rbd_dev->header_name) return -ENOMEM; @@ -3542,7 +4003,7 @@ static int rbd_dev_probe_finish(struct rbd_device *rbd_dev) if (ret) goto err_out_bus; - ret = rbd_init_watch_dev(rbd_dev); + ret = rbd_dev_header_watch_sync(rbd_dev, 1); if (ret) goto err_out_bus; @@ -3638,6 +4099,13 @@ static ssize_t rbd_add(struct bus_type *bus, goto err_out_client; spec->pool_id = (u64) rc; + /* The ceph file layout needs to fit pool id in 32 bits */ + + if (WARN_ON(spec->pool_id > (u64) U32_MAX)) { + rc = -EIO; + goto err_out_client; + } + rbd_dev = rbd_dev_create(rbdc, spec); if (!rbd_dev) goto err_out_client; @@ -3691,15 +4159,8 @@ static void rbd_dev_release(struct device *dev) { struct rbd_device *rbd_dev = dev_to_rbd_dev(dev); - if (rbd_dev->watch_request) { - struct ceph_client *client = rbd_dev->rbd_client->client; - - ceph_osdc_unregister_linger_request(&client->osdc, - rbd_dev->watch_request); - } if (rbd_dev->watch_event) - rbd_req_sync_unwatch(rbd_dev); - + rbd_dev_header_watch_sync(rbd_dev, 0); /* clean up and free blkdev */ rbd_free_disk(rbd_dev); @@ -3743,10 +4204,14 @@ static ssize_t rbd_remove(struct bus_type *bus, goto done; } - if (rbd_dev->open_count) { + spin_lock_irq(&rbd_dev->lock); + if (rbd_dev->open_count) ret = -EBUSY; + else + set_bit(RBD_DEV_FLAG_REMOVING, &rbd_dev->flags); + spin_unlock_irq(&rbd_dev->lock); + if (ret < 0) goto done; - } rbd_remove_all_snaps(rbd_dev); rbd_bus_del_dev(rbd_dev); @@ -3782,10 +4247,15 @@ static void rbd_sysfs_cleanup(void) device_unregister(&rbd_root_dev); } -int __init rbd_init(void) +static int __init rbd_init(void) { int rc; + if (!libceph_compatible(NULL)) { + rbd_warn(NULL, "libceph incompatibility (quitting)"); + + return -EINVAL; + } rc = rbd_sysfs_init(); if (rc) return rc; @@ -3793,7 +4263,7 @@ int __init rbd_init(void) return 0; } -void __exit rbd_exit(void) +static void __exit rbd_exit(void) { rbd_sysfs_cleanup(); } diff --git a/drivers/block/rsxx/Makefile b/drivers/block/rsxx/Makefile new file mode 100644 index 0000000..f35cd0b --- /dev/null +++ b/drivers/block/rsxx/Makefile @@ -0,0 +1,2 @@ +obj-$(CONFIG_BLK_DEV_RSXX) += rsxx.o +rsxx-y := config.o core.o cregs.o dev.o dma.o diff --git a/drivers/block/rsxx/config.c b/drivers/block/rsxx/config.c new file mode 100644 index 0000000..a295e7e --- /dev/null +++ b/drivers/block/rsxx/config.c @@ -0,0 +1,213 @@ +/* +* Filename: config.c +* +* +* Authors: Joshua Morris <josh.h.morris@us.ibm.com> +* Philip Kelleher <pjk1939@linux.vnet.ibm.com> +* +* (C) Copyright 2013 IBM Corporation +* +* This program is free software; you can redistribute it and/or +* modify it under the terms of the GNU General Public License as +* published by the Free Software Foundation; either version 2 of the +* License, or (at your option) any later version. +* +* This program is distributed in the hope that it will be useful, but +* WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +* General Public License for more details. +* +* You should have received a copy of the GNU General Public License +* along with this program; if not, write to the Free Software Foundation, +* Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +#include <linux/types.h> +#include <linux/crc32.h> +#include <linux/swab.h> + +#include "rsxx_priv.h" +#include "rsxx_cfg.h" + +static void initialize_config(void *config) +{ + struct rsxx_card_cfg *cfg = config; + + cfg->hdr.version = RSXX_CFG_VERSION; + + cfg->data.block_size = RSXX_HW_BLK_SIZE; + cfg->data.stripe_size = RSXX_HW_BLK_SIZE; + cfg->data.vendor_id = RSXX_VENDOR_ID_TMS_IBM; + cfg->data.cache_order = (-1); + cfg->data.intr_coal.mode = RSXX_INTR_COAL_DISABLED; + cfg->data.intr_coal.count = 0; + cfg->data.intr_coal.latency = 0; +} + +static u32 config_data_crc32(struct rsxx_card_cfg *cfg) +{ + /* + * Return the compliment of the CRC to ensure compatibility + * (i.e. this is how early rsxx drivers did it.) + */ + + return ~crc32(~0, &cfg->data, sizeof(cfg->data)); +} + + +/*----------------- Config Byte Swap Functions -------------------*/ +static void config_hdr_be_to_cpu(struct card_cfg_hdr *hdr) +{ + hdr->version = be32_to_cpu((__force __be32) hdr->version); + hdr->crc = be32_to_cpu((__force __be32) hdr->crc); +} + +static void config_hdr_cpu_to_be(struct card_cfg_hdr *hdr) +{ + hdr->version = (__force u32) cpu_to_be32(hdr->version); + hdr->crc = (__force u32) cpu_to_be32(hdr->crc); +} + +static void config_data_swab(struct rsxx_card_cfg *cfg) +{ + u32 *data = (u32 *) &cfg->data; + int i; + + for (i = 0; i < (sizeof(cfg->data) / 4); i++) + data[i] = swab32(data[i]); +} + +static void config_data_le_to_cpu(struct rsxx_card_cfg *cfg) +{ + u32 *data = (u32 *) &cfg->data; + int i; + + for (i = 0; i < (sizeof(cfg->data) / 4); i++) + data[i] = le32_to_cpu((__force __le32) data[i]); +} + +static void config_data_cpu_to_le(struct rsxx_card_cfg *cfg) +{ + u32 *data = (u32 *) &cfg->data; + int i; + + for (i = 0; i < (sizeof(cfg->data) / 4); i++) + data[i] = (__force u32) cpu_to_le32(data[i]); +} + + +/*----------------- Config Operations ------------------*/ +static int rsxx_save_config(struct rsxx_cardinfo *card) +{ + struct rsxx_card_cfg cfg; + int st; + + memcpy(&cfg, &card->config, sizeof(cfg)); + + if (unlikely(cfg.hdr.version != RSXX_CFG_VERSION)) { + dev_err(CARD_TO_DEV(card), + "Cannot save config with invalid version %d\n", + cfg.hdr.version); + return -EINVAL; + } + + /* Convert data to little endian for the CRC calculation. */ + config_data_cpu_to_le(&cfg); + + cfg.hdr.crc = config_data_crc32(&cfg); + + /* + * Swap the data from little endian to big endian so it can be + * stored. + */ + config_data_swab(&cfg); + config_hdr_cpu_to_be(&cfg.hdr); + + st = rsxx_creg_write(card, CREG_ADD_CONFIG, sizeof(cfg), &cfg, 1); + if (st) + return st; + + return 0; +} + +int rsxx_load_config(struct rsxx_cardinfo *card) +{ + int st; + u32 crc; + + st = rsxx_creg_read(card, CREG_ADD_CONFIG, sizeof(card->config), + &card->config, 1); + if (st) { + dev_err(CARD_TO_DEV(card), + "Failed reading card config.\n"); + return st; + } + + config_hdr_be_to_cpu(&card->config.hdr); + + if (card->config.hdr.version == RSXX_CFG_VERSION) { + /* + * We calculate the CRC with the data in little endian, because + * early drivers did not take big endian CPUs into account. + * The data is always stored in big endian, so we need to byte + * swap it before calculating the CRC. + */ + + config_data_swab(&card->config); + + /* Check the CRC */ + crc = config_data_crc32(&card->config); + if (crc != card->config.hdr.crc) { + dev_err(CARD_TO_DEV(card), + "Config corruption detected!\n"); + dev_info(CARD_TO_DEV(card), + "CRC (sb x%08x is x%08x)\n", + card->config.hdr.crc, crc); + return -EIO; + } + + /* Convert the data to CPU byteorder */ + config_data_le_to_cpu(&card->config); + + } else if (card->config.hdr.version != 0) { + dev_err(CARD_TO_DEV(card), + "Invalid config version %d.\n", + card->config.hdr.version); + /* + * Config version changes require special handling from the + * user + */ + return -EINVAL; + } else { + dev_info(CARD_TO_DEV(card), + "Initializing card configuration.\n"); + initialize_config(card); + st = rsxx_save_config(card); + if (st) + return st; + } + + card->config_valid = 1; + + dev_dbg(CARD_TO_DEV(card), "version: x%08x\n", + card->config.hdr.version); + dev_dbg(CARD_TO_DEV(card), "crc: x%08x\n", + card->config.hdr.crc); + dev_dbg(CARD_TO_DEV(card), "block_size: x%08x\n", + card->config.data.block_size); + dev_dbg(CARD_TO_DEV(card), "stripe_size: x%08x\n", + card->config.data.stripe_size); + dev_dbg(CARD_TO_DEV(card), "vendor_id: x%08x\n", + card->config.data.vendor_id); + dev_dbg(CARD_TO_DEV(card), "cache_order: x%08x\n", + card->config.data.cache_order); + dev_dbg(CARD_TO_DEV(card), "mode: x%08x\n", + card->config.data.intr_coal.mode); + dev_dbg(CARD_TO_DEV(card), "count: x%08x\n", + card->config.data.intr_coal.count); + dev_dbg(CARD_TO_DEV(card), "latency: x%08x\n", + card->config.data.intr_coal.latency); + + return 0; +} + diff --git a/drivers/block/rsxx/core.c b/drivers/block/rsxx/core.c new file mode 100644 index 0000000..e516248 --- /dev/null +++ b/drivers/block/rsxx/core.c @@ -0,0 +1,649 @@ +/* +* Filename: core.c +* +* +* Authors: Joshua Morris <josh.h.morris@us.ibm.com> +* Philip Kelleher <pjk1939@linux.vnet.ibm.com> +* +* (C) Copyright 2013 IBM Corporation +* +* This program is free software; you can redistribute it and/or +* modify it under the terms of the GNU General Public License as +* published by the Free Software Foundation; either version 2 of the +* License, or (at your option) any later version. +* +* This program is distributed in the hope that it will be useful, but +* WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +* General Public License for more details. +* +* You should have received a copy of the GNU General Public License +* along with this program; if not, write to the Free Software Foundation, +* Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +#include <linux/kernel.h> +#include <linux/init.h> +#include <linux/interrupt.h> +#include <linux/module.h> +#include <linux/pci.h> +#include <linux/reboot.h> +#include <linux/slab.h> +#include <linux/bitops.h> + +#include <linux/genhd.h> +#include <linux/idr.h> + +#include "rsxx_priv.h" +#include "rsxx_cfg.h" + +#define NO_LEGACY 0 + +MODULE_DESCRIPTION("IBM RamSan PCIe Flash SSD Device Driver"); +MODULE_AUTHOR("IBM <support@ramsan.com>"); +MODULE_LICENSE("GPL"); +MODULE_VERSION(DRIVER_VERSION); + +static unsigned int force_legacy = NO_LEGACY; +module_param(force_legacy, uint, 0444); +MODULE_PARM_DESC(force_legacy, "Force the use of legacy type PCI interrupts"); + +static DEFINE_IDA(rsxx_disk_ida); +static DEFINE_SPINLOCK(rsxx_ida_lock); + +/*----------------- Interrupt Control & Handling -------------------*/ +static void __enable_intr(unsigned int *mask, unsigned int intr) +{ + *mask |= intr; +} + +static void __disable_intr(unsigned int *mask, unsigned int intr) +{ + *mask &= ~intr; +} + +/* + * NOTE: Disabling the IER will disable the hardware interrupt. + * Disabling the ISR will disable the software handling of the ISR bit. + * + * Enable/Disable interrupt functions assume the card->irq_lock + * is held by the caller. + */ +void rsxx_enable_ier(struct rsxx_cardinfo *card, unsigned int intr) +{ + if (unlikely(card->halt)) + return; + + __enable_intr(&card->ier_mask, intr); + iowrite32(card->ier_mask, card->regmap + IER); +} + +void rsxx_disable_ier(struct rsxx_cardinfo *card, unsigned int intr) +{ + __disable_intr(&card->ier_mask, intr); + iowrite32(card->ier_mask, card->regmap + IER); +} + +void rsxx_enable_ier_and_isr(struct rsxx_cardinfo *card, + unsigned int intr) +{ + if (unlikely(card->halt)) + return; + + __enable_intr(&card->isr_mask, intr); + __enable_intr(&card->ier_mask, intr); + iowrite32(card->ier_mask, card->regmap + IER); +} +void rsxx_disable_ier_and_isr(struct rsxx_cardinfo *card, + unsigned int intr) +{ + __disable_intr(&card->isr_mask, intr); + __disable_intr(&card->ier_mask, intr); + iowrite32(card->ier_mask, card->regmap + IER); +} + +static irqreturn_t rsxx_isr(int irq, void *pdata) +{ + struct rsxx_cardinfo *card = pdata; + unsigned int isr; + int handled = 0; + int reread_isr; + int i; + + spin_lock(&card->irq_lock); + + do { + reread_isr = 0; + + isr = ioread32(card->regmap + ISR); + if (isr == 0xffffffff) { + /* + * A few systems seem to have an intermittent issue + * where PCI reads return all Fs, but retrying the read + * a little later will return as expected. + */ + dev_info(CARD_TO_DEV(card), + "ISR = 0xFFFFFFFF, retrying later\n"); + break; + } + + isr &= card->isr_mask; + if (!isr) + break; + + for (i = 0; i < card->n_targets; i++) { + if (isr & CR_INTR_DMA(i)) { + if (card->ier_mask & CR_INTR_DMA(i)) { + rsxx_disable_ier(card, CR_INTR_DMA(i)); + reread_isr = 1; + } + queue_work(card->ctrl[i].done_wq, + &card->ctrl[i].dma_done_work); + handled++; + } + } + + if (isr & CR_INTR_CREG) { + schedule_work(&card->creg_ctrl.done_work); + handled++; + } + + if (isr & CR_INTR_EVENT) { + schedule_work(&card->event_work); + rsxx_disable_ier_and_isr(card, CR_INTR_EVENT); + handled++; + } + } while (reread_isr); + + spin_unlock(&card->irq_lock); + + return handled ? IRQ_HANDLED : IRQ_NONE; +} + +/*----------------- Card Event Handler -------------------*/ +static char *rsxx_card_state_to_str(unsigned int state) +{ + static char *state_strings[] = { + "Unknown", "Shutdown", "Starting", "Formatting", + "Uninitialized", "Good", "Shutting Down", + "Fault", "Read Only Fault", "dStroying" + }; + + return state_strings[ffs(state)]; +} + +static void card_state_change(struct rsxx_cardinfo *card, + unsigned int new_state) +{ + int st; + + dev_info(CARD_TO_DEV(card), + "card state change detected.(%s -> %s)\n", + rsxx_card_state_to_str(card->state), + rsxx_card_state_to_str(new_state)); + + card->state = new_state; + + /* Don't attach DMA interfaces if the card has an invalid config */ + if (!card->config_valid) + return; + + switch (new_state) { + case CARD_STATE_RD_ONLY_FAULT: + dev_crit(CARD_TO_DEV(card), + "Hardware has entered read-only mode!\n"); + /* + * Fall through so the DMA devices can be attached and + * the user can attempt to pull off their data. + */ + case CARD_STATE_GOOD: + st = rsxx_get_card_size8(card, &card->size8); + if (st) + dev_err(CARD_TO_DEV(card), + "Failed attaching DMA devices\n"); + + if (card->config_valid) + set_capacity(card->gendisk, card->size8 >> 9); + break; + + case CARD_STATE_FAULT: + dev_crit(CARD_TO_DEV(card), + "Hardware Fault reported!\n"); + /* Fall through. */ + + /* Everything else, detach DMA interface if it's attached. */ + case CARD_STATE_SHUTDOWN: + case CARD_STATE_STARTING: + case CARD_STATE_FORMATTING: + case CARD_STATE_UNINITIALIZED: + case CARD_STATE_SHUTTING_DOWN: + /* + * dStroy is a term coined by marketing to represent the low level + * secure erase. + */ + case CARD_STATE_DSTROYING: + set_capacity(card->gendisk, 0); + break; + } +} + +static void card_event_handler(struct work_struct *work) +{ + struct rsxx_cardinfo *card; + unsigned int state; + unsigned long flags; + int st; + + card = container_of(work, struct rsxx_cardinfo, event_work); + + if (unlikely(card->halt)) + return; + + /* + * Enable the interrupt now to avoid any weird race conditions where a + * state change might occur while rsxx_get_card_state() is + * processing a returned creg cmd. + */ + spin_lock_irqsave(&card->irq_lock, flags); + rsxx_enable_ier_and_isr(card, CR_INTR_EVENT); + spin_unlock_irqrestore(&card->irq_lock, flags); + + st = rsxx_get_card_state(card, &state); + if (st) { + dev_info(CARD_TO_DEV(card), + "Failed reading state after event.\n"); + return; + } + + if (card->state != state) + card_state_change(card, state); + + if (card->creg_ctrl.creg_stats.stat & CREG_STAT_LOG_PENDING) + rsxx_read_hw_log(card); +} + +/*----------------- Card Operations -------------------*/ +static int card_shutdown(struct rsxx_cardinfo *card) +{ + unsigned int state; + signed long start; + const int timeout = msecs_to_jiffies(120000); + int st; + + /* We can't issue a shutdown if the card is in a transition state */ + start = jiffies; + do { + st = rsxx_get_card_state(card, &state); + if (st) + return st; + } while (state == CARD_STATE_STARTING && + (jiffies - start < timeout)); + + if (state == CARD_STATE_STARTING) + return -ETIMEDOUT; + + /* Only issue a shutdown if we need to */ + if ((state != CARD_STATE_SHUTTING_DOWN) && + (state != CARD_STATE_SHUTDOWN)) { + st = rsxx_issue_card_cmd(card, CARD_CMD_SHUTDOWN); + if (st) + return st; + } + + start = jiffies; + do { + st = rsxx_get_card_state(card, &state); + if (st) + return st; + } while (state != CARD_STATE_SHUTDOWN && + (jiffies - start < timeout)); + + if (state != CARD_STATE_SHUTDOWN) + return -ETIMEDOUT; + + return 0; +} + +/*----------------- Driver Initialization & Setup -------------------*/ +/* Returns: 0 if the driver is compatible with the device + -1 if the driver is NOT compatible with the device */ +static int rsxx_compatibility_check(struct rsxx_cardinfo *card) +{ + unsigned char pci_rev; + + pci_read_config_byte(card->dev, PCI_REVISION_ID, &pci_rev); + + if (pci_rev > RS70_PCI_REV_SUPPORTED) + return -1; + return 0; +} + +static int rsxx_pci_probe(struct pci_dev *dev, + const struct pci_device_id *id) +{ + struct rsxx_cardinfo *card; + int st; + + dev_info(&dev->dev, "PCI-Flash SSD discovered\n"); + + card = kzalloc(sizeof(*card), GFP_KERNEL); + if (!card) + return -ENOMEM; + + card->dev = dev; + pci_set_drvdata(dev, card); + + do { + if (!ida_pre_get(&rsxx_disk_ida, GFP_KERNEL)) { + st = -ENOMEM; + goto failed_ida_get; + } + + spin_lock(&rsxx_ida_lock); + st = ida_get_new(&rsxx_disk_ida, &card->disk_id); + spin_unlock(&rsxx_ida_lock); + } while (st == -EAGAIN); + + if (st) + goto failed_ida_get; + + st = pci_enable_device(dev); + if (st) + goto failed_enable; + + pci_set_master(dev); + pci_set_dma_max_seg_size(dev, RSXX_HW_BLK_SIZE); + + st = pci_set_dma_mask(dev, DMA_BIT_MASK(64)); + if (st) { + dev_err(CARD_TO_DEV(card), + "No usable DMA configuration,aborting\n"); + goto failed_dma_mask; + } + + st = pci_request_regions(dev, DRIVER_NAME); + if (st) { + dev_err(CARD_TO_DEV(card), + "Failed to request memory region\n"); + goto failed_request_regions; + } + + if (pci_resource_len(dev, 0) == 0) { + dev_err(CARD_TO_DEV(card), "BAR0 has length 0!\n"); + st = -ENOMEM; + goto failed_iomap; + } + + card->regmap = pci_iomap(dev, 0, 0); + if (!card->regmap) { + dev_err(CARD_TO_DEV(card), "Failed to map BAR0\n"); + st = -ENOMEM; + goto failed_iomap; + } + + spin_lock_init(&card->irq_lock); + card->halt = 0; + + spin_lock_irq(&card->irq_lock); + rsxx_disable_ier_and_isr(card, CR_INTR_ALL); + spin_unlock_irq(&card->irq_lock); + + if (!force_legacy) { + st = pci_enable_msi(dev); + if (st) + dev_warn(CARD_TO_DEV(card), + "Failed to enable MSI\n"); + } + + st = request_irq(dev->irq, rsxx_isr, IRQF_DISABLED | IRQF_SHARED, + DRIVER_NAME, card); + if (st) { + dev_err(CARD_TO_DEV(card), + "Failed requesting IRQ%d\n", dev->irq); + goto failed_irq; + } + + /************* Setup Processor Command Interface *************/ + rsxx_creg_setup(card); + + spin_lock_irq(&card->irq_lock); + rsxx_enable_ier_and_isr(card, CR_INTR_CREG); + spin_unlock_irq(&card->irq_lock); + + st = rsxx_compatibility_check(card); + if (st) { + dev_warn(CARD_TO_DEV(card), + "Incompatible driver detected. Please update the driver.\n"); + st = -EINVAL; + goto failed_compatiblity_check; + } + + /************* Load Card Config *************/ + st = rsxx_load_config(card); + if (st) + dev_err(CARD_TO_DEV(card), + "Failed loading card config\n"); + + /************* Setup DMA Engine *************/ + st = rsxx_get_num_targets(card, &card->n_targets); + if (st) + dev_info(CARD_TO_DEV(card), + "Failed reading the number of DMA targets\n"); + + card->ctrl = kzalloc(card->n_targets * sizeof(*card->ctrl), GFP_KERNEL); + if (!card->ctrl) { + st = -ENOMEM; + goto failed_dma_setup; + } + + st = rsxx_dma_setup(card); + if (st) { + dev_info(CARD_TO_DEV(card), + "Failed to setup DMA engine\n"); + goto failed_dma_setup; + } + + /************* Setup Card Event Handler *************/ + INIT_WORK(&card->event_work, card_event_handler); + + st = rsxx_setup_dev(card); + if (st) + goto failed_create_dev; + + rsxx_get_card_state(card, &card->state); + + dev_info(CARD_TO_DEV(card), + "card state: %s\n", + rsxx_card_state_to_str(card->state)); + + /* + * Now that the DMA Engine and devices have been setup, + * we can enable the event interrupt(it kicks off actions in + * those layers so we couldn't enable it right away.) + */ + spin_lock_irq(&card->irq_lock); + rsxx_enable_ier_and_isr(card, CR_INTR_EVENT); + spin_unlock_irq(&card->irq_lock); + + if (card->state == CARD_STATE_SHUTDOWN) { + st = rsxx_issue_card_cmd(card, CARD_CMD_STARTUP); + if (st) + dev_crit(CARD_TO_DEV(card), + "Failed issuing card startup\n"); + } else if (card->state == CARD_STATE_GOOD || + card->state == CARD_STATE_RD_ONLY_FAULT) { + st = rsxx_get_card_size8(card, &card->size8); + if (st) + card->size8 = 0; + } + + rsxx_attach_dev(card); + + return 0; + +failed_create_dev: + rsxx_dma_destroy(card); +failed_dma_setup: +failed_compatiblity_check: + spin_lock_irq(&card->irq_lock); + rsxx_disable_ier_and_isr(card, CR_INTR_ALL); + spin_unlock_irq(&card->irq_lock); + free_irq(dev->irq, card); + if (!force_legacy) + pci_disable_msi(dev); +failed_irq: + pci_iounmap(dev, card->regmap); +failed_iomap: + pci_release_regions(dev); +failed_request_regions: +failed_dma_mask: + pci_disable_device(dev); +failed_enable: + spin_lock(&rsxx_ida_lock); + ida_remove(&rsxx_disk_ida, card->disk_id); + spin_unlock(&rsxx_ida_lock); +failed_ida_get: + kfree(card); + + return st; +} + +static void rsxx_pci_remove(struct pci_dev *dev) +{ + struct rsxx_cardinfo *card = pci_get_drvdata(dev); + unsigned long flags; + int st; + int i; + + if (!card) + return; + + dev_info(CARD_TO_DEV(card), + "Removing PCI-Flash SSD.\n"); + + rsxx_detach_dev(card); + + for (i = 0; i < card->n_targets; i++) { + spin_lock_irqsave(&card->irq_lock, flags); + rsxx_disable_ier_and_isr(card, CR_INTR_DMA(i)); + spin_unlock_irqrestore(&card->irq_lock, flags); + } + + st = card_shutdown(card); + if (st) + dev_crit(CARD_TO_DEV(card), "Shutdown failed!\n"); + + /* Sync outstanding event handlers. */ + spin_lock_irqsave(&card->irq_lock, flags); + rsxx_disable_ier_and_isr(card, CR_INTR_EVENT); + spin_unlock_irqrestore(&card->irq_lock, flags); + + /* Prevent work_structs from re-queuing themselves. */ + card->halt = 1; + + cancel_work_sync(&card->event_work); + + rsxx_destroy_dev(card); + rsxx_dma_destroy(card); + + spin_lock_irqsave(&card->irq_lock, flags); + rsxx_disable_ier_and_isr(card, CR_INTR_ALL); + spin_unlock_irqrestore(&card->irq_lock, flags); + free_irq(dev->irq, card); + + if (!force_legacy) + pci_disable_msi(dev); + + rsxx_creg_destroy(card); + + pci_iounmap(dev, card->regmap); + + pci_disable_device(dev); + pci_release_regions(dev); + + kfree(card); +} + +static int rsxx_pci_suspend(struct pci_dev *dev, pm_message_t state) +{ + /* We don't support suspend at this time. */ + return -ENOSYS; +} + +static void rsxx_pci_shutdown(struct pci_dev *dev) +{ + struct rsxx_cardinfo *card = pci_get_drvdata(dev); + unsigned long flags; + int i; + + if (!card) + return; + + dev_info(CARD_TO_DEV(card), "Shutting down PCI-Flash SSD.\n"); + + rsxx_detach_dev(card); + + for (i = 0; i < card->n_targets; i++) { + spin_lock_irqsave(&card->irq_lock, flags); + rsxx_disable_ier_and_isr(card, CR_INTR_DMA(i)); + spin_unlock_irqrestore(&card->irq_lock, flags); + } + + card_shutdown(card); +} + +static DEFINE_PCI_DEVICE_TABLE(rsxx_pci_ids) = { + {PCI_DEVICE(PCI_VENDOR_ID_TMS_IBM, PCI_DEVICE_ID_RS70_FLASH)}, + {PCI_DEVICE(PCI_VENDOR_ID_TMS_IBM, PCI_DEVICE_ID_RS70D_FLASH)}, + {PCI_DEVICE(PCI_VENDOR_ID_TMS_IBM, PCI_DEVICE_ID_RS80_FLASH)}, + {PCI_DEVICE(PCI_VENDOR_ID_TMS_IBM, PCI_DEVICE_ID_RS81_FLASH)}, + {0,}, +}; + +MODULE_DEVICE_TABLE(pci, rsxx_pci_ids); + +static struct pci_driver rsxx_pci_driver = { + .name = DRIVER_NAME, + .id_table = rsxx_pci_ids, + .probe = rsxx_pci_probe, + .remove = rsxx_pci_remove, + .suspend = rsxx_pci_suspend, + .shutdown = rsxx_pci_shutdown, +}; + +static int __init rsxx_core_init(void) +{ + int st; + + st = rsxx_dev_init(); + if (st) + return st; + + st = rsxx_dma_init(); + if (st) + goto dma_init_failed; + + st = rsxx_creg_init(); + if (st) + goto creg_init_failed; + + return pci_register_driver(&rsxx_pci_driver); + +creg_init_failed: + rsxx_dma_cleanup(); +dma_init_failed: + rsxx_dev_cleanup(); + + return st; +} + +static void __exit rsxx_core_cleanup(void) +{ + pci_unregister_driver(&rsxx_pci_driver); + rsxx_creg_cleanup(); + rsxx_dma_cleanup(); + rsxx_dev_cleanup(); +} + +module_init(rsxx_core_init); +module_exit(rsxx_core_cleanup); diff --git a/drivers/block/rsxx/cregs.c b/drivers/block/rsxx/cregs.c new file mode 100644 index 0000000..80bbe63 --- /dev/null +++ b/drivers/block/rsxx/cregs.c @@ -0,0 +1,758 @@ +/* +* Filename: cregs.c +* +* +* Authors: Joshua Morris <josh.h.morris@us.ibm.com> +* Philip Kelleher <pjk1939@linux.vnet.ibm.com> +* +* (C) Copyright 2013 IBM Corporation +* +* This program is free software; you can redistribute it and/or +* modify it under the terms of the GNU General Public License as +* published by the Free Software Foundation; either version 2 of the +* License, or (at your option) any later version. +* +* This program is distributed in the hope that it will be useful, but +* WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +* General Public License for more details. +* +* You should have received a copy of the GNU General Public License +* along with this program; if not, write to the Free Software Foundation, +* Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +#include <linux/completion.h> +#include <linux/slab.h> + +#include "rsxx_priv.h" + +#define CREG_TIMEOUT_MSEC 10000 + +typedef void (*creg_cmd_cb)(struct rsxx_cardinfo *card, + struct creg_cmd *cmd, + int st); + +struct creg_cmd { + struct list_head list; + creg_cmd_cb cb; + void *cb_private; + unsigned int op; + unsigned int addr; + int cnt8; + void *buf; + unsigned int stream; + unsigned int status; +}; + +static struct kmem_cache *creg_cmd_pool; + + +/*------------ Private Functions --------------*/ + +#if defined(__LITTLE_ENDIAN) +#define LITTLE_ENDIAN 1 +#elif defined(__BIG_ENDIAN) +#define LITTLE_ENDIAN 0 +#else +#error Unknown endianess!!! Aborting... +#endif + +static void copy_to_creg_data(struct rsxx_cardinfo *card, + int cnt8, + void *buf, + unsigned int stream) +{ + int i = 0; + u32 *data = buf; + + for (i = 0; cnt8 > 0; i++, cnt8 -= 4) { + /* + * Firmware implementation makes it necessary to byte swap on + * little endian processors. + */ + if (LITTLE_ENDIAN && stream) + iowrite32be(data[i], card->regmap + CREG_DATA(i)); + else + iowrite32(data[i], card->regmap + CREG_DATA(i)); + } +} + + +static void copy_from_creg_data(struct rsxx_cardinfo *card, + int cnt8, + void *buf, + unsigned int stream) +{ + int i = 0; + u32 *data = buf; + + for (i = 0; cnt8 > 0; i++, cnt8 -= 4) { + /* + * Firmware implementation makes it necessary to byte swap on + * little endian processors. + */ + if (LITTLE_ENDIAN && stream) + data[i] = ioread32be(card->regmap + CREG_DATA(i)); + else + data[i] = ioread32(card->regmap + CREG_DATA(i)); + } +} + +static struct creg_cmd *pop_active_cmd(struct rsxx_cardinfo *card) +{ + struct creg_cmd *cmd; + + /* + * Spin lock is needed because this can be called in atomic/interrupt + * context. + */ + spin_lock_bh(&card->creg_ctrl.lock); + cmd = card->creg_ctrl.active_cmd; + card->creg_ctrl.active_cmd = NULL; + spin_unlock_bh(&card->creg_ctrl.lock); + + return cmd; +} + +static void creg_issue_cmd(struct rsxx_cardinfo *card, struct creg_cmd *cmd) +{ + iowrite32(cmd->addr, card->regmap + CREG_ADD); + iowrite32(cmd->cnt8, card->regmap + CREG_CNT); + + if (cmd->op == CREG_OP_WRITE) { + if (cmd->buf) + copy_to_creg_data(card, cmd->cnt8, + cmd->buf, cmd->stream); + } + + /* + * Data copy must complete before initiating the command. This is + * needed for weakly ordered processors (i.e. PowerPC), so that all + * neccessary registers are written before we kick the hardware. + */ + wmb(); + + /* Setting the valid bit will kick off the command. */ + iowrite32(cmd->op, card->regmap + CREG_CMD); +} + +static void creg_kick_queue(struct rsxx_cardinfo *card) +{ + if (card->creg_ctrl.active || list_empty(&card->creg_ctrl.queue)) + return; + + card->creg_ctrl.active = 1; + card->creg_ctrl.active_cmd = list_first_entry(&card->creg_ctrl.queue, + struct creg_cmd, list); + list_del(&card->creg_ctrl.active_cmd->list); + card->creg_ctrl.q_depth--; + + /* + * We have to set the timer before we push the new command. Otherwise, + * we could create a race condition that would occur if the timer + * was not canceled, and expired after the new command was pushed, + * but before the command was issued to hardware. + */ + mod_timer(&card->creg_ctrl.cmd_timer, + jiffies + msecs_to_jiffies(CREG_TIMEOUT_MSEC)); + + creg_issue_cmd(card, card->creg_ctrl.active_cmd); +} + +static int creg_queue_cmd(struct rsxx_cardinfo *card, + unsigned int op, + unsigned int addr, + unsigned int cnt8, + void *buf, + int stream, + creg_cmd_cb callback, + void *cb_private) +{ + struct creg_cmd *cmd; + + /* Don't queue stuff up if we're halted. */ + if (unlikely(card->halt)) + return -EINVAL; + + if (card->creg_ctrl.reset) + return -EAGAIN; + + if (cnt8 > MAX_CREG_DATA8) + return -EINVAL; + + cmd = kmem_cache_alloc(creg_cmd_pool, GFP_KERNEL); + if (!cmd) + return -ENOMEM; + + INIT_LIST_HEAD(&cmd->list); + + cmd->op = op; + cmd->addr = addr; + cmd->cnt8 = cnt8; + cmd->buf = buf; + cmd->stream = stream; + cmd->cb = callback; + cmd->cb_private = cb_private; + cmd->status = 0; + + spin_lock(&card->creg_ctrl.lock); + list_add_tail(&cmd->list, &card->creg_ctrl.queue); + card->creg_ctrl.q_depth++; + creg_kick_queue(card); + spin_unlock(&card->creg_ctrl.lock); + + return 0; +} + +static void creg_cmd_timed_out(unsigned long data) +{ + struct rsxx_cardinfo *card = (struct rsxx_cardinfo *) data; + struct creg_cmd *cmd; + + cmd = pop_active_cmd(card); + if (cmd == NULL) { + card->creg_ctrl.creg_stats.creg_timeout++; + dev_warn(CARD_TO_DEV(card), + "No active command associated with timeout!\n"); + return; + } + + if (cmd->cb) + cmd->cb(card, cmd, -ETIMEDOUT); + + kmem_cache_free(creg_cmd_pool, cmd); + + + spin_lock(&card->creg_ctrl.lock); + card->creg_ctrl.active = 0; + creg_kick_queue(card); + spin_unlock(&card->creg_ctrl.lock); +} + + +static void creg_cmd_done(struct work_struct *work) +{ + struct rsxx_cardinfo *card; + struct creg_cmd *cmd; + int st = 0; + + card = container_of(work, struct rsxx_cardinfo, + creg_ctrl.done_work); + + /* + * The timer could not be cancelled for some reason, + * race to pop the active command. + */ + if (del_timer_sync(&card->creg_ctrl.cmd_timer) == 0) + card->creg_ctrl.creg_stats.failed_cancel_timer++; + + cmd = pop_active_cmd(card); + if (cmd == NULL) { + dev_err(CARD_TO_DEV(card), + "Spurious creg interrupt!\n"); + return; + } + + card->creg_ctrl.creg_stats.stat = ioread32(card->regmap + CREG_STAT); + cmd->status = card->creg_ctrl.creg_stats.stat; + if ((cmd->status & CREG_STAT_STATUS_MASK) == 0) { + dev_err(CARD_TO_DEV(card), + "Invalid status on creg command\n"); + /* + * At this point we're probably reading garbage from HW. Don't + * do anything else that could mess up the system and let + * the sync function return an error. + */ + st = -EIO; + goto creg_done; + } else if (cmd->status & CREG_STAT_ERROR) { + st = -EIO; + } + + if ((cmd->op == CREG_OP_READ)) { + unsigned int cnt8 = ioread32(card->regmap + CREG_CNT); + + /* Paranoid Sanity Checks */ + if (!cmd->buf) { + dev_err(CARD_TO_DEV(card), + "Buffer not given for read.\n"); + st = -EIO; + goto creg_done; + } + if (cnt8 != cmd->cnt8) { + dev_err(CARD_TO_DEV(card), + "count mismatch\n"); + st = -EIO; + goto creg_done; + } + + copy_from_creg_data(card, cnt8, cmd->buf, cmd->stream); + } + +creg_done: + if (cmd->cb) + cmd->cb(card, cmd, st); + + kmem_cache_free(creg_cmd_pool, cmd); + + spin_lock(&card->creg_ctrl.lock); + card->creg_ctrl.active = 0; + creg_kick_queue(card); + spin_unlock(&card->creg_ctrl.lock); +} + +static void creg_reset(struct rsxx_cardinfo *card) +{ + struct creg_cmd *cmd = NULL; + struct creg_cmd *tmp; + unsigned long flags; + + /* + * mutex_trylock is used here because if reset_lock is taken then a + * reset is already happening. So, we can just go ahead and return. + */ + if (!mutex_trylock(&card->creg_ctrl.reset_lock)) + return; + + card->creg_ctrl.reset = 1; + spin_lock_irqsave(&card->irq_lock, flags); + rsxx_disable_ier_and_isr(card, CR_INTR_CREG | CR_INTR_EVENT); + spin_unlock_irqrestore(&card->irq_lock, flags); + + dev_warn(CARD_TO_DEV(card), + "Resetting creg interface for recovery\n"); + + /* Cancel outstanding commands */ + spin_lock(&card->creg_ctrl.lock); + list_for_each_entry_safe(cmd, tmp, &card->creg_ctrl.queue, list) { + list_del(&cmd->list); + card->creg_ctrl.q_depth--; + if (cmd->cb) + cmd->cb(card, cmd, -ECANCELED); + kmem_cache_free(creg_cmd_pool, cmd); + } + + cmd = card->creg_ctrl.active_cmd; + card->creg_ctrl.active_cmd = NULL; + if (cmd) { + if (timer_pending(&card->creg_ctrl.cmd_timer)) + del_timer_sync(&card->creg_ctrl.cmd_timer); + + if (cmd->cb) + cmd->cb(card, cmd, -ECANCELED); + kmem_cache_free(creg_cmd_pool, cmd); + + card->creg_ctrl.active = 0; + } + spin_unlock(&card->creg_ctrl.lock); + + card->creg_ctrl.reset = 0; + spin_lock_irqsave(&card->irq_lock, flags); + rsxx_enable_ier_and_isr(card, CR_INTR_CREG | CR_INTR_EVENT); + spin_unlock_irqrestore(&card->irq_lock, flags); + + mutex_unlock(&card->creg_ctrl.reset_lock); +} + +/* Used for synchronous accesses */ +struct creg_completion { + struct completion *cmd_done; + int st; + u32 creg_status; +}; + +static void creg_cmd_done_cb(struct rsxx_cardinfo *card, + struct creg_cmd *cmd, + int st) +{ + struct creg_completion *cmd_completion; + + cmd_completion = cmd->cb_private; + BUG_ON(!cmd_completion); + + cmd_completion->st = st; + cmd_completion->creg_status = cmd->status; + complete(cmd_completion->cmd_done); +} + +static int __issue_creg_rw(struct rsxx_cardinfo *card, + unsigned int op, + unsigned int addr, + unsigned int cnt8, + void *buf, + int stream, + unsigned int *hw_stat) +{ + DECLARE_COMPLETION_ONSTACK(cmd_done); + struct creg_completion completion; + unsigned long timeout; + int st; + + completion.cmd_done = &cmd_done; + completion.st = 0; + completion.creg_status = 0; + + st = creg_queue_cmd(card, op, addr, cnt8, buf, stream, creg_cmd_done_cb, + &completion); + if (st) + return st; + + /* + * This timeout is neccessary for unresponsive hardware. The additional + * 20 seconds to used to guarantee that each cregs requests has time to + * complete. + */ + timeout = msecs_to_jiffies((CREG_TIMEOUT_MSEC * + card->creg_ctrl.q_depth) + 20000); + + /* + * The creg interface is guaranteed to complete. It has a timeout + * mechanism that will kick in if hardware does not respond. + */ + st = wait_for_completion_timeout(completion.cmd_done, timeout); + if (st == 0) { + /* + * This is really bad, because the kernel timer did not + * expire and notify us of a timeout! + */ + dev_crit(CARD_TO_DEV(card), + "cregs timer failed\n"); + creg_reset(card); + return -EIO; + } + + *hw_stat = completion.creg_status; + + if (completion.st) { + dev_warn(CARD_TO_DEV(card), + "creg command failed(%d x%08x)\n", + completion.st, addr); + return completion.st; + } + + return 0; +} + +static int issue_creg_rw(struct rsxx_cardinfo *card, + u32 addr, + unsigned int size8, + void *data, + int stream, + int read) +{ + unsigned int hw_stat; + unsigned int xfer; + unsigned int op; + int st; + + op = read ? CREG_OP_READ : CREG_OP_WRITE; + + do { + xfer = min_t(unsigned int, size8, MAX_CREG_DATA8); + + st = __issue_creg_rw(card, op, addr, xfer, + data, stream, &hw_stat); + if (st) + return st; + + data = (char *)data + xfer; + addr += xfer; + size8 -= xfer; + } while (size8); + + return 0; +} + +/* ---------------------------- Public API ---------------------------------- */ +int rsxx_creg_write(struct rsxx_cardinfo *card, + u32 addr, + unsigned int size8, + void *data, + int byte_stream) +{ + return issue_creg_rw(card, addr, size8, data, byte_stream, 0); +} + +int rsxx_creg_read(struct rsxx_cardinfo *card, + u32 addr, + unsigned int size8, + void *data, + int byte_stream) +{ + return issue_creg_rw(card, addr, size8, data, byte_stream, 1); +} + +int rsxx_get_card_state(struct rsxx_cardinfo *card, unsigned int *state) +{ + return rsxx_creg_read(card, CREG_ADD_CARD_STATE, + sizeof(*state), state, 0); +} + +int rsxx_get_card_size8(struct rsxx_cardinfo *card, u64 *size8) +{ + unsigned int size; + int st; + + st = rsxx_creg_read(card, CREG_ADD_CARD_SIZE, + sizeof(size), &size, 0); + if (st) + return st; + + *size8 = (u64)size * RSXX_HW_BLK_SIZE; + return 0; +} + +int rsxx_get_num_targets(struct rsxx_cardinfo *card, + unsigned int *n_targets) +{ + return rsxx_creg_read(card, CREG_ADD_NUM_TARGETS, + sizeof(*n_targets), n_targets, 0); +} + +int rsxx_get_card_capabilities(struct rsxx_cardinfo *card, + u32 *capabilities) +{ + return rsxx_creg_read(card, CREG_ADD_CAPABILITIES, + sizeof(*capabilities), capabilities, 0); +} + +int rsxx_issue_card_cmd(struct rsxx_cardinfo *card, u32 cmd) +{ + return rsxx_creg_write(card, CREG_ADD_CARD_CMD, + sizeof(cmd), &cmd, 0); +} + + +/*----------------- HW Log Functions -------------------*/ +static void hw_log_msg(struct rsxx_cardinfo *card, const char *str, int len) +{ + static char level; + + /* + * New messages start with "<#>", where # is the log level. Messages + * that extend past the log buffer will use the previous level + */ + if ((len > 3) && (str[0] == '<') && (str[2] == '>')) { + level = str[1]; + str += 3; /* Skip past the log level. */ + len -= 3; + } + + switch (level) { + case '0': + dev_emerg(CARD_TO_DEV(card), "HW: %.*s", len, str); + break; + case '1': + dev_alert(CARD_TO_DEV(card), "HW: %.*s", len, str); + break; + case '2': + dev_crit(CARD_TO_DEV(card), "HW: %.*s", len, str); + break; + case '3': + dev_err(CARD_TO_DEV(card), "HW: %.*s", len, str); + break; + case '4': + dev_warn(CARD_TO_DEV(card), "HW: %.*s", len, str); + break; + case '5': + dev_notice(CARD_TO_DEV(card), "HW: %.*s", len, str); + break; + case '6': + dev_info(CARD_TO_DEV(card), "HW: %.*s", len, str); + break; + case '7': + dev_dbg(CARD_TO_DEV(card), "HW: %.*s", len, str); + break; + default: + dev_info(CARD_TO_DEV(card), "HW: %.*s", len, str); + break; + } +} + +/* + * The substrncpy function copies the src string (which includes the + * terminating '\0' character), up to the count into the dest pointer. + * Returns the number of bytes copied to dest. + */ +static int substrncpy(char *dest, const char *src, int count) +{ + int max_cnt = count; + + while (count) { + count--; + *dest = *src; + if (*dest == '\0') + break; + src++; + dest++; + } + return max_cnt - count; +} + + +static void read_hw_log_done(struct rsxx_cardinfo *card, + struct creg_cmd *cmd, + int st) +{ + char *buf; + char *log_str; + int cnt; + int len; + int off; + + buf = cmd->buf; + off = 0; + + /* Failed getting the log message */ + if (st) + return; + + while (off < cmd->cnt8) { + log_str = &card->log.buf[card->log.buf_len]; + cnt = min(cmd->cnt8 - off, LOG_BUF_SIZE8 - card->log.buf_len); + len = substrncpy(log_str, &buf[off], cnt); + + off += len; + card->log.buf_len += len; + + /* + * Flush the log if we've hit the end of a message or if we've + * run out of buffer space. + */ + if ((log_str[len - 1] == '\0') || + (card->log.buf_len == LOG_BUF_SIZE8)) { + if (card->log.buf_len != 1) /* Don't log blank lines. */ + hw_log_msg(card, card->log.buf, + card->log.buf_len); + card->log.buf_len = 0; + } + + } + + if (cmd->status & CREG_STAT_LOG_PENDING) + rsxx_read_hw_log(card); +} + +int rsxx_read_hw_log(struct rsxx_cardinfo *card) +{ + int st; + + st = creg_queue_cmd(card, CREG_OP_READ, CREG_ADD_LOG, + sizeof(card->log.tmp), card->log.tmp, + 1, read_hw_log_done, NULL); + if (st) + dev_err(CARD_TO_DEV(card), + "Failed getting log text\n"); + + return st; +} + +/*-------------- IOCTL REG Access ------------------*/ +static int issue_reg_cmd(struct rsxx_cardinfo *card, + struct rsxx_reg_access *cmd, + int read) +{ + unsigned int op = read ? CREG_OP_READ : CREG_OP_WRITE; + + return __issue_creg_rw(card, op, cmd->addr, cmd->cnt, cmd->data, + cmd->stream, &cmd->stat); +} + +int rsxx_reg_access(struct rsxx_cardinfo *card, + struct rsxx_reg_access __user *ucmd, + int read) +{ + struct rsxx_reg_access cmd; + int st; + + st = copy_from_user(&cmd, ucmd, sizeof(cmd)); + if (st) + return -EFAULT; + + if (cmd.cnt > RSXX_MAX_REG_CNT) + return -EFAULT; + + st = issue_reg_cmd(card, &cmd, read); + if (st) + return st; + + st = put_user(cmd.stat, &ucmd->stat); + if (st) + return -EFAULT; + + if (read) { + st = copy_to_user(ucmd->data, cmd.data, cmd.cnt); + if (st) + return -EFAULT; + } + + return 0; +} + +/*------------ Initialization & Setup --------------*/ +int rsxx_creg_setup(struct rsxx_cardinfo *card) +{ + card->creg_ctrl.active_cmd = NULL; + + INIT_WORK(&card->creg_ctrl.done_work, creg_cmd_done); + mutex_init(&card->creg_ctrl.reset_lock); + INIT_LIST_HEAD(&card->creg_ctrl.queue); + spin_lock_init(&card->creg_ctrl.lock); + setup_timer(&card->creg_ctrl.cmd_timer, creg_cmd_timed_out, + (unsigned long) card); + + return 0; +} + +void rsxx_creg_destroy(struct rsxx_cardinfo *card) +{ + struct creg_cmd *cmd; + struct creg_cmd *tmp; + int cnt = 0; + + /* Cancel outstanding commands */ + spin_lock(&card->creg_ctrl.lock); + list_for_each_entry_safe(cmd, tmp, &card->creg_ctrl.queue, list) { + list_del(&cmd->list); + if (cmd->cb) + cmd->cb(card, cmd, -ECANCELED); + kmem_cache_free(creg_cmd_pool, cmd); + cnt++; + } + + if (cnt) + dev_info(CARD_TO_DEV(card), + "Canceled %d queue creg commands\n", cnt); + + cmd = card->creg_ctrl.active_cmd; + card->creg_ctrl.active_cmd = NULL; + if (cmd) { + if (timer_pending(&card->creg_ctrl.cmd_timer)) + del_timer_sync(&card->creg_ctrl.cmd_timer); + + if (cmd->cb) + cmd->cb(card, cmd, -ECANCELED); + dev_info(CARD_TO_DEV(card), + "Canceled active creg command\n"); + kmem_cache_free(creg_cmd_pool, cmd); + } + spin_unlock(&card->creg_ctrl.lock); + + cancel_work_sync(&card->creg_ctrl.done_work); +} + + +int rsxx_creg_init(void) +{ + creg_cmd_pool = KMEM_CACHE(creg_cmd, SLAB_HWCACHE_ALIGN); + if (!creg_cmd_pool) + return -ENOMEM; + + return 0; +} + +void rsxx_creg_cleanup(void) +{ + kmem_cache_destroy(creg_cmd_pool); +} diff --git a/drivers/block/rsxx/dev.c b/drivers/block/rsxx/dev.c new file mode 100644 index 0000000..4346d17 --- /dev/null +++ b/drivers/block/rsxx/dev.c @@ -0,0 +1,367 @@ +/* +* Filename: dev.c +* +* +* Authors: Joshua Morris <josh.h.morris@us.ibm.com> +* Philip Kelleher <pjk1939@linux.vnet.ibm.com> +* +* (C) Copyright 2013 IBM Corporation +* +* This program is free software; you can redistribute it and/or +* modify it under the terms of the GNU General Public License as +* published by the Free Software Foundation; either version 2 of the +* License, or (at your option) any later version. +* +* This program is distributed in the hope that it will be useful, but +* WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +* General Public License for more details. +* +* You should have received a copy of the GNU General Public License +* along with this program; if not, write to the Free Software Foundation, +* Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +#include <linux/kernel.h> +#include <linux/interrupt.h> +#include <linux/module.h> +#include <linux/pci.h> +#include <linux/slab.h> + +#include <linux/hdreg.h> +#include <linux/genhd.h> +#include <linux/blkdev.h> +#include <linux/bio.h> + +#include <linux/fs.h> + +#include "rsxx_priv.h" + +static unsigned int blkdev_minors = 64; +module_param(blkdev_minors, uint, 0444); +MODULE_PARM_DESC(blkdev_minors, "Number of minors(partitions)"); + +/* + * For now I'm making this tweakable in case any applications hit this limit. + * If you see a "bio too big" error in the log you will need to raise this + * value. + */ +static unsigned int blkdev_max_hw_sectors = 1024; +module_param(blkdev_max_hw_sectors, uint, 0444); +MODULE_PARM_DESC(blkdev_max_hw_sectors, "Max hw sectors for a single BIO"); + +static unsigned int enable_blkdev = 1; +module_param(enable_blkdev , uint, 0444); +MODULE_PARM_DESC(enable_blkdev, "Enable block device interfaces"); + + +struct rsxx_bio_meta { + struct bio *bio; + atomic_t pending_dmas; + atomic_t error; + unsigned long start_time; +}; + +static struct kmem_cache *bio_meta_pool; + +/*----------------- Block Device Operations -----------------*/ +static int rsxx_blkdev_ioctl(struct block_device *bdev, + fmode_t mode, + unsigned int cmd, + unsigned long arg) +{ + struct rsxx_cardinfo *card = bdev->bd_disk->private_data; + + switch (cmd) { + case RSXX_GETREG: + return rsxx_reg_access(card, (void __user *)arg, 1); + case RSXX_SETREG: + return rsxx_reg_access(card, (void __user *)arg, 0); + } + + return -ENOTTY; +} + +static int rsxx_getgeo(struct block_device *bdev, struct hd_geometry *geo) +{ + struct rsxx_cardinfo *card = bdev->bd_disk->private_data; + u64 blocks = card->size8 >> 9; + + /* + * get geometry: Fake it. I haven't found any drivers that set + * geo->start, so we won't either. + */ + if (card->size8) { + geo->heads = 64; + geo->sectors = 16; + do_div(blocks, (geo->heads * geo->sectors)); + geo->cylinders = blocks; + } else { + geo->heads = 0; + geo->sectors = 0; + geo->cylinders = 0; + } + return 0; +} + +static const struct block_device_operations rsxx_fops = { + .owner = THIS_MODULE, + .getgeo = rsxx_getgeo, + .ioctl = rsxx_blkdev_ioctl, +}; + +static void disk_stats_start(struct rsxx_cardinfo *card, struct bio *bio) +{ + struct hd_struct *part0 = &card->gendisk->part0; + int rw = bio_data_dir(bio); + int cpu; + + cpu = part_stat_lock(); + + part_round_stats(cpu, part0); + part_inc_in_flight(part0, rw); + + part_stat_unlock(); +} + +static void disk_stats_complete(struct rsxx_cardinfo *card, + struct bio *bio, + unsigned long start_time) +{ + struct hd_struct *part0 = &card->gendisk->part0; + unsigned long duration = jiffies - start_time; + int rw = bio_data_dir(bio); + int cpu; + + cpu = part_stat_lock(); + + part_stat_add(cpu, part0, sectors[rw], bio_sectors(bio)); + part_stat_inc(cpu, part0, ios[rw]); + part_stat_add(cpu, part0, ticks[rw], duration); + + part_round_stats(cpu, part0); + part_dec_in_flight(part0, rw); + + part_stat_unlock(); +} + +static void bio_dma_done_cb(struct rsxx_cardinfo *card, + void *cb_data, + unsigned int error) +{ + struct rsxx_bio_meta *meta = cb_data; + + if (error) + atomic_set(&meta->error, 1); + + if (atomic_dec_and_test(&meta->pending_dmas)) { + disk_stats_complete(card, meta->bio, meta->start_time); + + bio_endio(meta->bio, atomic_read(&meta->error) ? -EIO : 0); + kmem_cache_free(bio_meta_pool, meta); + } +} + +static void rsxx_make_request(struct request_queue *q, struct bio *bio) +{ + struct rsxx_cardinfo *card = q->queuedata; + struct rsxx_bio_meta *bio_meta; + int st = -EINVAL; + + might_sleep(); + + if (unlikely(card->halt)) { + st = -EFAULT; + goto req_err; + } + + if (unlikely(card->dma_fault)) { + st = (-EFAULT); + goto req_err; + } + + if (bio->bi_size == 0) { + dev_err(CARD_TO_DEV(card), "size zero BIO!\n"); + goto req_err; + } + + bio_meta = kmem_cache_alloc(bio_meta_pool, GFP_KERNEL); + if (!bio_meta) { + st = -ENOMEM; + goto req_err; + } + + bio_meta->bio = bio; + atomic_set(&bio_meta->error, 0); + atomic_set(&bio_meta->pending_dmas, 0); + bio_meta->start_time = jiffies; + + disk_stats_start(card, bio); + + dev_dbg(CARD_TO_DEV(card), "BIO[%c]: meta: %p addr8: x%llx size: %d\n", + bio_data_dir(bio) ? 'W' : 'R', bio_meta, + (u64)bio->bi_sector << 9, bio->bi_size); + + st = rsxx_dma_queue_bio(card, bio, &bio_meta->pending_dmas, + bio_dma_done_cb, bio_meta); + if (st) + goto queue_err; + + return; + +queue_err: + kmem_cache_free(bio_meta_pool, bio_meta); +req_err: + bio_endio(bio, st); +} + +/*----------------- Device Setup -------------------*/ +static bool rsxx_discard_supported(struct rsxx_cardinfo *card) +{ + unsigned char pci_rev; + + pci_read_config_byte(card->dev, PCI_REVISION_ID, &pci_rev); + + return (pci_rev >= RSXX_DISCARD_SUPPORT); +} + +static unsigned short rsxx_get_logical_block_size( + struct rsxx_cardinfo *card) +{ + u32 capabilities = 0; + int st; + + st = rsxx_get_card_capabilities(card, &capabilities); + if (st) + dev_warn(CARD_TO_DEV(card), + "Failed reading card capabilities register\n"); + + /* Earlier firmware did not have support for 512 byte accesses */ + if (capabilities & CARD_CAP_SUBPAGE_WRITES) + return 512; + else + return RSXX_HW_BLK_SIZE; +} + +int rsxx_attach_dev(struct rsxx_cardinfo *card) +{ + mutex_lock(&card->dev_lock); + + /* The block device requires the stripe size from the config. */ + if (enable_blkdev) { + if (card->config_valid) + set_capacity(card->gendisk, card->size8 >> 9); + else + set_capacity(card->gendisk, 0); + add_disk(card->gendisk); + + card->bdev_attached = 1; + } + + mutex_unlock(&card->dev_lock); + + return 0; +} + +void rsxx_detach_dev(struct rsxx_cardinfo *card) +{ + mutex_lock(&card->dev_lock); + + if (card->bdev_attached) { + del_gendisk(card->gendisk); + card->bdev_attached = 0; + } + + mutex_unlock(&card->dev_lock); +} + +int rsxx_setup_dev(struct rsxx_cardinfo *card) +{ + unsigned short blk_size; + + mutex_init(&card->dev_lock); + + if (!enable_blkdev) + return 0; + + card->major = register_blkdev(0, DRIVER_NAME); + if (card->major < 0) { + dev_err(CARD_TO_DEV(card), "Failed to get major number\n"); + return -ENOMEM; + } + + card->queue = blk_alloc_queue(GFP_KERNEL); + if (!card->queue) { + dev_err(CARD_TO_DEV(card), "Failed queue alloc\n"); + unregister_blkdev(card->major, DRIVER_NAME); + return -ENOMEM; + } + + card->gendisk = alloc_disk(blkdev_minors); + if (!card->gendisk) { + dev_err(CARD_TO_DEV(card), "Failed disk alloc\n"); + blk_cleanup_queue(card->queue); + unregister_blkdev(card->major, DRIVER_NAME); + return -ENOMEM; + } + + blk_size = rsxx_get_logical_block_size(card); + + blk_queue_make_request(card->queue, rsxx_make_request); + blk_queue_bounce_limit(card->queue, BLK_BOUNCE_ANY); + blk_queue_dma_alignment(card->queue, blk_size - 1); + blk_queue_max_hw_sectors(card->queue, blkdev_max_hw_sectors); + blk_queue_logical_block_size(card->queue, blk_size); + blk_queue_physical_block_size(card->queue, RSXX_HW_BLK_SIZE); + + queue_flag_set_unlocked(QUEUE_FLAG_NONROT, card->queue); + if (rsxx_discard_supported(card)) { + queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, card->queue); + blk_queue_max_discard_sectors(card->queue, + RSXX_HW_BLK_SIZE >> 9); + card->queue->limits.discard_granularity = RSXX_HW_BLK_SIZE; + card->queue->limits.discard_alignment = RSXX_HW_BLK_SIZE; + card->queue->limits.discard_zeroes_data = 1; + } + + card->queue->queuedata = card; + + snprintf(card->gendisk->disk_name, sizeof(card->gendisk->disk_name), + "rsxx%d", card->disk_id); + card->gendisk->driverfs_dev = &card->dev->dev; + card->gendisk->major = card->major; + card->gendisk->first_minor = 0; + card->gendisk->fops = &rsxx_fops; + card->gendisk->private_data = card; + card->gendisk->queue = card->queue; + + return 0; +} + +void rsxx_destroy_dev(struct rsxx_cardinfo *card) +{ + if (!enable_blkdev) + return; + + put_disk(card->gendisk); + card->gendisk = NULL; + + blk_cleanup_queue(card->queue); + unregister_blkdev(card->major, DRIVER_NAME); +} + +int rsxx_dev_init(void) +{ + bio_meta_pool = KMEM_CACHE(rsxx_bio_meta, SLAB_HWCACHE_ALIGN); + if (!bio_meta_pool) + return -ENOMEM; + + return 0; +} + +void rsxx_dev_cleanup(void) +{ + kmem_cache_destroy(bio_meta_pool); +} + + diff --git a/drivers/block/rsxx/dma.c b/drivers/block/rsxx/dma.c new file mode 100644 index 0000000..63176e6 --- /dev/null +++ b/drivers/block/rsxx/dma.c @@ -0,0 +1,998 @@ +/* +* Filename: dma.c +* +* +* Authors: Joshua Morris <josh.h.morris@us.ibm.com> +* Philip Kelleher <pjk1939@linux.vnet.ibm.com> +* +* (C) Copyright 2013 IBM Corporation +* +* This program is free software; you can redistribute it and/or +* modify it under the terms of the GNU General Public License as +* published by the Free Software Foundation; either version 2 of the +* License, or (at your option) any later version. +* +* This program is distributed in the hope that it will be useful, but +* WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +* General Public License for more details. +* +* You should have received a copy of the GNU General Public License +* along with this program; if not, write to the Free Software Foundation, +* Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +#include <linux/slab.h> +#include "rsxx_priv.h" + +struct rsxx_dma { + struct list_head list; + u8 cmd; + unsigned int laddr; /* Logical address on the ramsan */ + struct { + u32 off; + u32 cnt; + } sub_page; + dma_addr_t dma_addr; + struct page *page; + unsigned int pg_off; /* Page Offset */ + rsxx_dma_cb cb; + void *cb_data; +}; + +/* This timeout is used to detect a stalled DMA channel */ +#define DMA_ACTIVITY_TIMEOUT msecs_to_jiffies(10000) + +struct hw_status { + u8 status; + u8 tag; + __le16 count; + __le32 _rsvd2; + __le64 _rsvd3; +} __packed; + +enum rsxx_dma_status { + DMA_SW_ERR = 0x1, + DMA_HW_FAULT = 0x2, + DMA_CANCELLED = 0x4, +}; + +struct hw_cmd { + u8 command; + u8 tag; + u8 _rsvd; + u8 sub_page; /* Bit[0:2]: 512byte offset */ + /* Bit[4:6]: 512byte count */ + __le32 device_addr; + __le64 host_addr; +} __packed; + +enum rsxx_hw_cmd { + HW_CMD_BLK_DISCARD = 0x70, + HW_CMD_BLK_WRITE = 0x80, + HW_CMD_BLK_READ = 0xC0, + HW_CMD_BLK_RECON_READ = 0xE0, +}; + +enum rsxx_hw_status { + HW_STATUS_CRC = 0x01, + HW_STATUS_HARD_ERR = 0x02, + HW_STATUS_SOFT_ERR = 0x04, + HW_STATUS_FAULT = 0x08, +}; + +#define STATUS_BUFFER_SIZE8 4096 +#define COMMAND_BUFFER_SIZE8 4096 + +static struct kmem_cache *rsxx_dma_pool; + +struct dma_tracker { + int next_tag; + struct rsxx_dma *dma; +}; + +#define DMA_TRACKER_LIST_SIZE8 (sizeof(struct dma_tracker_list) + \ + (sizeof(struct dma_tracker) * RSXX_MAX_OUTSTANDING_CMDS)) + +struct dma_tracker_list { + spinlock_t lock; + int head; + struct dma_tracker list[0]; +}; + + +/*----------------- Misc Utility Functions -------------------*/ +static unsigned int rsxx_addr8_to_laddr(u64 addr8, struct rsxx_cardinfo *card) +{ + unsigned long long tgt_addr8; + + tgt_addr8 = ((addr8 >> card->_stripe.upper_shift) & + card->_stripe.upper_mask) | + ((addr8) & card->_stripe.lower_mask); + do_div(tgt_addr8, RSXX_HW_BLK_SIZE); + return tgt_addr8; +} + +static unsigned int rsxx_get_dma_tgt(struct rsxx_cardinfo *card, u64 addr8) +{ + unsigned int tgt; + + tgt = (addr8 >> card->_stripe.target_shift) & card->_stripe.target_mask; + + return tgt; +} + +static void rsxx_dma_queue_reset(struct rsxx_cardinfo *card) +{ + /* Reset all DMA Command/Status Queues */ + iowrite32(DMA_QUEUE_RESET, card->regmap + RESET); +} + +static unsigned int get_dma_size(struct rsxx_dma *dma) +{ + if (dma->sub_page.cnt) + return dma->sub_page.cnt << 9; + else + return RSXX_HW_BLK_SIZE; +} + + +/*----------------- DMA Tracker -------------------*/ +static void set_tracker_dma(struct dma_tracker_list *trackers, + int tag, + struct rsxx_dma *dma) +{ + trackers->list[tag].dma = dma; +} + +static struct rsxx_dma *get_tracker_dma(struct dma_tracker_list *trackers, + int tag) +{ + return trackers->list[tag].dma; +} + +static int pop_tracker(struct dma_tracker_list *trackers) +{ + int tag; + + spin_lock(&trackers->lock); + tag = trackers->head; + if (tag != -1) { + trackers->head = trackers->list[tag].next_tag; + trackers->list[tag].next_tag = -1; + } + spin_unlock(&trackers->lock); + + return tag; +} + +static void push_tracker(struct dma_tracker_list *trackers, int tag) +{ + spin_lock(&trackers->lock); + trackers->list[tag].next_tag = trackers->head; + trackers->head = tag; + trackers->list[tag].dma = NULL; + spin_unlock(&trackers->lock); +} + + +/*----------------- Interrupt Coalescing -------------*/ +/* + * Interrupt Coalescing Register Format: + * Interrupt Timer (64ns units) [15:0] + * Interrupt Count [24:16] + * Reserved [31:25] +*/ +#define INTR_COAL_LATENCY_MASK (0x0000ffff) + +#define INTR_COAL_COUNT_SHIFT 16 +#define INTR_COAL_COUNT_BITS 9 +#define INTR_COAL_COUNT_MASK (((1 << INTR_COAL_COUNT_BITS) - 1) << \ + INTR_COAL_COUNT_SHIFT) +#define INTR_COAL_LATENCY_UNITS_NS 64 + + +static u32 dma_intr_coal_val(u32 mode, u32 count, u32 latency) +{ + u32 latency_units = latency / INTR_COAL_LATENCY_UNITS_NS; + + if (mode == RSXX_INTR_COAL_DISABLED) + return 0; + + return ((count << INTR_COAL_COUNT_SHIFT) & INTR_COAL_COUNT_MASK) | + (latency_units & INTR_COAL_LATENCY_MASK); + +} + +static void dma_intr_coal_auto_tune(struct rsxx_cardinfo *card) +{ + int i; + u32 q_depth = 0; + u32 intr_coal; + + if (card->config.data.intr_coal.mode != RSXX_INTR_COAL_AUTO_TUNE) + return; + + for (i = 0; i < card->n_targets; i++) + q_depth += atomic_read(&card->ctrl[i].stats.hw_q_depth); + + intr_coal = dma_intr_coal_val(card->config.data.intr_coal.mode, + q_depth / 2, + card->config.data.intr_coal.latency); + iowrite32(intr_coal, card->regmap + INTR_COAL); +} + +/*----------------- RSXX DMA Handling -------------------*/ +static void rsxx_complete_dma(struct rsxx_cardinfo *card, + struct rsxx_dma *dma, + unsigned int status) +{ + if (status & DMA_SW_ERR) + printk_ratelimited(KERN_ERR + "SW Error in DMA(cmd x%02x, laddr x%08x)\n", + dma->cmd, dma->laddr); + if (status & DMA_HW_FAULT) + printk_ratelimited(KERN_ERR + "HW Fault in DMA(cmd x%02x, laddr x%08x)\n", + dma->cmd, dma->laddr); + if (status & DMA_CANCELLED) + printk_ratelimited(KERN_ERR + "DMA Cancelled(cmd x%02x, laddr x%08x)\n", + dma->cmd, dma->laddr); + + if (dma->dma_addr) + pci_unmap_page(card->dev, dma->dma_addr, get_dma_size(dma), + dma->cmd == HW_CMD_BLK_WRITE ? + PCI_DMA_TODEVICE : + PCI_DMA_FROMDEVICE); + + if (dma->cb) + dma->cb(card, dma->cb_data, status ? 1 : 0); + + kmem_cache_free(rsxx_dma_pool, dma); +} + +static void rsxx_requeue_dma(struct rsxx_dma_ctrl *ctrl, + struct rsxx_dma *dma) +{ + /* + * Requeued DMAs go to the front of the queue so they are issued + * first. + */ + spin_lock(&ctrl->queue_lock); + list_add(&dma->list, &ctrl->queue); + spin_unlock(&ctrl->queue_lock); +} + +static void rsxx_handle_dma_error(struct rsxx_dma_ctrl *ctrl, + struct rsxx_dma *dma, + u8 hw_st) +{ + unsigned int status = 0; + int requeue_cmd = 0; + + dev_dbg(CARD_TO_DEV(ctrl->card), + "Handling DMA error(cmd x%02x, laddr x%08x st:x%02x)\n", + dma->cmd, dma->laddr, hw_st); + + if (hw_st & HW_STATUS_CRC) + ctrl->stats.crc_errors++; + if (hw_st & HW_STATUS_HARD_ERR) + ctrl->stats.hard_errors++; + if (hw_st & HW_STATUS_SOFT_ERR) + ctrl->stats.soft_errors++; + + switch (dma->cmd) { + case HW_CMD_BLK_READ: + if (hw_st & (HW_STATUS_CRC | HW_STATUS_HARD_ERR)) { + if (ctrl->card->scrub_hard) { + dma->cmd = HW_CMD_BLK_RECON_READ; + requeue_cmd = 1; + ctrl->stats.reads_retried++; + } else { + status |= DMA_HW_FAULT; + ctrl->stats.reads_failed++; + } + } else if (hw_st & HW_STATUS_FAULT) { + status |= DMA_HW_FAULT; + ctrl->stats.reads_failed++; + } + + break; + case HW_CMD_BLK_RECON_READ: + if (hw_st & (HW_STATUS_CRC | HW_STATUS_HARD_ERR)) { + /* Data could not be reconstructed. */ + status |= DMA_HW_FAULT; + ctrl->stats.reads_failed++; + } + + break; + case HW_CMD_BLK_WRITE: + status |= DMA_HW_FAULT; + ctrl->stats.writes_failed++; + + break; + case HW_CMD_BLK_DISCARD: + status |= DMA_HW_FAULT; + ctrl->stats.discards_failed++; + + break; + default: + dev_err(CARD_TO_DEV(ctrl->card), + "Unknown command in DMA!(cmd: x%02x " + "laddr x%08x st: x%02x\n", + dma->cmd, dma->laddr, hw_st); + status |= DMA_SW_ERR; + + break; + } + + if (requeue_cmd) + rsxx_requeue_dma(ctrl, dma); + else + rsxx_complete_dma(ctrl->card, dma, status); +} + +static void dma_engine_stalled(unsigned long data) +{ + struct rsxx_dma_ctrl *ctrl = (struct rsxx_dma_ctrl *)data; + + if (atomic_read(&ctrl->stats.hw_q_depth) == 0) + return; + + if (ctrl->cmd.idx != ioread32(ctrl->regmap + SW_CMD_IDX)) { + /* + * The dma engine was stalled because the SW_CMD_IDX write + * was lost. Issue it again to recover. + */ + dev_warn(CARD_TO_DEV(ctrl->card), + "SW_CMD_IDX write was lost, re-writing...\n"); + iowrite32(ctrl->cmd.idx, ctrl->regmap + SW_CMD_IDX); + mod_timer(&ctrl->activity_timer, + jiffies + DMA_ACTIVITY_TIMEOUT); + } else { + dev_warn(CARD_TO_DEV(ctrl->card), + "DMA channel %d has stalled, faulting interface.\n", + ctrl->id); + ctrl->card->dma_fault = 1; + } +} + +static void rsxx_issue_dmas(struct work_struct *work) +{ + struct rsxx_dma_ctrl *ctrl; + struct rsxx_dma *dma; + int tag; + int cmds_pending = 0; + struct hw_cmd *hw_cmd_buf; + + ctrl = container_of(work, struct rsxx_dma_ctrl, issue_dma_work); + hw_cmd_buf = ctrl->cmd.buf; + + if (unlikely(ctrl->card->halt)) + return; + + while (1) { + spin_lock(&ctrl->queue_lock); + if (list_empty(&ctrl->queue)) { + spin_unlock(&ctrl->queue_lock); + break; + } + spin_unlock(&ctrl->queue_lock); + + tag = pop_tracker(ctrl->trackers); + if (tag == -1) + break; + + spin_lock(&ctrl->queue_lock); + dma = list_entry(ctrl->queue.next, struct rsxx_dma, list); + list_del(&dma->list); + ctrl->stats.sw_q_depth--; + spin_unlock(&ctrl->queue_lock); + + /* + * This will catch any DMAs that slipped in right before the + * fault, but was queued after all the other DMAs were + * cancelled. + */ + if (unlikely(ctrl->card->dma_fault)) { + push_tracker(ctrl->trackers, tag); + rsxx_complete_dma(ctrl->card, dma, DMA_CANCELLED); + continue; + } + + set_tracker_dma(ctrl->trackers, tag, dma); + hw_cmd_buf[ctrl->cmd.idx].command = dma->cmd; + hw_cmd_buf[ctrl->cmd.idx].tag = tag; + hw_cmd_buf[ctrl->cmd.idx]._rsvd = 0; + hw_cmd_buf[ctrl->cmd.idx].sub_page = + ((dma->sub_page.cnt & 0x7) << 4) | + (dma->sub_page.off & 0x7); + + hw_cmd_buf[ctrl->cmd.idx].device_addr = + cpu_to_le32(dma->laddr); + + hw_cmd_buf[ctrl->cmd.idx].host_addr = + cpu_to_le64(dma->dma_addr); + + dev_dbg(CARD_TO_DEV(ctrl->card), + "Issue DMA%d(laddr %d tag %d) to idx %d\n", + ctrl->id, dma->laddr, tag, ctrl->cmd.idx); + + ctrl->cmd.idx = (ctrl->cmd.idx + 1) & RSXX_CS_IDX_MASK; + cmds_pending++; + + if (dma->cmd == HW_CMD_BLK_WRITE) + ctrl->stats.writes_issued++; + else if (dma->cmd == HW_CMD_BLK_DISCARD) + ctrl->stats.discards_issued++; + else + ctrl->stats.reads_issued++; + } + + /* Let HW know we've queued commands. */ + if (cmds_pending) { + /* + * We must guarantee that the CPU writes to 'ctrl->cmd.buf' + * (which is in PCI-consistent system-memory) from the loop + * above make it into the coherency domain before the + * following PIO "trigger" updating the cmd.idx. A WMB is + * sufficient. We need not explicitly CPU cache-flush since + * the memory is a PCI-consistent (ie; coherent) mapping. + */ + wmb(); + + atomic_add(cmds_pending, &ctrl->stats.hw_q_depth); + mod_timer(&ctrl->activity_timer, + jiffies + DMA_ACTIVITY_TIMEOUT); + iowrite32(ctrl->cmd.idx, ctrl->regmap + SW_CMD_IDX); + } +} + +static void rsxx_dma_done(struct work_struct *work) +{ + struct rsxx_dma_ctrl *ctrl; + struct rsxx_dma *dma; + unsigned long flags; + u16 count; + u8 status; + u8 tag; + struct hw_status *hw_st_buf; + + ctrl = container_of(work, struct rsxx_dma_ctrl, dma_done_work); + hw_st_buf = ctrl->status.buf; + + if (unlikely(ctrl->card->halt) || + unlikely(ctrl->card->dma_fault)) + return; + + count = le16_to_cpu(hw_st_buf[ctrl->status.idx].count); + + while (count == ctrl->e_cnt) { + /* + * The read memory-barrier is necessary to keep aggressive + * processors/optimizers (such as the PPC Apple G5) from + * reordering the following status-buffer tag & status read + * *before* the count read on subsequent iterations of the + * loop! + */ + rmb(); + + status = hw_st_buf[ctrl->status.idx].status; + tag = hw_st_buf[ctrl->status.idx].tag; + + dma = get_tracker_dma(ctrl->trackers, tag); + if (dma == NULL) { + spin_lock_irqsave(&ctrl->card->irq_lock, flags); + rsxx_disable_ier(ctrl->card, CR_INTR_DMA_ALL); + spin_unlock_irqrestore(&ctrl->card->irq_lock, flags); + + dev_err(CARD_TO_DEV(ctrl->card), + "No tracker for tag %d " + "(idx %d id %d)\n", + tag, ctrl->status.idx, ctrl->id); + return; + } + + dev_dbg(CARD_TO_DEV(ctrl->card), + "Completing DMA%d" + "(laddr x%x tag %d st: x%x cnt: x%04x) from idx %d.\n", + ctrl->id, dma->laddr, tag, status, count, + ctrl->status.idx); + + atomic_dec(&ctrl->stats.hw_q_depth); + + mod_timer(&ctrl->activity_timer, + jiffies + DMA_ACTIVITY_TIMEOUT); + + if (status) + rsxx_handle_dma_error(ctrl, dma, status); + else + rsxx_complete_dma(ctrl->card, dma, 0); + + push_tracker(ctrl->trackers, tag); + + ctrl->status.idx = (ctrl->status.idx + 1) & + RSXX_CS_IDX_MASK; + ctrl->e_cnt++; + + count = le16_to_cpu(hw_st_buf[ctrl->status.idx].count); + } + + dma_intr_coal_auto_tune(ctrl->card); + + if (atomic_read(&ctrl->stats.hw_q_depth) == 0) + del_timer_sync(&ctrl->activity_timer); + + spin_lock_irqsave(&ctrl->card->irq_lock, flags); + rsxx_enable_ier(ctrl->card, CR_INTR_DMA(ctrl->id)); + spin_unlock_irqrestore(&ctrl->card->irq_lock, flags); + + spin_lock(&ctrl->queue_lock); + if (ctrl->stats.sw_q_depth) + queue_work(ctrl->issue_wq, &ctrl->issue_dma_work); + spin_unlock(&ctrl->queue_lock); +} + +static int rsxx_cleanup_dma_queue(struct rsxx_cardinfo *card, + struct list_head *q) +{ + struct rsxx_dma *dma; + struct rsxx_dma *tmp; + int cnt = 0; + + list_for_each_entry_safe(dma, tmp, q, list) { + list_del(&dma->list); + + if (dma->dma_addr) + pci_unmap_page(card->dev, dma->dma_addr, + get_dma_size(dma), + (dma->cmd == HW_CMD_BLK_WRITE) ? + PCI_DMA_TODEVICE : + PCI_DMA_FROMDEVICE); + kmem_cache_free(rsxx_dma_pool, dma); + cnt++; + } + + return cnt; +} + +static int rsxx_queue_discard(struct rsxx_cardinfo *card, + struct list_head *q, + unsigned int laddr, + rsxx_dma_cb cb, + void *cb_data) +{ + struct rsxx_dma *dma; + + dma = kmem_cache_alloc(rsxx_dma_pool, GFP_KERNEL); + if (!dma) + return -ENOMEM; + + dma->cmd = HW_CMD_BLK_DISCARD; + dma->laddr = laddr; + dma->dma_addr = 0; + dma->sub_page.off = 0; + dma->sub_page.cnt = 0; + dma->page = NULL; + dma->pg_off = 0; + dma->cb = cb; + dma->cb_data = cb_data; + + dev_dbg(CARD_TO_DEV(card), "Queuing[D] laddr %x\n", dma->laddr); + + list_add_tail(&dma->list, q); + + return 0; +} + +static int rsxx_queue_dma(struct rsxx_cardinfo *card, + struct list_head *q, + int dir, + unsigned int dma_off, + unsigned int dma_len, + unsigned int laddr, + struct page *page, + unsigned int pg_off, + rsxx_dma_cb cb, + void *cb_data) +{ + struct rsxx_dma *dma; + + dma = kmem_cache_alloc(rsxx_dma_pool, GFP_KERNEL); + if (!dma) + return -ENOMEM; + + dma->dma_addr = pci_map_page(card->dev, page, pg_off, dma_len, + dir ? PCI_DMA_TODEVICE : + PCI_DMA_FROMDEVICE); + if (!dma->dma_addr) { + kmem_cache_free(rsxx_dma_pool, dma); + return -ENOMEM; + } + + dma->cmd = dir ? HW_CMD_BLK_WRITE : HW_CMD_BLK_READ; + dma->laddr = laddr; + dma->sub_page.off = (dma_off >> 9); + dma->sub_page.cnt = (dma_len >> 9); + dma->page = page; + dma->pg_off = pg_off; + dma->cb = cb; + dma->cb_data = cb_data; + + dev_dbg(CARD_TO_DEV(card), + "Queuing[%c] laddr %x off %d cnt %d page %p pg_off %d\n", + dir ? 'W' : 'R', dma->laddr, dma->sub_page.off, + dma->sub_page.cnt, dma->page, dma->pg_off); + + /* Queue the DMA */ + list_add_tail(&dma->list, q); + + return 0; +} + +int rsxx_dma_queue_bio(struct rsxx_cardinfo *card, + struct bio *bio, + atomic_t *n_dmas, + rsxx_dma_cb cb, + void *cb_data) +{ + struct list_head dma_list[RSXX_MAX_TARGETS]; + struct bio_vec *bvec; + unsigned long long addr8; + unsigned int laddr; + unsigned int bv_len; + unsigned int bv_off; + unsigned int dma_off; + unsigned int dma_len; + int dma_cnt[RSXX_MAX_TARGETS]; + int tgt; + int st; + int i; + + addr8 = bio->bi_sector << 9; /* sectors are 512 bytes */ + atomic_set(n_dmas, 0); + + for (i = 0; i < card->n_targets; i++) { + INIT_LIST_HEAD(&dma_list[i]); + dma_cnt[i] = 0; + } + + if (bio->bi_rw & REQ_DISCARD) { + bv_len = bio->bi_size; + + while (bv_len > 0) { + tgt = rsxx_get_dma_tgt(card, addr8); + laddr = rsxx_addr8_to_laddr(addr8, card); + + st = rsxx_queue_discard(card, &dma_list[tgt], laddr, + cb, cb_data); + if (st) + goto bvec_err; + + dma_cnt[tgt]++; + atomic_inc(n_dmas); + addr8 += RSXX_HW_BLK_SIZE; + bv_len -= RSXX_HW_BLK_SIZE; + } + } else { + bio_for_each_segment(bvec, bio, i) { + bv_len = bvec->bv_len; + bv_off = bvec->bv_offset; + + while (bv_len > 0) { + tgt = rsxx_get_dma_tgt(card, addr8); + laddr = rsxx_addr8_to_laddr(addr8, card); + dma_off = addr8 & RSXX_HW_BLK_MASK; + dma_len = min(bv_len, + RSXX_HW_BLK_SIZE - dma_off); + + st = rsxx_queue_dma(card, &dma_list[tgt], + bio_data_dir(bio), + dma_off, dma_len, + laddr, bvec->bv_page, + bv_off, cb, cb_data); + if (st) + goto bvec_err; + + dma_cnt[tgt]++; + atomic_inc(n_dmas); + addr8 += dma_len; + bv_off += dma_len; + bv_len -= dma_len; + } + } + } + + for (i = 0; i < card->n_targets; i++) { + if (!list_empty(&dma_list[i])) { + spin_lock(&card->ctrl[i].queue_lock); + card->ctrl[i].stats.sw_q_depth += dma_cnt[i]; + list_splice_tail(&dma_list[i], &card->ctrl[i].queue); + spin_unlock(&card->ctrl[i].queue_lock); + + queue_work(card->ctrl[i].issue_wq, + &card->ctrl[i].issue_dma_work); + } + } + + return 0; + +bvec_err: + for (i = 0; i < card->n_targets; i++) + rsxx_cleanup_dma_queue(card, &dma_list[i]); + + return st; +} + + +/*----------------- DMA Engine Initialization & Setup -------------------*/ +static int rsxx_dma_ctrl_init(struct pci_dev *dev, + struct rsxx_dma_ctrl *ctrl) +{ + int i; + + memset(&ctrl->stats, 0, sizeof(ctrl->stats)); + + ctrl->status.buf = pci_alloc_consistent(dev, STATUS_BUFFER_SIZE8, + &ctrl->status.dma_addr); + ctrl->cmd.buf = pci_alloc_consistent(dev, COMMAND_BUFFER_SIZE8, + &ctrl->cmd.dma_addr); + if (ctrl->status.buf == NULL || ctrl->cmd.buf == NULL) + return -ENOMEM; + + ctrl->trackers = vmalloc(DMA_TRACKER_LIST_SIZE8); + if (!ctrl->trackers) + return -ENOMEM; + + ctrl->trackers->head = 0; + for (i = 0; i < RSXX_MAX_OUTSTANDING_CMDS; i++) { + ctrl->trackers->list[i].next_tag = i + 1; + ctrl->trackers->list[i].dma = NULL; + } + ctrl->trackers->list[RSXX_MAX_OUTSTANDING_CMDS-1].next_tag = -1; + spin_lock_init(&ctrl->trackers->lock); + + spin_lock_init(&ctrl->queue_lock); + INIT_LIST_HEAD(&ctrl->queue); + + setup_timer(&ctrl->activity_timer, dma_engine_stalled, + (unsigned long)ctrl); + + ctrl->issue_wq = alloc_ordered_workqueue(DRIVER_NAME"_issue", 0); + if (!ctrl->issue_wq) + return -ENOMEM; + + ctrl->done_wq = alloc_ordered_workqueue(DRIVER_NAME"_done", 0); + if (!ctrl->done_wq) + return -ENOMEM; + + INIT_WORK(&ctrl->issue_dma_work, rsxx_issue_dmas); + INIT_WORK(&ctrl->dma_done_work, rsxx_dma_done); + + memset(ctrl->status.buf, 0xac, STATUS_BUFFER_SIZE8); + iowrite32(lower_32_bits(ctrl->status.dma_addr), + ctrl->regmap + SB_ADD_LO); + iowrite32(upper_32_bits(ctrl->status.dma_addr), + ctrl->regmap + SB_ADD_HI); + + memset(ctrl->cmd.buf, 0x83, COMMAND_BUFFER_SIZE8); + iowrite32(lower_32_bits(ctrl->cmd.dma_addr), ctrl->regmap + CB_ADD_LO); + iowrite32(upper_32_bits(ctrl->cmd.dma_addr), ctrl->regmap + CB_ADD_HI); + + ctrl->status.idx = ioread32(ctrl->regmap + HW_STATUS_CNT); + if (ctrl->status.idx > RSXX_MAX_OUTSTANDING_CMDS) { + dev_crit(&dev->dev, "Failed reading status cnt x%x\n", + ctrl->status.idx); + return -EINVAL; + } + iowrite32(ctrl->status.idx, ctrl->regmap + HW_STATUS_CNT); + iowrite32(ctrl->status.idx, ctrl->regmap + SW_STATUS_CNT); + + ctrl->cmd.idx = ioread32(ctrl->regmap + HW_CMD_IDX); + if (ctrl->cmd.idx > RSXX_MAX_OUTSTANDING_CMDS) { + dev_crit(&dev->dev, "Failed reading cmd cnt x%x\n", + ctrl->status.idx); + return -EINVAL; + } + iowrite32(ctrl->cmd.idx, ctrl->regmap + HW_CMD_IDX); + iowrite32(ctrl->cmd.idx, ctrl->regmap + SW_CMD_IDX); + + wmb(); + + return 0; +} + +static int rsxx_dma_stripe_setup(struct rsxx_cardinfo *card, + unsigned int stripe_size8) +{ + if (!is_power_of_2(stripe_size8)) { + dev_err(CARD_TO_DEV(card), + "stripe_size is NOT a power of 2!\n"); + return -EINVAL; + } + + card->_stripe.lower_mask = stripe_size8 - 1; + + card->_stripe.upper_mask = ~(card->_stripe.lower_mask); + card->_stripe.upper_shift = ffs(card->n_targets) - 1; + + card->_stripe.target_mask = card->n_targets - 1; + card->_stripe.target_shift = ffs(stripe_size8) - 1; + + dev_dbg(CARD_TO_DEV(card), "_stripe.lower_mask = x%016llx\n", + card->_stripe.lower_mask); + dev_dbg(CARD_TO_DEV(card), "_stripe.upper_shift = x%016llx\n", + card->_stripe.upper_shift); + dev_dbg(CARD_TO_DEV(card), "_stripe.upper_mask = x%016llx\n", + card->_stripe.upper_mask); + dev_dbg(CARD_TO_DEV(card), "_stripe.target_mask = x%016llx\n", + card->_stripe.target_mask); + dev_dbg(CARD_TO_DEV(card), "_stripe.target_shift = x%016llx\n", + card->_stripe.target_shift); + + return 0; +} + +static int rsxx_dma_configure(struct rsxx_cardinfo *card) +{ + u32 intr_coal; + + intr_coal = dma_intr_coal_val(card->config.data.intr_coal.mode, + card->config.data.intr_coal.count, + card->config.data.intr_coal.latency); + iowrite32(intr_coal, card->regmap + INTR_COAL); + + return rsxx_dma_stripe_setup(card, card->config.data.stripe_size); +} + +int rsxx_dma_setup(struct rsxx_cardinfo *card) +{ + unsigned long flags; + int st; + int i; + + dev_info(CARD_TO_DEV(card), + "Initializing %d DMA targets\n", + card->n_targets); + + /* Regmap is divided up into 4K chunks. One for each DMA channel */ + for (i = 0; i < card->n_targets; i++) + card->ctrl[i].regmap = card->regmap + (i * 4096); + + card->dma_fault = 0; + + /* Reset the DMA queues */ + rsxx_dma_queue_reset(card); + + /************* Setup DMA Control *************/ + for (i = 0; i < card->n_targets; i++) { + st = rsxx_dma_ctrl_init(card->dev, &card->ctrl[i]); + if (st) + goto failed_dma_setup; + + card->ctrl[i].card = card; + card->ctrl[i].id = i; + } + + card->scrub_hard = 1; + + if (card->config_valid) + rsxx_dma_configure(card); + + /* Enable the interrupts after all setup has completed. */ + for (i = 0; i < card->n_targets; i++) { + spin_lock_irqsave(&card->irq_lock, flags); + rsxx_enable_ier_and_isr(card, CR_INTR_DMA(i)); + spin_unlock_irqrestore(&card->irq_lock, flags); + } + + return 0; + +failed_dma_setup: + for (i = 0; i < card->n_targets; i++) { + struct rsxx_dma_ctrl *ctrl = &card->ctrl[i]; + + if (ctrl->issue_wq) { + destroy_workqueue(ctrl->issue_wq); + ctrl->issue_wq = NULL; + } + + if (ctrl->done_wq) { + destroy_workqueue(ctrl->done_wq); + ctrl->done_wq = NULL; + } + + if (ctrl->trackers) + vfree(ctrl->trackers); + + if (ctrl->status.buf) + pci_free_consistent(card->dev, STATUS_BUFFER_SIZE8, + ctrl->status.buf, + ctrl->status.dma_addr); + if (ctrl->cmd.buf) + pci_free_consistent(card->dev, COMMAND_BUFFER_SIZE8, + ctrl->cmd.buf, ctrl->cmd.dma_addr); + } + + return st; +} + + +void rsxx_dma_destroy(struct rsxx_cardinfo *card) +{ + struct rsxx_dma_ctrl *ctrl; + struct rsxx_dma *dma; + int i, j; + int cnt = 0; + + for (i = 0; i < card->n_targets; i++) { + ctrl = &card->ctrl[i]; + + if (ctrl->issue_wq) { + destroy_workqueue(ctrl->issue_wq); + ctrl->issue_wq = NULL; + } + + if (ctrl->done_wq) { + destroy_workqueue(ctrl->done_wq); + ctrl->done_wq = NULL; + } + + if (timer_pending(&ctrl->activity_timer)) + del_timer_sync(&ctrl->activity_timer); + + /* Clean up the DMA queue */ + spin_lock(&ctrl->queue_lock); + cnt = rsxx_cleanup_dma_queue(card, &ctrl->queue); + spin_unlock(&ctrl->queue_lock); + + if (cnt) + dev_info(CARD_TO_DEV(card), + "Freed %d queued DMAs on channel %d\n", + cnt, i); + + /* Clean up issued DMAs */ + for (j = 0; j < RSXX_MAX_OUTSTANDING_CMDS; j++) { + dma = get_tracker_dma(ctrl->trackers, j); + if (dma) { + pci_unmap_page(card->dev, dma->dma_addr, + get_dma_size(dma), + (dma->cmd == HW_CMD_BLK_WRITE) ? + PCI_DMA_TODEVICE : + PCI_DMA_FROMDEVICE); + kmem_cache_free(rsxx_dma_pool, dma); + cnt++; + } + } + + if (cnt) + dev_info(CARD_TO_DEV(card), + "Freed %d pending DMAs on channel %d\n", + cnt, i); + + vfree(ctrl->trackers); + + pci_free_consistent(card->dev, STATUS_BUFFER_SIZE8, + ctrl->status.buf, ctrl->status.dma_addr); + pci_free_consistent(card->dev, COMMAND_BUFFER_SIZE8, + ctrl->cmd.buf, ctrl->cmd.dma_addr); + } +} + + +int rsxx_dma_init(void) +{ + rsxx_dma_pool = KMEM_CACHE(rsxx_dma, SLAB_HWCACHE_ALIGN); + if (!rsxx_dma_pool) + return -ENOMEM; + + return 0; +} + + +void rsxx_dma_cleanup(void) +{ + kmem_cache_destroy(rsxx_dma_pool); +} + diff --git a/drivers/block/rsxx/rsxx.h b/drivers/block/rsxx/rsxx.h new file mode 100644 index 0000000..2e50b65 --- /dev/null +++ b/drivers/block/rsxx/rsxx.h @@ -0,0 +1,45 @@ +/* +* Filename: rsxx.h +* +* +* Authors: Joshua Morris <josh.h.morris@us.ibm.com> +* Philip Kelleher <pjk1939@linux.vnet.ibm.com> +* +* (C) Copyright 2013 IBM Corporation +* +* This program is free software; you can redistribute it and/or +* modify it under the terms of the GNU General Public License as +* published by the Free Software Foundation; either version 2 of the +* License, or (at your option) any later version. +* +* This program is distributed in the hope that it will be useful, but +* WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +* General Public License for more details. +* +* You should have received a copy of the GNU General Public License +* along with this program; if not, write to the Free Software Foundation, +* Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +#ifndef __RSXX_H__ +#define __RSXX_H__ + +/*----------------- IOCTL Definitions -------------------*/ + +struct rsxx_reg_access { + __u32 addr; + __u32 cnt; + __u32 stat; + __u32 stream; + __u32 data[8]; +}; + +#define RSXX_MAX_REG_CNT (8 * (sizeof(__u32))) + +#define RSXX_IOC_MAGIC 'r' + +#define RSXX_GETREG _IOWR(RSXX_IOC_MAGIC, 0x20, struct rsxx_reg_access) +#define RSXX_SETREG _IOWR(RSXX_IOC_MAGIC, 0x21, struct rsxx_reg_access) + +#endif /* __RSXX_H_ */ diff --git a/drivers/block/rsxx/rsxx_cfg.h b/drivers/block/rsxx/rsxx_cfg.h new file mode 100644 index 0000000..c025fe5 --- /dev/null +++ b/drivers/block/rsxx/rsxx_cfg.h @@ -0,0 +1,72 @@ +/* +* Filename: rsXX_cfg.h +* +* +* Authors: Joshua Morris <josh.h.morris@us.ibm.com> +* Philip Kelleher <pjk1939@linux.vnet.ibm.com> +* +* (C) Copyright 2013 IBM Corporation +* +* This program is free software; you can redistribute it and/or +* modify it under the terms of the GNU General Public License as +* published by the Free Software Foundation; either version 2 of the +* License, or (at your option) any later version. +* +* This program is distributed in the hope that it will be useful, but +* WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +* General Public License for more details. +* +* You should have received a copy of the GNU General Public License +* along with this program; if not, write to the Free Software Foundation, +* Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +#ifndef __RSXX_CFG_H__ +#define __RSXX_CFG_H__ + +/* NOTE: Config values will be saved in network byte order (i.e. Big endian) */ +#include <linux/types.h> + +/* + * The card config version must match the driver's expected version. If it does + * not, the DMA interfaces will not be attached and the user will need to + * initialize/upgrade the card configuration using the card config utility. + */ +#define RSXX_CFG_VERSION 4 + +struct card_cfg_hdr { + __u32 version; + __u32 crc; +}; + +struct card_cfg_data { + __u32 block_size; + __u32 stripe_size; + __u32 vendor_id; + __u32 cache_order; + struct { + __u32 mode; /* Disabled, manual, auto-tune... */ + __u32 count; /* Number of intr to coalesce */ + __u32 latency;/* Max wait time (in ns) */ + } intr_coal; +}; + +struct rsxx_card_cfg { + struct card_cfg_hdr hdr; + struct card_cfg_data data; +}; + +/* Vendor ID Values */ +#define RSXX_VENDOR_ID_TMS_IBM 0 +#define RSXX_VENDOR_ID_DSI 1 +#define RSXX_VENDOR_COUNT 2 + +/* Interrupt Coalescing Values */ +#define RSXX_INTR_COAL_DISABLED 0 +#define RSXX_INTR_COAL_EXPLICIT 1 +#define RSXX_INTR_COAL_AUTO_TUNE 2 + + +#endif /* __RSXX_CFG_H__ */ + diff --git a/drivers/block/rsxx/rsxx_priv.h b/drivers/block/rsxx/rsxx_priv.h new file mode 100644 index 0000000..a1ac907 --- /dev/null +++ b/drivers/block/rsxx/rsxx_priv.h @@ -0,0 +1,399 @@ +/* +* Filename: rsxx_priv.h +* +* +* Authors: Joshua Morris <josh.h.morris@us.ibm.com> +* Philip Kelleher <pjk1939@linux.vnet.ibm.com> +* +* (C) Copyright 2013 IBM Corporation +* +* This program is free software; you can redistribute it and/or +* modify it under the terms of the GNU General Public License as +* published by the Free Software Foundation; either version 2 of the +* License, or (at your option) any later version. +* +* This program is distributed in the hope that it will be useful, but +* WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +* General Public License for more details. +* +* You should have received a copy of the GNU General Public License +* along with this program; if not, write to the Free Software Foundation, +* Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +#ifndef __RSXX_PRIV_H__ +#define __RSXX_PRIV_H__ + +#include <linux/version.h> +#include <linux/semaphore.h> + +#include <linux/fs.h> +#include <linux/interrupt.h> +#include <linux/mutex.h> +#include <linux/pci.h> +#include <linux/spinlock.h> +#include <linux/sysfs.h> +#include <linux/workqueue.h> +#include <linux/bio.h> +#include <linux/vmalloc.h> +#include <linux/timer.h> +#include <linux/ioctl.h> + +#include "rsxx.h" +#include "rsxx_cfg.h" + +struct proc_cmd; + +#define PCI_VENDOR_ID_TMS_IBM 0x15B6 +#define PCI_DEVICE_ID_RS70_FLASH 0x0019 +#define PCI_DEVICE_ID_RS70D_FLASH 0x001A +#define PCI_DEVICE_ID_RS80_FLASH 0x001C +#define PCI_DEVICE_ID_RS81_FLASH 0x001E + +#define RS70_PCI_REV_SUPPORTED 4 + +#define DRIVER_NAME "rsxx" +#define DRIVER_VERSION "3.7" + +/* Block size is 4096 */ +#define RSXX_HW_BLK_SHIFT 12 +#define RSXX_HW_BLK_SIZE (1 << RSXX_HW_BLK_SHIFT) +#define RSXX_HW_BLK_MASK (RSXX_HW_BLK_SIZE - 1) + +#define MAX_CREG_DATA8 32 +#define LOG_BUF_SIZE8 128 + +#define RSXX_MAX_OUTSTANDING_CMDS 255 +#define RSXX_CS_IDX_MASK 0xff + +#define RSXX_MAX_TARGETS 8 + +struct dma_tracker_list; + +/* DMA Command/Status Buffer structure */ +struct rsxx_cs_buffer { + dma_addr_t dma_addr; + void *buf; + u32 idx; +}; + +struct rsxx_dma_stats { + u32 crc_errors; + u32 hard_errors; + u32 soft_errors; + u32 writes_issued; + u32 writes_failed; + u32 reads_issued; + u32 reads_failed; + u32 reads_retried; + u32 discards_issued; + u32 discards_failed; + u32 done_rescheduled; + u32 issue_rescheduled; + u32 sw_q_depth; /* Number of DMAs on the SW queue. */ + atomic_t hw_q_depth; /* Number of DMAs queued to HW. */ +}; + +struct rsxx_dma_ctrl { + struct rsxx_cardinfo *card; + int id; + void __iomem *regmap; + struct rsxx_cs_buffer status; + struct rsxx_cs_buffer cmd; + u16 e_cnt; + spinlock_t queue_lock; + struct list_head queue; + struct workqueue_struct *issue_wq; + struct work_struct issue_dma_work; + struct workqueue_struct *done_wq; + struct work_struct dma_done_work; + struct timer_list activity_timer; + struct dma_tracker_list *trackers; + struct rsxx_dma_stats stats; +}; + +struct rsxx_cardinfo { + struct pci_dev *dev; + unsigned int halt; + + void __iomem *regmap; + spinlock_t irq_lock; + unsigned int isr_mask; + unsigned int ier_mask; + + struct rsxx_card_cfg config; + int config_valid; + + /* Embedded CPU Communication */ + struct { + spinlock_t lock; + bool active; + struct creg_cmd *active_cmd; + struct work_struct done_work; + struct list_head queue; + unsigned int q_depth; + /* Cache the creg status to prevent ioreads */ + struct { + u32 stat; + u32 failed_cancel_timer; + u32 creg_timeout; + } creg_stats; + struct timer_list cmd_timer; + struct mutex reset_lock; + int reset; + } creg_ctrl; + + struct { + char tmp[MAX_CREG_DATA8]; + char buf[LOG_BUF_SIZE8]; /* terminated */ + int buf_len; + } log; + + struct work_struct event_work; + unsigned int state; + u64 size8; + + /* Lock the device attach/detach function */ + struct mutex dev_lock; + + /* Block Device Variables */ + bool bdev_attached; + int disk_id; + int major; + struct request_queue *queue; + struct gendisk *gendisk; + struct { + /* Used to convert a byte address to a device address. */ + u64 lower_mask; + u64 upper_shift; + u64 upper_mask; + u64 target_mask; + u64 target_shift; + } _stripe; + unsigned int dma_fault; + + int scrub_hard; + + int n_targets; + struct rsxx_dma_ctrl *ctrl; +}; + +enum rsxx_pci_regmap { + HWID = 0x00, /* Hardware Identification Register */ + SCRATCH = 0x04, /* Scratch/Debug Register */ + RESET = 0x08, /* Reset Register */ + ISR = 0x10, /* Interrupt Status Register */ + IER = 0x14, /* Interrupt Enable Register */ + IPR = 0x18, /* Interrupt Poll Register */ + CB_ADD_LO = 0x20, /* Command Host Buffer Address [31:0] */ + CB_ADD_HI = 0x24, /* Command Host Buffer Address [63:32]*/ + HW_CMD_IDX = 0x28, /* Hardware Processed Command Index */ + SW_CMD_IDX = 0x2C, /* Software Processed Command Index */ + SB_ADD_LO = 0x30, /* Status Host Buffer Address [31:0] */ + SB_ADD_HI = 0x34, /* Status Host Buffer Address [63:32] */ + HW_STATUS_CNT = 0x38, /* Hardware Status Counter */ + SW_STATUS_CNT = 0x3C, /* Deprecated */ + CREG_CMD = 0x40, /* CPU Command Register */ + CREG_ADD = 0x44, /* CPU Address Register */ + CREG_CNT = 0x48, /* CPU Count Register */ + CREG_STAT = 0x4C, /* CPU Status Register */ + CREG_DATA0 = 0x50, /* CPU Data Registers */ + CREG_DATA1 = 0x54, + CREG_DATA2 = 0x58, + CREG_DATA3 = 0x5C, + CREG_DATA4 = 0x60, + CREG_DATA5 = 0x64, + CREG_DATA6 = 0x68, + CREG_DATA7 = 0x6c, + INTR_COAL = 0x70, /* Interrupt Coalescing Register */ + HW_ERROR = 0x74, /* Card Error Register */ + PCI_DEBUG0 = 0x78, /* PCI Debug Registers */ + PCI_DEBUG1 = 0x7C, + PCI_DEBUG2 = 0x80, + PCI_DEBUG3 = 0x84, + PCI_DEBUG4 = 0x88, + PCI_DEBUG5 = 0x8C, + PCI_DEBUG6 = 0x90, + PCI_DEBUG7 = 0x94, + PCI_POWER_THROTTLE = 0x98, + PERF_CTRL = 0x9c, + PERF_TIMER_LO = 0xa0, + PERF_TIMER_HI = 0xa4, + PERF_RD512_LO = 0xa8, + PERF_RD512_HI = 0xac, + PERF_WR512_LO = 0xb0, + PERF_WR512_HI = 0xb4, +}; + +enum rsxx_intr { + CR_INTR_DMA0 = 0x00000001, + CR_INTR_CREG = 0x00000002, + CR_INTR_DMA1 = 0x00000004, + CR_INTR_EVENT = 0x00000008, + CR_INTR_DMA2 = 0x00000010, + CR_INTR_DMA3 = 0x00000020, + CR_INTR_DMA4 = 0x00000040, + CR_INTR_DMA5 = 0x00000080, + CR_INTR_DMA6 = 0x00000100, + CR_INTR_DMA7 = 0x00000200, + CR_INTR_DMA_ALL = 0x000003f5, + CR_INTR_ALL = 0xffffffff, +}; + +static inline int CR_INTR_DMA(int N) +{ + static const unsigned int _CR_INTR_DMA[] = { + CR_INTR_DMA0, CR_INTR_DMA1, CR_INTR_DMA2, CR_INTR_DMA3, + CR_INTR_DMA4, CR_INTR_DMA5, CR_INTR_DMA6, CR_INTR_DMA7 + }; + return _CR_INTR_DMA[N]; +} +enum rsxx_pci_reset { + DMA_QUEUE_RESET = 0x00000001, +}; + +enum rsxx_pci_revision { + RSXX_DISCARD_SUPPORT = 2, +}; + +enum rsxx_creg_cmd { + CREG_CMD_TAG_MASK = 0x0000FF00, + CREG_OP_WRITE = 0x000000C0, + CREG_OP_READ = 0x000000E0, +}; + +enum rsxx_creg_addr { + CREG_ADD_CARD_CMD = 0x80001000, + CREG_ADD_CARD_STATE = 0x80001004, + CREG_ADD_CARD_SIZE = 0x8000100c, + CREG_ADD_CAPABILITIES = 0x80001050, + CREG_ADD_LOG = 0x80002000, + CREG_ADD_NUM_TARGETS = 0x80003000, + CREG_ADD_CONFIG = 0xB0000000, +}; + +enum rsxx_creg_card_cmd { + CARD_CMD_STARTUP = 1, + CARD_CMD_SHUTDOWN = 2, + CARD_CMD_LOW_LEVEL_FORMAT = 3, + CARD_CMD_FPGA_RECONFIG_BR = 4, + CARD_CMD_FPGA_RECONFIG_MAIN = 5, + CARD_CMD_BACKUP = 6, + CARD_CMD_RESET = 7, + CARD_CMD_deprecated = 8, + CARD_CMD_UNINITIALIZE = 9, + CARD_CMD_DSTROY_EMERGENCY = 10, + CARD_CMD_DSTROY_NORMAL = 11, + CARD_CMD_DSTROY_EXTENDED = 12, + CARD_CMD_DSTROY_ABORT = 13, +}; + +enum rsxx_card_state { + CARD_STATE_SHUTDOWN = 0x00000001, + CARD_STATE_STARTING = 0x00000002, + CARD_STATE_FORMATTING = 0x00000004, + CARD_STATE_UNINITIALIZED = 0x00000008, + CARD_STATE_GOOD = 0x00000010, + CARD_STATE_SHUTTING_DOWN = 0x00000020, + CARD_STATE_FAULT = 0x00000040, + CARD_STATE_RD_ONLY_FAULT = 0x00000080, + CARD_STATE_DSTROYING = 0x00000100, +}; + +enum rsxx_led { + LED_DEFAULT = 0x0, + LED_IDENTIFY = 0x1, + LED_SOAK = 0x2, +}; + +enum rsxx_creg_flash_lock { + CREG_FLASH_LOCK = 1, + CREG_FLASH_UNLOCK = 2, +}; + +enum rsxx_card_capabilities { + CARD_CAP_SUBPAGE_WRITES = 0x00000080, +}; + +enum rsxx_creg_stat { + CREG_STAT_STATUS_MASK = 0x00000003, + CREG_STAT_SUCCESS = 0x1, + CREG_STAT_ERROR = 0x2, + CREG_STAT_CHAR_PENDING = 0x00000004, /* Character I/O pending bit */ + CREG_STAT_LOG_PENDING = 0x00000008, /* HW log message pending bit */ + CREG_STAT_TAG_MASK = 0x0000ff00, +}; + +static inline unsigned int CREG_DATA(int N) +{ + return CREG_DATA0 + (N << 2); +} + +/*----------------- Convenient Log Wrappers -------------------*/ +#define CARD_TO_DEV(__CARD) (&(__CARD)->dev->dev) + +/***** config.c *****/ +int rsxx_load_config(struct rsxx_cardinfo *card); + +/***** core.c *****/ +void rsxx_enable_ier(struct rsxx_cardinfo *card, unsigned int intr); +void rsxx_disable_ier(struct rsxx_cardinfo *card, unsigned int intr); +void rsxx_enable_ier_and_isr(struct rsxx_cardinfo *card, + unsigned int intr); +void rsxx_disable_ier_and_isr(struct rsxx_cardinfo *card, + unsigned int intr); + +/***** dev.c *****/ +int rsxx_attach_dev(struct rsxx_cardinfo *card); +void rsxx_detach_dev(struct rsxx_cardinfo *card); +int rsxx_setup_dev(struct rsxx_cardinfo *card); +void rsxx_destroy_dev(struct rsxx_cardinfo *card); +int rsxx_dev_init(void); +void rsxx_dev_cleanup(void); + +/***** dma.c ****/ +typedef void (*rsxx_dma_cb)(struct rsxx_cardinfo *card, + void *cb_data, + unsigned int status); +int rsxx_dma_setup(struct rsxx_cardinfo *card); +void rsxx_dma_destroy(struct rsxx_cardinfo *card); +int rsxx_dma_init(void); +void rsxx_dma_cleanup(void); +int rsxx_dma_queue_bio(struct rsxx_cardinfo *card, + struct bio *bio, + atomic_t *n_dmas, + rsxx_dma_cb cb, + void *cb_data); + +/***** cregs.c *****/ +int rsxx_creg_write(struct rsxx_cardinfo *card, u32 addr, + unsigned int size8, + void *data, + int byte_stream); +int rsxx_creg_read(struct rsxx_cardinfo *card, + u32 addr, + unsigned int size8, + void *data, + int byte_stream); +int rsxx_read_hw_log(struct rsxx_cardinfo *card); +int rsxx_get_card_state(struct rsxx_cardinfo *card, + unsigned int *state); +int rsxx_get_card_size8(struct rsxx_cardinfo *card, u64 *size8); +int rsxx_get_num_targets(struct rsxx_cardinfo *card, + unsigned int *n_targets); +int rsxx_get_card_capabilities(struct rsxx_cardinfo *card, + u32 *capabilities); +int rsxx_issue_card_cmd(struct rsxx_cardinfo *card, u32 cmd); +int rsxx_creg_setup(struct rsxx_cardinfo *card); +void rsxx_creg_destroy(struct rsxx_cardinfo *card); +int rsxx_creg_init(void); +void rsxx_creg_cleanup(void); + +int rsxx_reg_access(struct rsxx_cardinfo *card, + struct rsxx_reg_access __user *ucmd, + int read); + + + +#endif /* __DRIVERS_BLOCK_RSXX_H__ */ diff --git a/drivers/block/swim3.c b/drivers/block/swim3.c index 57763c5..758f2ac 100644 --- a/drivers/block/swim3.c +++ b/drivers/block/swim3.c @@ -1090,10 +1090,13 @@ static const struct block_device_operations floppy_fops = { static void swim3_mb_event(struct macio_dev* mdev, int mb_state) { struct floppy_state *fs = macio_get_drvdata(mdev); - struct swim3 __iomem *sw = fs->swim3; + struct swim3 __iomem *sw; if (!fs) return; + + sw = fs->swim3; + if (mb_state != MB_FD) return; diff --git a/drivers/block/xd.c b/drivers/block/xd.c deleted file mode 100644 index ff54052..0000000 --- a/drivers/block/xd.c +++ /dev/null @@ -1,1123 +0,0 @@ -/* - * This file contains the driver for an XT hard disk controller - * (at least the DTC 5150X) for Linux. - * - * Author: Pat Mackinlay, pat@it.com.au - * Date: 29/09/92 - * - * Revised: 01/01/93, ... - * - * Ref: DTC 5150X Controller Specification (thanks to Kevin Fowler, - * kevinf@agora.rain.com) - * Also thanks to: Salvador Abreu, Dave Thaler, Risto Kankkunen and - * Wim Van Dorst. - * - * Revised: 04/04/94 by Risto Kankkunen - * Moved the detection code from xd_init() to xd_geninit() as it needed - * interrupts enabled and Linus didn't want to enable them in that first - * phase. xd_geninit() is the place to do these kinds of things anyway, - * he says. - * - * Modularized: 04/10/96 by Todd Fries, tfries@umr.edu - * - * Revised: 13/12/97 by Andrzej Krzysztofowicz, ankry@mif.pg.gda.pl - * Fixed some problems with disk initialization and module initiation. - * Added support for manual geometry setting (except Seagate controllers) - * in form: - * xd_geo=<cyl_xda>,<head_xda>,<sec_xda>[,<cyl_xdb>,<head_xdb>,<sec_xdb>] - * Recovered DMA access. Abridged messages. Added support for DTC5051CX, - * WD1002-27X & XEBEC controllers. Driver uses now some jumper settings. - * Extended ioctl() support. - * - * Bugfix: 15/02/01, Paul G. - inform queue layer of tiny xd_maxsect. - * - */ - -#include <linux/module.h> -#include <linux/errno.h> -#include <linux/interrupt.h> -#include <linux/mm.h> -#include <linux/fs.h> -#include <linux/kernel.h> -#include <linux/timer.h> -#include <linux/genhd.h> -#include <linux/hdreg.h> -#include <linux/ioport.h> -#include <linux/init.h> -#include <linux/wait.h> -#include <linux/blkdev.h> -#include <linux/mutex.h> -#include <linux/blkpg.h> -#include <linux/delay.h> -#include <linux/io.h> -#include <linux/gfp.h> - -#include <asm/uaccess.h> -#include <asm/dma.h> - -#include "xd.h" - -static DEFINE_MUTEX(xd_mutex); -static void __init do_xd_setup (int *integers); -#ifdef MODULE -static int xd[5] = { -1,-1,-1,-1, }; -#endif - -#define XD_DONT_USE_DMA 0 /* Initial value. may be overriden using - "nodma" module option */ -#define XD_INIT_DISK_DELAY (30) /* 30 ms delay during disk initialization */ - -/* Above may need to be increased if a problem with the 2nd drive detection - (ST11M controller) or resetting a controller (WD) appears */ - -static XD_INFO xd_info[XD_MAXDRIVES]; - -/* If you try this driver and find that your card is not detected by the driver at bootup, you need to add your BIOS - signature and details to the following list of signatures. A BIOS signature is a string embedded into the first - few bytes of your controller's on-board ROM BIOS. To find out what yours is, use something like MS-DOS's DEBUG - command. Run DEBUG, and then you can examine your BIOS signature with: - - d xxxx:0000 - - where xxxx is the segment of your controller (like C800 or D000 or something). On the ASCII dump at the right, you should - be able to see a string mentioning the manufacturer's copyright etc. Add this string into the table below. The parameters - in the table are, in order: - - offset ; this is the offset (in bytes) from the start of your ROM where the signature starts - signature ; this is the actual text of the signature - xd_?_init_controller ; this is the controller init routine used by your controller - xd_?_init_drive ; this is the drive init routine used by your controller - - The controllers directly supported at the moment are: DTC 5150x, WD 1004A27X, ST11M/R and override. If your controller is - made by the same manufacturer as one of these, try using the same init routines as they do. If that doesn't work, your - best bet is to use the "override" routines. These routines use a "portable" method of getting the disk's geometry, and - may work with your card. If none of these seem to work, try sending me some email and I'll see what I can do <grin>. - - NOTE: You can now specify your XT controller's parameters from the command line in the form xd=TYPE,IRQ,IO,DMA. The driver - should be able to detect your drive's geometry from this info. (eg: xd=0,5,0x320,3 is the "standard"). */ - -#include <asm/page.h> -#define xd_dma_mem_alloc(size) __get_dma_pages(GFP_KERNEL,get_order(size)) -#define xd_dma_mem_free(addr, size) free_pages(addr, get_order(size)) -static char *xd_dma_buffer; - -static XD_SIGNATURE xd_sigs[] __initdata = { - { 0x0000,"Override geometry handler",NULL,xd_override_init_drive,"n unknown" }, /* Pat Mackinlay, pat@it.com.au */ - { 0x0008,"[BXD06 (C) DTC 17-MAY-1985]",xd_dtc_init_controller,xd_dtc5150cx_init_drive," DTC 5150CX" }, /* Andrzej Krzysztofowicz, ankry@mif.pg.gda.pl */ - { 0x000B,"CRD18A Not an IBM rom. (C) Copyright Data Technology Corp. 05/31/88",xd_dtc_init_controller,xd_dtc_init_drive," DTC 5150X" }, /* Todd Fries, tfries@umr.edu */ - { 0x000B,"CXD23A Not an IBM ROM (C)Copyright Data Technology Corp 12/03/88",xd_dtc_init_controller,xd_dtc_init_drive," DTC 5150X" }, /* Pat Mackinlay, pat@it.com.au */ - { 0x0008,"07/15/86(C) Copyright 1986 Western Digital Corp.",xd_wd_init_controller,xd_wd_init_drive," Western Dig. 1002-27X" }, /* Andrzej Krzysztofowicz, ankry@mif.pg.gda.pl */ - { 0x0008,"06/24/88(C) Copyright 1988 Western Digital Corp.",xd_wd_init_controller,xd_wd_init_drive," Western Dig. WDXT-GEN2" }, /* Dan Newcombe, newcombe@aa.csc.peachnet.edu */ - { 0x0015,"SEAGATE ST11 BIOS REVISION",xd_seagate_init_controller,xd_seagate_init_drive," Seagate ST11M/R" }, /* Salvador Abreu, spa@fct.unl.pt */ - { 0x0010,"ST11R BIOS",xd_seagate_init_controller,xd_seagate_init_drive," Seagate ST11M/R" }, /* Risto Kankkunen, risto.kankkunen@cs.helsinki.fi */ - { 0x0010,"ST11 BIOS v1.7",xd_seagate_init_controller,xd_seagate_init_drive," Seagate ST11R" }, /* Alan Hourihane, alanh@fairlite.demon.co.uk */ - { 0x1000,"(c)Copyright 1987 SMS",xd_omti_init_controller,xd_omti_init_drive,"n OMTI 5520" }, /* Dirk Melchers, dirk@merlin.nbg.sub.org */ - { 0x0006,"COPYRIGHT XEBEC (C) 1984",xd_xebec_init_controller,xd_xebec_init_drive," XEBEC" }, /* Andrzej Krzysztofowicz, ankry@mif.pg.gda.pl */ - { 0x0008,"(C) Copyright 1984 Western Digital Corp", xd_wd_init_controller, xd_wd_init_drive," Western Dig. 1002s-wx2" }, - { 0x0008,"(C) Copyright 1986 Western Digital Corporation", xd_wd_init_controller, xd_wd_init_drive," 1986 Western Digital" }, /* jfree@sovereign.org */ -}; - -static unsigned int xd_bases[] __initdata = -{ - 0xC8000, 0xCA000, 0xCC000, - 0xCE000, 0xD0000, 0xD2000, - 0xD4000, 0xD6000, 0xD8000, - 0xDA000, 0xDC000, 0xDE000, - 0xE0000 -}; - -static DEFINE_SPINLOCK(xd_lock); - -static struct gendisk *xd_gendisk[2]; - -static int xd_getgeo(struct block_device *bdev, struct hd_geometry *geo); - -static const struct block_device_operations xd_fops = { - .owner = THIS_MODULE, - .ioctl = xd_ioctl, - .getgeo = xd_getgeo, -}; -static DECLARE_WAIT_QUEUE_HEAD(xd_wait_int); -static u_char xd_drives, xd_irq = 5, xd_dma = 3, xd_maxsectors; -static u_char xd_override __initdata = 0, xd_type __initdata = 0; -static u_short xd_iobase = 0x320; -static int xd_geo[XD_MAXDRIVES*3] __initdata = { 0, }; - -static volatile int xdc_busy; -static struct timer_list xd_watchdog_int; - -static volatile u_char xd_error; -static bool nodma = XD_DONT_USE_DMA; - -static struct request_queue *xd_queue; - -/* xd_init: register the block device number and set up pointer tables */ -static int __init xd_init(void) -{ - u_char i,controller; - unsigned int address; - int err; - -#ifdef MODULE - { - u_char count = 0; - for (i = 4; i > 0; i--) - if (((xd[i] = xd[i-1]) >= 0) && !count) - count = i; - if ((xd[0] = count)) - do_xd_setup(xd); - } -#endif - - init_timer (&xd_watchdog_int); xd_watchdog_int.function = xd_watchdog; - - err = -EBUSY; - if (register_blkdev(XT_DISK_MAJOR, "xd")) - goto out1; - - err = -ENOMEM; - xd_queue = blk_init_queue(do_xd_request, &xd_lock); - if (!xd_queue) - goto out1a; - - if (xd_detect(&controller,&address)) { - - printk("Detected a%s controller (type %d) at address %06x\n", - xd_sigs[controller].name,controller,address); - if (!request_region(xd_iobase,4,"xd")) { - printk("xd: Ports at 0x%x are not available\n", - xd_iobase); - goto out2; - } - if (controller) - xd_sigs[controller].init_controller(address); - xd_drives = xd_initdrives(xd_sigs[controller].init_drive); - - printk("Detected %d hard drive%s (using IRQ%d & DMA%d)\n", - xd_drives,xd_drives == 1 ? "" : "s",xd_irq,xd_dma); - } - - /* - * With the drive detected, xd_maxsectors should now be known. - * If xd_maxsectors is 0, nothing was detected and we fall through - * to return -ENODEV - */ - if (!xd_dma_buffer && xd_maxsectors) { - xd_dma_buffer = (char *)xd_dma_mem_alloc(xd_maxsectors * 0x200); - if (!xd_dma_buffer) { - printk(KERN_ERR "xd: Out of memory.\n"); - goto out3; - } - } - - err = -ENODEV; - if (!xd_drives) - goto out3; - - for (i = 0; i < xd_drives; i++) { - XD_INFO *p = &xd_info[i]; - struct gendisk *disk = alloc_disk(64); - if (!disk) - goto Enomem; - p->unit = i; - disk->major = XT_DISK_MAJOR; - disk->first_minor = i<<6; - sprintf(disk->disk_name, "xd%c", i+'a'); - disk->fops = &xd_fops; - disk->private_data = p; - disk->queue = xd_queue; - set_capacity(disk, p->heads * p->cylinders * p->sectors); - printk(" %s: CHS=%d/%d/%d\n", disk->disk_name, - p->cylinders, p->heads, p->sectors); - xd_gendisk[i] = disk; - } - - err = -EBUSY; - if (request_irq(xd_irq,xd_interrupt_handler, 0, "XT hard disk", NULL)) { - printk("xd: unable to get IRQ%d\n",xd_irq); - goto out4; - } - - if (request_dma(xd_dma,"xd")) { - printk("xd: unable to get DMA%d\n",xd_dma); - goto out5; - } - - /* xd_maxsectors depends on controller - so set after detection */ - blk_queue_max_hw_sectors(xd_queue, xd_maxsectors); - - for (i = 0; i < xd_drives; i++) - add_disk(xd_gendisk[i]); - - return 0; - -out5: - free_irq(xd_irq, NULL); -out4: - for (i = 0; i < xd_drives; i++) - put_disk(xd_gendisk[i]); -out3: - if (xd_maxsectors) - release_region(xd_iobase,4); - - if (xd_dma_buffer) - xd_dma_mem_free((unsigned long)xd_dma_buffer, - xd_maxsectors * 0x200); -out2: - blk_cleanup_queue(xd_queue); -out1a: - unregister_blkdev(XT_DISK_MAJOR, "xd"); -out1: - return err; -Enomem: - err = -ENOMEM; - while (i--) - put_disk(xd_gendisk[i]); - goto out3; -} - -/* xd_detect: scan the possible BIOS ROM locations for the signature strings */ -static u_char __init xd_detect (u_char *controller, unsigned int *address) -{ - int i, j; - - if (xd_override) - { - *controller = xd_type; - *address = 0; - return(1); - } - - for (i = 0; i < ARRAY_SIZE(xd_bases); i++) { - void __iomem *p = ioremap(xd_bases[i], 0x2000); - if (!p) - continue; - for (j = 1; j < ARRAY_SIZE(xd_sigs); j++) { - const char *s = xd_sigs[j].string; - if (check_signature(p + xd_sigs[j].offset, s, strlen(s))) { - *controller = j; - xd_type = j; - *address = xd_bases[i]; - iounmap(p); - return 1; - } - } - iounmap(p); - } - return 0; -} - -/* do_xd_request: handle an incoming request */ -static void do_xd_request (struct request_queue * q) -{ - struct request *req; - - if (xdc_busy) - return; - - req = blk_fetch_request(q); - while (req) { - unsigned block = blk_rq_pos(req); - unsigned count = blk_rq_cur_sectors(req); - XD_INFO *disk = req->rq_disk->private_data; - int res = -EIO; - int retry; - - if (req->cmd_type != REQ_TYPE_FS) - goto done; - if (block + count > get_capacity(req->rq_disk)) - goto done; - for (retry = 0; (retry < XD_RETRIES) && !res; retry++) - res = xd_readwrite(rq_data_dir(req), disk, req->buffer, - block, count); - done: - /* wrap up, 0 = success, -errno = fail */ - if (!__blk_end_request_cur(req, res)) - req = blk_fetch_request(q); - } -} - -static int xd_getgeo(struct block_device *bdev, struct hd_geometry *geo) -{ - XD_INFO *p = bdev->bd_disk->private_data; - - geo->heads = p->heads; - geo->sectors = p->sectors; - geo->cylinders = p->cylinders; - return 0; -} - -/* xd_ioctl: handle device ioctl's */ -static int xd_locked_ioctl(struct block_device *bdev, fmode_t mode, u_int cmd, u_long arg) -{ - switch (cmd) { - case HDIO_SET_DMA: - if (!capable(CAP_SYS_ADMIN)) return -EACCES; - if (xdc_busy) return -EBUSY; - nodma = !arg; - if (nodma && xd_dma_buffer) { - xd_dma_mem_free((unsigned long)xd_dma_buffer, - xd_maxsectors * 0x200); - xd_dma_buffer = NULL; - } else if (!nodma && !xd_dma_buffer) { - xd_dma_buffer = (char *)xd_dma_mem_alloc(xd_maxsectors * 0x200); - if (!xd_dma_buffer) { - nodma = XD_DONT_USE_DMA; - return -ENOMEM; - } - } - return 0; - case HDIO_GET_DMA: - return put_user(!nodma, (long __user *) arg); - case HDIO_GET_MULTCOUNT: - return put_user(xd_maxsectors, (long __user *) arg); - default: - return -EINVAL; - } -} - -static int xd_ioctl(struct block_device *bdev, fmode_t mode, - unsigned int cmd, unsigned long param) -{ - int ret; - - mutex_lock(&xd_mutex); - ret = xd_locked_ioctl(bdev, mode, cmd, param); - mutex_unlock(&xd_mutex); - - return ret; -} - -/* xd_readwrite: handle a read/write request */ -static int xd_readwrite (u_char operation,XD_INFO *p,char *buffer,u_int block,u_int count) -{ - int drive = p->unit; - u_char cmdblk[6],sense[4]; - u_short track,cylinder; - u_char head,sector,control,mode = PIO_MODE,temp; - char **real_buffer; - register int i; - -#ifdef DEBUG_READWRITE - printk("xd_readwrite: operation = %s, drive = %d, buffer = 0x%X, block = %d, count = %d\n",operation == READ ? "read" : "write",drive,buffer,block,count); -#endif /* DEBUG_READWRITE */ - - spin_unlock_irq(&xd_lock); - - control = p->control; - if (!xd_dma_buffer) - xd_dma_buffer = (char *)xd_dma_mem_alloc(xd_maxsectors * 0x200); - while (count) { - temp = count < xd_maxsectors ? count : xd_maxsectors; - - track = block / p->sectors; - head = track % p->heads; - cylinder = track / p->heads; - sector = block % p->sectors; - -#ifdef DEBUG_READWRITE - printk("xd_readwrite: drive = %d, head = %d, cylinder = %d, sector = %d, count = %d\n",drive,head,cylinder,sector,temp); -#endif /* DEBUG_READWRITE */ - - if (xd_dma_buffer) { - mode = xd_setup_dma(operation == READ ? DMA_MODE_READ : DMA_MODE_WRITE,(u_char *)(xd_dma_buffer),temp * 0x200); - real_buffer = &xd_dma_buffer; - for (i=0; i < (temp * 0x200); i++) - xd_dma_buffer[i] = buffer[i]; - } - else - real_buffer = &buffer; - - xd_build(cmdblk,operation == READ ? CMD_READ : CMD_WRITE,drive,head,cylinder,sector,temp & 0xFF,control); - - switch (xd_command(cmdblk,mode,(u_char *)(*real_buffer),(u_char *)(*real_buffer),sense,XD_TIMEOUT)) { - case 1: - printk("xd%c: %s timeout, recalibrating drive\n",'a'+drive,(operation == READ ? "read" : "write")); - xd_recalibrate(drive); - spin_lock_irq(&xd_lock); - return -EIO; - case 2: - if (sense[0] & 0x30) { - printk("xd%c: %s - ",'a'+drive,(operation == READ ? "reading" : "writing")); - switch ((sense[0] & 0x30) >> 4) { - case 0: printk("drive error, code = 0x%X",sense[0] & 0x0F); - break; - case 1: printk("controller error, code = 0x%X",sense[0] & 0x0F); - break; - case 2: printk("command error, code = 0x%X",sense[0] & 0x0F); - break; - case 3: printk("miscellaneous error, code = 0x%X",sense[0] & 0x0F); - break; - } - } - if (sense[0] & 0x80) - printk(" - CHS = %d/%d/%d\n",((sense[2] & 0xC0) << 2) | sense[3],sense[1] & 0x1F,sense[2] & 0x3F); - /* reported drive number = (sense[1] & 0xE0) >> 5 */ - else - printk(" - no valid disk address\n"); - spin_lock_irq(&xd_lock); - return -EIO; - } - if (xd_dma_buffer) - for (i=0; i < (temp * 0x200); i++) - buffer[i] = xd_dma_buffer[i]; - - count -= temp, buffer += temp * 0x200, block += temp; - } - spin_lock_irq(&xd_lock); - return 0; -} - -/* xd_recalibrate: recalibrate a given drive and reset controller if necessary */ -static void xd_recalibrate (u_char drive) -{ - u_char cmdblk[6]; - - xd_build(cmdblk,CMD_RECALIBRATE,drive,0,0,0,0,0); - if (xd_command(cmdblk,PIO_MODE,NULL,NULL,NULL,XD_TIMEOUT * 8)) - printk("xd%c: warning! error recalibrating, controller may be unstable\n", 'a'+drive); -} - -/* xd_interrupt_handler: interrupt service routine */ -static irqreturn_t xd_interrupt_handler(int irq, void *dev_id) -{ - if (inb(XD_STATUS) & STAT_INTERRUPT) { /* check if it was our device */ -#ifdef DEBUG_OTHER - printk("xd_interrupt_handler: interrupt detected\n"); -#endif /* DEBUG_OTHER */ - outb(0,XD_CONTROL); /* acknowledge interrupt */ - wake_up(&xd_wait_int); /* and wake up sleeping processes */ - return IRQ_HANDLED; - } - else - printk("xd: unexpected interrupt\n"); - return IRQ_NONE; -} - -/* xd_setup_dma: set up the DMA controller for a data transfer */ -static u_char xd_setup_dma (u_char mode,u_char *buffer,u_int count) -{ - unsigned long f; - - if (nodma) - return (PIO_MODE); - if (((unsigned long) buffer & 0xFFFF0000) != (((unsigned long) buffer + count) & 0xFFFF0000)) { -#ifdef DEBUG_OTHER - printk("xd_setup_dma: using PIO, transfer overlaps 64k boundary\n"); -#endif /* DEBUG_OTHER */ - return (PIO_MODE); - } - - f=claim_dma_lock(); - disable_dma(xd_dma); - clear_dma_ff(xd_dma); - set_dma_mode(xd_dma,mode); - set_dma_addr(xd_dma, (unsigned long) buffer); - set_dma_count(xd_dma,count); - - release_dma_lock(f); - - return (DMA_MODE); /* use DMA and INT */ -} - -/* xd_build: put stuff into an array in a format suitable for the controller */ -static u_char *xd_build (u_char *cmdblk,u_char command,u_char drive,u_char head,u_short cylinder,u_char sector,u_char count,u_char control) -{ - cmdblk[0] = command; - cmdblk[1] = ((drive & 0x07) << 5) | (head & 0x1F); - cmdblk[2] = ((cylinder & 0x300) >> 2) | (sector & 0x3F); - cmdblk[3] = cylinder & 0xFF; - cmdblk[4] = count; - cmdblk[5] = control; - - return (cmdblk); -} - -static void xd_watchdog (unsigned long unused) -{ - xd_error = 1; - wake_up(&xd_wait_int); -} - -/* xd_waitport: waits until port & mask == flags or a timeout occurs. return 1 for a timeout */ -static inline u_char xd_waitport (u_short port,u_char flags,u_char mask,u_long timeout) -{ - u_long expiry = jiffies + timeout; - int success; - - xdc_busy = 1; - while ((success = ((inb(port) & mask) != flags)) && time_before(jiffies, expiry)) - schedule_timeout_uninterruptible(1); - xdc_busy = 0; - return (success); -} - -static inline u_int xd_wait_for_IRQ (void) -{ - unsigned long flags; - xd_watchdog_int.expires = jiffies + 8 * HZ; - add_timer(&xd_watchdog_int); - - flags=claim_dma_lock(); - enable_dma(xd_dma); - release_dma_lock(flags); - - sleep_on(&xd_wait_int); - del_timer(&xd_watchdog_int); - xdc_busy = 0; - - flags=claim_dma_lock(); - disable_dma(xd_dma); - release_dma_lock(flags); - - if (xd_error) { - printk("xd: missed IRQ - command aborted\n"); - xd_error = 0; - return (1); - } - return (0); -} - -/* xd_command: handle all data transfers necessary for a single command */ -static u_int xd_command (u_char *command,u_char mode,u_char *indata,u_char *outdata,u_char *sense,u_long timeout) -{ - u_char cmdblk[6],csb,complete = 0; - -#ifdef DEBUG_COMMAND - printk("xd_command: command = 0x%X, mode = 0x%X, indata = 0x%X, outdata = 0x%X, sense = 0x%X\n",command,mode,indata,outdata,sense); -#endif /* DEBUG_COMMAND */ - - outb(0,XD_SELECT); - outb(mode,XD_CONTROL); - - if (xd_waitport(XD_STATUS,STAT_SELECT,STAT_SELECT,timeout)) - return (1); - - while (!complete) { - if (xd_waitport(XD_STATUS,STAT_READY,STAT_READY,timeout)) - return (1); - - switch (inb(XD_STATUS) & (STAT_COMMAND | STAT_INPUT)) { - case 0: - if (mode == DMA_MODE) { - if (xd_wait_for_IRQ()) - return (1); - } else - outb(outdata ? *outdata++ : 0,XD_DATA); - break; - case STAT_INPUT: - if (mode == DMA_MODE) { - if (xd_wait_for_IRQ()) - return (1); - } else - if (indata) - *indata++ = inb(XD_DATA); - else - inb(XD_DATA); - break; - case STAT_COMMAND: - outb(command ? *command++ : 0,XD_DATA); - break; - case STAT_COMMAND | STAT_INPUT: - complete = 1; - break; - } - } - csb = inb(XD_DATA); - - if (xd_waitport(XD_STATUS,0,STAT_SELECT,timeout)) /* wait until deselected */ - return (1); - - if (csb & CSB_ERROR) { /* read sense data if error */ - xd_build(cmdblk,CMD_SENSE,(csb & CSB_LUN) >> 5,0,0,0,0,0); - if (xd_command(cmdblk,0,sense,NULL,NULL,XD_TIMEOUT)) - printk("xd: warning! sense command failed!\n"); - } - -#ifdef DEBUG_COMMAND - printk("xd_command: completed with csb = 0x%X\n",csb); -#endif /* DEBUG_COMMAND */ - - return (csb & CSB_ERROR); -} - -static u_char __init xd_initdrives (void (*init_drive)(u_char drive)) -{ - u_char cmdblk[6],i,count = 0; - - for (i = 0; i < XD_MAXDRIVES; i++) { - xd_build(cmdblk,CMD_TESTREADY,i,0,0,0,0,0); - if (!xd_command(cmdblk,PIO_MODE,NULL,NULL,NULL,XD_TIMEOUT*8)) { - msleep_interruptible(XD_INIT_DISK_DELAY); - - init_drive(count); - count++; - - msleep_interruptible(XD_INIT_DISK_DELAY); - } - } - return (count); -} - -static void __init xd_manual_geo_set (u_char drive) -{ - xd_info[drive].heads = (u_char)(xd_geo[3 * drive + 1]); - xd_info[drive].cylinders = (u_short)(xd_geo[3 * drive]); - xd_info[drive].sectors = (u_char)(xd_geo[3 * drive + 2]); -} - -static void __init xd_dtc_init_controller (unsigned int address) -{ - switch (address) { - case 0x00000: - case 0xC8000: break; /*initial: 0x320 */ - case 0xCA000: xd_iobase = 0x324; - case 0xD0000: /*5150CX*/ - case 0xD8000: break; /*5150CX & 5150XL*/ - default: printk("xd_dtc_init_controller: unsupported BIOS address %06x\n",address); - break; - } - xd_maxsectors = 0x01; /* my card seems to have trouble doing multi-block transfers? */ - - outb(0,XD_RESET); /* reset the controller */ -} - - -static void __init xd_dtc5150cx_init_drive (u_char drive) -{ - /* values from controller's BIOS - BIOS chip may be removed */ - static u_short geometry_table[][4] = { - {0x200,8,0x200,0x100}, - {0x267,2,0x267,0x267}, - {0x264,4,0x264,0x80}, - {0x132,4,0x132,0x0}, - {0x132,2,0x80, 0x132}, - {0x177,8,0x177,0x0}, - {0x132,8,0x84, 0x0}, - {}, /* not used */ - {0x132,6,0x80, 0x100}, - {0x200,6,0x100,0x100}, - {0x264,2,0x264,0x80}, - {0x280,4,0x280,0x100}, - {0x2B9,3,0x2B9,0x2B9}, - {0x2B9,5,0x2B9,0x2B9}, - {0x280,6,0x280,0x100}, - {0x132,4,0x132,0x0}}; - u_char n; - - n = inb(XD_JUMPER); - n = (drive ? n : (n >> 2)) & 0x33; - n = (n | (n >> 2)) & 0x0F; - if (xd_geo[3*drive]) - xd_manual_geo_set(drive); - else - if (n != 7) { - xd_info[drive].heads = (u_char)(geometry_table[n][1]); /* heads */ - xd_info[drive].cylinders = geometry_table[n][0]; /* cylinders */ - xd_info[drive].sectors = 17; /* sectors */ -#if 0 - xd_info[drive].rwrite = geometry_table[n][2]; /* reduced write */ - xd_info[drive].precomp = geometry_table[n][3] /* write precomp */ - xd_info[drive].ecc = 0x0B; /* ecc length */ -#endif /* 0 */ - } - else { - printk("xd%c: undetermined drive geometry\n",'a'+drive); - return; - } - xd_info[drive].control = 5; /* control byte */ - xd_setparam(CMD_DTCSETPARAM,drive,xd_info[drive].heads,xd_info[drive].cylinders,geometry_table[n][2],geometry_table[n][3],0x0B); - xd_recalibrate(drive); -} - -static void __init xd_dtc_init_drive (u_char drive) -{ - u_char cmdblk[6],buf[64]; - - xd_build(cmdblk,CMD_DTCGETGEOM,drive,0,0,0,0,0); - if (!xd_command(cmdblk,PIO_MODE,buf,NULL,NULL,XD_TIMEOUT * 2)) { - xd_info[drive].heads = buf[0x0A]; /* heads */ - xd_info[drive].cylinders = ((u_short *) (buf))[0x04]; /* cylinders */ - xd_info[drive].sectors = 17; /* sectors */ - if (xd_geo[3*drive]) - xd_manual_geo_set(drive); -#if 0 - xd_info[drive].rwrite = ((u_short *) (buf + 1))[0x05]; /* reduced write */ - xd_info[drive].precomp = ((u_short *) (buf + 1))[0x06]; /* write precomp */ - xd_info[drive].ecc = buf[0x0F]; /* ecc length */ -#endif /* 0 */ - xd_info[drive].control = 0; /* control byte */ - - xd_setparam(CMD_DTCSETPARAM,drive,xd_info[drive].heads,xd_info[drive].cylinders,((u_short *) (buf + 1))[0x05],((u_short *) (buf + 1))[0x06],buf[0x0F]); - xd_build(cmdblk,CMD_DTCSETSTEP,drive,0,0,0,0,7); - if (xd_command(cmdblk,PIO_MODE,NULL,NULL,NULL,XD_TIMEOUT * 2)) - printk("xd_dtc_init_drive: error setting step rate for xd%c\n", 'a'+drive); - } - else - printk("xd_dtc_init_drive: error reading geometry for xd%c\n", 'a'+drive); -} - -static void __init xd_wd_init_controller (unsigned int address) -{ - switch (address) { - case 0x00000: - case 0xC8000: break; /*initial: 0x320 */ - case 0xCA000: xd_iobase = 0x324; break; - case 0xCC000: xd_iobase = 0x328; break; - case 0xCE000: xd_iobase = 0x32C; break; - case 0xD0000: xd_iobase = 0x328; break; /* ? */ - case 0xD8000: xd_iobase = 0x32C; break; /* ? */ - default: printk("xd_wd_init_controller: unsupported BIOS address %06x\n",address); - break; - } - xd_maxsectors = 0x01; /* this one doesn't wrap properly either... */ - - outb(0,XD_RESET); /* reset the controller */ - - msleep(XD_INIT_DISK_DELAY); -} - -static void __init xd_wd_init_drive (u_char drive) -{ - /* values from controller's BIOS - BIOS may be disabled */ - static u_short geometry_table[][4] = { - {0x264,4,0x1C2,0x1C2}, /* common part */ - {0x132,4,0x099,0x0}, - {0x267,2,0x1C2,0x1C2}, - {0x267,4,0x1C2,0x1C2}, - - {0x334,6,0x335,0x335}, /* 1004 series RLL */ - {0x30E,4,0x30F,0x3DC}, - {0x30E,2,0x30F,0x30F}, - {0x267,4,0x268,0x268}, - - {0x3D5,5,0x3D6,0x3D6}, /* 1002 series RLL */ - {0x3DB,7,0x3DC,0x3DC}, - {0x264,4,0x265,0x265}, - {0x267,4,0x268,0x268}}; - - u_char cmdblk[6],buf[0x200]; - u_char n = 0,rll,jumper_state,use_jumper_geo; - u_char wd_1002 = (xd_sigs[xd_type].string[7] == '6'); - - jumper_state = ~(inb(0x322)); - if (jumper_state & 0x40) - xd_irq = 9; - rll = (jumper_state & 0x30) ? (0x04 << wd_1002) : 0; - xd_build(cmdblk,CMD_READ,drive,0,0,0,1,0); - if (!xd_command(cmdblk,PIO_MODE,buf,NULL,NULL,XD_TIMEOUT * 2)) { - xd_info[drive].heads = buf[0x1AF]; /* heads */ - xd_info[drive].cylinders = ((u_short *) (buf + 1))[0xD6]; /* cylinders */ - xd_info[drive].sectors = 17; /* sectors */ - if (xd_geo[3*drive]) - xd_manual_geo_set(drive); -#if 0 - xd_info[drive].rwrite = ((u_short *) (buf))[0xD8]; /* reduced write */ - xd_info[drive].wprecomp = ((u_short *) (buf))[0xDA]; /* write precomp */ - xd_info[drive].ecc = buf[0x1B4]; /* ecc length */ -#endif /* 0 */ - xd_info[drive].control = buf[0x1B5]; /* control byte */ - use_jumper_geo = !(xd_info[drive].heads) || !(xd_info[drive].cylinders); - if (xd_geo[3*drive]) { - xd_manual_geo_set(drive); - xd_info[drive].control = rll ? 7 : 5; - } - else if (use_jumper_geo) { - n = (((jumper_state & 0x0F) >> (drive << 1)) & 0x03) | rll; - xd_info[drive].cylinders = geometry_table[n][0]; - xd_info[drive].heads = (u_char)(geometry_table[n][1]); - xd_info[drive].control = rll ? 7 : 5; -#if 0 - xd_info[drive].rwrite = geometry_table[n][2]; - xd_info[drive].wprecomp = geometry_table[n][3]; - xd_info[drive].ecc = 0x0B; -#endif /* 0 */ - } - if (!wd_1002) { - if (use_jumper_geo) - xd_setparam(CMD_WDSETPARAM,drive,xd_info[drive].heads,xd_info[drive].cylinders, - geometry_table[n][2],geometry_table[n][3],0x0B); - else - xd_setparam(CMD_WDSETPARAM,drive,xd_info[drive].heads,xd_info[drive].cylinders, - ((u_short *) (buf))[0xD8],((u_short *) (buf))[0xDA],buf[0x1B4]); - } - /* 1002 based RLL controller requests converted addressing, but reports physical - (physical 26 sec., logical 17 sec.) - 1004 based ???? */ - if (rll & wd_1002) { - if ((xd_info[drive].cylinders *= 26, - xd_info[drive].cylinders /= 17) > 1023) - xd_info[drive].cylinders = 1023; /* 1024 ? */ -#if 0 - xd_info[drive].rwrite *= 26; - xd_info[drive].rwrite /= 17; - xd_info[drive].wprecomp *= 26 - xd_info[drive].wprecomp /= 17; -#endif /* 0 */ - } - } - else - printk("xd_wd_init_drive: error reading geometry for xd%c\n",'a'+drive); - -} - -static void __init xd_seagate_init_controller (unsigned int address) -{ - switch (address) { - case 0x00000: - case 0xC8000: break; /*initial: 0x320 */ - case 0xD0000: xd_iobase = 0x324; break; - case 0xD8000: xd_iobase = 0x328; break; - case 0xE0000: xd_iobase = 0x32C; break; - default: printk("xd_seagate_init_controller: unsupported BIOS address %06x\n",address); - break; - } - xd_maxsectors = 0x40; - - outb(0,XD_RESET); /* reset the controller */ -} - -static void __init xd_seagate_init_drive (u_char drive) -{ - u_char cmdblk[6],buf[0x200]; - - xd_build(cmdblk,CMD_ST11GETGEOM,drive,0,0,0,1,0); - if (!xd_command(cmdblk,PIO_MODE,buf,NULL,NULL,XD_TIMEOUT * 2)) { - xd_info[drive].heads = buf[0x04]; /* heads */ - xd_info[drive].cylinders = (buf[0x02] << 8) | buf[0x03]; /* cylinders */ - xd_info[drive].sectors = buf[0x05]; /* sectors */ - xd_info[drive].control = 0; /* control byte */ - } - else - printk("xd_seagate_init_drive: error reading geometry from xd%c\n", 'a'+drive); -} - -/* Omti support courtesy Dirk Melchers */ -static void __init xd_omti_init_controller (unsigned int address) -{ - switch (address) { - case 0x00000: - case 0xC8000: break; /*initial: 0x320 */ - case 0xD0000: xd_iobase = 0x324; break; - case 0xD8000: xd_iobase = 0x328; break; - case 0xE0000: xd_iobase = 0x32C; break; - default: printk("xd_omti_init_controller: unsupported BIOS address %06x\n",address); - break; - } - - xd_maxsectors = 0x40; - - outb(0,XD_RESET); /* reset the controller */ -} - -static void __init xd_omti_init_drive (u_char drive) -{ - /* gets infos from drive */ - xd_override_init_drive(drive); - - /* set other parameters, Hardcoded, not that nice :-) */ - xd_info[drive].control = 2; -} - -/* Xebec support (AK) */ -static void __init xd_xebec_init_controller (unsigned int address) -{ -/* iobase may be set manually in range 0x300 - 0x33C - irq may be set manually to 2(9),3,4,5,6,7 - dma may be set manually to 1,2,3 - (How to detect them ???) -BIOS address may be set manually in range 0x0 - 0xF8000 -If you need non-standard settings use the xd=... command */ - - switch (address) { - case 0x00000: - case 0xC8000: /* initially: xd_iobase==0x320 */ - case 0xD0000: - case 0xD2000: - case 0xD4000: - case 0xD6000: - case 0xD8000: - case 0xDA000: - case 0xDC000: - case 0xDE000: - case 0xE0000: break; - default: printk("xd_xebec_init_controller: unsupported BIOS address %06x\n",address); - break; - } - - xd_maxsectors = 0x01; - outb(0,XD_RESET); /* reset the controller */ - - msleep(XD_INIT_DISK_DELAY); -} - -static void __init xd_xebec_init_drive (u_char drive) -{ - /* values from controller's BIOS - BIOS chip may be removed */ - static u_short geometry_table[][5] = { - {0x132,4,0x080,0x080,0x7}, - {0x132,4,0x080,0x080,0x17}, - {0x264,2,0x100,0x100,0x7}, - {0x264,2,0x100,0x100,0x17}, - {0x132,8,0x080,0x080,0x7}, - {0x132,8,0x080,0x080,0x17}, - {0x264,4,0x100,0x100,0x6}, - {0x264,4,0x100,0x100,0x17}, - {0x2BC,5,0x2BC,0x12C,0x6}, - {0x3A5,4,0x3A5,0x3A5,0x7}, - {0x26C,6,0x26C,0x26C,0x7}, - {0x200,8,0x200,0x100,0x17}, - {0x400,5,0x400,0x400,0x7}, - {0x400,6,0x400,0x400,0x7}, - {0x264,8,0x264,0x200,0x17}, - {0x33E,7,0x33E,0x200,0x7}}; - u_char n; - - n = inb(XD_JUMPER) & 0x0F; /* BIOS's drive number: same geometry - is assumed for BOTH drives */ - if (xd_geo[3*drive]) - xd_manual_geo_set(drive); - else { - xd_info[drive].heads = (u_char)(geometry_table[n][1]); /* heads */ - xd_info[drive].cylinders = geometry_table[n][0]; /* cylinders */ - xd_info[drive].sectors = 17; /* sectors */ -#if 0 - xd_info[drive].rwrite = geometry_table[n][2]; /* reduced write */ - xd_info[drive].precomp = geometry_table[n][3] /* write precomp */ - xd_info[drive].ecc = 0x0B; /* ecc length */ -#endif /* 0 */ - } - xd_info[drive].control = geometry_table[n][4]; /* control byte */ - xd_setparam(CMD_XBSETPARAM,drive,xd_info[drive].heads,xd_info[drive].cylinders,geometry_table[n][2],geometry_table[n][3],0x0B); - xd_recalibrate(drive); -} - -/* xd_override_init_drive: this finds disk geometry in a "binary search" style, narrowing in on the "correct" number of heads - etc. by trying values until it gets the highest successful value. Idea courtesy Salvador Abreu (spa@fct.unl.pt). */ -static void __init xd_override_init_drive (u_char drive) -{ - u_short min[] = { 0,0,0 },max[] = { 16,1024,64 },test[] = { 0,0,0 }; - u_char cmdblk[6],i; - - if (xd_geo[3*drive]) - xd_manual_geo_set(drive); - else { - for (i = 0; i < 3; i++) { - while (min[i] != max[i] - 1) { - test[i] = (min[i] + max[i]) / 2; - xd_build(cmdblk,CMD_SEEK,drive,(u_char) test[0],(u_short) test[1],(u_char) test[2],0,0); - if (!xd_command(cmdblk,PIO_MODE,NULL,NULL,NULL,XD_TIMEOUT * 2)) - min[i] = test[i]; - else - max[i] = test[i]; - } - test[i] = min[i]; - } - xd_info[drive].heads = (u_char) min[0] + 1; - xd_info[drive].cylinders = (u_short) min[1] + 1; - xd_info[drive].sectors = (u_char) min[2] + 1; - } - xd_info[drive].control = 0; -} - -/* xd_setup: initialise controller from command line parameters */ -static void __init do_xd_setup (int *integers) -{ - switch (integers[0]) { - case 4: if (integers[4] < 0) - nodma = 1; - else if (integers[4] < 8) - xd_dma = integers[4]; - case 3: if ((integers[3] > 0) && (integers[3] <= 0x3FC)) - xd_iobase = integers[3]; - case 2: if ((integers[2] > 0) && (integers[2] < 16)) - xd_irq = integers[2]; - case 1: xd_override = 1; - if ((integers[1] >= 0) && (integers[1] < ARRAY_SIZE(xd_sigs))) - xd_type = integers[1]; - case 0: break; - default:printk("xd: too many parameters for xd\n"); - } - xd_maxsectors = 0x01; -} - -/* xd_setparam: set the drive characteristics */ -static void __init xd_setparam (u_char command,u_char drive,u_char heads,u_short cylinders,u_short rwrite,u_short wprecomp,u_char ecc) -{ - u_char cmdblk[14]; - - xd_build(cmdblk,command,drive,0,0,0,0,0); - cmdblk[6] = (u_char) (cylinders >> 8) & 0x03; - cmdblk[7] = (u_char) (cylinders & 0xFF); - cmdblk[8] = heads & 0x1F; - cmdblk[9] = (u_char) (rwrite >> 8) & 0x03; - cmdblk[10] = (u_char) (rwrite & 0xFF); - cmdblk[11] = (u_char) (wprecomp >> 8) & 0x03; - cmdblk[12] = (u_char) (wprecomp & 0xFF); - cmdblk[13] = ecc; - - /* Some controllers require geometry info as data, not command */ - - if (xd_command(cmdblk,PIO_MODE,NULL,&cmdblk[6],NULL,XD_TIMEOUT * 2)) - printk("xd: error setting characteristics for xd%c\n", 'a'+drive); -} - - -#ifdef MODULE - -module_param_array(xd, int, NULL, 0); -module_param_array(xd_geo, int, NULL, 0); -module_param(nodma, bool, 0); - -MODULE_LICENSE("GPL"); - -void cleanup_module(void) -{ - int i; - unregister_blkdev(XT_DISK_MAJOR, "xd"); - for (i = 0; i < xd_drives; i++) { - del_gendisk(xd_gendisk[i]); - put_disk(xd_gendisk[i]); - } - blk_cleanup_queue(xd_queue); - release_region(xd_iobase,4); - if (xd_drives) { - free_irq(xd_irq, NULL); - free_dma(xd_dma); - if (xd_dma_buffer) - xd_dma_mem_free((unsigned long)xd_dma_buffer, xd_maxsectors * 0x200); - } -} -#else - -static int __init xd_setup (char *str) -{ - int ints[5]; - get_options (str, ARRAY_SIZE (ints), ints); - do_xd_setup (ints); - return 1; -} - -/* xd_manual_geo_init: initialise drive geometry from command line parameters - (used only for WD drives) */ -static int __init xd_manual_geo_init (char *str) -{ - int i, integers[1 + 3*XD_MAXDRIVES]; - - get_options (str, ARRAY_SIZE (integers), integers); - if (integers[0]%3 != 0) { - printk("xd: incorrect number of parameters for xd_geo\n"); - return 1; - } - for (i = 0; (i < integers[0]) && (i < 3*XD_MAXDRIVES); i++) - xd_geo[i] = integers[i+1]; - return 1; -} - -__setup ("xd=", xd_setup); -__setup ("xd_geo=", xd_manual_geo_init); - -#endif /* MODULE */ - -module_init(xd_init); -MODULE_ALIAS_BLOCKDEV_MAJOR(XT_DISK_MAJOR); diff --git a/drivers/block/xd.h b/drivers/block/xd.h deleted file mode 100644 index 37cacef..0000000 --- a/drivers/block/xd.h +++ /dev/null @@ -1,134 +0,0 @@ -#ifndef _LINUX_XD_H -#define _LINUX_XD_H - -/* - * This file contains the definitions for the IO ports and errors etc. for XT hard disk controllers (at least the DTC 5150X). - * - * Author: Pat Mackinlay, pat@it.com.au - * Date: 29/09/92 - * - * Revised: 01/01/93, ... - * - * Ref: DTC 5150X Controller Specification (thanks to Kevin Fowler, kevinf@agora.rain.com) - * Also thanks to: Salvador Abreu, Dave Thaler, Risto Kankkunen and Wim Van Dorst. - */ - -#include <linux/interrupt.h> - -/* XT hard disk controller registers */ -#define XD_DATA (xd_iobase + 0x00) /* data RW register */ -#define XD_RESET (xd_iobase + 0x01) /* reset WO register */ -#define XD_STATUS (xd_iobase + 0x01) /* status RO register */ -#define XD_SELECT (xd_iobase + 0x02) /* select WO register */ -#define XD_JUMPER (xd_iobase + 0x02) /* jumper RO register */ -#define XD_CONTROL (xd_iobase + 0x03) /* DMAE/INTE WO register */ -#define XD_RESERVED (xd_iobase + 0x03) /* reserved */ - -/* XT hard disk controller commands (incomplete list) */ -#define CMD_TESTREADY 0x00 /* test drive ready */ -#define CMD_RECALIBRATE 0x01 /* recalibrate drive */ -#define CMD_SENSE 0x03 /* request sense */ -#define CMD_FORMATDRV 0x04 /* format drive */ -#define CMD_VERIFY 0x05 /* read verify */ -#define CMD_FORMATTRK 0x06 /* format track */ -#define CMD_FORMATBAD 0x07 /* format bad track */ -#define CMD_READ 0x08 /* read */ -#define CMD_WRITE 0x0A /* write */ -#define CMD_SEEK 0x0B /* seek */ - -/* Controller specific commands */ -#define CMD_DTCSETPARAM 0x0C /* set drive parameters (DTC 5150X & CX only?) */ -#define CMD_DTCGETECC 0x0D /* get ecc error length (DTC 5150X only?) */ -#define CMD_DTCREADBUF 0x0E /* read sector buffer (DTC 5150X only?) */ -#define CMD_DTCWRITEBUF 0x0F /* write sector buffer (DTC 5150X only?) */ -#define CMD_DTCREMAPTRK 0x11 /* assign alternate track (DTC 5150X only?) */ -#define CMD_DTCGETPARAM 0xFB /* get drive parameters (DTC 5150X only?) */ -#define CMD_DTCSETSTEP 0xFC /* set step rate (DTC 5150X only?) */ -#define CMD_DTCSETGEOM 0xFE /* set geometry data (DTC 5150X only?) */ -#define CMD_DTCGETGEOM 0xFF /* get geometry data (DTC 5150X only?) */ -#define CMD_ST11GETGEOM 0xF8 /* get geometry data (Seagate ST11R/M only?) */ -#define CMD_WDSETPARAM 0x0C /* set drive parameters (WD 1004A27X only?) */ -#define CMD_XBSETPARAM 0x0C /* set drive parameters (XEBEC only?) */ - -/* Bits for command status byte */ -#define CSB_ERROR 0x02 /* error */ -#define CSB_LUN 0x20 /* logical Unit Number */ - -/* XT hard disk controller status bits */ -#define STAT_READY 0x01 /* controller is ready */ -#define STAT_INPUT 0x02 /* data flowing from controller to host */ -#define STAT_COMMAND 0x04 /* controller in command phase */ -#define STAT_SELECT 0x08 /* controller is selected */ -#define STAT_REQUEST 0x10 /* controller requesting data */ -#define STAT_INTERRUPT 0x20 /* controller requesting interrupt */ - -/* XT hard disk controller control bits */ -#define PIO_MODE 0x00 /* control bits to set for PIO */ -#define DMA_MODE 0x03 /* control bits to set for DMA & interrupt */ - -#define XD_MAXDRIVES 2 /* maximum 2 drives */ -#define XD_TIMEOUT HZ /* 1 second timeout */ -#define XD_RETRIES 4 /* maximum 4 retries */ - -#undef DEBUG /* define for debugging output */ - -#ifdef DEBUG - #define DEBUG_STARTUP /* debug driver initialisation */ - #define DEBUG_OVERRIDE /* debug override geometry detection */ - #define DEBUG_READWRITE /* debug each read/write command */ - #define DEBUG_OTHER /* debug misc. interrupt/DMA stuff */ - #define DEBUG_COMMAND /* debug each controller command */ -#endif /* DEBUG */ - -/* this structure defines the XT drives and their types */ -typedef struct { - u_char heads; - u_short cylinders; - u_char sectors; - u_char control; - int unit; -} XD_INFO; - -/* this structure defines a ROM BIOS signature */ -typedef struct { - unsigned int offset; - const char *string; - void (*init_controller)(unsigned int address); - void (*init_drive)(u_char drive); - const char *name; -} XD_SIGNATURE; - -#ifndef MODULE -static int xd_manual_geo_init (char *command); -#endif /* MODULE */ -static u_char xd_detect (u_char *controller, unsigned int *address); -static u_char xd_initdrives (void (*init_drive)(u_char drive)); - -static void do_xd_request (struct request_queue * q); -static int xd_ioctl (struct block_device *bdev,fmode_t mode,unsigned int cmd,unsigned long arg); -static int xd_readwrite (u_char operation,XD_INFO *disk,char *buffer,u_int block,u_int count); -static void xd_recalibrate (u_char drive); - -static irqreturn_t xd_interrupt_handler(int irq, void *dev_id); -static u_char xd_setup_dma (u_char opcode,u_char *buffer,u_int count); -static u_char *xd_build (u_char *cmdblk,u_char command,u_char drive,u_char head,u_short cylinder,u_char sector,u_char count,u_char control); -static void xd_watchdog (unsigned long unused); -static inline u_char xd_waitport (u_short port,u_char flags,u_char mask,u_long timeout); -static u_int xd_command (u_char *command,u_char mode,u_char *indata,u_char *outdata,u_char *sense,u_long timeout); - -/* card specific setup and geometry gathering code */ -static void xd_dtc_init_controller (unsigned int address); -static void xd_dtc5150cx_init_drive (u_char drive); -static void xd_dtc_init_drive (u_char drive); -static void xd_wd_init_controller (unsigned int address); -static void xd_wd_init_drive (u_char drive); -static void xd_seagate_init_controller (unsigned int address); -static void xd_seagate_init_drive (u_char drive); -static void xd_omti_init_controller (unsigned int address); -static void xd_omti_init_drive (u_char drive); -static void xd_xebec_init_controller (unsigned int address); -static void xd_xebec_init_drive (u_char drive); -static void xd_setparam (u_char command,u_char drive,u_char heads,u_short cylinders,u_short rwrite,u_short wprecomp,u_char ecc); -static void xd_override_init_drive (u_char drive); - -#endif /* _LINUX_XD_H */ diff --git a/drivers/block/xen-blkback/blkback.c b/drivers/block/xen-blkback/blkback.c index 5ac841f..de1f319 100644 --- a/drivers/block/xen-blkback/blkback.c +++ b/drivers/block/xen-blkback/blkback.c @@ -46,6 +46,7 @@ #include <xen/xen.h> #include <asm/xen/hypervisor.h> #include <asm/xen/hypercall.h> +#include <xen/balloon.h> #include "common.h" /* @@ -239,6 +240,7 @@ static void free_persistent_gnts(struct rb_root *root, unsigned int num) ret = gnttab_unmap_refs(unmap, NULL, pages, segs_to_unmap); BUG_ON(ret); + free_xenballooned_pages(segs_to_unmap, pages); segs_to_unmap = 0; } @@ -527,8 +529,8 @@ static int xen_blkbk_map(struct blkif_request *req, GFP_KERNEL); if (!persistent_gnt) return -ENOMEM; - persistent_gnt->page = alloc_page(GFP_KERNEL); - if (!persistent_gnt->page) { + if (alloc_xenballooned_pages(1, &persistent_gnt->page, + false)) { kfree(persistent_gnt); return -ENOMEM; } @@ -879,7 +881,6 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif, goto fail_response; } - preq.dev = req->u.rw.handle; preq.sector_number = req->u.rw.sector_number; preq.nr_sects = 0; diff --git a/drivers/block/xen-blkback/xenbus.c b/drivers/block/xen-blkback/xenbus.c index 6398072..5e237f6 100644 --- a/drivers/block/xen-blkback/xenbus.c +++ b/drivers/block/xen-blkback/xenbus.c @@ -367,6 +367,7 @@ static int xen_blkbk_remove(struct xenbus_device *dev) be->blkif = NULL; } + kfree(be->mode); kfree(be); dev_set_drvdata(&dev->dev, NULL); return 0; @@ -502,6 +503,7 @@ static void backend_changed(struct xenbus_watch *watch, = container_of(watch, struct backend_info, backend_watch); struct xenbus_device *dev = be->dev; int cdrom = 0; + unsigned long handle; char *device_type; DPRINTK(""); @@ -521,10 +523,10 @@ static void backend_changed(struct xenbus_watch *watch, return; } - if ((be->major || be->minor) && - ((be->major != major) || (be->minor != minor))) { - pr_warn(DRV_PFX "changing physical device (from %x:%x to %x:%x) not supported.\n", - be->major, be->minor, major, minor); + if (be->major | be->minor) { + if (be->major != major || be->minor != minor) + pr_warn(DRV_PFX "changing physical device (from %x:%x to %x:%x) not supported.\n", + be->major, be->minor, major, minor); return; } @@ -542,36 +544,33 @@ static void backend_changed(struct xenbus_watch *watch, kfree(device_type); } - if (be->major == 0 && be->minor == 0) { - /* Front end dir is a number, which is used as the handle. */ - - char *p = strrchr(dev->otherend, '/') + 1; - long handle; - err = strict_strtoul(p, 0, &handle); - if (err) - return; + /* Front end dir is a number, which is used as the handle. */ + err = strict_strtoul(strrchr(dev->otherend, '/') + 1, 0, &handle); + if (err) + return; - be->major = major; - be->minor = minor; + be->major = major; + be->minor = minor; - err = xen_vbd_create(be->blkif, handle, major, minor, - (NULL == strchr(be->mode, 'w')), cdrom); - if (err) { - be->major = 0; - be->minor = 0; - xenbus_dev_fatal(dev, err, "creating vbd structure"); - return; - } + err = xen_vbd_create(be->blkif, handle, major, minor, + !strchr(be->mode, 'w'), cdrom); + if (err) + xenbus_dev_fatal(dev, err, "creating vbd structure"); + else { err = xenvbd_sysfs_addif(dev); if (err) { xen_vbd_free(&be->blkif->vbd); - be->major = 0; - be->minor = 0; xenbus_dev_fatal(dev, err, "creating sysfs entries"); - return; } + } + if (err) { + kfree(be->mode); + be->mode = NULL; + be->major = 0; + be->minor = 0; + } else { /* We're potentially connected now */ xen_update_blkif_status(be->blkif); } diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c index 11043c1..c3dae2e 100644 --- a/drivers/block/xen-blkfront.c +++ b/drivers/block/xen-blkfront.c @@ -791,7 +791,7 @@ static void blkif_restart_queue(struct work_struct *work) static void blkif_free(struct blkfront_info *info, int suspend) { struct llist_node *all_gnts; - struct grant *persistent_gnt; + struct grant *persistent_gnt, *tmp; struct llist_node *n; /* Prevent new requests being issued until we fix things up. */ @@ -805,10 +805,17 @@ static void blkif_free(struct blkfront_info *info, int suspend) /* Remove all persistent grants */ if (info->persistent_gnts_c) { all_gnts = llist_del_all(&info->persistent_gnts); - llist_for_each_entry_safe(persistent_gnt, n, all_gnts, node) { + persistent_gnt = llist_entry(all_gnts, typeof(*(persistent_gnt)), node); + while (persistent_gnt) { gnttab_end_foreign_access(persistent_gnt->gref, 0, 0UL); __free_page(pfn_to_page(persistent_gnt->pfn)); - kfree(persistent_gnt); + tmp = persistent_gnt; + n = persistent_gnt->node.next; + if (n) + persistent_gnt = llist_entry(n, typeof(*(persistent_gnt)), node); + else + persistent_gnt = NULL; + kfree(tmp); } info->persistent_gnts_c = 0; } diff --git a/drivers/char/ipmi/ipmi_si_intf.c b/drivers/char/ipmi/ipmi_si_intf.c index 1c7fdcd..0ac9b45 100644 --- a/drivers/char/ipmi/ipmi_si_intf.c +++ b/drivers/char/ipmi/ipmi_si_intf.c @@ -1208,6 +1208,16 @@ static int smi_num; /* Used to sequence the SMIs */ #define DEFAULT_REGSPACING 1 #define DEFAULT_REGSIZE 1 +#ifdef CONFIG_ACPI +static bool si_tryacpi = 1; +#endif +#ifdef CONFIG_DMI +static bool si_trydmi = 1; +#endif +static bool si_tryplatform = 1; +#ifdef CONFIG_PCI +static bool si_trypci = 1; +#endif static bool si_trydefaults = 1; static char *si_type[SI_MAX_PARMS]; #define MAX_SI_TYPE_STR 30 @@ -1238,6 +1248,25 @@ MODULE_PARM_DESC(hotmod, "Add and remove interfaces. See" " Documentation/IPMI.txt in the kernel sources for the" " gory details."); +#ifdef CONFIG_ACPI +module_param_named(tryacpi, si_tryacpi, bool, 0); +MODULE_PARM_DESC(tryacpi, "Setting this to zero will disable the" + " default scan of the interfaces identified via ACPI"); +#endif +#ifdef CONFIG_DMI +module_param_named(trydmi, si_trydmi, bool, 0); +MODULE_PARM_DESC(trydmi, "Setting this to zero will disable the" + " default scan of the interfaces identified via DMI"); +#endif +module_param_named(tryplatform, si_tryplatform, bool, 0); +MODULE_PARM_DESC(tryacpi, "Setting this to zero will disable the" + " default scan of the interfaces identified via platform" + " interfaces like openfirmware"); +#ifdef CONFIG_PCI +module_param_named(trypci, si_trypci, bool, 0); +MODULE_PARM_DESC(tryacpi, "Setting this to zero will disable the" + " default scan of the interfaces identified via pci"); +#endif module_param_named(trydefaults, si_trydefaults, bool, 0); MODULE_PARM_DESC(trydefaults, "Setting this to 'false' will disable the" " default scan of the KCS and SMIC interface at the standard" @@ -3371,13 +3400,15 @@ static int init_ipmi_si(void) return 0; initialized = 1; - rv = platform_driver_register(&ipmi_driver); - if (rv) { - printk(KERN_ERR PFX "Unable to register driver: %d\n", rv); - return rv; + if (si_tryplatform) { + rv = platform_driver_register(&ipmi_driver); + if (rv) { + printk(KERN_ERR PFX "Unable to register " + "driver: %d\n", rv); + return rv; + } } - /* Parse out the si_type string into its components. */ str = si_type_str; if (*str != '\0') { @@ -3400,24 +3431,31 @@ static int init_ipmi_si(void) return 0; #ifdef CONFIG_PCI - rv = pci_register_driver(&ipmi_pci_driver); - if (rv) - printk(KERN_ERR PFX "Unable to register PCI driver: %d\n", rv); - else - pci_registered = 1; + if (si_trypci) { + rv = pci_register_driver(&ipmi_pci_driver); + if (rv) + printk(KERN_ERR PFX "Unable to register " + "PCI driver: %d\n", rv); + else + pci_registered = 1; + } #endif #ifdef CONFIG_ACPI - pnp_register_driver(&ipmi_pnp_driver); - pnp_registered = 1; + if (si_tryacpi) { + pnp_register_driver(&ipmi_pnp_driver); + pnp_registered = 1; + } #endif #ifdef CONFIG_DMI - dmi_find_bmc(); + if (si_trydmi) + dmi_find_bmc(); #endif #ifdef CONFIG_ACPI - spmi_find_bmc(); + if (si_tryacpi) + spmi_find_bmc(); #endif /* We prefer devices with interrupts, but in the case of a machine diff --git a/drivers/char/misc.c b/drivers/char/misc.c index 522136d..190d442 100644 --- a/drivers/char/misc.c +++ b/drivers/char/misc.c @@ -183,19 +183,12 @@ static const struct file_operations misc_fops = { int misc_register(struct miscdevice * misc) { - struct miscdevice *c; dev_t dev; int err = 0; INIT_LIST_HEAD(&misc->list); mutex_lock(&misc_mtx); - list_for_each_entry(c, &misc_list, list) { - if (c->minor == misc->minor) { - mutex_unlock(&misc_mtx); - return -EBUSY; - } - } if (misc->minor == MISC_DYNAMIC_MINOR) { int i = find_first_zero_bit(misc_minors, DYNAMIC_MINORS); @@ -205,6 +198,15 @@ int misc_register(struct miscdevice * misc) } misc->minor = DYNAMIC_MINORS - i - 1; set_bit(i, misc_minors); + } else { + struct miscdevice *c; + + list_for_each_entry(c, &misc_list, list) { + if (c->minor == misc->minor) { + mutex_unlock(&misc_mtx); + return -EBUSY; + } + } } dev = MKDEV(MISC_MAJOR, misc->minor); diff --git a/drivers/clk/clk.c b/drivers/clk/clk.c index fabbfe1..ed87b24 100644 --- a/drivers/clk/clk.c +++ b/drivers/clk/clk.c @@ -52,31 +52,29 @@ static void clk_summary_show_subtree(struct seq_file *s, struct clk *c, int level) { struct clk *child; - struct hlist_node *tmp; if (!c) return; clk_summary_show_one(s, c, level); - hlist_for_each_entry(child, tmp, &c->children, child_node) + hlist_for_each_entry(child, &c->children, child_node) clk_summary_show_subtree(s, child, level + 1); } static int clk_summary_show(struct seq_file *s, void *data) { struct clk *c; - struct hlist_node *tmp; seq_printf(s, " clock enable_cnt prepare_cnt rate\n"); seq_printf(s, "---------------------------------------------------------------------\n"); mutex_lock(&prepare_lock); - hlist_for_each_entry(c, tmp, &clk_root_list, child_node) + hlist_for_each_entry(c, &clk_root_list, child_node) clk_summary_show_subtree(s, c, 0); - hlist_for_each_entry(c, tmp, &clk_orphan_list, child_node) + hlist_for_each_entry(c, &clk_orphan_list, child_node) clk_summary_show_subtree(s, c, 0); mutex_unlock(&prepare_lock); @@ -111,14 +109,13 @@ static void clk_dump_one(struct seq_file *s, struct clk *c, int level) static void clk_dump_subtree(struct seq_file *s, struct clk *c, int level) { struct clk *child; - struct hlist_node *tmp; if (!c) return; clk_dump_one(s, c, level); - hlist_for_each_entry(child, tmp, &c->children, child_node) { + hlist_for_each_entry(child, &c->children, child_node) { seq_printf(s, ","); clk_dump_subtree(s, child, level + 1); } @@ -129,21 +126,20 @@ static void clk_dump_subtree(struct seq_file *s, struct clk *c, int level) static int clk_dump(struct seq_file *s, void *data) { struct clk *c; - struct hlist_node *tmp; bool first_node = true; seq_printf(s, "{"); mutex_lock(&prepare_lock); - hlist_for_each_entry(c, tmp, &clk_root_list, child_node) { + hlist_for_each_entry(c, &clk_root_list, child_node) { if (!first_node) seq_printf(s, ","); first_node = false; clk_dump_subtree(s, c, 0); } - hlist_for_each_entry(c, tmp, &clk_orphan_list, child_node) { + hlist_for_each_entry(c, &clk_orphan_list, child_node) { seq_printf(s, ","); clk_dump_subtree(s, c, 0); } @@ -222,7 +218,6 @@ out: static int clk_debug_create_subtree(struct clk *clk, struct dentry *pdentry) { struct clk *child; - struct hlist_node *tmp; int ret = -EINVAL;; if (!clk || !pdentry) @@ -233,7 +228,7 @@ static int clk_debug_create_subtree(struct clk *clk, struct dentry *pdentry) if (ret) goto out; - hlist_for_each_entry(child, tmp, &clk->children, child_node) + hlist_for_each_entry(child, &clk->children, child_node) clk_debug_create_subtree(child, clk->dentry); ret = 0; @@ -299,7 +294,6 @@ out: static int __init clk_debug_init(void) { struct clk *clk; - struct hlist_node *tmp; struct dentry *d; rootdir = debugfs_create_dir("clk", NULL); @@ -324,10 +318,10 @@ static int __init clk_debug_init(void) mutex_lock(&prepare_lock); - hlist_for_each_entry(clk, tmp, &clk_root_list, child_node) + hlist_for_each_entry(clk, &clk_root_list, child_node) clk_debug_create_subtree(clk, rootdir); - hlist_for_each_entry(clk, tmp, &clk_orphan_list, child_node) + hlist_for_each_entry(clk, &clk_orphan_list, child_node) clk_debug_create_subtree(clk, orphandir); inited = 1; @@ -345,13 +339,12 @@ static inline int clk_debug_register(struct clk *clk) { return 0; } static void clk_disable_unused_subtree(struct clk *clk) { struct clk *child; - struct hlist_node *tmp; unsigned long flags; if (!clk) goto out; - hlist_for_each_entry(child, tmp, &clk->children, child_node) + hlist_for_each_entry(child, &clk->children, child_node) clk_disable_unused_subtree(child); spin_lock_irqsave(&enable_lock, flags); @@ -384,14 +377,13 @@ out: static int clk_disable_unused(void) { struct clk *clk; - struct hlist_node *tmp; mutex_lock(&prepare_lock); - hlist_for_each_entry(clk, tmp, &clk_root_list, child_node) + hlist_for_each_entry(clk, &clk_root_list, child_node) clk_disable_unused_subtree(clk); - hlist_for_each_entry(clk, tmp, &clk_orphan_list, child_node) + hlist_for_each_entry(clk, &clk_orphan_list, child_node) clk_disable_unused_subtree(clk); mutex_unlock(&prepare_lock); @@ -484,12 +476,11 @@ static struct clk *__clk_lookup_subtree(const char *name, struct clk *clk) { struct clk *child; struct clk *ret; - struct hlist_node *tmp; if (!strcmp(clk->name, name)) return clk; - hlist_for_each_entry(child, tmp, &clk->children, child_node) { + hlist_for_each_entry(child, &clk->children, child_node) { ret = __clk_lookup_subtree(name, child); if (ret) return ret; @@ -502,20 +493,19 @@ struct clk *__clk_lookup(const char *name) { struct clk *root_clk; struct clk *ret; - struct hlist_node *tmp; if (!name) return NULL; /* search the 'proper' clk tree first */ - hlist_for_each_entry(root_clk, tmp, &clk_root_list, child_node) { + hlist_for_each_entry(root_clk, &clk_root_list, child_node) { ret = __clk_lookup_subtree(name, root_clk); if (ret) return ret; } /* if not found, then search the orphan tree */ - hlist_for_each_entry(root_clk, tmp, &clk_orphan_list, child_node) { + hlist_for_each_entry(root_clk, &clk_orphan_list, child_node) { ret = __clk_lookup_subtree(name, root_clk); if (ret) return ret; @@ -812,7 +802,6 @@ static void __clk_recalc_rates(struct clk *clk, unsigned long msg) { unsigned long old_rate; unsigned long parent_rate = 0; - struct hlist_node *tmp; struct clk *child; old_rate = clk->rate; @@ -832,7 +821,7 @@ static void __clk_recalc_rates(struct clk *clk, unsigned long msg) if (clk->notifier_count && msg) __clk_notify(clk, msg, old_rate, clk->rate); - hlist_for_each_entry(child, tmp, &clk->children, child_node) + hlist_for_each_entry(child, &clk->children, child_node) __clk_recalc_rates(child, msg); } @@ -878,7 +867,6 @@ EXPORT_SYMBOL_GPL(clk_get_rate); */ static int __clk_speculate_rates(struct clk *clk, unsigned long parent_rate) { - struct hlist_node *tmp; struct clk *child; unsigned long new_rate; int ret = NOTIFY_DONE; @@ -895,7 +883,7 @@ static int __clk_speculate_rates(struct clk *clk, unsigned long parent_rate) if (ret == NOTIFY_BAD) goto out; - hlist_for_each_entry(child, tmp, &clk->children, child_node) { + hlist_for_each_entry(child, &clk->children, child_node) { ret = __clk_speculate_rates(child, new_rate); if (ret == NOTIFY_BAD) break; @@ -908,11 +896,10 @@ out: static void clk_calc_subtree(struct clk *clk, unsigned long new_rate) { struct clk *child; - struct hlist_node *tmp; clk->new_rate = new_rate; - hlist_for_each_entry(child, tmp, &clk->children, child_node) { + hlist_for_each_entry(child, &clk->children, child_node) { if (child->ops->recalc_rate) child->new_rate = child->ops->recalc_rate(child->hw, new_rate); else @@ -983,7 +970,6 @@ out: */ static struct clk *clk_propagate_rate_change(struct clk *clk, unsigned long event) { - struct hlist_node *tmp; struct clk *child, *fail_clk = NULL; int ret = NOTIFY_DONE; @@ -996,7 +982,7 @@ static struct clk *clk_propagate_rate_change(struct clk *clk, unsigned long even fail_clk = clk; } - hlist_for_each_entry(child, tmp, &clk->children, child_node) { + hlist_for_each_entry(child, &clk->children, child_node) { clk = clk_propagate_rate_change(child, event); if (clk) fail_clk = clk; @@ -1014,7 +1000,6 @@ static void clk_change_rate(struct clk *clk) struct clk *child; unsigned long old_rate; unsigned long best_parent_rate = 0; - struct hlist_node *tmp; old_rate = clk->rate; @@ -1032,7 +1017,7 @@ static void clk_change_rate(struct clk *clk) if (clk->notifier_count && old_rate != clk->rate) __clk_notify(clk, POST_RATE_CHANGE, old_rate, clk->rate); - hlist_for_each_entry(child, tmp, &clk->children, child_node) + hlist_for_each_entry(child, &clk->children, child_node) clk_change_rate(child); } @@ -1348,7 +1333,7 @@ int __clk_init(struct device *dev, struct clk *clk) { int i, ret = 0; struct clk *orphan; - struct hlist_node *tmp, *tmp2; + struct hlist_node *tmp2; if (!clk) return -EINVAL; @@ -1448,7 +1433,7 @@ int __clk_init(struct device *dev, struct clk *clk) * walk the list of orphan clocks and reparent any that are children of * this clock */ - hlist_for_each_entry_safe(orphan, tmp, tmp2, &clk_orphan_list, child_node) { + hlist_for_each_entry_safe(orphan, tmp2, &clk_orphan_list, child_node) { if (orphan->ops->get_parent) { i = orphan->ops->get_parent(orphan->hw); if (!strcmp(clk->name, orphan->parent_names[i])) diff --git a/drivers/clocksource/time-armada-370-xp.c b/drivers/clocksource/time-armada-370-xp.c index a4605fd..47a6730 100644 --- a/drivers/clocksource/time-armada-370-xp.c +++ b/drivers/clocksource/time-armada-370-xp.c @@ -27,8 +27,10 @@ #include <linux/of_address.h> #include <linux/irq.h> #include <linux/module.h> -#include <asm/sched_clock.h> +#include <asm/sched_clock.h> +#include <asm/localtimer.h> +#include <linux/percpu.h> /* * Timer block registers. */ @@ -49,6 +51,7 @@ #define TIMER1_RELOAD_OFF 0x0018 #define TIMER1_VAL_OFF 0x001c +#define LCL_TIMER_EVENTS_STATUS 0x0028 /* Global timers are connected to the coherency fabric clock, and the below divider reduces their incrementing frequency. */ #define TIMER_DIVIDER_SHIFT 5 @@ -57,14 +60,17 @@ /* * SoC-specific data. */ -static void __iomem *timer_base; -static int timer_irq; +static void __iomem *timer_base, *local_base; +static unsigned int timer_clk; +static bool timer25Mhz = true; /* * Number of timer ticks per jiffy. */ static u32 ticks_per_jiffy; +static struct clock_event_device __percpu **percpu_armada_370_xp_evt; + static u32 notrace armada_370_xp_read_sched_clock(void) { return ~readl(timer_base + TIMER0_VAL_OFF); @@ -78,24 +84,23 @@ armada_370_xp_clkevt_next_event(unsigned long delta, struct clock_event_device *dev) { u32 u; - /* * Clear clockevent timer interrupt. */ - writel(TIMER1_CLR_MASK, timer_base + TIMER_EVENTS_STATUS); + writel(TIMER0_CLR_MASK, local_base + LCL_TIMER_EVENTS_STATUS); /* * Setup new clockevent timer value. */ - writel(delta, timer_base + TIMER1_VAL_OFF); + writel(delta, local_base + TIMER0_VAL_OFF); /* * Enable the timer. */ - u = readl(timer_base + TIMER_CTRL_OFF); - u = ((u & ~TIMER1_RELOAD_EN) | TIMER1_EN | - TIMER1_DIV(TIMER_DIVIDER_SHIFT)); - writel(u, timer_base + TIMER_CTRL_OFF); + u = readl(local_base + TIMER_CTRL_OFF); + u = ((u & ~TIMER0_RELOAD_EN) | TIMER0_EN | + TIMER0_DIV(TIMER_DIVIDER_SHIFT)); + writel(u, local_base + TIMER_CTRL_OFF); return 0; } @@ -107,37 +112,38 @@ armada_370_xp_clkevt_mode(enum clock_event_mode mode, u32 u; if (mode == CLOCK_EVT_MODE_PERIODIC) { + /* * Setup timer to fire at 1/HZ intervals. */ - writel(ticks_per_jiffy - 1, timer_base + TIMER1_RELOAD_OFF); - writel(ticks_per_jiffy - 1, timer_base + TIMER1_VAL_OFF); + writel(ticks_per_jiffy - 1, local_base + TIMER0_RELOAD_OFF); + writel(ticks_per_jiffy - 1, local_base + TIMER0_VAL_OFF); /* * Enable timer. */ - u = readl(timer_base + TIMER_CTRL_OFF); - writel((u | TIMER1_EN | TIMER1_RELOAD_EN | - TIMER1_DIV(TIMER_DIVIDER_SHIFT)), - timer_base + TIMER_CTRL_OFF); + u = readl(local_base + TIMER_CTRL_OFF); + + writel((u | TIMER0_EN | TIMER0_RELOAD_EN | + TIMER0_DIV(TIMER_DIVIDER_SHIFT)), + local_base + TIMER_CTRL_OFF); } else { /* * Disable timer. */ - u = readl(timer_base + TIMER_CTRL_OFF); - writel(u & ~TIMER1_EN, timer_base + TIMER_CTRL_OFF); + u = readl(local_base + TIMER_CTRL_OFF); + writel(u & ~TIMER0_EN, local_base + TIMER_CTRL_OFF); /* * ACK pending timer interrupt. */ - writel(TIMER1_CLR_MASK, timer_base + TIMER_EVENTS_STATUS); - + writel(TIMER0_CLR_MASK, local_base + LCL_TIMER_EVENTS_STATUS); } } static struct clock_event_device armada_370_xp_clkevt = { - .name = "armada_370_xp_tick", + .name = "armada_370_xp_per_cpu_tick", .features = CLOCK_EVT_FEAT_ONESHOT | CLOCK_EVT_FEAT_PERIODIC, .shift = 32, .rating = 300, @@ -150,32 +156,78 @@ static irqreturn_t armada_370_xp_timer_interrupt(int irq, void *dev_id) /* * ACK timer interrupt and call event handler. */ + struct clock_event_device *evt = *(struct clock_event_device **)dev_id; - writel(TIMER1_CLR_MASK, timer_base + TIMER_EVENTS_STATUS); - armada_370_xp_clkevt.event_handler(&armada_370_xp_clkevt); + writel(TIMER0_CLR_MASK, local_base + LCL_TIMER_EVENTS_STATUS); + evt->event_handler(evt); return IRQ_HANDLED; } -static struct irqaction armada_370_xp_timer_irq = { - .name = "armada_370_xp_tick", - .flags = IRQF_DISABLED | IRQF_TIMER, - .handler = armada_370_xp_timer_interrupt +/* + * Setup the local clock events for a CPU. + */ +static int __cpuinit armada_370_xp_timer_setup(struct clock_event_device *evt) +{ + u32 u; + int cpu = smp_processor_id(); + + /* Use existing clock_event for cpu 0 */ + if (!smp_processor_id()) + return 0; + + u = readl(local_base + TIMER_CTRL_OFF); + if (timer25Mhz) + writel(u | TIMER0_25MHZ, local_base + TIMER_CTRL_OFF); + else + writel(u & ~TIMER0_25MHZ, local_base + TIMER_CTRL_OFF); + + evt->name = armada_370_xp_clkevt.name; + evt->irq = armada_370_xp_clkevt.irq; + evt->features = armada_370_xp_clkevt.features; + evt->shift = armada_370_xp_clkevt.shift; + evt->rating = armada_370_xp_clkevt.rating, + evt->set_next_event = armada_370_xp_clkevt_next_event, + evt->set_mode = armada_370_xp_clkevt_mode, + evt->cpumask = cpumask_of(cpu); + + *__this_cpu_ptr(percpu_armada_370_xp_evt) = evt; + + clockevents_config_and_register(evt, timer_clk, 1, 0xfffffffe); + enable_percpu_irq(evt->irq, 0); + + return 0; +} + +static void armada_370_xp_timer_stop(struct clock_event_device *evt) +{ + evt->set_mode(CLOCK_EVT_MODE_UNUSED, evt); + disable_percpu_irq(evt->irq); +} + +static struct local_timer_ops armada_370_xp_local_timer_ops __cpuinitdata = { + .setup = armada_370_xp_timer_setup, + .stop = armada_370_xp_timer_stop, }; void __init armada_370_xp_timer_init(void) { u32 u; struct device_node *np; - unsigned int timer_clk; + int res; + np = of_find_compatible_node(NULL, NULL, "marvell,armada-370-xp-timer"); timer_base = of_iomap(np, 0); WARN_ON(!timer_base); + local_base = of_iomap(np, 1); if (of_find_property(np, "marvell,timer-25Mhz", NULL)) { /* The fixed 25MHz timer is available so let's use it */ + u = readl(local_base + TIMER_CTRL_OFF); + writel(u | TIMER0_25MHZ, + local_base + TIMER_CTRL_OFF); u = readl(timer_base + TIMER_CTRL_OFF); - writel(u | TIMER0_25MHZ | TIMER1_25MHZ, + writel(u | TIMER0_25MHZ, timer_base + TIMER_CTRL_OFF); timer_clk = 25000000; } else { @@ -183,15 +235,23 @@ void __init armada_370_xp_timer_init(void) struct clk *clk = of_clk_get(np, 0); WARN_ON(IS_ERR(clk)); rate = clk_get_rate(clk); + u = readl(local_base + TIMER_CTRL_OFF); + writel(u & ~(TIMER0_25MHZ), + local_base + TIMER_CTRL_OFF); + u = readl(timer_base + TIMER_CTRL_OFF); - writel(u & ~(TIMER0_25MHZ | TIMER1_25MHZ), + writel(u & ~(TIMER0_25MHZ), timer_base + TIMER_CTRL_OFF); + timer_clk = rate / TIMER_DIVIDER; + timer25Mhz = false; } - /* We use timer 0 as clocksource, and timer 1 for - clockevents */ - timer_irq = irq_of_parse_and_map(np, 1); + /* + * We use timer 0 as clocksource, and private(local) timer 0 + * for clockevents + */ + armada_370_xp_clkevt.irq = irq_of_parse_and_map(np, 4); ticks_per_jiffy = (timer_clk + HZ / 2) / HZ; @@ -216,12 +276,26 @@ void __init armada_370_xp_timer_init(void) "armada_370_xp_clocksource", timer_clk, 300, 32, clocksource_mmio_readl_down); - /* - * Setup clockevent timer (interrupt-driven). - */ - setup_irq(timer_irq, &armada_370_xp_timer_irq); + /* Register the clockevent on the private timer of CPU 0 */ armada_370_xp_clkevt.cpumask = cpumask_of(0); clockevents_config_and_register(&armada_370_xp_clkevt, timer_clk, 1, 0xfffffffe); -} + percpu_armada_370_xp_evt = alloc_percpu(struct clock_event_device *); + + + /* + * Setup clockevent timer (interrupt-driven). + */ + *__this_cpu_ptr(percpu_armada_370_xp_evt) = &armada_370_xp_clkevt; + res = request_percpu_irq(armada_370_xp_clkevt.irq, + armada_370_xp_timer_interrupt, + armada_370_xp_clkevt.name, + percpu_armada_370_xp_evt); + if (!res) { + enable_percpu_irq(armada_370_xp_clkevt.irq, 0); +#ifdef CONFIG_LOCAL_TIMERS + local_timer_register(&armada_370_xp_local_timer_ops); +#endif + } +} diff --git a/drivers/dca/dca-sysfs.c b/drivers/dca/dca-sysfs.c index 591b659..126cf29 100644 --- a/drivers/dca/dca-sysfs.c +++ b/drivers/dca/dca-sysfs.c @@ -53,22 +53,19 @@ void dca_sysfs_remove_req(struct dca_provider *dca, int slot) int dca_sysfs_add_provider(struct dca_provider *dca, struct device *dev) { struct device *cd; - int err = 0; + int ret; -idr_try_again: - if (!idr_pre_get(&dca_idr, GFP_KERNEL)) - return -ENOMEM; + idr_preload(GFP_KERNEL); spin_lock(&dca_idr_lock); - err = idr_get_new(&dca_idr, dca, &dca->id); + + ret = idr_alloc(&dca_idr, dca, 0, 0, GFP_NOWAIT); + if (ret >= 0) + dca->id = ret; + spin_unlock(&dca_idr_lock); - switch (err) { - case 0: - break; - case -EAGAIN: - goto idr_try_again; - default: - return err; - } + idr_preload_end(); + if (ret < 0) + return ret; cd = device_create(dca_class, dev, MKDEV(0, 0), NULL, "dca%d", dca->id); if (IS_ERR(cd)) { diff --git a/drivers/dma/dmaengine.c b/drivers/dma/dmaengine.c index 242b8c0..b2728d6 100644 --- a/drivers/dma/dmaengine.c +++ b/drivers/dma/dmaengine.c @@ -686,18 +686,14 @@ static int get_dma_id(struct dma_device *device) { int rc; - idr_retry: - if (!idr_pre_get(&dma_idr, GFP_KERNEL)) - return -ENOMEM; mutex_lock(&dma_list_mutex); - rc = idr_get_new(&dma_idr, NULL, &device->dev_id); - mutex_unlock(&dma_list_mutex); - if (rc == -EAGAIN) - goto idr_retry; - else if (rc != 0) - return rc; - return 0; + rc = idr_alloc(&dma_idr, NULL, 0, 0, GFP_KERNEL); + if (rc >= 0) + device->dev_id = rc; + + mutex_unlock(&dma_list_mutex); + return rc < 0 ? rc : 0; } /** diff --git a/drivers/edac/Kconfig b/drivers/edac/Kconfig index acb709b..e443f2c1 100644 --- a/drivers/edac/Kconfig +++ b/drivers/edac/Kconfig @@ -80,6 +80,29 @@ config EDAC_MM_EDAC occurred so that a particular failing memory module can be replaced. If unsure, select 'Y'. +config EDAC_GHES + bool "Output ACPI APEI/GHES BIOS detected errors via EDAC" + depends on ACPI_APEI_GHES && (EDAC_MM_EDAC=y) + default y + help + Not all machines support hardware-driven error report. Some of those + provide a BIOS-driven error report mechanism via ACPI, using the + APEI/GHES driver. By enabling this option, the error reports provided + by GHES are sent to userspace via the EDAC API. + + When this option is enabled, it will disable the hardware-driven + mechanisms, if a GHES BIOS is detected, entering into the + "Firmware First" mode. + + It should be noticed that keeping both GHES and a hardware-driven + error mechanism won't work well, as BIOS will race with OS, while + reading the error registers. So, if you want to not use "Firmware + first" GHES error mechanism, you should disable GHES either at + compilation time or by passing "ghes.disable=1" Kernel parameter + at boot time. + + In doubt, say 'Y'. + config EDAC_AMD64 tristate "AMD64 (Opteron, Athlon64) K8, F10h" depends on EDAC_MM_EDAC && AMD_NB && X86_64 && EDAC_DECODE_MCE diff --git a/drivers/edac/Makefile b/drivers/edac/Makefile index 5608a9b..4154ed6 100644 --- a/drivers/edac/Makefile +++ b/drivers/edac/Makefile @@ -16,6 +16,7 @@ ifdef CONFIG_PCI edac_core-y += edac_pci.o edac_pci_sysfs.o endif +obj-$(CONFIG_EDAC_GHES) += ghes_edac.o obj-$(CONFIG_EDAC_MCE_INJ) += mce_amd_inj.o edac_mce_amd-y := mce_amd.o diff --git a/drivers/edac/edac_core.h b/drivers/edac/edac_core.h index 23bb99f..3c2625e 100644 --- a/drivers/edac/edac_core.h +++ b/drivers/edac/edac_core.h @@ -453,6 +453,11 @@ extern struct mem_ctl_info *find_mci_by_dev(struct device *dev); extern struct mem_ctl_info *edac_mc_del_mc(struct device *dev); extern int edac_mc_find_csrow_by_page(struct mem_ctl_info *mci, unsigned long page); + +void edac_raw_mc_handle_error(const enum hw_event_mc_err_type type, + struct mem_ctl_info *mci, + struct edac_raw_error_desc *e); + void edac_mc_handle_error(const enum hw_event_mc_err_type type, struct mem_ctl_info *mci, const u16 error_count, diff --git a/drivers/edac/edac_mc.c b/drivers/edac/edac_mc.c index d1e9eb1..cdb81aa 100644 --- a/drivers/edac/edac_mc.c +++ b/drivers/edac/edac_mc.c @@ -42,6 +42,12 @@ static DEFINE_MUTEX(mem_ctls_mutex); static LIST_HEAD(mc_devices); +/* + * Used to lock EDAC MC to just one module, avoiding two drivers e. g. + * apei/ghes and i7core_edac to be used at the same time. + */ +static void const *edac_mc_owner; + unsigned edac_dimm_info_location(struct dimm_info *dimm, char *buf, unsigned len) { @@ -441,13 +447,6 @@ struct mem_ctl_info *edac_mc_alloc(unsigned mc_num, mci->op_state = OP_ALLOC; - /* at this point, the root kobj is valid, and in order to - * 'free' the object, then the function: - * edac_mc_unregister_sysfs_main_kobj() must be called - * which will perform kobj unregistration and the actual free - * will occur during the kobject callback operation - */ - return mci; error: @@ -666,9 +665,9 @@ fail1: return 1; } -static void del_mc_from_global_list(struct mem_ctl_info *mci) +static int del_mc_from_global_list(struct mem_ctl_info *mci) { - atomic_dec(&edac_handlers); + int handlers = atomic_dec_return(&edac_handlers); list_del_rcu(&mci->link); /* these are for safe removal of devices from global list while @@ -676,6 +675,8 @@ static void del_mc_from_global_list(struct mem_ctl_info *mci) */ synchronize_rcu(); INIT_LIST_HEAD(&mci->link); + + return handlers; } /** @@ -719,6 +720,7 @@ EXPORT_SYMBOL(edac_mc_find); /* FIXME - should a warning be printed if no error detection? correction? */ int edac_mc_add_mc(struct mem_ctl_info *mci) { + int ret = -EINVAL; edac_dbg(0, "\n"); #ifdef CONFIG_EDAC_DEBUG @@ -749,6 +751,11 @@ int edac_mc_add_mc(struct mem_ctl_info *mci) #endif mutex_lock(&mem_ctls_mutex); + if (edac_mc_owner && edac_mc_owner != mci->mod_name) { + ret = -EPERM; + goto fail0; + } + if (add_mc_to_global_list(mci)) goto fail0; @@ -775,6 +782,8 @@ int edac_mc_add_mc(struct mem_ctl_info *mci) edac_mc_printk(mci, KERN_INFO, "Giving out device to '%s' '%s':" " DEV %s\n", mci->mod_name, mci->ctl_name, edac_dev_name(mci)); + edac_mc_owner = mci->mod_name; + mutex_unlock(&mem_ctls_mutex); return 0; @@ -783,7 +792,7 @@ fail1: fail0: mutex_unlock(&mem_ctls_mutex); - return 1; + return ret; } EXPORT_SYMBOL_GPL(edac_mc_add_mc); @@ -809,7 +818,8 @@ struct mem_ctl_info *edac_mc_del_mc(struct device *dev) return NULL; } - del_mc_from_global_list(mci); + if (!del_mc_from_global_list(mci)) + edac_mc_owner = NULL; mutex_unlock(&mem_ctls_mutex); /* flush workq processes */ @@ -907,6 +917,7 @@ const char *edac_layer_name[] = { [EDAC_MC_LAYER_CHANNEL] = "channel", [EDAC_MC_LAYER_SLOT] = "slot", [EDAC_MC_LAYER_CHIP_SELECT] = "csrow", + [EDAC_MC_LAYER_ALL_MEM] = "memory", }; EXPORT_SYMBOL_GPL(edac_layer_name); @@ -1054,7 +1065,46 @@ static void edac_ue_error(struct mem_ctl_info *mci, edac_inc_ue_error(mci, enable_per_layer_report, pos, error_count); } -#define OTHER_LABEL " or " +/** + * edac_raw_mc_handle_error - reports a memory event to userspace without doing + * anything to discover the error location + * + * @type: severity of the error (CE/UE/Fatal) + * @mci: a struct mem_ctl_info pointer + * @e: error description + * + * This raw function is used internally by edac_mc_handle_error(). It should + * only be called directly when the hardware error come directly from BIOS, + * like in the case of APEI GHES driver. + */ +void edac_raw_mc_handle_error(const enum hw_event_mc_err_type type, + struct mem_ctl_info *mci, + struct edac_raw_error_desc *e) +{ + char detail[80]; + int pos[EDAC_MAX_LAYERS] = { e->top_layer, e->mid_layer, e->low_layer }; + + /* Memory type dependent details about the error */ + if (type == HW_EVENT_ERR_CORRECTED) { + snprintf(detail, sizeof(detail), + "page:0x%lx offset:0x%lx grain:%ld syndrome:0x%lx", + e->page_frame_number, e->offset_in_page, + e->grain, e->syndrome); + edac_ce_error(mci, e->error_count, pos, e->msg, e->location, e->label, + detail, e->other_detail, e->enable_per_layer_report, + e->page_frame_number, e->offset_in_page, e->grain); + } else { + snprintf(detail, sizeof(detail), + "page:0x%lx offset:0x%lx grain:%ld", + e->page_frame_number, e->offset_in_page, e->grain); + + edac_ue_error(mci, e->error_count, pos, e->msg, e->location, e->label, + detail, e->other_detail, e->enable_per_layer_report); + } + + +} +EXPORT_SYMBOL_GPL(edac_raw_mc_handle_error); /** * edac_mc_handle_error - reports a memory event to userspace @@ -1086,19 +1136,27 @@ void edac_mc_handle_error(const enum hw_event_mc_err_type type, const char *msg, const char *other_detail) { - /* FIXME: too much for stack: move it to some pre-alocated area */ - char detail[80], location[80]; - char label[(EDAC_MC_LABEL_LEN + 1 + sizeof(OTHER_LABEL)) * mci->tot_dimms]; char *p; int row = -1, chan = -1; int pos[EDAC_MAX_LAYERS] = { top_layer, mid_layer, low_layer }; - int i; - long grain; - bool enable_per_layer_report = false; + int i, n_labels = 0; u8 grain_bits; + struct edac_raw_error_desc *e = &mci->error_desc; edac_dbg(3, "MC%d\n", mci->mc_idx); + /* Fills the error report buffer */ + memset(e, 0, sizeof (*e)); + e->error_count = error_count; + e->top_layer = top_layer; + e->mid_layer = mid_layer; + e->low_layer = low_layer; + e->page_frame_number = page_frame_number; + e->offset_in_page = offset_in_page; + e->syndrome = syndrome; + e->msg = msg; + e->other_detail = other_detail; + /* * Check if the event report is consistent and if the memory * location is known. If it is known, enable_per_layer_report will be @@ -1121,7 +1179,7 @@ void edac_mc_handle_error(const enum hw_event_mc_err_type type, pos[i] = -1; } if (pos[i] >= 0) - enable_per_layer_report = true; + e->enable_per_layer_report = true; } /* @@ -1135,8 +1193,7 @@ void edac_mc_handle_error(const enum hw_event_mc_err_type type, * where each memory belongs to a separate channel within the same * branch. */ - grain = 0; - p = label; + p = e->label; *p = '\0'; for (i = 0; i < mci->tot_dimms; i++) { @@ -1150,8 +1207,8 @@ void edac_mc_handle_error(const enum hw_event_mc_err_type type, continue; /* get the max grain, over the error match range */ - if (dimm->grain > grain) - grain = dimm->grain; + if (dimm->grain > e->grain) + e->grain = dimm->grain; /* * If the error is memory-controller wide, there's no need to @@ -1159,8 +1216,13 @@ void edac_mc_handle_error(const enum hw_event_mc_err_type type, * channel/memory controller/... may be affected. * Also, don't show errors for empty DIMM slots. */ - if (enable_per_layer_report && dimm->nr_pages) { - if (p != label) { + if (e->enable_per_layer_report && dimm->nr_pages) { + if (n_labels >= EDAC_MAX_LABELS) { + e->enable_per_layer_report = false; + break; + } + n_labels++; + if (p != e->label) { strcpy(p, OTHER_LABEL); p += strlen(OTHER_LABEL); } @@ -1187,12 +1249,12 @@ void edac_mc_handle_error(const enum hw_event_mc_err_type type, } } - if (!enable_per_layer_report) { - strcpy(label, "any memory"); + if (!e->enable_per_layer_report) { + strcpy(e->label, "any memory"); } else { edac_dbg(4, "csrow/channel to increment: (%d,%d)\n", row, chan); - if (p == label) - strcpy(label, "unknown memory"); + if (p == e->label) + strcpy(e->label, "unknown memory"); if (type == HW_EVENT_ERR_CORRECTED) { if (row >= 0) { mci->csrows[row]->ce_count += error_count; @@ -1205,7 +1267,7 @@ void edac_mc_handle_error(const enum hw_event_mc_err_type type, } /* Fill the RAM location data */ - p = location; + p = e->location; for (i = 0; i < mci->n_layers; i++) { if (pos[i] < 0) @@ -1215,32 +1277,16 @@ void edac_mc_handle_error(const enum hw_event_mc_err_type type, edac_layer_name[mci->layers[i].type], pos[i]); } - if (p > location) + if (p > e->location) *(p - 1) = '\0'; /* Report the error via the trace interface */ - grain_bits = fls_long(grain) + 1; - trace_mc_event(type, msg, label, error_count, - mci->mc_idx, top_layer, mid_layer, low_layer, - PAGES_TO_MiB(page_frame_number) | offset_in_page, - grain_bits, syndrome, other_detail); + grain_bits = fls_long(e->grain) + 1; + trace_mc_event(type, e->msg, e->label, e->error_count, + mci->mc_idx, e->top_layer, e->mid_layer, e->low_layer, + PAGES_TO_MiB(e->page_frame_number) | e->offset_in_page, + grain_bits, e->syndrome, e->other_detail); - /* Memory type dependent details about the error */ - if (type == HW_EVENT_ERR_CORRECTED) { - snprintf(detail, sizeof(detail), - "page:0x%lx offset:0x%lx grain:%ld syndrome:0x%lx", - page_frame_number, offset_in_page, - grain, syndrome); - edac_ce_error(mci, error_count, pos, msg, location, label, - detail, other_detail, enable_per_layer_report, - page_frame_number, offset_in_page, grain); - } else { - snprintf(detail, sizeof(detail), - "page:0x%lx offset:0x%lx grain:%ld", - page_frame_number, offset_in_page, grain); - - edac_ue_error(mci, error_count, pos, msg, location, label, - detail, other_detail, enable_per_layer_report); - } + edac_raw_mc_handle_error(type, mci, e); } EXPORT_SYMBOL_GPL(edac_mc_handle_error); diff --git a/drivers/edac/edac_mc_sysfs.c b/drivers/edac/edac_mc_sysfs.c index 0ca1ca7..4f4b613 100644 --- a/drivers/edac/edac_mc_sysfs.c +++ b/drivers/edac/edac_mc_sysfs.c @@ -7,7 +7,7 @@ * * Written Doug Thompson <norsk5@xmission.com> www.softwarebitmaker.com * - * (c) 2012 - Mauro Carvalho Chehab <mchehab@redhat.com> + * (c) 2012-2013 - Mauro Carvalho Chehab <mchehab@redhat.com> * The entire API were re-written, and ported to use struct device * */ @@ -429,8 +429,12 @@ static int edac_create_csrow_objects(struct mem_ctl_info *mci) if (!nr_pages_per_csrow(csrow)) continue; err = edac_create_csrow_object(mci, mci->csrows[i], i); - if (err < 0) + if (err < 0) { + edac_dbg(1, + "failure: create csrow objects for csrow %d\n", + i); goto error; + } } return 0; @@ -677,9 +681,6 @@ static ssize_t mci_sdram_scrub_rate_store(struct device *dev, unsigned long bandwidth = 0; int new_bw = 0; - if (!mci->set_sdram_scrub_rate) - return -ENODEV; - if (strict_strtoul(data, 10, &bandwidth) < 0) return -EINVAL; @@ -703,9 +704,6 @@ static ssize_t mci_sdram_scrub_rate_show(struct device *dev, struct mem_ctl_info *mci = to_mci(dev); int bandwidth = 0; - if (!mci->get_sdram_scrub_rate) - return -ENODEV; - bandwidth = mci->get_sdram_scrub_rate(mci); if (bandwidth < 0) { edac_printk(KERN_DEBUG, EDAC_MC, "Error reading scrub rate\n"); @@ -866,8 +864,7 @@ DEVICE_ATTR(ce_count, S_IRUGO, mci_ce_count_show, NULL); DEVICE_ATTR(max_location, S_IRUGO, mci_max_location_show, NULL); /* memory scrubber attribute file */ -DEVICE_ATTR(sdram_scrub_rate, S_IRUGO | S_IWUSR, mci_sdram_scrub_rate_show, - mci_sdram_scrub_rate_store); +DEVICE_ATTR(sdram_scrub_rate, 0, NULL, NULL); static struct attribute *mci_attrs[] = { &dev_attr_reset_counters.attr, @@ -878,7 +875,6 @@ static struct attribute *mci_attrs[] = { &dev_attr_ce_noinfo_count.attr, &dev_attr_ue_count.attr, &dev_attr_ce_count.attr, - &dev_attr_sdram_scrub_rate.attr, &dev_attr_max_location.attr, NULL }; @@ -1007,11 +1003,28 @@ int edac_create_sysfs_mci_device(struct mem_ctl_info *mci) edac_dbg(0, "creating device %s\n", dev_name(&mci->dev)); err = device_add(&mci->dev); if (err < 0) { + edac_dbg(1, "failure: create device %s\n", dev_name(&mci->dev)); bus_unregister(&mci->bus); kfree(mci->bus.name); return err; } + if (mci->set_sdram_scrub_rate || mci->get_sdram_scrub_rate) { + if (mci->get_sdram_scrub_rate) { + dev_attr_sdram_scrub_rate.attr.mode |= S_IRUGO; + dev_attr_sdram_scrub_rate.show = &mci_sdram_scrub_rate_show; + } + if (mci->set_sdram_scrub_rate) { + dev_attr_sdram_scrub_rate.attr.mode |= S_IWUSR; + dev_attr_sdram_scrub_rate.store = &mci_sdram_scrub_rate_store; + } + err = device_create_file(&mci->dev, + &dev_attr_sdram_scrub_rate); + if (err) { + edac_dbg(1, "failure: create sdram_scrub_rate\n"); + goto fail2; + } + } /* * Create the dimm/rank devices */ @@ -1056,6 +1069,7 @@ fail: continue; device_unregister(&dimm->dev); } +fail2: device_unregister(&mci->dev); bus_unregister(&mci->bus); kfree(mci->bus.name); diff --git a/drivers/edac/edac_module.c b/drivers/edac/edac_module.c index 12c951a..a66941f 100644 --- a/drivers/edac/edac_module.c +++ b/drivers/edac/edac_module.c @@ -146,7 +146,7 @@ static void __exit edac_exit(void) /* * Inform the kernel of our entry and exit points */ -module_init(edac_init); +subsys_initcall(edac_init); module_exit(edac_exit); MODULE_LICENSE("GPL"); diff --git a/drivers/edac/edac_pci_sysfs.c b/drivers/edac/edac_pci_sysfs.c index 0056c4d..e8658e4 100644 --- a/drivers/edac/edac_pci_sysfs.c +++ b/drivers/edac/edac_pci_sysfs.c @@ -429,8 +429,8 @@ static void edac_pci_main_kobj_teardown(void) if (atomic_dec_return(&edac_pci_sysfs_refcount) == 0) { edac_dbg(0, "called kobject_put on main kobj\n"); kobject_put(edac_pci_top_main_kobj); + edac_put_sysfs_subsys(); } - edac_put_sysfs_subsys(); } /* diff --git a/drivers/edac/ghes_edac.c b/drivers/edac/ghes_edac.c new file mode 100644 index 0000000..bb53467 --- /dev/null +++ b/drivers/edac/ghes_edac.c @@ -0,0 +1,537 @@ +/* + * GHES/EDAC Linux driver + * + * This file may be distributed under the terms of the GNU General Public + * License version 2. + * + * Copyright (c) 2013 by Mauro Carvalho Chehab <mchehab@redhat.com> + * + * Red Hat Inc. http://www.redhat.com + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include <acpi/ghes.h> +#include <linux/edac.h> +#include <linux/dmi.h> +#include "edac_core.h" +#include <ras/ras_event.h> + +#define GHES_EDAC_REVISION " Ver: 1.0.0" + +struct ghes_edac_pvt { + struct list_head list; + struct ghes *ghes; + struct mem_ctl_info *mci; + + /* Buffers for the error handling routine */ + char detail_location[240]; + char other_detail[160]; + char msg[80]; +}; + +static LIST_HEAD(ghes_reglist); +static DEFINE_MUTEX(ghes_edac_lock); +static int ghes_edac_mc_num; + + +/* Memory Device - Type 17 of SMBIOS spec */ +struct memdev_dmi_entry { + u8 type; + u8 length; + u16 handle; + u16 phys_mem_array_handle; + u16 mem_err_info_handle; + u16 total_width; + u16 data_width; + u16 size; + u8 form_factor; + u8 device_set; + u8 device_locator; + u8 bank_locator; + u8 memory_type; + u16 type_detail; + u16 speed; + u8 manufacturer; + u8 serial_number; + u8 asset_tag; + u8 part_number; + u8 attributes; + u32 extended_size; + u16 conf_mem_clk_speed; +} __attribute__((__packed__)); + +struct ghes_edac_dimm_fill { + struct mem_ctl_info *mci; + unsigned count; +}; + +char *memory_type[] = { + [MEM_EMPTY] = "EMPTY", + [MEM_RESERVED] = "RESERVED", + [MEM_UNKNOWN] = "UNKNOWN", + [MEM_FPM] = "FPM", + [MEM_EDO] = "EDO", + [MEM_BEDO] = "BEDO", + [MEM_SDR] = "SDR", + [MEM_RDR] = "RDR", + [MEM_DDR] = "DDR", + [MEM_RDDR] = "RDDR", + [MEM_RMBS] = "RMBS", + [MEM_DDR2] = "DDR2", + [MEM_FB_DDR2] = "FB_DDR2", + [MEM_RDDR2] = "RDDR2", + [MEM_XDR] = "XDR", + [MEM_DDR3] = "DDR3", + [MEM_RDDR3] = "RDDR3", +}; + +static void ghes_edac_count_dimms(const struct dmi_header *dh, void *arg) +{ + int *num_dimm = arg; + + if (dh->type == DMI_ENTRY_MEM_DEVICE) + (*num_dimm)++; +} + +static void ghes_edac_dmidecode(const struct dmi_header *dh, void *arg) +{ + struct ghes_edac_dimm_fill *dimm_fill = arg; + struct mem_ctl_info *mci = dimm_fill->mci; + + if (dh->type == DMI_ENTRY_MEM_DEVICE) { + struct memdev_dmi_entry *entry = (struct memdev_dmi_entry *)dh; + struct dimm_info *dimm = EDAC_DIMM_PTR(mci->layers, mci->dimms, + mci->n_layers, + dimm_fill->count, 0, 0); + + if (entry->size == 0xffff) { + pr_info("Can't get DIMM%i size\n", + dimm_fill->count); + dimm->nr_pages = MiB_TO_PAGES(32);/* Unknown */ + } else if (entry->size == 0x7fff) { + dimm->nr_pages = MiB_TO_PAGES(entry->extended_size); + } else { + if (entry->size & 1 << 15) + dimm->nr_pages = MiB_TO_PAGES((entry->size & + 0x7fff) << 10); + else + dimm->nr_pages = MiB_TO_PAGES(entry->size); + } + + switch (entry->memory_type) { + case 0x12: + if (entry->type_detail & 1 << 13) + dimm->mtype = MEM_RDDR; + else + dimm->mtype = MEM_DDR; + break; + case 0x13: + if (entry->type_detail & 1 << 13) + dimm->mtype = MEM_RDDR2; + else + dimm->mtype = MEM_DDR2; + break; + case 0x14: + dimm->mtype = MEM_FB_DDR2; + break; + case 0x18: + if (entry->type_detail & 1 << 13) + dimm->mtype = MEM_RDDR3; + else + dimm->mtype = MEM_DDR3; + break; + default: + if (entry->type_detail & 1 << 6) + dimm->mtype = MEM_RMBS; + else if ((entry->type_detail & ((1 << 7) | (1 << 13))) + == ((1 << 7) | (1 << 13))) + dimm->mtype = MEM_RDR; + else if (entry->type_detail & 1 << 7) + dimm->mtype = MEM_SDR; + else if (entry->type_detail & 1 << 9) + dimm->mtype = MEM_EDO; + else + dimm->mtype = MEM_UNKNOWN; + } + + /* + * Actually, we can only detect if the memory has bits for + * checksum or not + */ + if (entry->total_width == entry->data_width) + dimm->edac_mode = EDAC_NONE; + else + dimm->edac_mode = EDAC_SECDED; + + dimm->dtype = DEV_UNKNOWN; + dimm->grain = 128; /* Likely, worse case */ + + /* + * FIXME: It shouldn't be hard to also fill the DIMM labels + */ + + if (dimm->nr_pages) { + edac_dbg(1, "DIMM%i: %s size = %d MB%s\n", + dimm_fill->count, memory_type[dimm->mtype], + PAGES_TO_MiB(dimm->nr_pages), + (dimm->edac_mode != EDAC_NONE) ? "(ECC)" : ""); + edac_dbg(2, "\ttype %d, detail 0x%02x, width %d(total %d)\n", + entry->memory_type, entry->type_detail, + entry->total_width, entry->data_width); + } + + dimm_fill->count++; + } +} + +void ghes_edac_report_mem_error(struct ghes *ghes, int sev, + struct cper_sec_mem_err *mem_err) +{ + enum hw_event_mc_err_type type; + struct edac_raw_error_desc *e; + struct mem_ctl_info *mci; + struct ghes_edac_pvt *pvt = NULL; + char *p; + u8 grain_bits; + + list_for_each_entry(pvt, &ghes_reglist, list) { + if (ghes == pvt->ghes) + break; + } + if (!pvt) { + pr_err("Internal error: Can't find EDAC structure\n"); + return; + } + mci = pvt->mci; + e = &mci->error_desc; + + /* Cleans the error report buffer */ + memset(e, 0, sizeof (*e)); + e->error_count = 1; + strcpy(e->label, "unknown label"); + e->msg = pvt->msg; + e->other_detail = pvt->other_detail; + e->top_layer = -1; + e->mid_layer = -1; + e->low_layer = -1; + *pvt->other_detail = '\0'; + *pvt->msg = '\0'; + + switch (sev) { + case GHES_SEV_CORRECTED: + type = HW_EVENT_ERR_CORRECTED; + break; + case GHES_SEV_RECOVERABLE: + type = HW_EVENT_ERR_UNCORRECTED; + break; + case GHES_SEV_PANIC: + type = HW_EVENT_ERR_FATAL; + break; + default: + case GHES_SEV_NO: + type = HW_EVENT_ERR_INFO; + } + + edac_dbg(1, "error validation_bits: 0x%08llx\n", + (long long)mem_err->validation_bits); + + /* Error type, mapped on e->msg */ + if (mem_err->validation_bits & CPER_MEM_VALID_ERROR_TYPE) { + p = pvt->msg; + switch (mem_err->error_type) { + case 0: + p += sprintf(p, "Unknown"); + break; + case 1: + p += sprintf(p, "No error"); + break; + case 2: + p += sprintf(p, "Single-bit ECC"); + break; + case 3: + p += sprintf(p, "Multi-bit ECC"); + break; + case 4: + p += sprintf(p, "Single-symbol ChipKill ECC"); + break; + case 5: + p += sprintf(p, "Multi-symbol ChipKill ECC"); + break; + case 6: + p += sprintf(p, "Master abort"); + break; + case 7: + p += sprintf(p, "Target abort"); + break; + case 8: + p += sprintf(p, "Parity Error"); + break; + case 9: + p += sprintf(p, "Watchdog timeout"); + break; + case 10: + p += sprintf(p, "Invalid address"); + break; + case 11: + p += sprintf(p, "Mirror Broken"); + break; + case 12: + p += sprintf(p, "Memory Sparing"); + break; + case 13: + p += sprintf(p, "Scrub corrected error"); + break; + case 14: + p += sprintf(p, "Scrub uncorrected error"); + break; + case 15: + p += sprintf(p, "Physical Memory Map-out event"); + break; + default: + p += sprintf(p, "reserved error (%d)", + mem_err->error_type); + } + } else { + strcpy(pvt->msg, "unknown error"); + } + + /* Error address */ + if (mem_err->validation_bits & CPER_MEM_VALID_PHYSICAL_ADDRESS) { + e->page_frame_number = mem_err->physical_addr >> PAGE_SHIFT; + e->offset_in_page = mem_err->physical_addr & ~PAGE_MASK; + } + + /* Error grain */ + if (mem_err->validation_bits & CPER_MEM_VALID_PHYSICAL_ADDRESS_MASK) { + e->grain = ~(mem_err->physical_addr_mask & ~PAGE_MASK); + } + + /* Memory error location, mapped on e->location */ + p = e->location; + if (mem_err->validation_bits & CPER_MEM_VALID_NODE) + p += sprintf(p, "node:%d ", mem_err->node); + if (mem_err->validation_bits & CPER_MEM_VALID_CARD) + p += sprintf(p, "card:%d ", mem_err->card); + if (mem_err->validation_bits & CPER_MEM_VALID_MODULE) + p += sprintf(p, "module:%d ", mem_err->module); + if (mem_err->validation_bits & CPER_MEM_VALID_BANK) + p += sprintf(p, "bank:%d ", mem_err->bank); + if (mem_err->validation_bits & CPER_MEM_VALID_ROW) + p += sprintf(p, "row:%d ", mem_err->row); + if (mem_err->validation_bits & CPER_MEM_VALID_COLUMN) + p += sprintf(p, "col:%d ", mem_err->column); + if (mem_err->validation_bits & CPER_MEM_VALID_BIT_POSITION) + p += sprintf(p, "bit_pos:%d ", mem_err->bit_pos); + if (p > e->location) + *(p - 1) = '\0'; + + /* All other fields are mapped on e->other_detail */ + p = pvt->other_detail; + if (mem_err->validation_bits & CPER_MEM_VALID_ERROR_STATUS) { + u64 status = mem_err->error_status; + + p += sprintf(p, "status(0x%016llx): ", (long long)status); + switch ((status >> 8) & 0xff) { + case 1: + p += sprintf(p, "Error detected internal to the component "); + break; + case 16: + p += sprintf(p, "Error detected in the bus "); + break; + case 4: + p += sprintf(p, "Storage error in DRAM memory "); + break; + case 5: + p += sprintf(p, "Storage error in TLB "); + break; + case 6: + p += sprintf(p, "Storage error in cache "); + break; + case 7: + p += sprintf(p, "Error in one or more functional units "); + break; + case 8: + p += sprintf(p, "component failed self test "); + break; + case 9: + p += sprintf(p, "Overflow or undervalue of internal queue "); + break; + case 17: + p += sprintf(p, "Virtual address not found on IO-TLB or IO-PDIR "); + break; + case 18: + p += sprintf(p, "Improper access error "); + break; + case 19: + p += sprintf(p, "Access to a memory address which is not mapped to any component "); + break; + case 20: + p += sprintf(p, "Loss of Lockstep "); + break; + case 21: + p += sprintf(p, "Response not associated with a request "); + break; + case 22: + p += sprintf(p, "Bus parity error - must also set the A, C, or D Bits "); + break; + case 23: + p += sprintf(p, "Detection of a PATH_ERROR "); + break; + case 25: + p += sprintf(p, "Bus operation timeout "); + break; + case 26: + p += sprintf(p, "A read was issued to data that has been poisoned "); + break; + default: + p += sprintf(p, "reserved "); + break; + } + } + if (mem_err->validation_bits & CPER_MEM_VALID_REQUESTOR_ID) + p += sprintf(p, "requestorID: 0x%016llx ", + (long long)mem_err->requestor_id); + if (mem_err->validation_bits & CPER_MEM_VALID_RESPONDER_ID) + p += sprintf(p, "responderID: 0x%016llx ", + (long long)mem_err->responder_id); + if (mem_err->validation_bits & CPER_MEM_VALID_TARGET_ID) + p += sprintf(p, "targetID: 0x%016llx ", + (long long)mem_err->responder_id); + if (p > pvt->other_detail) + *(p - 1) = '\0'; + + /* Generate the trace event */ + grain_bits = fls_long(e->grain); + sprintf(pvt->detail_location, "APEI location: %s %s", + e->location, e->other_detail); + trace_mc_event(type, e->msg, e->label, e->error_count, + mci->mc_idx, e->top_layer, e->mid_layer, e->low_layer, + PAGES_TO_MiB(e->page_frame_number) | e->offset_in_page, + grain_bits, e->syndrome, pvt->detail_location); + + /* Report the error via EDAC API */ + edac_raw_mc_handle_error(type, mci, e); +} +EXPORT_SYMBOL_GPL(ghes_edac_report_mem_error); + +int ghes_edac_register(struct ghes *ghes, struct device *dev) +{ + bool fake = false; + int rc, num_dimm = 0; + struct mem_ctl_info *mci; + struct edac_mc_layer layers[1]; + struct ghes_edac_pvt *pvt; + struct ghes_edac_dimm_fill dimm_fill; + + /* Get the number of DIMMs */ + dmi_walk(ghes_edac_count_dimms, &num_dimm); + + /* Check if we've got a bogus BIOS */ + if (num_dimm == 0) { + fake = true; + num_dimm = 1; + } + + layers[0].type = EDAC_MC_LAYER_ALL_MEM; + layers[0].size = num_dimm; + layers[0].is_virt_csrow = true; + + /* + * We need to serialize edac_mc_alloc() and edac_mc_add_mc(), + * to avoid duplicated memory controller numbers + */ + mutex_lock(&ghes_edac_lock); + mci = edac_mc_alloc(ghes_edac_mc_num, ARRAY_SIZE(layers), layers, + sizeof(*pvt)); + if (!mci) { + pr_info("Can't allocate memory for EDAC data\n"); + mutex_unlock(&ghes_edac_lock); + return -ENOMEM; + } + + pvt = mci->pvt_info; + memset(pvt, 0, sizeof(*pvt)); + list_add_tail(&pvt->list, &ghes_reglist); + pvt->ghes = ghes; + pvt->mci = mci; + mci->pdev = dev; + + mci->mtype_cap = MEM_FLAG_EMPTY; + mci->edac_ctl_cap = EDAC_FLAG_NONE; + mci->edac_cap = EDAC_FLAG_NONE; + mci->mod_name = "ghes_edac.c"; + mci->mod_ver = GHES_EDAC_REVISION; + mci->ctl_name = "ghes_edac"; + mci->dev_name = "ghes"; + + if (!ghes_edac_mc_num) { + if (!fake) { + pr_info("This EDAC driver relies on BIOS to enumerate memory and get error reports.\n"); + pr_info("Unfortunately, not all BIOSes reflect the memory layout correctly.\n"); + pr_info("So, the end result of using this driver varies from vendor to vendor.\n"); + pr_info("If you find incorrect reports, please contact your hardware vendor\n"); + pr_info("to correct its BIOS.\n"); + pr_info("This system has %d DIMM sockets.\n", + num_dimm); + } else { + pr_info("This system has a very crappy BIOS: It doesn't even list the DIMMS.\n"); + pr_info("Its SMBIOS info is wrong. It is doubtful that the error report would\n"); + pr_info("work on such system. Use this driver with caution\n"); + } + } + + if (!fake) { + /* + * Fill DIMM info from DMI for the memory controller #0 + * + * Keep it in blank for the other memory controllers, as + * there's no reliable way to properly credit each DIMM to + * the memory controller, as different BIOSes fill the + * DMI bank location fields on different ways + */ + if (!ghes_edac_mc_num) { + dimm_fill.count = 0; + dimm_fill.mci = mci; + dmi_walk(ghes_edac_dmidecode, &dimm_fill); + } + } else { + struct dimm_info *dimm = EDAC_DIMM_PTR(mci->layers, mci->dimms, + mci->n_layers, 0, 0, 0); + + dimm->nr_pages = 1; + dimm->grain = 128; + dimm->mtype = MEM_UNKNOWN; + dimm->dtype = DEV_UNKNOWN; + dimm->edac_mode = EDAC_SECDED; + } + + rc = edac_mc_add_mc(mci); + if (rc < 0) { + pr_info("Can't register at EDAC core\n"); + edac_mc_free(mci); + mutex_unlock(&ghes_edac_lock); + return -ENODEV; + } + + ghes_edac_mc_num++; + mutex_unlock(&ghes_edac_lock); + return 0; +} +EXPORT_SYMBOL_GPL(ghes_edac_register); + +void ghes_edac_unregister(struct ghes *ghes) +{ + struct mem_ctl_info *mci; + struct ghes_edac_pvt *pvt, *tmp; + + list_for_each_entry_safe(pvt, tmp, &ghes_reglist, list) { + if (ghes == pvt->ghes) { + mci = pvt->mci; + edac_mc_del_mc(mci->pdev); + edac_mc_free(mci); + list_del(&pvt->list); + } + } +} +EXPORT_SYMBOL_GPL(ghes_edac_unregister); diff --git a/drivers/edac/i3200_edac.c b/drivers/edac/i3200_edac.c index 4e83376..aa44c17 100644 --- a/drivers/edac/i3200_edac.c +++ b/drivers/edac/i3200_edac.c @@ -106,16 +106,26 @@ static int nr_channels; static int how_many_channels(struct pci_dev *pdev) { + int n_channels; + unsigned char capid0_8b; /* 8th byte of CAPID0 */ pci_read_config_byte(pdev, I3200_CAPID0 + 8, &capid0_8b); + if (capid0_8b & 0x20) { /* check DCD: Dual Channel Disable */ edac_dbg(0, "In single channel mode\n"); - return 1; + n_channels = 1; } else { edac_dbg(0, "In dual channel mode\n"); - return 2; + n_channels = 2; } + + if (capid0_8b & 0x10) /* check if both channels are filled */ + edac_dbg(0, "2 DIMMS per channel disabled\n"); + else + edac_dbg(0, "2 DIMMS per channel enabled\n"); + + return n_channels; } static unsigned long eccerrlog_syndrome(u64 log) @@ -290,6 +300,8 @@ static void i3200_get_drbs(void __iomem *window, for (i = 0; i < I3200_RANKS_PER_CHANNEL; i++) { drbs[0][i] = readw(window + I3200_C0DRB + 2*i) & I3200_DRB_MASK; drbs[1][i] = readw(window + I3200_C1DRB + 2*i) & I3200_DRB_MASK; + + edac_dbg(0, "drb[0][%d] = %d, drb[1][%d] = %d\n", i, drbs[0][i], i, drbs[1][i]); } } @@ -311,6 +323,9 @@ static unsigned long drb_to_nr_pages( int n; n = drbs[channel][rank]; + if (!n) + return 0; + if (rank > 0) n -= drbs[channel][rank - 1]; if (stacked && (channel == 1) && @@ -377,19 +392,19 @@ static int i3200_probe1(struct pci_dev *pdev, int dev_idx) * cumulative; the last one will contain the total memory * contained in all ranks. */ - for (i = 0; i < mci->nr_csrows; i++) { + for (i = 0; i < I3200_DIMMS; i++) { unsigned long nr_pages; - struct csrow_info *csrow = mci->csrows[i]; - nr_pages = drb_to_nr_pages(drbs, stacked, - i / I3200_RANKS_PER_CHANNEL, - i % I3200_RANKS_PER_CHANNEL); + for (j = 0; j < nr_channels; j++) { + struct dimm_info *dimm = EDAC_DIMM_PTR(mci->layers, mci->dimms, + mci->n_layers, i, j, 0); - if (nr_pages == 0) - continue; + nr_pages = drb_to_nr_pages(drbs, stacked, j, i); + if (nr_pages == 0) + continue; - for (j = 0; j < nr_channels; j++) { - struct dimm_info *dimm = csrow->channels[j]->dimm; + edac_dbg(0, "csrow %d, channel %d%s, size = %ld Mb\n", i, j, + stacked ? " (stacked)" : "", PAGES_TO_MiB(nr_pages)); dimm->nr_pages = nr_pages; dimm->grain = nr_pages << PAGE_SHIFT; diff --git a/drivers/edac/i5100_edac.c b/drivers/edac/i5100_edac.c index d6955b2..1b63517 100644 --- a/drivers/edac/i5100_edac.c +++ b/drivers/edac/i5100_edac.c @@ -27,6 +27,7 @@ #include <linux/edac.h> #include <linux/delay.h> #include <linux/mmzone.h> +#include <linux/debugfs.h> #include "edac_core.h" @@ -68,6 +69,14 @@ I5100_FERR_NF_MEM_M1ERR_MASK) #define I5100_NERR_NF_MEM 0xa4 /* MC Next Non-Fatal Errors */ #define I5100_EMASK_MEM 0xa8 /* MC Error Mask Register */ +#define I5100_MEM0EINJMSK0 0x200 /* Injection Mask0 Register Channel 0 */ +#define I5100_MEM1EINJMSK0 0x208 /* Injection Mask0 Register Channel 1 */ +#define I5100_MEMXEINJMSK0_EINJEN (1 << 27) +#define I5100_MEM0EINJMSK1 0x204 /* Injection Mask1 Register Channel 0 */ +#define I5100_MEM1EINJMSK1 0x206 /* Injection Mask1 Register Channel 1 */ + +/* Device 19, Function 0 */ +#define I5100_DINJ0 0x9a /* device 21 and 22, func 0 */ #define I5100_MTR_0 0x154 /* Memory Technology Registers 0-3 */ @@ -338,13 +347,26 @@ struct i5100_priv { unsigned ranksperchan; /* number of ranks per channel */ struct pci_dev *mc; /* device 16 func 1 */ + struct pci_dev *einj; /* device 19 func 0 */ struct pci_dev *ch0mm; /* device 21 func 0 */ struct pci_dev *ch1mm; /* device 22 func 0 */ struct delayed_work i5100_scrubbing; int scrub_enable; + + /* Error injection */ + u8 inject_channel; + u8 inject_hlinesel; + u8 inject_deviceptr1; + u8 inject_deviceptr2; + u16 inject_eccmask1; + u16 inject_eccmask2; + + struct dentry *debugfs; }; +static struct dentry *i5100_debugfs; + /* map a rank/chan to a slot number on the mainboard */ static int i5100_rank_to_slot(const struct mem_ctl_info *mci, int chan, int rank) @@ -863,13 +885,126 @@ static void i5100_init_csrows(struct mem_ctl_info *mci) } } +/**************************************************************************** + * Error injection routines + ****************************************************************************/ + +static void i5100_do_inject(struct mem_ctl_info *mci) +{ + struct i5100_priv *priv = mci->pvt_info; + u32 mask0; + u16 mask1; + + /* MEM[1:0]EINJMSK0 + * 31 - ADDRMATCHEN + * 29:28 - HLINESEL + * 00 Reserved + * 01 Lower half of cache line + * 10 Upper half of cache line + * 11 Both upper and lower parts of cache line + * 27 - EINJEN + * 25:19 - XORMASK1 for deviceptr1 + * 9:5 - SEC2RAM for deviceptr2 + * 4:0 - FIR2RAM for deviceptr1 + */ + mask0 = ((priv->inject_hlinesel & 0x3) << 28) | + I5100_MEMXEINJMSK0_EINJEN | + ((priv->inject_eccmask1 & 0xffff) << 10) | + ((priv->inject_deviceptr2 & 0x1f) << 5) | + (priv->inject_deviceptr1 & 0x1f); + + /* MEM[1:0]EINJMSK1 + * 15:0 - XORMASK2 for deviceptr2 + */ + mask1 = priv->inject_eccmask2; + + if (priv->inject_channel == 0) { + pci_write_config_dword(priv->mc, I5100_MEM0EINJMSK0, mask0); + pci_write_config_word(priv->mc, I5100_MEM0EINJMSK1, mask1); + } else { + pci_write_config_dword(priv->mc, I5100_MEM1EINJMSK0, mask0); + pci_write_config_word(priv->mc, I5100_MEM1EINJMSK1, mask1); + } + + /* Error Injection Response Function + * Intel 5100 Memory Controller Hub Chipset (318378) datasheet + * hints about this register but carry no data about them. All + * data regarding device 19 is based on experimentation and the + * Intel 7300 Chipset Memory Controller Hub (318082) datasheet + * which appears to be accurate for the i5100 in this area. + * + * The injection code don't work without setting this register. + * The register needs to be flipped off then on else the hardware + * will only preform the first injection. + * + * Stop condition bits 7:4 + * 1010 - Stop after one injection + * 1011 - Never stop injecting faults + * + * Start condition bits 3:0 + * 1010 - Never start + * 1011 - Start immediately + */ + pci_write_config_byte(priv->einj, I5100_DINJ0, 0xaa); + pci_write_config_byte(priv->einj, I5100_DINJ0, 0xab); +} + +#define to_mci(k) container_of(k, struct mem_ctl_info, dev) +static ssize_t inject_enable_write(struct file *file, const char __user *data, + size_t count, loff_t *ppos) +{ + struct device *dev = file->private_data; + struct mem_ctl_info *mci = to_mci(dev); + + i5100_do_inject(mci); + + return count; +} + +static const struct file_operations i5100_inject_enable_fops = { + .open = simple_open, + .write = inject_enable_write, + .llseek = generic_file_llseek, +}; + +static int i5100_setup_debugfs(struct mem_ctl_info *mci) +{ + struct i5100_priv *priv = mci->pvt_info; + + if (!i5100_debugfs) + return -ENODEV; + + priv->debugfs = debugfs_create_dir(mci->bus.name, i5100_debugfs); + + if (!priv->debugfs) + return -ENOMEM; + + debugfs_create_x8("inject_channel", S_IRUGO | S_IWUSR, priv->debugfs, + &priv->inject_channel); + debugfs_create_x8("inject_hlinesel", S_IRUGO | S_IWUSR, priv->debugfs, + &priv->inject_hlinesel); + debugfs_create_x8("inject_deviceptr1", S_IRUGO | S_IWUSR, priv->debugfs, + &priv->inject_deviceptr1); + debugfs_create_x8("inject_deviceptr2", S_IRUGO | S_IWUSR, priv->debugfs, + &priv->inject_deviceptr2); + debugfs_create_x16("inject_eccmask1", S_IRUGO | S_IWUSR, priv->debugfs, + &priv->inject_eccmask1); + debugfs_create_x16("inject_eccmask2", S_IRUGO | S_IWUSR, priv->debugfs, + &priv->inject_eccmask2); + debugfs_create_file("inject_enable", S_IWUSR, priv->debugfs, + &mci->dev, &i5100_inject_enable_fops); + + return 0; + +} + static int i5100_init_one(struct pci_dev *pdev, const struct pci_device_id *id) { int rc; struct mem_ctl_info *mci; struct edac_mc_layer layers[2]; struct i5100_priv *priv; - struct pci_dev *ch0mm, *ch1mm; + struct pci_dev *ch0mm, *ch1mm, *einj; int ret = 0; u32 dw; int ranksperch; @@ -941,6 +1076,22 @@ static int i5100_init_one(struct pci_dev *pdev, const struct pci_device_id *id) goto bail_disable_ch1; } + + /* device 19, func 0, Error injection */ + einj = pci_get_device_func(PCI_VENDOR_ID_INTEL, + PCI_DEVICE_ID_INTEL_5100_19, 0); + if (!einj) { + ret = -ENODEV; + goto bail_einj; + } + + rc = pci_enable_device(einj); + if (rc < 0) { + ret = rc; + goto bail_disable_einj; + } + + mci->pdev = &pdev->dev; priv = mci->pvt_info; @@ -948,6 +1099,7 @@ static int i5100_init_one(struct pci_dev *pdev, const struct pci_device_id *id) priv->mc = pdev; priv->ch0mm = ch0mm; priv->ch1mm = ch1mm; + priv->einj = einj; INIT_DELAYED_WORK(&(priv->i5100_scrubbing), i5100_refresh_scrubbing); @@ -975,6 +1127,13 @@ static int i5100_init_one(struct pci_dev *pdev, const struct pci_device_id *id) mci->set_sdram_scrub_rate = i5100_set_scrub_rate; mci->get_sdram_scrub_rate = i5100_get_scrub_rate; + priv->inject_channel = 0; + priv->inject_hlinesel = 0; + priv->inject_deviceptr1 = 0; + priv->inject_deviceptr2 = 0; + priv->inject_eccmask1 = 0; + priv->inject_eccmask2 = 0; + i5100_init_csrows(mci); /* this strange construction seems to be in every driver, dunno why */ @@ -992,6 +1151,8 @@ static int i5100_init_one(struct pci_dev *pdev, const struct pci_device_id *id) goto bail_scrub; } + i5100_setup_debugfs(mci); + return ret; bail_scrub: @@ -999,6 +1160,12 @@ bail_scrub: cancel_delayed_work_sync(&(priv->i5100_scrubbing)); edac_mc_free(mci); +bail_disable_einj: + pci_disable_device(einj); + +bail_einj: + pci_dev_put(einj); + bail_disable_ch1: pci_disable_device(ch1mm); @@ -1030,14 +1197,18 @@ static void i5100_remove_one(struct pci_dev *pdev) priv = mci->pvt_info; + debugfs_remove_recursive(priv->debugfs); + priv->scrub_enable = 0; cancel_delayed_work_sync(&(priv->i5100_scrubbing)); pci_disable_device(pdev); pci_disable_device(priv->ch0mm); pci_disable_device(priv->ch1mm); + pci_disable_device(priv->einj); pci_dev_put(priv->ch0mm); pci_dev_put(priv->ch1mm); + pci_dev_put(priv->einj); edac_mc_free(mci); } @@ -1060,13 +1231,16 @@ static int __init i5100_init(void) { int pci_rc; - pci_rc = pci_register_driver(&i5100_driver); + i5100_debugfs = debugfs_create_dir("i5100_edac", NULL); + pci_rc = pci_register_driver(&i5100_driver); return (pci_rc < 0) ? pci_rc : 0; } static void __exit i5100_exit(void) { + debugfs_remove(i5100_debugfs); + pci_unregister_driver(&i5100_driver); } diff --git a/drivers/edac/i7core_edac.c b/drivers/edac/i7core_edac.c index e213d03..0ec3e95 100644 --- a/drivers/edac/i7core_edac.c +++ b/drivers/edac/i7core_edac.c @@ -420,21 +420,21 @@ static inline int numdimms(u32 dimms) static inline int numrank(u32 rank) { - static int ranks[4] = { 1, 2, 4, -EINVAL }; + static const int ranks[] = { 1, 2, 4, -EINVAL }; return ranks[rank & 0x3]; } static inline int numbank(u32 bank) { - static int banks[4] = { 4, 8, 16, -EINVAL }; + static const int banks[] = { 4, 8, 16, -EINVAL }; return banks[bank & 0x3]; } static inline int numrow(u32 row) { - static int rows[8] = { + static const int rows[] = { 1 << 12, 1 << 13, 1 << 14, 1 << 15, 1 << 16, -EINVAL, -EINVAL, -EINVAL, }; @@ -444,7 +444,7 @@ static inline int numrow(u32 row) static inline int numcol(u32 col) { - static int cols[8] = { + static const int cols[] = { 1 << 10, 1 << 11, 1 << 12, -EINVAL, }; return cols[col & 0x3]; diff --git a/drivers/edac/sb_edac.c b/drivers/edac/sb_edac.c index da7e298..57244f9 100644 --- a/drivers/edac/sb_edac.c +++ b/drivers/edac/sb_edac.c @@ -639,7 +639,7 @@ static void get_memory_layout(const struct mem_ctl_info *mci) tmp_mb = (1 + pvt->tohm) >> 20; mb = div_u64_rem(tmp_mb, 1000, &kb); - edac_dbg(0, "TOHM: %u.%03u GB (0x%016Lx)", mb, kb, (u64)pvt->tohm); + edac_dbg(0, "TOHM: %u.%03u GB (0x%016Lx)\n", mb, kb, (u64)pvt->tohm); /* * Step 2) Get SAD range and SAD Interleave list diff --git a/drivers/firewire/core-cdev.c b/drivers/firewire/core-cdev.c index f8d2287..27ac423 100644 --- a/drivers/firewire/core-cdev.c +++ b/drivers/firewire/core-cdev.c @@ -487,27 +487,28 @@ static int ioctl_get_info(struct client *client, union ioctl_arg *arg) static int add_client_resource(struct client *client, struct client_resource *resource, gfp_t gfp_mask) { + bool preload = gfp_mask & __GFP_WAIT; unsigned long flags; int ret; - retry: - if (idr_pre_get(&client->resource_idr, gfp_mask) == 0) - return -ENOMEM; - + if (preload) + idr_preload(gfp_mask); spin_lock_irqsave(&client->lock, flags); + if (client->in_shutdown) ret = -ECANCELED; else - ret = idr_get_new(&client->resource_idr, resource, - &resource->handle); + ret = idr_alloc(&client->resource_idr, resource, 0, 0, + GFP_NOWAIT); if (ret >= 0) { + resource->handle = ret; client_get(client); schedule_if_iso_resource(resource); } - spin_unlock_irqrestore(&client->lock, flags); - if (ret == -EAGAIN) - goto retry; + spin_unlock_irqrestore(&client->lock, flags); + if (preload) + idr_preload_end(); return ret < 0 ? ret : 0; } @@ -1779,7 +1780,6 @@ static int fw_device_op_release(struct inode *inode, struct file *file) wait_event(client->tx_flush_wait, !has_outbound_transactions(client)); idr_for_each(&client->resource_idr, shutdown_resource, client); - idr_remove_all(&client->resource_idr); idr_destroy(&client->resource_idr); list_for_each_entry_safe(event, next_event, &client->event_list, link) diff --git a/drivers/firewire/core-device.c b/drivers/firewire/core-device.c index 3873d53..03ce7d9 100644 --- a/drivers/firewire/core-device.c +++ b/drivers/firewire/core-device.c @@ -1017,12 +1017,11 @@ static void fw_device_init(struct work_struct *work) fw_device_get(device); down_write(&fw_device_rwsem); - ret = idr_pre_get(&fw_device_idr, GFP_KERNEL) ? - idr_get_new(&fw_device_idr, device, &minor) : - -ENOMEM; + minor = idr_alloc(&fw_device_idr, device, 0, 1 << MINORBITS, + GFP_KERNEL); up_write(&fw_device_rwsem); - if (ret < 0) + if (minor < 0) goto error; device->device.bus = &fw_bus_type; diff --git a/drivers/firmware/efivars.c b/drivers/firmware/efivars.c index fed08b6..7320bf8 100644 --- a/drivers/firmware/efivars.c +++ b/drivers/firmware/efivars.c @@ -79,6 +79,7 @@ #include <linux/device.h> #include <linux/slab.h> #include <linux/pstore.h> +#include <linux/ctype.h> #include <linux/fs.h> #include <linux/ramfs.h> @@ -908,6 +909,48 @@ static struct inode *efivarfs_get_inode(struct super_block *sb, return inode; } +/* + * Return true if 'str' is a valid efivarfs filename of the form, + * + * VariableName-12345678-1234-1234-1234-1234567891bc + */ +static bool efivarfs_valid_name(const char *str, int len) +{ + static const char dashes[GUID_LEN] = { + [8] = 1, [13] = 1, [18] = 1, [23] = 1 + }; + const char *s = str + len - GUID_LEN; + int i; + + /* + * We need a GUID, plus at least one letter for the variable name, + * plus the '-' separator + */ + if (len < GUID_LEN + 2) + return false; + + /* GUID should be right after the first '-' */ + if (s - 1 != strchr(str, '-')) + return false; + + /* + * Validate that 's' is of the correct format, e.g. + * + * 12345678-1234-1234-1234-123456789abc + */ + for (i = 0; i < GUID_LEN; i++) { + if (dashes[i]) { + if (*s++ != '-') + return false; + } else { + if (!isxdigit(*s++)) + return false; + } + } + + return true; +} + static void efivarfs_hex_to_guid(const char *str, efi_guid_t *guid) { guid->b[0] = hex_to_bin(str[6]) << 4 | hex_to_bin(str[7]); @@ -936,11 +979,7 @@ static int efivarfs_create(struct inode *dir, struct dentry *dentry, struct efivar_entry *var; int namelen, i = 0, err = 0; - /* - * We need a GUID, plus at least one letter for the variable name, - * plus the '-' separator - */ - if (dentry->d_name.len < GUID_LEN + 2) + if (!efivarfs_valid_name(dentry->d_name.name, dentry->d_name.len)) return -EINVAL; inode = efivarfs_get_inode(dir->i_sb, dir, mode, 0); @@ -1012,6 +1051,84 @@ static int efivarfs_unlink(struct inode *dir, struct dentry *dentry) return -EINVAL; }; +/* + * Compare two efivarfs file names. + * + * An efivarfs filename is composed of two parts, + * + * 1. A case-sensitive variable name + * 2. A case-insensitive GUID + * + * So we need to perform a case-sensitive match on part 1 and a + * case-insensitive match on part 2. + */ +static int efivarfs_d_compare(const struct dentry *parent, const struct inode *pinode, + const struct dentry *dentry, const struct inode *inode, + unsigned int len, const char *str, + const struct qstr *name) +{ + int guid = len - GUID_LEN; + + if (name->len != len) + return 1; + + /* Case-sensitive compare for the variable name */ + if (memcmp(str, name->name, guid)) + return 1; + + /* Case-insensitive compare for the GUID */ + return strncasecmp(name->name + guid, str + guid, GUID_LEN); +} + +static int efivarfs_d_hash(const struct dentry *dentry, + const struct inode *inode, struct qstr *qstr) +{ + unsigned long hash = init_name_hash(); + const unsigned char *s = qstr->name; + unsigned int len = qstr->len; + + if (!efivarfs_valid_name(s, len)) + return -EINVAL; + + while (len-- > GUID_LEN) + hash = partial_name_hash(*s++, hash); + + /* GUID is case-insensitive. */ + while (len--) + hash = partial_name_hash(tolower(*s++), hash); + + qstr->hash = end_name_hash(hash); + return 0; +} + +/* + * Retaining negative dentries for an in-memory filesystem just wastes + * memory and lookup time: arrange for them to be deleted immediately. + */ +static int efivarfs_delete_dentry(const struct dentry *dentry) +{ + return 1; +} + +static struct dentry_operations efivarfs_d_ops = { + .d_compare = efivarfs_d_compare, + .d_hash = efivarfs_d_hash, + .d_delete = efivarfs_delete_dentry, +}; + +static struct dentry *efivarfs_alloc_dentry(struct dentry *parent, char *name) +{ + struct qstr q; + + q.name = name; + q.len = strlen(name); + + if (efivarfs_d_hash(NULL, NULL, &q)) + return NULL; + + return d_alloc(parent, &q); +} + static int efivarfs_fill_super(struct super_block *sb, void *data, int silent) { struct inode *inode = NULL; @@ -1027,6 +1144,7 @@ static int efivarfs_fill_super(struct super_block *sb, void *data, int silent) sb->s_blocksize_bits = PAGE_CACHE_SHIFT; sb->s_magic = EFIVARFS_MAGIC; sb->s_op = &efivarfs_ops; + sb->s_d_op = &efivarfs_d_ops; sb->s_time_gran = 1; inode = efivarfs_get_inode(sb, NULL, S_IFDIR | 0755, 0); @@ -1067,7 +1185,7 @@ static int efivarfs_fill_super(struct super_block *sb, void *data, int silent) if (!inode) goto fail_name; - dentry = d_alloc_name(root, name); + dentry = efivarfs_alloc_dentry(root, name); if (!dentry) goto fail_inode; @@ -1084,7 +1202,7 @@ static int efivarfs_fill_super(struct super_block *sb, void *data, int silent) mutex_lock(&inode->i_mutex); inode->i_private = entry; - i_size_write(inode, size+4); + i_size_write(inode, size + sizeof(entry->var.Attributes)); mutex_unlock(&inode->i_mutex); d_add(dentry, inode); } @@ -1117,8 +1235,20 @@ static struct file_system_type efivarfs_type = { .kill_sb = efivarfs_kill_sb, }; +/* + * Handle negative dentry. + */ +static struct dentry *efivarfs_lookup(struct inode *dir, struct dentry *dentry, + unsigned int flags) +{ + if (dentry->d_name.len > NAME_MAX) + return ERR_PTR(-ENAMETOOLONG); + d_add(dentry, NULL); + return NULL; +} + static const struct inode_operations efivarfs_dir_inode_operations = { - .lookup = simple_lookup, + .lookup = efivarfs_lookup, .unlink = efivarfs_unlink, .create = efivarfs_create, }; diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c index 4828fe7..fff9786 100644 --- a/drivers/gpio/gpiolib.c +++ b/drivers/gpio/gpiolib.c @@ -411,15 +411,10 @@ static int gpio_setup_irq(struct gpio_desc *desc, struct device *dev, goto err_out; } - do { - ret = -ENOMEM; - if (idr_pre_get(&dirent_idr, GFP_KERNEL)) - ret = idr_get_new_above(&dirent_idr, value_sd, - 1, &id); - } while (ret == -EAGAIN); - - if (ret) + ret = idr_alloc(&dirent_idr, value_sd, 1, 0, GFP_KERNEL); + if (ret < 0) goto free_sd; + id = ret; desc->flags &= GPIO_FLAGS_MASK; desc->flags |= (unsigned long)id << ID_SHIFT; diff --git a/drivers/gpu/drm/drm_context.c b/drivers/gpu/drm/drm_context.c index 45adf97..725968d 100644 --- a/drivers/gpu/drm/drm_context.c +++ b/drivers/gpu/drm/drm_context.c @@ -74,24 +74,13 @@ void drm_ctxbitmap_free(struct drm_device * dev, int ctx_handle) */ static int drm_ctxbitmap_next(struct drm_device * dev) { - int new_id; int ret; -again: - if (idr_pre_get(&dev->ctx_idr, GFP_KERNEL) == 0) { - DRM_ERROR("Out of memory expanding drawable idr\n"); - return -ENOMEM; - } mutex_lock(&dev->struct_mutex); - ret = idr_get_new_above(&dev->ctx_idr, NULL, - DRM_RESERVED_CONTEXTS, &new_id); + ret = idr_alloc(&dev->ctx_idr, NULL, DRM_RESERVED_CONTEXTS, 0, + GFP_KERNEL); mutex_unlock(&dev->struct_mutex); - if (ret == -EAGAIN) - goto again; - else if (ret) - return ret; - - return new_id; + return ret; } /** @@ -118,7 +107,7 @@ int drm_ctxbitmap_init(struct drm_device * dev) void drm_ctxbitmap_cleanup(struct drm_device * dev) { mutex_lock(&dev->struct_mutex); - idr_remove_all(&dev->ctx_idr); + idr_destroy(&dev->ctx_idr); mutex_unlock(&dev->struct_mutex); } diff --git a/drivers/gpu/drm/drm_crtc.c b/drivers/gpu/drm/drm_crtc.c index 3bdf2a6..792c3e3 100644 --- a/drivers/gpu/drm/drm_crtc.c +++ b/drivers/gpu/drm/drm_crtc.c @@ -266,32 +266,21 @@ char *drm_get_connector_status_name(enum drm_connector_status status) static int drm_mode_object_get(struct drm_device *dev, struct drm_mode_object *obj, uint32_t obj_type) { - int new_id = 0; int ret; -again: - if (idr_pre_get(&dev->mode_config.crtc_idr, GFP_KERNEL) == 0) { - DRM_ERROR("Ran out memory getting a mode number\n"); - return -ENOMEM; - } - mutex_lock(&dev->mode_config.idr_mutex); - ret = idr_get_new_above(&dev->mode_config.crtc_idr, obj, 1, &new_id); - - if (!ret) { + ret = idr_alloc(&dev->mode_config.crtc_idr, obj, 1, 0, GFP_KERNEL); + if (ret >= 0) { /* * Set up the object linking under the protection of the idr * lock so that other users can't see inconsistent state. */ - obj->id = new_id; + obj->id = ret; obj->type = obj_type; } mutex_unlock(&dev->mode_config.idr_mutex); - if (ret == -EAGAIN) - goto again; - - return ret; + return ret < 0 ? ret : 0; } /** @@ -1272,7 +1261,6 @@ void drm_mode_config_cleanup(struct drm_device *dev) crtc->funcs->destroy(crtc); } - idr_remove_all(&dev->mode_config.crtc_idr); idr_destroy(&dev->mode_config.crtc_idr); } EXPORT_SYMBOL(drm_mode_config_cleanup); diff --git a/drivers/gpu/drm/drm_drv.c b/drivers/gpu/drm/drm_drv.c index be174ca..25f91cd 100644 --- a/drivers/gpu/drm/drm_drv.c +++ b/drivers/gpu/drm/drm_drv.c @@ -297,7 +297,6 @@ static void __exit drm_core_exit(void) unregister_chrdev(DRM_MAJOR, "drm"); - idr_remove_all(&drm_minors_idr); idr_destroy(&drm_minors_idr); } diff --git a/drivers/gpu/drm/drm_gem.c b/drivers/gpu/drm/drm_gem.c index 24efae4..af779ae 100644 --- a/drivers/gpu/drm/drm_gem.c +++ b/drivers/gpu/drm/drm_gem.c @@ -270,21 +270,19 @@ drm_gem_handle_create(struct drm_file *file_priv, int ret; /* - * Get the user-visible handle using idr. + * Get the user-visible handle using idr. Preload and perform + * allocation under our spinlock. */ -again: - /* ensure there is space available to allocate a handle */ - if (idr_pre_get(&file_priv->object_idr, GFP_KERNEL) == 0) - return -ENOMEM; - - /* do the allocation under our spinlock */ + idr_preload(GFP_KERNEL); spin_lock(&file_priv->table_lock); - ret = idr_get_new_above(&file_priv->object_idr, obj, 1, (int *)handlep); + + ret = idr_alloc(&file_priv->object_idr, obj, 1, 0, GFP_NOWAIT); + spin_unlock(&file_priv->table_lock); - if (ret == -EAGAIN) - goto again; - else if (ret) + idr_preload_end(); + if (ret < 0) return ret; + *handlep = ret; drm_gem_object_handle_reference(obj); @@ -451,29 +449,25 @@ drm_gem_flink_ioctl(struct drm_device *dev, void *data, if (obj == NULL) return -ENOENT; -again: - if (idr_pre_get(&dev->object_name_idr, GFP_KERNEL) == 0) { - ret = -ENOMEM; - goto err; - } - + idr_preload(GFP_KERNEL); spin_lock(&dev->object_name_lock); if (!obj->name) { - ret = idr_get_new_above(&dev->object_name_idr, obj, 1, - &obj->name); + ret = idr_alloc(&dev->object_name_idr, obj, 1, 0, GFP_NOWAIT); + obj->name = ret; args->name = (uint64_t) obj->name; spin_unlock(&dev->object_name_lock); + idr_preload_end(); - if (ret == -EAGAIN) - goto again; - else if (ret) + if (ret < 0) goto err; + ret = 0; /* Allocate a reference for the name table. */ drm_gem_object_reference(obj); } else { args->name = (uint64_t) obj->name; spin_unlock(&dev->object_name_lock); + idr_preload_end(); ret = 0; } @@ -561,8 +555,6 @@ drm_gem_release(struct drm_device *dev, struct drm_file *file_private) { idr_for_each(&file_private->object_idr, &drm_gem_object_release_handle, file_private); - - idr_remove_all(&file_private->object_idr); idr_destroy(&file_private->object_idr); } diff --git a/drivers/gpu/drm/drm_hashtab.c b/drivers/gpu/drm/drm_hashtab.c index 8025454..7e4bae7 100644 --- a/drivers/gpu/drm/drm_hashtab.c +++ b/drivers/gpu/drm/drm_hashtab.c @@ -60,14 +60,13 @@ void drm_ht_verbose_list(struct drm_open_hash *ht, unsigned long key) { struct drm_hash_item *entry; struct hlist_head *h_list; - struct hlist_node *list; unsigned int hashed_key; int count = 0; hashed_key = hash_long(key, ht->order); DRM_DEBUG("Key is 0x%08lx, Hashed key is 0x%08x\n", key, hashed_key); h_list = &ht->table[hashed_key]; - hlist_for_each_entry(entry, list, h_list, head) + hlist_for_each_entry(entry, h_list, head) DRM_DEBUG("count %d, key: 0x%08lx\n", count++, entry->key); } @@ -76,14 +75,13 @@ static struct hlist_node *drm_ht_find_key(struct drm_open_hash *ht, { struct drm_hash_item *entry; struct hlist_head *h_list; - struct hlist_node *list; unsigned int hashed_key; hashed_key = hash_long(key, ht->order); h_list = &ht->table[hashed_key]; - hlist_for_each_entry(entry, list, h_list, head) { + hlist_for_each_entry(entry, h_list, head) { if (entry->key == key) - return list; + return &entry->head; if (entry->key > key) break; } @@ -95,14 +93,13 @@ static struct hlist_node *drm_ht_find_key_rcu(struct drm_open_hash *ht, { struct drm_hash_item *entry; struct hlist_head *h_list; - struct hlist_node *list; unsigned int hashed_key; hashed_key = hash_long(key, ht->order); h_list = &ht->table[hashed_key]; - hlist_for_each_entry_rcu(entry, list, h_list, head) { + hlist_for_each_entry_rcu(entry, h_list, head) { if (entry->key == key) - return list; + return &entry->head; if (entry->key > key) break; } @@ -113,19 +110,19 @@ int drm_ht_insert_item(struct drm_open_hash *ht, struct drm_hash_item *item) { struct drm_hash_item *entry; struct hlist_head *h_list; - struct hlist_node *list, *parent; + struct hlist_node *parent; unsigned int hashed_key; unsigned long key = item->key; hashed_key = hash_long(key, ht->order); h_list = &ht->table[hashed_key]; parent = NULL; - hlist_for_each_entry(entry, list, h_list, head) { + hlist_for_each_entry(entry, h_list, head) { if (entry->key == key) return -EINVAL; if (entry->key > key) break; - parent = list; + parent = &entry->head; } if (parent) { hlist_add_after_rcu(parent, &item->head); diff --git a/drivers/gpu/drm/drm_stub.c b/drivers/gpu/drm/drm_stub.c index 200e104..7d30802 100644 --- a/drivers/gpu/drm/drm_stub.c +++ b/drivers/gpu/drm/drm_stub.c @@ -109,7 +109,6 @@ EXPORT_SYMBOL(drm_ut_debug_printk); static int drm_minor_get_id(struct drm_device *dev, int type) { - int new_id; int ret; int base = 0, limit = 63; @@ -121,25 +120,11 @@ static int drm_minor_get_id(struct drm_device *dev, int type) limit = base + 255; } -again: - if (idr_pre_get(&drm_minors_idr, GFP_KERNEL) == 0) { - DRM_ERROR("Out of memory expanding drawable idr\n"); - return -ENOMEM; - } mutex_lock(&dev->struct_mutex); - ret = idr_get_new_above(&drm_minors_idr, NULL, - base, &new_id); + ret = idr_alloc(&drm_minors_idr, NULL, base, limit, GFP_KERNEL); mutex_unlock(&dev->struct_mutex); - if (ret == -EAGAIN) - goto again; - else if (ret) - return ret; - if (new_id >= limit) { - idr_remove(&drm_minors_idr, new_id); - return -EINVAL; - } - return new_id; + return ret == -ENOSPC ? -EINVAL : ret; } struct drm_master *drm_master_create(struct drm_minor *minor) diff --git a/drivers/gpu/drm/exynos/exynos_drm_ipp.c b/drivers/gpu/drm/exynos/exynos_drm_ipp.c index 1a55635..1adce07 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_ipp.c +++ b/drivers/gpu/drm/exynos/exynos_drm_ipp.c @@ -137,21 +137,15 @@ static int ipp_create_id(struct idr *id_idr, struct mutex *lock, void *obj, DRM_DEBUG_KMS("%s\n", __func__); -again: - /* ensure there is space available to allocate a handle */ - if (idr_pre_get(id_idr, GFP_KERNEL) == 0) { - DRM_ERROR("failed to get idr.\n"); - return -ENOMEM; - } - /* do the allocation under our mutexlock */ mutex_lock(lock); - ret = idr_get_new_above(id_idr, obj, 1, (int *)idp); + ret = idr_alloc(id_idr, obj, 1, 0, GFP_KERNEL); mutex_unlock(lock); - if (ret == -EAGAIN) - goto again; + if (ret < 0) + return ret; - return ret; + *idp = ret; + return 0; } static void *ipp_find_obj(struct idr *id_idr, struct mutex *lock, u32 id) @@ -1786,8 +1780,6 @@ err_iommu: drm_iommu_detach_device(drm_dev, ippdrv->dev); err_idr: - idr_remove_all(&ctx->ipp_idr); - idr_remove_all(&ctx->prop_idr); idr_destroy(&ctx->ipp_idr); idr_destroy(&ctx->prop_idr); return ret; @@ -1965,8 +1957,6 @@ static int ipp_remove(struct platform_device *pdev) exynos_drm_subdrv_unregister(&ctx->subdrv); /* remove,destroy ipp idr */ - idr_remove_all(&ctx->ipp_idr); - idr_remove_all(&ctx->prop_idr); idr_destroy(&ctx->ipp_idr); idr_destroy(&ctx->prop_idr); diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c index 21177d9..94d873a 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.c +++ b/drivers/gpu/drm/i915/i915_gem_context.c @@ -139,7 +139,7 @@ create_hw_context(struct drm_device *dev, { struct drm_i915_private *dev_priv = dev->dev_private; struct i915_hw_context *ctx; - int ret, id; + int ret; ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); if (ctx == NULL) @@ -164,22 +164,11 @@ create_hw_context(struct drm_device *dev, ctx->file_priv = file_priv; -again: - if (idr_pre_get(&file_priv->context_idr, GFP_KERNEL) == 0) { - ret = -ENOMEM; - DRM_DEBUG_DRIVER("idr allocation failed\n"); - goto err_out; - } - - ret = idr_get_new_above(&file_priv->context_idr, ctx, - DEFAULT_CONTEXT_ID + 1, &id); - if (ret == 0) - ctx->id = id; - - if (ret == -EAGAIN) - goto again; - else if (ret) + ret = idr_alloc(&file_priv->context_idr, ctx, DEFAULT_CONTEXT_ID + 1, 0, + GFP_KERNEL); + if (ret < 0) goto err_out; + ctx->id = ret; return ctx; diff --git a/drivers/gpu/drm/sis/sis_drv.c b/drivers/gpu/drm/sis/sis_drv.c index 841065b..5a5325e 100644 --- a/drivers/gpu/drm/sis/sis_drv.c +++ b/drivers/gpu/drm/sis/sis_drv.c @@ -58,7 +58,6 @@ static int sis_driver_unload(struct drm_device *dev) { drm_sis_private_t *dev_priv = dev->dev_private; - idr_remove_all(&dev_priv->object_idr); idr_destroy(&dev_priv->object_idr); kfree(dev_priv); diff --git a/drivers/gpu/drm/sis/sis_mm.c b/drivers/gpu/drm/sis/sis_mm.c index 2b2f78c..9a43d98 100644 --- a/drivers/gpu/drm/sis/sis_mm.c +++ b/drivers/gpu/drm/sis/sis_mm.c @@ -128,17 +128,10 @@ static int sis_drm_alloc(struct drm_device *dev, struct drm_file *file, if (retval) goto fail_alloc; -again: - if (idr_pre_get(&dev_priv->object_idr, GFP_KERNEL) == 0) { - retval = -ENOMEM; - goto fail_idr; - } - - retval = idr_get_new_above(&dev_priv->object_idr, item, 1, &user_key); - if (retval == -EAGAIN) - goto again; - if (retval) + retval = idr_alloc(&dev_priv->object_idr, item, 1, 0, GFP_KERNEL); + if (retval < 0) goto fail_idr; + user_key = retval; list_add(&item->owner_list, &file_priv->obj_list); mutex_unlock(&dev->struct_mutex); diff --git a/drivers/gpu/drm/via/via_map.c b/drivers/gpu/drm/via/via_map.c index c0f1cc7..d0ab3fb 100644 --- a/drivers/gpu/drm/via/via_map.c +++ b/drivers/gpu/drm/via/via_map.c @@ -120,7 +120,6 @@ int via_driver_unload(struct drm_device *dev) { drm_via_private_t *dev_priv = dev->dev_private; - idr_remove_all(&dev_priv->object_idr); idr_destroy(&dev_priv->object_idr); kfree(dev_priv); diff --git a/drivers/gpu/drm/via/via_mm.c b/drivers/gpu/drm/via/via_mm.c index 0d55432..0ab93ff 100644 --- a/drivers/gpu/drm/via/via_mm.c +++ b/drivers/gpu/drm/via/via_mm.c @@ -148,17 +148,10 @@ int via_mem_alloc(struct drm_device *dev, void *data, if (retval) goto fail_alloc; -again: - if (idr_pre_get(&dev_priv->object_idr, GFP_KERNEL) == 0) { - retval = -ENOMEM; - goto fail_idr; - } - - retval = idr_get_new_above(&dev_priv->object_idr, item, 1, &user_key); - if (retval == -EAGAIN) - goto again; - if (retval) + retval = idr_alloc(&dev_priv->object_idr, item, 1, 0, GFP_KERNEL); + if (retval < 0) goto fail_idr; + user_key = retval; list_add(&item->owner_list, &file_priv->obj_list); mutex_unlock(&dev->struct_mutex); diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c b/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c index 1655617..bc78425 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c @@ -177,17 +177,16 @@ int vmw_resource_alloc_id(struct vmw_resource *res) BUG_ON(res->id != -1); - do { - if (unlikely(idr_pre_get(idr, GFP_KERNEL) == 0)) - return -ENOMEM; - - write_lock(&dev_priv->resource_lock); - ret = idr_get_new_above(idr, res, 1, &res->id); - write_unlock(&dev_priv->resource_lock); + idr_preload(GFP_KERNEL); + write_lock(&dev_priv->resource_lock); - } while (ret == -EAGAIN); + ret = idr_alloc(idr, res, 1, 0, GFP_NOWAIT); + if (ret >= 0) + res->id = ret; - return ret; + write_unlock(&dev_priv->resource_lock); + idr_preload_end(); + return ret < 0 ? ret : 0; } /** diff --git a/drivers/i2c/i2c-core.c b/drivers/i2c/i2c-core.c index 66a30f7..991d38d 100644 --- a/drivers/i2c/i2c-core.c +++ b/drivers/i2c/i2c-core.c @@ -935,25 +935,17 @@ out_list: */ int i2c_add_adapter(struct i2c_adapter *adapter) { - int id, res = 0; - -retry: - if (idr_pre_get(&i2c_adapter_idr, GFP_KERNEL) == 0) - return -ENOMEM; + int id; mutex_lock(&core_lock); - /* "above" here means "above or equal to", sigh */ - res = idr_get_new_above(&i2c_adapter_idr, adapter, - __i2c_first_dynamic_bus_num, &id); + id = idr_alloc(&i2c_adapter_idr, adapter, + __i2c_first_dynamic_bus_num, 0, GFP_KERNEL); mutex_unlock(&core_lock); - - if (res < 0) { - if (res == -EAGAIN) - goto retry; - return res; - } + if (id < 0) + return id; adapter->nr = id; + return i2c_register_adapter(adapter); } EXPORT_SYMBOL(i2c_add_adapter); @@ -984,33 +976,17 @@ EXPORT_SYMBOL(i2c_add_adapter); int i2c_add_numbered_adapter(struct i2c_adapter *adap) { int id; - int status; if (adap->nr == -1) /* -1 means dynamically assign bus id */ return i2c_add_adapter(adap); - if (adap->nr & ~MAX_IDR_MASK) - return -EINVAL; - -retry: - if (idr_pre_get(&i2c_adapter_idr, GFP_KERNEL) == 0) - return -ENOMEM; mutex_lock(&core_lock); - /* "above" here means "above or equal to", sigh; - * we need the "equal to" result to force the result - */ - status = idr_get_new_above(&i2c_adapter_idr, adap, adap->nr, &id); - if (status == 0 && id != adap->nr) { - status = -EBUSY; - idr_remove(&i2c_adapter_idr, id); - } + id = idr_alloc(&i2c_adapter_idr, adap, adap->nr, adap->nr + 1, + GFP_KERNEL); mutex_unlock(&core_lock); - if (status == -EAGAIN) - goto retry; - - if (status == 0) - status = i2c_register_adapter(adap); - return status; + if (id < 0) + return id == -ENOSPC ? -EBUSY : id; + return i2c_register_adapter(adap); } EXPORT_SYMBOL_GPL(i2c_add_numbered_adapter); diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c index 394fea2..784b97c 100644 --- a/drivers/infiniband/core/cm.c +++ b/drivers/infiniband/core/cm.c @@ -382,20 +382,21 @@ static int cm_init_av_by_path(struct ib_sa_path_rec *path, struct cm_av *av) static int cm_alloc_id(struct cm_id_private *cm_id_priv) { unsigned long flags; - int ret, id; + int id; static int next_id; - do { - spin_lock_irqsave(&cm.lock, flags); - ret = idr_get_new_above(&cm.local_id_table, cm_id_priv, - next_id, &id); - if (!ret) - next_id = ((unsigned) id + 1) & MAX_IDR_MASK; - spin_unlock_irqrestore(&cm.lock, flags); - } while( (ret == -EAGAIN) && idr_pre_get(&cm.local_id_table, GFP_KERNEL) ); + idr_preload(GFP_KERNEL); + spin_lock_irqsave(&cm.lock, flags); + + id = idr_alloc(&cm.local_id_table, cm_id_priv, next_id, 0, GFP_NOWAIT); + if (id >= 0) + next_id = max(id + 1, 0); + + spin_unlock_irqrestore(&cm.lock, flags); + idr_preload_end(); cm_id_priv->id.local_id = (__force __be32)id ^ cm.random_id_operand; - return ret; + return id < 0 ? id : 0; } static void cm_free_id(__be32 local_id) @@ -3844,7 +3845,6 @@ static int __init ib_cm_init(void) cm.remote_sidr_table = RB_ROOT; idr_init(&cm.local_id_table); get_random_bytes(&cm.random_id_operand, sizeof cm.random_id_operand); - idr_pre_get(&cm.local_id_table, GFP_KERNEL); INIT_LIST_HEAD(&cm.timewait_list); ret = class_register(&cm_class); diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index d789eea..71c2c71 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -2143,33 +2143,23 @@ static int cma_alloc_port(struct idr *ps, struct rdma_id_private *id_priv, unsigned short snum) { struct rdma_bind_list *bind_list; - int port, ret; + int ret; bind_list = kzalloc(sizeof *bind_list, GFP_KERNEL); if (!bind_list) return -ENOMEM; - do { - ret = idr_get_new_above(ps, bind_list, snum, &port); - } while ((ret == -EAGAIN) && idr_pre_get(ps, GFP_KERNEL)); - - if (ret) - goto err1; - - if (port != snum) { - ret = -EADDRNOTAVAIL; - goto err2; - } + ret = idr_alloc(ps, bind_list, snum, snum + 1, GFP_KERNEL); + if (ret < 0) + goto err; bind_list->ps = ps; - bind_list->port = (unsigned short) port; + bind_list->port = (unsigned short)ret; cma_bind_port(bind_list, id_priv); return 0; -err2: - idr_remove(ps, port); -err1: +err: kfree(bind_list); - return ret; + return ret == -ENOSPC ? -EADDRNOTAVAIL : ret; } static int cma_alloc_any_port(struct idr *ps, struct rdma_id_private *id_priv) @@ -2214,10 +2204,9 @@ static int cma_check_port(struct rdma_bind_list *bind_list, { struct rdma_id_private *cur_id; struct sockaddr *addr, *cur_addr; - struct hlist_node *node; addr = (struct sockaddr *) &id_priv->id.route.addr.src_addr; - hlist_for_each_entry(cur_id, node, &bind_list->owners, node) { + hlist_for_each_entry(cur_id, &bind_list->owners, node) { if (id_priv == cur_id) continue; diff --git a/drivers/infiniband/core/fmr_pool.c b/drivers/infiniband/core/fmr_pool.c index 176c8f9..9f5ad7c 100644 --- a/drivers/infiniband/core/fmr_pool.c +++ b/drivers/infiniband/core/fmr_pool.c @@ -118,14 +118,13 @@ static inline struct ib_pool_fmr *ib_fmr_cache_lookup(struct ib_fmr_pool *pool, { struct hlist_head *bucket; struct ib_pool_fmr *fmr; - struct hlist_node *pos; if (!pool->cache_bucket) return NULL; bucket = pool->cache_bucket + ib_fmr_hash(*page_list); - hlist_for_each_entry(fmr, pos, bucket, cache_node) + hlist_for_each_entry(fmr, bucket, cache_node) if (io_virtual_address == fmr->io_virtual_address && page_list_len == fmr->page_list_len && !memcmp(page_list, fmr->page_list, diff --git a/drivers/infiniband/core/sa_query.c b/drivers/infiniband/core/sa_query.c index a8905ab..934f45e 100644 --- a/drivers/infiniband/core/sa_query.c +++ b/drivers/infiniband/core/sa_query.c @@ -611,19 +611,21 @@ static void init_mad(struct ib_sa_mad *mad, struct ib_mad_agent *agent) static int send_mad(struct ib_sa_query *query, int timeout_ms, gfp_t gfp_mask) { + bool preload = gfp_mask & __GFP_WAIT; unsigned long flags; int ret, id; -retry: - if (!idr_pre_get(&query_idr, gfp_mask)) - return -ENOMEM; + if (preload) + idr_preload(gfp_mask); spin_lock_irqsave(&idr_lock, flags); - ret = idr_get_new(&query_idr, query, &id); + + id = idr_alloc(&query_idr, query, 0, 0, GFP_NOWAIT); + spin_unlock_irqrestore(&idr_lock, flags); - if (ret == -EAGAIN) - goto retry; - if (ret) - return ret; + if (preload) + idr_preload_end(); + if (id < 0) + return id; query->mad_buf->timeout_ms = timeout_ms; query->mad_buf->context[0] = query; diff --git a/drivers/infiniband/core/ucm.c b/drivers/infiniband/core/ucm.c index 49b15ac..f2f6393 100644 --- a/drivers/infiniband/core/ucm.c +++ b/drivers/infiniband/core/ucm.c @@ -176,7 +176,6 @@ static void ib_ucm_cleanup_events(struct ib_ucm_context *ctx) static struct ib_ucm_context *ib_ucm_ctx_alloc(struct ib_ucm_file *file) { struct ib_ucm_context *ctx; - int result; ctx = kzalloc(sizeof *ctx, GFP_KERNEL); if (!ctx) @@ -187,17 +186,10 @@ static struct ib_ucm_context *ib_ucm_ctx_alloc(struct ib_ucm_file *file) ctx->file = file; INIT_LIST_HEAD(&ctx->events); - do { - result = idr_pre_get(&ctx_id_table, GFP_KERNEL); - if (!result) - goto error; - - mutex_lock(&ctx_id_mutex); - result = idr_get_new(&ctx_id_table, ctx, &ctx->id); - mutex_unlock(&ctx_id_mutex); - } while (result == -EAGAIN); - - if (result) + mutex_lock(&ctx_id_mutex); + ctx->id = idr_alloc(&ctx_id_table, ctx, 0, 0, GFP_KERNEL); + mutex_unlock(&ctx_id_mutex); + if (ctx->id < 0) goto error; list_add_tail(&ctx->file_list, &file->ctxs); diff --git a/drivers/infiniband/core/ucma.c b/drivers/infiniband/core/ucma.c index 2709ff5..5ca44cd 100644 --- a/drivers/infiniband/core/ucma.c +++ b/drivers/infiniband/core/ucma.c @@ -145,7 +145,6 @@ static void ucma_put_ctx(struct ucma_context *ctx) static struct ucma_context *ucma_alloc_ctx(struct ucma_file *file) { struct ucma_context *ctx; - int ret; ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); if (!ctx) @@ -156,17 +155,10 @@ static struct ucma_context *ucma_alloc_ctx(struct ucma_file *file) INIT_LIST_HEAD(&ctx->mc_list); ctx->file = file; - do { - ret = idr_pre_get(&ctx_idr, GFP_KERNEL); - if (!ret) - goto error; - - mutex_lock(&mut); - ret = idr_get_new(&ctx_idr, ctx, &ctx->id); - mutex_unlock(&mut); - } while (ret == -EAGAIN); - - if (ret) + mutex_lock(&mut); + ctx->id = idr_alloc(&ctx_idr, ctx, 0, 0, GFP_KERNEL); + mutex_unlock(&mut); + if (ctx->id < 0) goto error; list_add_tail(&ctx->list, &file->ctx_list); @@ -180,23 +172,15 @@ error: static struct ucma_multicast* ucma_alloc_multicast(struct ucma_context *ctx) { struct ucma_multicast *mc; - int ret; mc = kzalloc(sizeof(*mc), GFP_KERNEL); if (!mc) return NULL; - do { - ret = idr_pre_get(&multicast_idr, GFP_KERNEL); - if (!ret) - goto error; - - mutex_lock(&mut); - ret = idr_get_new(&multicast_idr, mc, &mc->id); - mutex_unlock(&mut); - } while (ret == -EAGAIN); - - if (ret) + mutex_lock(&mut); + mc->id = idr_alloc(&multicast_idr, mc, 0, 0, GFP_KERNEL); + mutex_unlock(&mut); + if (mc->id < 0) goto error; mc->ctx = ctx; diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index e71d834..a7d00f6 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -125,18 +125,17 @@ static int idr_add_uobj(struct idr *idr, struct ib_uobject *uobj) { int ret; -retry: - if (!idr_pre_get(idr, GFP_KERNEL)) - return -ENOMEM; - + idr_preload(GFP_KERNEL); spin_lock(&ib_uverbs_idr_lock); - ret = idr_get_new(idr, uobj, &uobj->id); - spin_unlock(&ib_uverbs_idr_lock); - if (ret == -EAGAIN) - goto retry; + ret = idr_alloc(idr, uobj, 0, 0, GFP_NOWAIT); + if (ret >= 0) + uobj->id = ret; - return ret; + spin_unlock(&ib_uverbs_idr_lock); + idr_preload_end(); + + return ret < 0 ? ret : 0; } void idr_remove_uobj(struct idr *idr, struct ib_uobject *uobj) diff --git a/drivers/infiniband/hw/amso1100/c2_qp.c b/drivers/infiniband/hw/amso1100/c2_qp.c index 28cd5cb..0ab826b 100644 --- a/drivers/infiniband/hw/amso1100/c2_qp.c +++ b/drivers/infiniband/hw/amso1100/c2_qp.c @@ -382,14 +382,17 @@ static int c2_alloc_qpn(struct c2_dev *c2dev, struct c2_qp *qp) { int ret; - do { - spin_lock_irq(&c2dev->qp_table.lock); - ret = idr_get_new_above(&c2dev->qp_table.idr, qp, - c2dev->qp_table.last++, &qp->qpn); - spin_unlock_irq(&c2dev->qp_table.lock); - } while ((ret == -EAGAIN) && - idr_pre_get(&c2dev->qp_table.idr, GFP_KERNEL)); - return ret; + idr_preload(GFP_KERNEL); + spin_lock_irq(&c2dev->qp_table.lock); + + ret = idr_alloc(&c2dev->qp_table.idr, qp, c2dev->qp_table.last++, 0, + GFP_NOWAIT); + if (ret >= 0) + qp->qpn = ret; + + spin_unlock_irq(&c2dev->qp_table.lock); + idr_preload_end(); + return ret < 0 ? ret : 0; } static void c2_free_qpn(struct c2_dev *c2dev, int qpn) diff --git a/drivers/infiniband/hw/cxgb3/iwch.h b/drivers/infiniband/hw/cxgb3/iwch.h index a1c4457..8378622 100644 --- a/drivers/infiniband/hw/cxgb3/iwch.h +++ b/drivers/infiniband/hw/cxgb3/iwch.h @@ -153,19 +153,17 @@ static inline int insert_handle(struct iwch_dev *rhp, struct idr *idr, void *handle, u32 id) { int ret; - int newid; - - do { - if (!idr_pre_get(idr, GFP_KERNEL)) { - return -ENOMEM; - } - spin_lock_irq(&rhp->lock); - ret = idr_get_new_above(idr, handle, id, &newid); - BUG_ON(newid != id); - spin_unlock_irq(&rhp->lock); - } while (ret == -EAGAIN); - - return ret; + + idr_preload(GFP_KERNEL); + spin_lock_irq(&rhp->lock); + + ret = idr_alloc(idr, handle, id, id + 1, GFP_NOWAIT); + + spin_unlock_irq(&rhp->lock); + idr_preload_end(); + + BUG_ON(ret == -ENOSPC); + return ret < 0 ? ret : 0; } static inline void remove_handle(struct iwch_dev *rhp, struct idr *idr, u32 id) diff --git a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h index 4c07fc0..7eec5e1 100644 --- a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h +++ b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h @@ -260,20 +260,21 @@ static inline int _insert_handle(struct c4iw_dev *rhp, struct idr *idr, void *handle, u32 id, int lock) { int ret; - int newid; - do { - if (!idr_pre_get(idr, lock ? GFP_KERNEL : GFP_ATOMIC)) - return -ENOMEM; - if (lock) - spin_lock_irq(&rhp->lock); - ret = idr_get_new_above(idr, handle, id, &newid); - BUG_ON(!ret && newid != id); - if (lock) - spin_unlock_irq(&rhp->lock); - } while (ret == -EAGAIN); - - return ret; + if (lock) { + idr_preload(GFP_KERNEL); + spin_lock_irq(&rhp->lock); + } + + ret = idr_alloc(idr, handle, id, id + 1, GFP_ATOMIC); + + if (lock) { + spin_unlock_irq(&rhp->lock); + idr_preload_end(); + } + + BUG_ON(ret == -ENOSPC); + return ret < 0 ? ret : 0; } static inline int insert_handle(struct c4iw_dev *rhp, struct idr *idr, diff --git a/drivers/infiniband/hw/ehca/ehca_cq.c b/drivers/infiniband/hw/ehca/ehca_cq.c index 8f52901..212150c 100644 --- a/drivers/infiniband/hw/ehca/ehca_cq.c +++ b/drivers/infiniband/hw/ehca/ehca_cq.c @@ -128,7 +128,7 @@ struct ib_cq *ehca_create_cq(struct ib_device *device, int cqe, int comp_vector, void *vpage; u32 counter; u64 rpage, cqx_fec, h_ret; - int ipz_rc, ret, i; + int ipz_rc, i; unsigned long flags; if (cqe >= 0xFFFFFFFF - 64 - additional_cqe) @@ -163,32 +163,19 @@ struct ib_cq *ehca_create_cq(struct ib_device *device, int cqe, int comp_vector, adapter_handle = shca->ipz_hca_handle; param.eq_handle = shca->eq.ipz_eq_handle; - do { - if (!idr_pre_get(&ehca_cq_idr, GFP_KERNEL)) { - cq = ERR_PTR(-ENOMEM); - ehca_err(device, "Can't reserve idr nr. device=%p", - device); - goto create_cq_exit1; - } - - write_lock_irqsave(&ehca_cq_idr_lock, flags); - ret = idr_get_new(&ehca_cq_idr, my_cq, &my_cq->token); - write_unlock_irqrestore(&ehca_cq_idr_lock, flags); - } while (ret == -EAGAIN); + idr_preload(GFP_KERNEL); + write_lock_irqsave(&ehca_cq_idr_lock, flags); + my_cq->token = idr_alloc(&ehca_cq_idr, my_cq, 0, 0x2000000, GFP_NOWAIT); + write_unlock_irqrestore(&ehca_cq_idr_lock, flags); + idr_preload_end(); - if (ret) { + if (my_cq->token < 0) { cq = ERR_PTR(-ENOMEM); ehca_err(device, "Can't allocate new idr entry. device=%p", device); goto create_cq_exit1; } - if (my_cq->token > 0x1FFFFFF) { - cq = ERR_PTR(-ENOMEM); - ehca_err(device, "Invalid number of cq. device=%p", device); - goto create_cq_exit2; - } - /* * CQs maximum depth is 4GB-64, but we need additional 20 as buffer * for receiving errors CQEs. diff --git a/drivers/infiniband/hw/ehca/ehca_qp.c b/drivers/infiniband/hw/ehca/ehca_qp.c index 1493939..00d6861 100644 --- a/drivers/infiniband/hw/ehca/ehca_qp.c +++ b/drivers/infiniband/hw/ehca/ehca_qp.c @@ -636,30 +636,26 @@ static struct ehca_qp *internal_create_qp( my_qp->send_cq = container_of(init_attr->send_cq, struct ehca_cq, ib_cq); - do { - if (!idr_pre_get(&ehca_qp_idr, GFP_KERNEL)) { - ret = -ENOMEM; - ehca_err(pd->device, "Can't reserve idr resources."); - goto create_qp_exit0; - } + idr_preload(GFP_KERNEL); + write_lock_irqsave(&ehca_qp_idr_lock, flags); - write_lock_irqsave(&ehca_qp_idr_lock, flags); - ret = idr_get_new(&ehca_qp_idr, my_qp, &my_qp->token); - write_unlock_irqrestore(&ehca_qp_idr_lock, flags); - } while (ret == -EAGAIN); + ret = idr_alloc(&ehca_qp_idr, my_qp, 0, 0x2000000, GFP_NOWAIT); + if (ret >= 0) + my_qp->token = ret; - if (ret) { - ret = -ENOMEM; - ehca_err(pd->device, "Can't allocate new idr entry."); + write_unlock_irqrestore(&ehca_qp_idr_lock, flags); + idr_preload_end(); + if (ret < 0) { + if (ret == -ENOSPC) { + ret = -EINVAL; + ehca_err(pd->device, "Invalid number of qp"); + } else { + ret = -ENOMEM; + ehca_err(pd->device, "Can't allocate new idr entry."); + } goto create_qp_exit0; } - if (my_qp->token > 0x1FFFFFF) { - ret = -EINVAL; - ehca_err(pd->device, "Invalid number of qp"); - goto create_qp_exit1; - } - if (has_srq) parms.srq_token = my_qp->token; diff --git a/drivers/infiniband/hw/ipath/ipath_driver.c b/drivers/infiniband/hw/ipath/ipath_driver.c index 7b371f5..bd0caed 100644 --- a/drivers/infiniband/hw/ipath/ipath_driver.c +++ b/drivers/infiniband/hw/ipath/ipath_driver.c @@ -194,11 +194,6 @@ static struct ipath_devdata *ipath_alloc_devdata(struct pci_dev *pdev) struct ipath_devdata *dd; int ret; - if (!idr_pre_get(&unit_table, GFP_KERNEL)) { - dd = ERR_PTR(-ENOMEM); - goto bail; - } - dd = vzalloc(sizeof(*dd)); if (!dd) { dd = ERR_PTR(-ENOMEM); @@ -206,9 +201,10 @@ static struct ipath_devdata *ipath_alloc_devdata(struct pci_dev *pdev) } dd->ipath_unit = -1; + idr_preload(GFP_KERNEL); spin_lock_irqsave(&ipath_devs_lock, flags); - ret = idr_get_new(&unit_table, dd, &dd->ipath_unit); + ret = idr_alloc(&unit_table, dd, 0, 0, GFP_NOWAIT); if (ret < 0) { printk(KERN_ERR IPATH_DRV_NAME ": Could not allocate unit ID: error %d\n", -ret); @@ -216,6 +212,7 @@ static struct ipath_devdata *ipath_alloc_devdata(struct pci_dev *pdev) dd = ERR_PTR(ret); goto bail_unlock; } + dd->ipath_unit = ret; dd->pcidev = pdev; pci_set_drvdata(pdev, dd); @@ -224,7 +221,7 @@ static struct ipath_devdata *ipath_alloc_devdata(struct pci_dev *pdev) bail_unlock: spin_unlock_irqrestore(&ipath_devs_lock, flags); - + idr_preload_end(); bail: return dd; } @@ -2503,11 +2500,6 @@ static int __init infinipath_init(void) * the PCI subsystem. */ idr_init(&unit_table); - if (!idr_pre_get(&unit_table, GFP_KERNEL)) { - printk(KERN_ERR IPATH_DRV_NAME ": idr_pre_get() failed\n"); - ret = -ENOMEM; - goto bail; - } ret = pci_register_driver(&ipath_driver); if (ret < 0) { diff --git a/drivers/infiniband/hw/mlx4/cm.c b/drivers/infiniband/hw/mlx4/cm.c index dbc99d4..e0d79b2 100644 --- a/drivers/infiniband/hw/mlx4/cm.c +++ b/drivers/infiniband/hw/mlx4/cm.c @@ -203,7 +203,7 @@ static void sl_id_map_add(struct ib_device *ibdev, struct id_map_entry *new) static struct id_map_entry * id_map_alloc(struct ib_device *ibdev, int slave_id, u32 sl_cm_id) { - int ret, id; + int ret; static int next_id; struct id_map_entry *ent; struct mlx4_ib_sriov *sriov = &to_mdev(ibdev)->sriov; @@ -220,25 +220,23 @@ id_map_alloc(struct ib_device *ibdev, int slave_id, u32 sl_cm_id) ent->dev = to_mdev(ibdev); INIT_DELAYED_WORK(&ent->timeout, id_map_ent_timeout); - do { - spin_lock(&to_mdev(ibdev)->sriov.id_map_lock); - ret = idr_get_new_above(&sriov->pv_id_table, ent, - next_id, &id); - if (!ret) { - next_id = ((unsigned) id + 1) & MAX_IDR_MASK; - ent->pv_cm_id = (u32)id; - sl_id_map_add(ibdev, ent); - } + idr_preload(GFP_KERNEL); + spin_lock(&to_mdev(ibdev)->sriov.id_map_lock); - spin_unlock(&sriov->id_map_lock); - } while (ret == -EAGAIN && idr_pre_get(&sriov->pv_id_table, GFP_KERNEL)); - /*the function idr_get_new_above can return -ENOSPC, so don't insert in that case.*/ - if (!ret) { - spin_lock(&sriov->id_map_lock); + ret = idr_alloc(&sriov->pv_id_table, ent, next_id, 0, GFP_NOWAIT); + if (ret >= 0) { + next_id = max(ret + 1, 0); + ent->pv_cm_id = (u32)ret; + sl_id_map_add(ibdev, ent); list_add_tail(&ent->list, &sriov->cm_list); - spin_unlock(&sriov->id_map_lock); - return ent; } + + spin_unlock(&sriov->id_map_lock); + idr_preload_end(); + + if (ret >= 0) + return ent; + /*error flow*/ kfree(ent); mlx4_ib_warn(ibdev, "No more space in the idr (err:0x%x)\n", ret); diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_main.c b/drivers/infiniband/hw/ocrdma/ocrdma_main.c index c4e0131..48928c8 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_main.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_main.c @@ -51,18 +51,6 @@ static DEFINE_IDR(ocrdma_dev_id); static union ib_gid ocrdma_zero_sgid; -static int ocrdma_get_instance(void) -{ - int instance = 0; - - /* Assign an unused number */ - if (!idr_pre_get(&ocrdma_dev_id, GFP_KERNEL)) - return -1; - if (idr_get_new(&ocrdma_dev_id, NULL, &instance)) - return -1; - return instance; -} - void ocrdma_get_guid(struct ocrdma_dev *dev, u8 *guid) { u8 mac_addr[6]; @@ -416,7 +404,7 @@ static struct ocrdma_dev *ocrdma_add(struct be_dev_info *dev_info) goto idr_err; memcpy(&dev->nic_info, dev_info, sizeof(*dev_info)); - dev->id = ocrdma_get_instance(); + dev->id = idr_alloc(&ocrdma_dev_id, NULL, 0, 0, GFP_KERNEL); if (dev->id < 0) goto idr_err; diff --git a/drivers/infiniband/hw/qib/qib_init.c b/drivers/infiniband/hw/qib/qib_init.c index ddf066d..50e33aa 100644 --- a/drivers/infiniband/hw/qib/qib_init.c +++ b/drivers/infiniband/hw/qib/qib_init.c @@ -1060,22 +1060,23 @@ struct qib_devdata *qib_alloc_devdata(struct pci_dev *pdev, size_t extra) struct qib_devdata *dd; int ret; - if (!idr_pre_get(&qib_unit_table, GFP_KERNEL)) { - dd = ERR_PTR(-ENOMEM); - goto bail; - } - dd = (struct qib_devdata *) ib_alloc_device(sizeof(*dd) + extra); if (!dd) { dd = ERR_PTR(-ENOMEM); goto bail; } + idr_preload(GFP_KERNEL); spin_lock_irqsave(&qib_devs_lock, flags); - ret = idr_get_new(&qib_unit_table, dd, &dd->unit); - if (ret >= 0) + + ret = idr_alloc(&qib_unit_table, dd, 0, 0, GFP_NOWAIT); + if (ret >= 0) { + dd->unit = ret; list_add(&dd->list, &qib_dev_list); + } + spin_unlock_irqrestore(&qib_devs_lock, flags); + idr_preload_end(); if (ret < 0) { qib_early_err(&pdev->dev, @@ -1180,11 +1181,6 @@ static int __init qlogic_ib_init(void) * the PCI subsystem. */ idr_init(&qib_unit_table); - if (!idr_pre_get(&qib_unit_table, GFP_KERNEL)) { - pr_err("idr_pre_get() failed\n"); - ret = -ENOMEM; - goto bail_cq_wq; - } ret = pci_register_driver(&qib_driver); if (ret < 0) { @@ -1199,7 +1195,6 @@ static int __init qlogic_ib_init(void) bail_unit: idr_destroy(&qib_unit_table); -bail_cq_wq: destroy_workqueue(qib_cq_wq); bail_dev: qib_dev_cleanup(); diff --git a/drivers/isdn/mISDN/socket.c b/drivers/isdn/mISDN/socket.c index abe2d69..8b07f83 100644 --- a/drivers/isdn/mISDN/socket.c +++ b/drivers/isdn/mISDN/socket.c @@ -483,7 +483,6 @@ data_sock_bind(struct socket *sock, struct sockaddr *addr, int addr_len) { struct sockaddr_mISDN *maddr = (struct sockaddr_mISDN *) addr; struct sock *sk = sock->sk; - struct hlist_node *node; struct sock *csk; int err = 0; @@ -508,7 +507,7 @@ data_sock_bind(struct socket *sock, struct sockaddr *addr, int addr_len) if (sk->sk_protocol < ISDN_P_B_START) { read_lock_bh(&data_sockets.lock); - sk_for_each(csk, node, &data_sockets.head) { + sk_for_each(csk, &data_sockets.head) { if (sk == csk) continue; if (_pms(csk)->dev != _pms(sk)->dev) diff --git a/drivers/isdn/mISDN/stack.c b/drivers/isdn/mISDN/stack.c index deda591..9cb4b62 100644 --- a/drivers/isdn/mISDN/stack.c +++ b/drivers/isdn/mISDN/stack.c @@ -64,12 +64,11 @@ unlock: static void send_socklist(struct mISDN_sock_list *sl, struct sk_buff *skb) { - struct hlist_node *node; struct sock *sk; struct sk_buff *cskb = NULL; read_lock(&sl->lock); - sk_for_each(sk, node, &sl->head) { + sk_for_each(sk, &sl->head) { if (sk->sk_state != MISDN_BOUND) continue; if (!cskb) diff --git a/drivers/md/dm-bio-prison.c b/drivers/md/dm-bio-prison.c index aefb78e..d9d3f1c 100644 --- a/drivers/md/dm-bio-prison.c +++ b/drivers/md/dm-bio-prison.c @@ -106,9 +106,8 @@ static struct dm_bio_prison_cell *__search_bucket(struct hlist_head *bucket, struct dm_cell_key *key) { struct dm_bio_prison_cell *cell; - struct hlist_node *tmp; - hlist_for_each_entry(cell, tmp, bucket, list) + hlist_for_each_entry(cell, bucket, list) if (keys_equal(&cell->key, key)) return cell; diff --git a/drivers/md/dm-bufio.c b/drivers/md/dm-bufio.c index 651ca79..93205e3 100644 --- a/drivers/md/dm-bufio.c +++ b/drivers/md/dm-bufio.c @@ -859,9 +859,8 @@ static void __check_watermark(struct dm_bufio_client *c) static struct dm_buffer *__find(struct dm_bufio_client *c, sector_t block) { struct dm_buffer *b; - struct hlist_node *hn; - hlist_for_each_entry(b, hn, &c->cache_hash[DM_BUFIO_HASH(block)], + hlist_for_each_entry(b, &c->cache_hash[DM_BUFIO_HASH(block)], hash_list) { dm_bufio_cond_resched(); if (b->block == block) diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c index 59fc18a..10079e0 100644 --- a/drivers/md/dm-snap.c +++ b/drivers/md/dm-snap.c @@ -227,12 +227,11 @@ static void stop_tracking_chunk(struct dm_snapshot *s, struct bio *bio) static int __chunk_is_tracked(struct dm_snapshot *s, chunk_t chunk) { struct dm_snap_tracked_chunk *c; - struct hlist_node *hn; int found = 0; spin_lock_irq(&s->tracked_chunk_lock); - hlist_for_each_entry(c, hn, + hlist_for_each_entry(c, &s->tracked_chunk_hash[DM_TRACKED_CHUNK_HASH(chunk)], node) { if (c->chunk == chunk) { found = 1; diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 314a0e2..bb2cd3c 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -318,7 +318,6 @@ static void __exit dm_exit(void) /* * Should be empty by this point. */ - idr_remove_all(&_minor_idr); idr_destroy(&_minor_idr); } @@ -627,7 +626,6 @@ static void dec_pending(struct dm_io *io, int error) queue_io(md, bio); } else { /* done with normal IO or empty flush */ - trace_block_bio_complete(md->queue, bio, io_error); bio_endio(bio, io_error); } } @@ -1756,62 +1754,38 @@ static void free_minor(int minor) */ static int specific_minor(int minor) { - int r, m; + int r; if (minor >= (1 << MINORBITS)) return -EINVAL; - r = idr_pre_get(&_minor_idr, GFP_KERNEL); - if (!r) - return -ENOMEM; - + idr_preload(GFP_KERNEL); spin_lock(&_minor_lock); - if (idr_find(&_minor_idr, minor)) { - r = -EBUSY; - goto out; - } - - r = idr_get_new_above(&_minor_idr, MINOR_ALLOCED, minor, &m); - if (r) - goto out; + r = idr_alloc(&_minor_idr, MINOR_ALLOCED, minor, minor + 1, GFP_NOWAIT); - if (m != minor) { - idr_remove(&_minor_idr, m); - r = -EBUSY; - goto out; - } - -out: spin_unlock(&_minor_lock); - return r; + idr_preload_end(); + if (r < 0) + return r == -ENOSPC ? -EBUSY : r; + return 0; } static int next_free_minor(int *minor) { - int r, m; - - r = idr_pre_get(&_minor_idr, GFP_KERNEL); - if (!r) - return -ENOMEM; + int r; + idr_preload(GFP_KERNEL); spin_lock(&_minor_lock); - r = idr_get_new(&_minor_idr, MINOR_ALLOCED, &m); - if (r) - goto out; - - if (m >= (1 << MINORBITS)) { - idr_remove(&_minor_idr, m); - r = -ENOSPC; - goto out; - } - - *minor = m; + r = idr_alloc(&_minor_idr, MINOR_ALLOCED, 0, 1 << MINORBITS, GFP_NOWAIT); -out: spin_unlock(&_minor_lock); - return r; + idr_preload_end(); + if (r < 0) + return r; + *minor = r; + return 0; } static const struct block_device_operations dm_blk_dops; diff --git a/drivers/md/persistent-data/dm-transaction-manager.c b/drivers/md/persistent-data/dm-transaction-manager.c index 7b17a1f..81da1a2 100644 --- a/drivers/md/persistent-data/dm-transaction-manager.c +++ b/drivers/md/persistent-data/dm-transaction-manager.c @@ -46,10 +46,9 @@ static int is_shadow(struct dm_transaction_manager *tm, dm_block_t b) int r = 0; unsigned bucket = dm_hash_block(b, DM_HASH_MASK); struct shadow_info *si; - struct hlist_node *n; spin_lock(&tm->lock); - hlist_for_each_entry(si, n, tm->buckets + bucket, hlist) + hlist_for_each_entry(si, tm->buckets + bucket, hlist) if (si->where == b) { r = 1; break; @@ -81,14 +80,14 @@ static void insert_shadow(struct dm_transaction_manager *tm, dm_block_t b) static void wipe_shadow_table(struct dm_transaction_manager *tm) { struct shadow_info *si; - struct hlist_node *n, *tmp; + struct hlist_node *tmp; struct hlist_head *bucket; int i; spin_lock(&tm->lock); for (i = 0; i < DM_HASH_SIZE; i++) { bucket = tm->buckets + i; - hlist_for_each_entry_safe(si, n, tmp, bucket, hlist) + hlist_for_each_entry_safe(si, tmp, bucket, hlist) kfree(si); INIT_HLIST_HEAD(bucket); diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 19d77a0..5af2d27 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -184,8 +184,6 @@ static void return_io(struct bio *return_bi) return_bi = bi->bi_next; bi->bi_next = NULL; bi->bi_size = 0; - trace_block_bio_complete(bdev_get_queue(bi->bi_bdev), - bi, 0); bio_endio(bi, 0); bi = return_bi; } @@ -365,10 +363,9 @@ static struct stripe_head *__find_stripe(struct r5conf *conf, sector_t sector, short generation) { struct stripe_head *sh; - struct hlist_node *hn; pr_debug("__find_stripe, sector %llu\n", (unsigned long long)sector); - hlist_for_each_entry(sh, hn, stripe_hash(conf, sector), hash) + hlist_for_each_entry(sh, stripe_hash(conf, sector), hash) if (sh->sector == sector && sh->generation == generation) return sh; pr_debug("__stripe %llu not in cache\n", (unsigned long long)sector); @@ -3917,8 +3914,6 @@ static void raid5_align_endio(struct bio *bi, int error) rdev_dec_pending(rdev, conf->mddev); if (!error && uptodate) { - trace_block_bio_complete(bdev_get_queue(raid_bi->bi_bdev), - raid_bi, 0); bio_endio(raid_bi, 0); if (atomic_dec_and_test(&conf->active_aligned_reads)) wake_up(&conf->wait_for_stripe); @@ -4377,8 +4372,6 @@ static void make_request(struct mddev *mddev, struct bio * bi) if ( rw == WRITE ) md_write_end(mddev); - trace_block_bio_complete(bdev_get_queue(bi->bi_bdev), - bi, 0); bio_endio(bi, 0); } } @@ -4755,11 +4748,8 @@ static int retry_aligned_read(struct r5conf *conf, struct bio *raid_bio) handled++; } remaining = raid5_dec_bi_active_stripes(raid_bio); - if (remaining == 0) { - trace_block_bio_complete(bdev_get_queue(raid_bio->bi_bdev), - raid_bio, 0); + if (remaining == 0) bio_endio(raid_bio, 0); - } if (atomic_dec_and_test(&conf->active_aligned_reads)) wake_up(&conf->wait_for_stripe); return handled; diff --git a/drivers/media/platform/omap3isp/isp.c b/drivers/media/platform/omap3isp/isp.c index 383a727..6e5ad8e 100644 --- a/drivers/media/platform/omap3isp/isp.c +++ b/drivers/media/platform/omap3isp/isp.c @@ -1338,28 +1338,15 @@ static int isp_enable_clocks(struct isp_device *isp) { int r; unsigned long rate; - int divisor; - - /* - * cam_mclk clock chain: - * dpll4 -> dpll4_m5 -> dpll4_m5x2 -> cam_mclk - * - * In OMAP3630 dpll4_m5x2 != 2 x dpll4_m5 but both are - * set to the same value. Hence the rate set for dpll4_m5 - * has to be twice of what is set on OMAP3430 to get - * the required value for cam_mclk - */ - divisor = isp->revision == ISP_REVISION_15_0 ? 1 : 2; r = clk_prepare_enable(isp->clock[ISP_CLK_CAM_ICK]); if (r) { dev_err(isp->dev, "failed to enable cam_ick clock\n"); goto out_clk_enable_ick; } - r = clk_set_rate(isp->clock[ISP_CLK_DPLL4_M5_CK], - CM_CAM_MCLK_HZ/divisor); + r = clk_set_rate(isp->clock[ISP_CLK_CAM_MCLK], CM_CAM_MCLK_HZ); if (r) { - dev_err(isp->dev, "clk_set_rate for dpll4_m5_ck failed\n"); + dev_err(isp->dev, "clk_set_rate for cam_mclk failed\n"); goto out_clk_enable_mclk; } r = clk_prepare_enable(isp->clock[ISP_CLK_CAM_MCLK]); @@ -1401,7 +1388,6 @@ static void isp_disable_clocks(struct isp_device *isp) static const char *isp_clocks[] = { "cam_ick", "cam_mclk", - "dpll4_m5_ck", "csi2_96m_fck", "l3_ick", }; diff --git a/drivers/media/platform/omap3isp/isp.h b/drivers/media/platform/omap3isp/isp.h index 517d348..c77e1f2 100644 --- a/drivers/media/platform/omap3isp/isp.h +++ b/drivers/media/platform/omap3isp/isp.h @@ -147,7 +147,6 @@ struct isp_platform_callback { * @ref_count: Reference count for handling multiple ISP requests. * @cam_ick: Pointer to camera interface clock structure. * @cam_mclk: Pointer to camera functional clock structure. - * @dpll4_m5_ck: Pointer to DPLL4 M5 clock structure. * @csi2_fck: Pointer to camera CSI2 complexIO clock structure. * @l3_ick: Pointer to OMAP3 L3 bus interface clock. * @irq: Currently attached ISP ISR callbacks information structure. @@ -189,10 +188,9 @@ struct isp_device { u32 xclk_divisor[2]; /* Two clocks, a and b. */ #define ISP_CLK_CAM_ICK 0 #define ISP_CLK_CAM_MCLK 1 -#define ISP_CLK_DPLL4_M5_CK 2 -#define ISP_CLK_CSI2_FCK 3 -#define ISP_CLK_L3_ICK 4 - struct clk *clock[5]; +#define ISP_CLK_CSI2_FCK 2 +#define ISP_CLK_L3_ICK 3 + struct clk *clock[4]; /* ISP modules */ struct ispstat isp_af; diff --git a/drivers/memstick/core/memstick.c b/drivers/memstick/core/memstick.c index 56ff19c..ffcb10a 100644 --- a/drivers/memstick/core/memstick.c +++ b/drivers/memstick/core/memstick.c @@ -512,18 +512,17 @@ int memstick_add_host(struct memstick_host *host) { int rc; - while (1) { - if (!idr_pre_get(&memstick_host_idr, GFP_KERNEL)) - return -ENOMEM; + idr_preload(GFP_KERNEL); + spin_lock(&memstick_host_lock); - spin_lock(&memstick_host_lock); - rc = idr_get_new(&memstick_host_idr, host, &host->id); - spin_unlock(&memstick_host_lock); - if (!rc) - break; - else if (rc != -EAGAIN) - return rc; - } + rc = idr_alloc(&memstick_host_idr, host, 0, 0, GFP_NOWAIT); + if (rc >= 0) + host->id = rc; + + spin_unlock(&memstick_host_lock); + idr_preload_end(); + if (rc < 0) + return rc; dev_set_name(&host->dev, "memstick%u", host->id); diff --git a/drivers/memstick/core/mspro_block.c b/drivers/memstick/core/mspro_block.c index 9729b92..f12b78d 100644 --- a/drivers/memstick/core/mspro_block.c +++ b/drivers/memstick/core/mspro_block.c @@ -1213,21 +1213,10 @@ static int mspro_block_init_disk(struct memstick_dev *card) msb->page_size = be16_to_cpu(sys_info->unit_size); mutex_lock(&mspro_block_disk_lock); - if (!idr_pre_get(&mspro_block_disk_idr, GFP_KERNEL)) { - mutex_unlock(&mspro_block_disk_lock); - return -ENOMEM; - } - - rc = idr_get_new(&mspro_block_disk_idr, card, &disk_id); + disk_id = idr_alloc(&mspro_block_disk_idr, card, 0, 256, GFP_KERNEL); mutex_unlock(&mspro_block_disk_lock); - - if (rc) - return rc; - - if ((disk_id << MSPRO_BLOCK_PART_SHIFT) > 255) { - rc = -ENOSPC; - goto out_release_id; - } + if (disk_id < 0) + return disk_id; msb->disk = alloc_disk(1 << MSPRO_BLOCK_PART_SHIFT); if (!msb->disk) { diff --git a/drivers/memstick/host/r592.c b/drivers/memstick/host/r592.c index 29b2172..a7c5b31 100644 --- a/drivers/memstick/host/r592.c +++ b/drivers/memstick/host/r592.c @@ -454,7 +454,7 @@ static int r592_transfer_fifo_pio(struct r592_device *dev) /* Executes one TPC (data is read/written from small or large fifo) */ static void r592_execute_tpc(struct r592_device *dev) { - bool is_write = dev->req->tpc >= MS_TPC_SET_RW_REG_ADRS; + bool is_write; int len, error; u32 status, reg; @@ -463,6 +463,7 @@ static void r592_execute_tpc(struct r592_device *dev) return; } + is_write = dev->req->tpc >= MS_TPC_SET_RW_REG_ADRS; len = dev->req->long_data ? dev->req->sg.length : dev->req->data_len; diff --git a/drivers/mfd/rtsx_pcr.c b/drivers/mfd/rtsx_pcr.c index 481a98a..2f12cc1 100644 --- a/drivers/mfd/rtsx_pcr.c +++ b/drivers/mfd/rtsx_pcr.c @@ -1091,15 +1091,14 @@ static int rtsx_pci_probe(struct pci_dev *pcidev, } handle->pcr = pcr; - if (!idr_pre_get(&rtsx_pci_idr, GFP_KERNEL)) { - ret = -ENOMEM; - goto free_handle; - } - + idr_preload(GFP_KERNEL); spin_lock(&rtsx_pci_lock); - ret = idr_get_new(&rtsx_pci_idr, pcr, &pcr->id); + ret = idr_alloc(&rtsx_pci_idr, pcr, 0, 0, GFP_NOWAIT); + if (ret >= 0) + pcr->id = ret; spin_unlock(&rtsx_pci_lock); - if (ret) + idr_preload_end(); + if (ret < 0) goto free_handle; pcr->pci = pcidev; diff --git a/drivers/misc/c2port/core.c b/drivers/misc/c2port/core.c index f428d86..f32550a 100644 --- a/drivers/misc/c2port/core.c +++ b/drivers/misc/c2port/core.c @@ -885,7 +885,7 @@ struct c2port_device *c2port_device_register(char *name, struct c2port_ops *ops, void *devdata) { struct c2port_device *c2dev; - int id, ret; + int ret; if (unlikely(!ops) || unlikely(!ops->access) || \ unlikely(!ops->c2d_dir) || unlikely(!ops->c2ck_set) || \ @@ -897,22 +897,18 @@ struct c2port_device *c2port_device_register(char *name, if (unlikely(!c2dev)) return ERR_PTR(-ENOMEM); - ret = idr_pre_get(&c2port_idr, GFP_KERNEL); - if (!ret) { - ret = -ENOMEM; - goto error_idr_get_new; - } - + idr_preload(GFP_KERNEL); spin_lock_irq(&c2port_idr_lock); - ret = idr_get_new(&c2port_idr, c2dev, &id); + ret = idr_alloc(&c2port_idr, c2dev, 0, 0, GFP_NOWAIT); spin_unlock_irq(&c2port_idr_lock); + idr_preload_end(); if (ret < 0) - goto error_idr_get_new; - c2dev->id = id; + goto error_idr_alloc; + c2dev->id = ret; c2dev->dev = device_create(c2port_class, NULL, 0, c2dev, - "c2port%d", id); + "c2port%d", c2dev->id); if (unlikely(IS_ERR(c2dev->dev))) { ret = PTR_ERR(c2dev->dev); goto error_device_create; @@ -946,10 +942,10 @@ error_device_create_bin_file: error_device_create: spin_lock_irq(&c2port_idr_lock); - idr_remove(&c2port_idr, id); + idr_remove(&c2port_idr, c2dev->id); spin_unlock_irq(&c2port_idr_lock); -error_idr_get_new: +error_idr_alloc: kfree(c2dev); return ERR_PTR(ret); diff --git a/drivers/misc/sgi-gru/grutlbpurge.c b/drivers/misc/sgi-gru/grutlbpurge.c index 240a6d3..2129274 100644 --- a/drivers/misc/sgi-gru/grutlbpurge.c +++ b/drivers/misc/sgi-gru/grutlbpurge.c @@ -280,11 +280,10 @@ static struct mmu_notifier *mmu_find_ops(struct mm_struct *mm, const struct mmu_notifier_ops *ops) { struct mmu_notifier *mn, *gru_mn = NULL; - struct hlist_node *n; if (mm->mmu_notifier_mm) { rcu_read_lock(); - hlist_for_each_entry_rcu(mn, n, &mm->mmu_notifier_mm->list, + hlist_for_each_entry_rcu(mn, &mm->mmu_notifier_mm->list, hlist) if (mn->ops == ops) { gru_mn = mn; diff --git a/drivers/misc/tifm_core.c b/drivers/misc/tifm_core.c index 0bd5349..0ab7c92 100644 --- a/drivers/misc/tifm_core.c +++ b/drivers/misc/tifm_core.c @@ -196,13 +196,14 @@ int tifm_add_adapter(struct tifm_adapter *fm) { int rc; - if (!idr_pre_get(&tifm_adapter_idr, GFP_KERNEL)) - return -ENOMEM; - + idr_preload(GFP_KERNEL); spin_lock(&tifm_adapter_lock); - rc = idr_get_new(&tifm_adapter_idr, fm, &fm->id); + rc = idr_alloc(&tifm_adapter_idr, fm, 0, 0, GFP_NOWAIT); + if (rc >= 0) + fm->id = rc; spin_unlock(&tifm_adapter_lock); - if (rc) + idr_preload_end(); + if (rc < 0) return rc; dev_set_name(&fm->dev, "tifm%u", fm->id); diff --git a/drivers/misc/vmw_vmci/vmci_doorbell.c b/drivers/misc/vmw_vmci/vmci_doorbell.c index c3e8397..a8cee33 100644 --- a/drivers/misc/vmw_vmci/vmci_doorbell.c +++ b/drivers/misc/vmw_vmci/vmci_doorbell.c @@ -127,9 +127,8 @@ static struct dbell_entry *dbell_index_table_find(u32 idx) { u32 bucket = VMCI_DOORBELL_HASH(idx); struct dbell_entry *dbell; - struct hlist_node *node; - hlist_for_each_entry(dbell, node, &vmci_doorbell_it.entries[bucket], + hlist_for_each_entry(dbell, &vmci_doorbell_it.entries[bucket], node) { if (idx == dbell->idx) return dbell; @@ -359,12 +358,10 @@ static void dbell_fire_entries(u32 notify_idx) { u32 bucket = VMCI_DOORBELL_HASH(notify_idx); struct dbell_entry *dbell; - struct hlist_node *node; spin_lock_bh(&vmci_doorbell_it.lock); - hlist_for_each_entry(dbell, node, - &vmci_doorbell_it.entries[bucket], node) { + hlist_for_each_entry(dbell, &vmci_doorbell_it.entries[bucket], node) { if (dbell->idx == notify_idx && atomic_read(&dbell->active) == 1) { if (dbell->run_delayed) { diff --git a/drivers/misc/vmw_vmci/vmci_resource.c b/drivers/misc/vmw_vmci/vmci_resource.c index a196f84..9a53a30 100644 --- a/drivers/misc/vmw_vmci/vmci_resource.c +++ b/drivers/misc/vmw_vmci/vmci_resource.c @@ -46,11 +46,10 @@ static struct vmci_resource *vmci_resource_lookup(struct vmci_handle handle, enum vmci_resource_type type) { struct vmci_resource *r, *resource = NULL; - struct hlist_node *node; unsigned int idx = vmci_resource_hash(handle); rcu_read_lock(); - hlist_for_each_entry_rcu(r, node, + hlist_for_each_entry_rcu(r, &vmci_resource_table.entries[idx], node) { u32 cid = r->handle.context; u32 rid = r->handle.resource; @@ -146,12 +145,11 @@ void vmci_resource_remove(struct vmci_resource *resource) struct vmci_handle handle = resource->handle; unsigned int idx = vmci_resource_hash(handle); struct vmci_resource *r; - struct hlist_node *node; /* Remove resource from hash table. */ spin_lock(&vmci_resource_table.lock); - hlist_for_each_entry(r, node, &vmci_resource_table.entries[idx], node) { + hlist_for_each_entry(r, &vmci_resource_table.entries[idx], node) { if (vmci_handle_is_equal(r->handle, resource->handle)) { hlist_del_init_rcu(&r->node); break; diff --git a/drivers/mmc/core/host.c b/drivers/mmc/core/host.c index 821cd82..2a3593d 100644 --- a/drivers/mmc/core/host.c +++ b/drivers/mmc/core/host.c @@ -429,19 +429,20 @@ struct mmc_host *mmc_alloc_host(int extra, struct device *dev) int err; struct mmc_host *host; - if (!idr_pre_get(&mmc_host_idr, GFP_KERNEL)) - return NULL; - host = kzalloc(sizeof(struct mmc_host) + extra, GFP_KERNEL); if (!host) return NULL; /* scanning will be enabled when we're ready */ host->rescan_disable = 1; + idr_preload(GFP_KERNEL); spin_lock(&mmc_host_lock); - err = idr_get_new(&mmc_host_idr, host, &host->index); + err = idr_alloc(&mmc_host_idr, host, 0, 0, GFP_NOWAIT); + if (err >= 0) + host->index = err; spin_unlock(&mmc_host_lock); - if (err) + idr_preload_end(); + if (err < 0) goto free; dev_set_name(&host->class_dev, "mmc%d", host->index); diff --git a/drivers/mmc/host/dw_mmc.c b/drivers/mmc/host/dw_mmc.c index 60063cc..9834221 100644 --- a/drivers/mmc/host/dw_mmc.c +++ b/drivers/mmc/host/dw_mmc.c @@ -1453,7 +1453,7 @@ static void dw_mci_read_data_pio(struct dw_mci *host) if (!sg_miter_next(sg_miter)) goto done; - host->sg = sg_miter->__sg; + host->sg = sg_miter->piter.sg; buf = sg_miter->addr; remain = sg_miter->length; offset = 0; @@ -1508,7 +1508,7 @@ static void dw_mci_write_data_pio(struct dw_mci *host) if (!sg_miter_next(sg_miter)) goto done; - host->sg = sg_miter->__sg; + host->sg = sg_miter->piter.sg; buf = sg_miter->addr; remain = sg_miter->length; offset = 0; diff --git a/drivers/mtd/mtdcore.c b/drivers/mtd/mtdcore.c index ec794a7..61d5f56 100644 --- a/drivers/mtd/mtdcore.c +++ b/drivers/mtd/mtdcore.c @@ -349,13 +349,8 @@ int add_mtd_device(struct mtd_info *mtd) BUG_ON(mtd->writesize == 0); mutex_lock(&mtd_table_mutex); - do { - if (!idr_pre_get(&mtd_idr, GFP_KERNEL)) - goto fail_locked; - error = idr_get_new(&mtd_idr, mtd, &i); - } while (error == -EAGAIN); - - if (error) + i = idr_alloc(&mtd_idr, mtd, 0, 0, GFP_KERNEL); + if (i < 0) goto fail_locked; mtd->index = i; diff --git a/drivers/mtd/tests/mtd_nandecctest.c b/drivers/mtd/tests/mtd_nandecctest.c index 1eee264..db2f22e 100644 --- a/drivers/mtd/tests/mtd_nandecctest.c +++ b/drivers/mtd/tests/mtd_nandecctest.c @@ -256,7 +256,7 @@ static int nand_ecc_test_run(const size_t size) goto error; } - get_random_bytes(correct_data, size); + prandom_bytes(correct_data, size); __nand_calculate_ecc(correct_data, size, correct_ecc); for (i = 0; i < ARRAY_SIZE(nand_ecc_test); i++) { diff --git a/drivers/mtd/tests/mtd_oobtest.c b/drivers/mtd/tests/mtd_oobtest.c index e827fa8..3e24b37 100644 --- a/drivers/mtd/tests/mtd_oobtest.c +++ b/drivers/mtd/tests/mtd_oobtest.c @@ -29,6 +29,7 @@ #include <linux/mtd/mtd.h> #include <linux/slab.h> #include <linux/sched.h> +#include <linux/random.h> static int dev = -EINVAL; module_param(dev, int, S_IRUGO); @@ -46,26 +47,7 @@ static int use_offset; static int use_len; static int use_len_max; static int vary_offset; -static unsigned long next = 1; - -static inline unsigned int simple_rand(void) -{ - next = next * 1103515245 + 12345; - return (unsigned int)((next / 65536) % 32768); -} - -static inline void simple_srand(unsigned long seed) -{ - next = seed; -} - -static void set_random_data(unsigned char *buf, size_t len) -{ - size_t i; - - for (i = 0; i < len; ++i) - buf[i] = simple_rand(); -} +static struct rnd_state rnd_state; static int erase_eraseblock(int ebnum) { @@ -129,7 +111,7 @@ static int write_eraseblock(int ebnum) loff_t addr = ebnum * mtd->erasesize; for (i = 0; i < pgcnt; ++i, addr += mtd->writesize) { - set_random_data(writebuf, use_len); + prandom_bytes_state(&rnd_state, writebuf, use_len); ops.mode = MTD_OPS_AUTO_OOB; ops.len = 0; ops.retlen = 0; @@ -182,7 +164,7 @@ static int verify_eraseblock(int ebnum) loff_t addr = ebnum * mtd->erasesize; for (i = 0; i < pgcnt; ++i, addr += mtd->writesize) { - set_random_data(writebuf, use_len); + prandom_bytes_state(&rnd_state, writebuf, use_len); ops.mode = MTD_OPS_AUTO_OOB; ops.len = 0; ops.retlen = 0; @@ -273,7 +255,7 @@ static int verify_eraseblock_in_one_go(int ebnum) loff_t addr = ebnum * mtd->erasesize; size_t len = mtd->ecclayout->oobavail * pgcnt; - set_random_data(writebuf, len); + prandom_bytes_state(&rnd_state, writebuf, len); ops.mode = MTD_OPS_AUTO_OOB; ops.len = 0; ops.retlen = 0; @@ -424,12 +406,12 @@ static int __init mtd_oobtest_init(void) if (err) goto out; - simple_srand(1); + prandom_seed_state(&rnd_state, 1); err = write_whole_device(); if (err) goto out; - simple_srand(1); + prandom_seed_state(&rnd_state, 1); err = verify_all_eraseblocks(); if (err) goto out; @@ -444,13 +426,13 @@ static int __init mtd_oobtest_init(void) if (err) goto out; - simple_srand(3); + prandom_seed_state(&rnd_state, 3); err = write_whole_device(); if (err) goto out; /* Check all eraseblocks */ - simple_srand(3); + prandom_seed_state(&rnd_state, 3); pr_info("verifying all eraseblocks\n"); for (i = 0; i < ebcnt; ++i) { if (bbt[i]) @@ -479,7 +461,7 @@ static int __init mtd_oobtest_init(void) use_len = mtd->ecclayout->oobavail; use_len_max = mtd->ecclayout->oobavail; vary_offset = 1; - simple_srand(5); + prandom_seed_state(&rnd_state, 5); err = write_whole_device(); if (err) @@ -490,7 +472,7 @@ static int __init mtd_oobtest_init(void) use_len = mtd->ecclayout->oobavail; use_len_max = mtd->ecclayout->oobavail; vary_offset = 1; - simple_srand(5); + prandom_seed_state(&rnd_state, 5); err = verify_all_eraseblocks(); if (err) goto out; @@ -649,7 +631,7 @@ static int __init mtd_oobtest_init(void) goto out; /* Write all eraseblocks */ - simple_srand(11); + prandom_seed_state(&rnd_state, 11); pr_info("writing OOBs of whole device\n"); for (i = 0; i < ebcnt - 1; ++i) { int cnt = 2; @@ -659,7 +641,7 @@ static int __init mtd_oobtest_init(void) continue; addr = (i + 1) * mtd->erasesize - mtd->writesize; for (pg = 0; pg < cnt; ++pg) { - set_random_data(writebuf, sz); + prandom_bytes_state(&rnd_state, writebuf, sz); ops.mode = MTD_OPS_AUTO_OOB; ops.len = 0; ops.retlen = 0; @@ -680,12 +662,13 @@ static int __init mtd_oobtest_init(void) pr_info("written %u eraseblocks\n", i); /* Check all eraseblocks */ - simple_srand(11); + prandom_seed_state(&rnd_state, 11); pr_info("verifying all eraseblocks\n"); for (i = 0; i < ebcnt - 1; ++i) { if (bbt[i] || bbt[i + 1]) continue; - set_random_data(writebuf, mtd->ecclayout->oobavail * 2); + prandom_bytes_state(&rnd_state, writebuf, + mtd->ecclayout->oobavail * 2); addr = (i + 1) * mtd->erasesize - mtd->writesize; ops.mode = MTD_OPS_AUTO_OOB; ops.len = 0; diff --git a/drivers/mtd/tests/mtd_pagetest.c b/drivers/mtd/tests/mtd_pagetest.c index f93a76f..0c1140b 100644 --- a/drivers/mtd/tests/mtd_pagetest.c +++ b/drivers/mtd/tests/mtd_pagetest.c @@ -29,6 +29,7 @@ #include <linux/mtd/mtd.h> #include <linux/slab.h> #include <linux/sched.h> +#include <linux/random.h> static int dev = -EINVAL; module_param(dev, int, S_IRUGO); @@ -45,26 +46,7 @@ static int bufsize; static int ebcnt; static int pgcnt; static int errcnt; -static unsigned long next = 1; - -static inline unsigned int simple_rand(void) -{ - next = next * 1103515245 + 12345; - return (unsigned int)((next / 65536) % 32768); -} - -static inline void simple_srand(unsigned long seed) -{ - next = seed; -} - -static void set_random_data(unsigned char *buf, size_t len) -{ - size_t i; - - for (i = 0; i < len; ++i) - buf[i] = simple_rand(); -} +static struct rnd_state rnd_state; static int erase_eraseblock(int ebnum) { @@ -98,7 +80,7 @@ static int write_eraseblock(int ebnum) size_t written; loff_t addr = ebnum * mtd->erasesize; - set_random_data(writebuf, mtd->erasesize); + prandom_bytes_state(&rnd_state, writebuf, mtd->erasesize); cond_resched(); err = mtd_write(mtd, addr, mtd->erasesize, &written, writebuf); if (err || written != mtd->erasesize) @@ -124,7 +106,7 @@ static int verify_eraseblock(int ebnum) for (i = 0; i < ebcnt && bbt[ebcnt - i - 1]; ++i) addrn -= mtd->erasesize; - set_random_data(writebuf, mtd->erasesize); + prandom_bytes_state(&rnd_state, writebuf, mtd->erasesize); for (j = 0; j < pgcnt - 1; ++j, addr += pgsize) { /* Do a read to set the internal dataRAMs to different data */ err = mtd_read(mtd, addr0, bufsize, &read, twopages); @@ -160,7 +142,8 @@ static int verify_eraseblock(int ebnum) } /* Check boundary between eraseblocks */ if (addr <= addrn - pgsize - pgsize && !bbt[ebnum + 1]) { - unsigned long oldnext = next; + struct rnd_state old_state = rnd_state; + /* Do a read to set the internal dataRAMs to different data */ err = mtd_read(mtd, addr0, bufsize, &read, twopages); if (mtd_is_bitflip(err)) @@ -188,13 +171,13 @@ static int verify_eraseblock(int ebnum) return err; } memcpy(boundary, writebuf + mtd->erasesize - pgsize, pgsize); - set_random_data(boundary + pgsize, pgsize); + prandom_bytes_state(&rnd_state, boundary + pgsize, pgsize); if (memcmp(twopages, boundary, bufsize)) { pr_err("error: verify failed at %#llx\n", (long long)addr); errcnt += 1; } - next = oldnext; + rnd_state = old_state; } return err; } @@ -326,7 +309,7 @@ static int erasecrosstest(void) return err; pr_info("writing 1st page of block %d\n", ebnum); - set_random_data(writebuf, pgsize); + prandom_bytes_state(&rnd_state, writebuf, pgsize); strcpy(writebuf, "There is no data like this!"); err = mtd_write(mtd, addr0, pgsize, &written, writebuf); if (err || written != pgsize) { @@ -359,7 +342,7 @@ static int erasecrosstest(void) return err; pr_info("writing 1st page of block %d\n", ebnum); - set_random_data(writebuf, pgsize); + prandom_bytes_state(&rnd_state, writebuf, pgsize); strcpy(writebuf, "There is no data like this!"); err = mtd_write(mtd, addr0, pgsize, &written, writebuf); if (err || written != pgsize) { @@ -417,7 +400,7 @@ static int erasetest(void) return err; pr_info("writing 1st page of block %d\n", ebnum); - set_random_data(writebuf, pgsize); + prandom_bytes_state(&rnd_state, writebuf, pgsize); err = mtd_write(mtd, addr0, pgsize, &written, writebuf); if (err || written != pgsize) { pr_err("error: write failed at %#llx\n", @@ -565,7 +548,7 @@ static int __init mtd_pagetest_init(void) pr_info("erased %u eraseblocks\n", i); /* Write all eraseblocks */ - simple_srand(1); + prandom_seed_state(&rnd_state, 1); pr_info("writing whole device\n"); for (i = 0; i < ebcnt; ++i) { if (bbt[i]) @@ -580,7 +563,7 @@ static int __init mtd_pagetest_init(void) pr_info("written %u eraseblocks\n", i); /* Check all eraseblocks */ - simple_srand(1); + prandom_seed_state(&rnd_state, 1); pr_info("verifying all eraseblocks\n"); for (i = 0; i < ebcnt; ++i) { if (bbt[i]) diff --git a/drivers/mtd/tests/mtd_speedtest.c b/drivers/mtd/tests/mtd_speedtest.c index 596cbea..a6ce9c1 100644 --- a/drivers/mtd/tests/mtd_speedtest.c +++ b/drivers/mtd/tests/mtd_speedtest.c @@ -49,13 +49,6 @@ static int pgcnt; static int goodebcnt; static struct timeval start, finish; -static void set_random_data(unsigned char *buf, size_t len) -{ - size_t i; - - for (i = 0; i < len; ++i) - buf[i] = random32(); -} static int erase_eraseblock(int ebnum) { @@ -396,7 +389,7 @@ static int __init mtd_speedtest_init(void) goto out; } - set_random_data(iobuf, mtd->erasesize); + prandom_bytes(iobuf, mtd->erasesize); err = scan_for_bad_eraseblocks(); if (err) diff --git a/drivers/mtd/tests/mtd_stresstest.c b/drivers/mtd/tests/mtd_stresstest.c index 3729f67..2d7e6cf 100644 --- a/drivers/mtd/tests/mtd_stresstest.c +++ b/drivers/mtd/tests/mtd_stresstest.c @@ -282,8 +282,7 @@ static int __init mtd_stresstest_init(void) } for (i = 0; i < ebcnt; i++) offsets[i] = mtd->erasesize; - for (i = 0; i < bufsize; i++) - writebuf[i] = random32(); + prandom_bytes(writebuf, bufsize); err = scan_for_bad_eraseblocks(); if (err) diff --git a/drivers/mtd/tests/mtd_subpagetest.c b/drivers/mtd/tests/mtd_subpagetest.c index c880c22..aade56f 100644 --- a/drivers/mtd/tests/mtd_subpagetest.c +++ b/drivers/mtd/tests/mtd_subpagetest.c @@ -28,6 +28,7 @@ #include <linux/mtd/mtd.h> #include <linux/slab.h> #include <linux/sched.h> +#include <linux/random.h> static int dev = -EINVAL; module_param(dev, int, S_IRUGO); @@ -43,26 +44,7 @@ static int bufsize; static int ebcnt; static int pgcnt; static int errcnt; -static unsigned long next = 1; - -static inline unsigned int simple_rand(void) -{ - next = next * 1103515245 + 12345; - return (unsigned int)((next / 65536) % 32768); -} - -static inline void simple_srand(unsigned long seed) -{ - next = seed; -} - -static void set_random_data(unsigned char *buf, size_t len) -{ - size_t i; - - for (i = 0; i < len; ++i) - buf[i] = simple_rand(); -} +static struct rnd_state rnd_state; static inline void clear_data(unsigned char *buf, size_t len) { @@ -119,7 +101,7 @@ static int write_eraseblock(int ebnum) int err = 0; loff_t addr = ebnum * mtd->erasesize; - set_random_data(writebuf, subpgsize); + prandom_bytes_state(&rnd_state, writebuf, subpgsize); err = mtd_write(mtd, addr, subpgsize, &written, writebuf); if (unlikely(err || written != subpgsize)) { pr_err("error: write failed at %#llx\n", @@ -133,7 +115,7 @@ static int write_eraseblock(int ebnum) addr += subpgsize; - set_random_data(writebuf, subpgsize); + prandom_bytes_state(&rnd_state, writebuf, subpgsize); err = mtd_write(mtd, addr, subpgsize, &written, writebuf); if (unlikely(err || written != subpgsize)) { pr_err("error: write failed at %#llx\n", @@ -157,7 +139,7 @@ static int write_eraseblock2(int ebnum) for (k = 1; k < 33; ++k) { if (addr + (subpgsize * k) > (ebnum + 1) * mtd->erasesize) break; - set_random_data(writebuf, subpgsize * k); + prandom_bytes_state(&rnd_state, writebuf, subpgsize * k); err = mtd_write(mtd, addr, subpgsize * k, &written, writebuf); if (unlikely(err || written != subpgsize * k)) { pr_err("error: write failed at %#llx\n", @@ -193,7 +175,7 @@ static int verify_eraseblock(int ebnum) int err = 0; loff_t addr = ebnum * mtd->erasesize; - set_random_data(writebuf, subpgsize); + prandom_bytes_state(&rnd_state, writebuf, subpgsize); clear_data(readbuf, subpgsize); err = mtd_read(mtd, addr, subpgsize, &read, readbuf); if (unlikely(err || read != subpgsize)) { @@ -220,7 +202,7 @@ static int verify_eraseblock(int ebnum) addr += subpgsize; - set_random_data(writebuf, subpgsize); + prandom_bytes_state(&rnd_state, writebuf, subpgsize); clear_data(readbuf, subpgsize); err = mtd_read(mtd, addr, subpgsize, &read, readbuf); if (unlikely(err || read != subpgsize)) { @@ -257,7 +239,7 @@ static int verify_eraseblock2(int ebnum) for (k = 1; k < 33; ++k) { if (addr + (subpgsize * k) > (ebnum + 1) * mtd->erasesize) break; - set_random_data(writebuf, subpgsize * k); + prandom_bytes_state(&rnd_state, writebuf, subpgsize * k); clear_data(readbuf, subpgsize * k); err = mtd_read(mtd, addr, subpgsize * k, &read, readbuf); if (unlikely(err || read != subpgsize * k)) { @@ -430,7 +412,7 @@ static int __init mtd_subpagetest_init(void) goto out; pr_info("writing whole device\n"); - simple_srand(1); + prandom_seed_state(&rnd_state, 1); for (i = 0; i < ebcnt; ++i) { if (bbt[i]) continue; @@ -443,7 +425,7 @@ static int __init mtd_subpagetest_init(void) } pr_info("written %u eraseblocks\n", i); - simple_srand(1); + prandom_seed_state(&rnd_state, 1); pr_info("verifying all eraseblocks\n"); for (i = 0; i < ebcnt; ++i) { if (bbt[i]) @@ -466,7 +448,7 @@ static int __init mtd_subpagetest_init(void) goto out; /* Write all eraseblocks */ - simple_srand(3); + prandom_seed_state(&rnd_state, 3); pr_info("writing whole device\n"); for (i = 0; i < ebcnt; ++i) { if (bbt[i]) @@ -481,7 +463,7 @@ static int __init mtd_subpagetest_init(void) pr_info("written %u eraseblocks\n", i); /* Check all eraseblocks */ - simple_srand(3); + prandom_seed_state(&rnd_state, 3); pr_info("verifying all eraseblocks\n"); for (i = 0; i < ebcnt; ++i) { if (bbt[i]) diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c index f4d2e9e..c3f1afd 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c @@ -2197,13 +2197,13 @@ static int ixgbe_get_ethtool_fdir_entry(struct ixgbe_adapter *adapter, union ixgbe_atr_input *mask = &adapter->fdir_mask; struct ethtool_rx_flow_spec *fsp = (struct ethtool_rx_flow_spec *)&cmd->fs; - struct hlist_node *node, *node2; + struct hlist_node *node2; struct ixgbe_fdir_filter *rule = NULL; /* report total rule count */ cmd->data = (1024 << adapter->fdir_pballoc) - 2; - hlist_for_each_entry_safe(rule, node, node2, + hlist_for_each_entry_safe(rule, node2, &adapter->fdir_filter_list, fdir_node) { if (fsp->location <= rule->sw_idx) break; @@ -2264,14 +2264,14 @@ static int ixgbe_get_ethtool_fdir_all(struct ixgbe_adapter *adapter, struct ethtool_rxnfc *cmd, u32 *rule_locs) { - struct hlist_node *node, *node2; + struct hlist_node *node2; struct ixgbe_fdir_filter *rule; int cnt = 0; /* report total rule count */ cmd->data = (1024 << adapter->fdir_pballoc) - 2; - hlist_for_each_entry_safe(rule, node, node2, + hlist_for_each_entry_safe(rule, node2, &adapter->fdir_filter_list, fdir_node) { if (cnt == cmd->rule_cnt) return -EMSGSIZE; @@ -2358,19 +2358,19 @@ static int ixgbe_update_ethtool_fdir_entry(struct ixgbe_adapter *adapter, u16 sw_idx) { struct ixgbe_hw *hw = &adapter->hw; - struct hlist_node *node, *node2, *parent; - struct ixgbe_fdir_filter *rule; + struct hlist_node *node2; + struct ixgbe_fdir_filter *rule, *parent; int err = -EINVAL; parent = NULL; rule = NULL; - hlist_for_each_entry_safe(rule, node, node2, + hlist_for_each_entry_safe(rule, node2, &adapter->fdir_filter_list, fdir_node) { /* hash found, or no matching entry */ if (rule->sw_idx >= sw_idx) break; - parent = node; + parent = rule; } /* if there is an old rule occupying our place remove it */ @@ -2399,7 +2399,7 @@ static int ixgbe_update_ethtool_fdir_entry(struct ixgbe_adapter *adapter, /* add filter to the list */ if (parent) - hlist_add_after(parent, &input->fdir_node); + hlist_add_after(&parent->fdir_node, &input->fdir_node); else hlist_add_head(&input->fdir_node, &adapter->fdir_filter_list); diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index 68478d6..db5611a 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -3891,7 +3891,7 @@ static void ixgbe_configure_pb(struct ixgbe_adapter *adapter) static void ixgbe_fdir_filter_restore(struct ixgbe_adapter *adapter) { struct ixgbe_hw *hw = &adapter->hw; - struct hlist_node *node, *node2; + struct hlist_node *node2; struct ixgbe_fdir_filter *filter; spin_lock(&adapter->fdir_perfect_lock); @@ -3899,7 +3899,7 @@ static void ixgbe_fdir_filter_restore(struct ixgbe_adapter *adapter) if (!hlist_empty(&adapter->fdir_filter_list)) ixgbe_fdir_set_input_mask_82599(hw, &adapter->fdir_mask); - hlist_for_each_entry_safe(filter, node, node2, + hlist_for_each_entry_safe(filter, node2, &adapter->fdir_filter_list, fdir_node) { ixgbe_fdir_write_perfect_filter_82599(hw, &filter->filter, @@ -4356,12 +4356,12 @@ static void ixgbe_clean_all_tx_rings(struct ixgbe_adapter *adapter) static void ixgbe_fdir_filter_exit(struct ixgbe_adapter *adapter) { - struct hlist_node *node, *node2; + struct hlist_node *node2; struct ixgbe_fdir_filter *filter; spin_lock(&adapter->fdir_perfect_lock); - hlist_for_each_entry_safe(filter, node, node2, + hlist_for_each_entry_safe(filter, node2, &adapter->fdir_filter_list, fdir_node) { hlist_del(&filter->fdir_node); kfree(filter); diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c index 5385474..bb4d8d9 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c @@ -225,11 +225,10 @@ static inline struct mlx4_en_filter * mlx4_en_filter_find(struct mlx4_en_priv *priv, __be32 src_ip, __be32 dst_ip, __be16 src_port, __be16 dst_port) { - struct hlist_node *elem; struct mlx4_en_filter *filter; struct mlx4_en_filter *ret = NULL; - hlist_for_each_entry(filter, elem, + hlist_for_each_entry(filter, filter_hash_bucket(priv, src_ip, dst_ip, src_port, dst_port), filter_chain) { @@ -574,13 +573,13 @@ static void mlx4_en_put_qp(struct mlx4_en_priv *priv) if (dev->caps.steering_mode != MLX4_STEERING_MODE_A0) { struct mlx4_mac_entry *entry; - struct hlist_node *n, *tmp; + struct hlist_node *tmp; struct hlist_head *bucket; unsigned int mac_hash; mac_hash = priv->dev->dev_addr[MLX4_EN_MAC_HASH_IDX]; bucket = &priv->mac_hash[mac_hash]; - hlist_for_each_entry_safe(entry, n, tmp, bucket, hlist) { + hlist_for_each_entry_safe(entry, tmp, bucket, hlist) { if (ether_addr_equal_64bits(entry->mac, priv->dev->dev_addr)) { en_dbg(DRV, priv, "Releasing qp: port %d, MAC %pM, qpn %d\n", @@ -609,11 +608,11 @@ static int mlx4_en_replace_mac(struct mlx4_en_priv *priv, int qpn, struct hlist_head *bucket; unsigned int mac_hash; struct mlx4_mac_entry *entry; - struct hlist_node *n, *tmp; + struct hlist_node *tmp; u64 prev_mac_u64 = mlx4_en_mac_to_u64(prev_mac); bucket = &priv->mac_hash[prev_mac[MLX4_EN_MAC_HASH_IDX]]; - hlist_for_each_entry_safe(entry, n, tmp, bucket, hlist) { + hlist_for_each_entry_safe(entry, tmp, bucket, hlist) { if (ether_addr_equal_64bits(entry->mac, prev_mac)) { mlx4_en_uc_steer_release(priv, entry->mac, qpn, entry->reg_id); @@ -1019,7 +1018,7 @@ static void mlx4_en_do_uc_filter(struct mlx4_en_priv *priv, { struct netdev_hw_addr *ha; struct mlx4_mac_entry *entry; - struct hlist_node *n, *tmp; + struct hlist_node *tmp; bool found; u64 mac; int err = 0; @@ -1035,7 +1034,7 @@ static void mlx4_en_do_uc_filter(struct mlx4_en_priv *priv, /* find what to remove */ for (i = 0; i < MLX4_EN_MAC_HASH_SIZE; ++i) { bucket = &priv->mac_hash[i]; - hlist_for_each_entry_safe(entry, n, tmp, bucket, hlist) { + hlist_for_each_entry_safe(entry, tmp, bucket, hlist) { found = false; netdev_for_each_uc_addr(ha, dev) { if (ether_addr_equal_64bits(entry->mac, @@ -1078,7 +1077,7 @@ static void mlx4_en_do_uc_filter(struct mlx4_en_priv *priv, netdev_for_each_uc_addr(ha, dev) { found = false; bucket = &priv->mac_hash[ha->addr[MLX4_EN_MAC_HASH_IDX]]; - hlist_for_each_entry(entry, n, bucket, hlist) { + hlist_for_each_entry(entry, bucket, hlist) { if (ether_addr_equal_64bits(entry->mac, ha->addr)) { found = true; break; diff --git a/drivers/net/ethernet/mellanox/mlx4/en_rx.c b/drivers/net/ethernet/mellanox/mlx4/en_rx.c index ce38654..c7f8563 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_rx.c @@ -35,6 +35,7 @@ #include <linux/slab.h> #include <linux/mlx4/qp.h> #include <linux/skbuff.h> +#include <linux/rculist.h> #include <linux/if_ether.h> #include <linux/if_vlan.h> #include <linux/vmalloc.h> @@ -617,7 +618,6 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud if (is_multicast_ether_addr(ethh->h_dest)) { struct mlx4_mac_entry *entry; - struct hlist_node *n; struct hlist_head *bucket; unsigned int mac_hash; @@ -625,7 +625,7 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud mac_hash = ethh->h_source[MLX4_EN_MAC_HASH_IDX]; bucket = &priv->mac_hash[mac_hash]; rcu_read_lock(); - hlist_for_each_entry_rcu(entry, n, bucket, hlist) { + hlist_for_each_entry_rcu(entry, bucket, hlist) { if (ether_addr_equal_64bits(entry->mac, ethh->h_source)) { rcu_read_unlock(); diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_hw.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_hw.c index 325e11e..f89cc7a 100644 --- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_hw.c +++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_hw.c @@ -576,7 +576,7 @@ void qlcnic_free_mac_list(struct qlcnic_adapter *adapter) void qlcnic_prune_lb_filters(struct qlcnic_adapter *adapter) { struct qlcnic_filter *tmp_fil; - struct hlist_node *tmp_hnode, *n; + struct hlist_node *n; struct hlist_head *head; int i; unsigned long time; @@ -584,7 +584,7 @@ void qlcnic_prune_lb_filters(struct qlcnic_adapter *adapter) for (i = 0; i < adapter->fhash.fbucket_size; i++) { head = &(adapter->fhash.fhead[i]); - hlist_for_each_entry_safe(tmp_fil, tmp_hnode, n, head, fnode) { + hlist_for_each_entry_safe(tmp_fil, n, head, fnode) { cmd = tmp_fil->vlan_id ? QLCNIC_MAC_VLAN_DEL : QLCNIC_MAC_DEL; time = tmp_fil->ftime; @@ -604,7 +604,7 @@ void qlcnic_prune_lb_filters(struct qlcnic_adapter *adapter) for (i = 0; i < adapter->rx_fhash.fbucket_size; i++) { head = &(adapter->rx_fhash.fhead[i]); - hlist_for_each_entry_safe(tmp_fil, tmp_hnode, n, head, fnode) + hlist_for_each_entry_safe(tmp_fil, n, head, fnode) { time = tmp_fil->ftime; if (jiffies > (QLCNIC_FILTER_AGE * HZ + time)) { @@ -621,14 +621,14 @@ void qlcnic_prune_lb_filters(struct qlcnic_adapter *adapter) void qlcnic_delete_lb_filters(struct qlcnic_adapter *adapter) { struct qlcnic_filter *tmp_fil; - struct hlist_node *tmp_hnode, *n; + struct hlist_node *n; struct hlist_head *head; int i; u8 cmd; for (i = 0; i < adapter->fhash.fbucket_size; i++) { head = &(adapter->fhash.fhead[i]); - hlist_for_each_entry_safe(tmp_fil, tmp_hnode, n, head, fnode) { + hlist_for_each_entry_safe(tmp_fil, n, head, fnode) { cmd = tmp_fil->vlan_id ? QLCNIC_MAC_VLAN_DEL : QLCNIC_MAC_DEL; qlcnic_sre_macaddr_change(adapter, diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_io.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_io.c index 6387e0c..0e63006 100644 --- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_io.c +++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_io.c @@ -162,7 +162,7 @@ void qlcnic_add_lb_filter(struct qlcnic_adapter *adapter, struct sk_buff *skb, { struct ethhdr *phdr = (struct ethhdr *)(skb->data); struct qlcnic_filter *fil, *tmp_fil; - struct hlist_node *tmp_hnode, *n; + struct hlist_node *n; struct hlist_head *head; unsigned long time; u64 src_addr = 0; @@ -179,7 +179,7 @@ void qlcnic_add_lb_filter(struct qlcnic_adapter *adapter, struct sk_buff *skb, (adapter->fhash.fbucket_size - 1); head = &(adapter->rx_fhash.fhead[hindex]); - hlist_for_each_entry_safe(tmp_fil, tmp_hnode, n, head, fnode) { + hlist_for_each_entry_safe(tmp_fil, n, head, fnode) { if (!memcmp(tmp_fil->faddr, &src_addr, ETH_ALEN) && tmp_fil->vlan_id == vlan_id) { time = tmp_fil->ftime; @@ -205,7 +205,7 @@ void qlcnic_add_lb_filter(struct qlcnic_adapter *adapter, struct sk_buff *skb, (adapter->fhash.fbucket_size - 1); head = &(adapter->rx_fhash.fhead[hindex]); spin_lock(&adapter->rx_mac_learn_lock); - hlist_for_each_entry_safe(tmp_fil, tmp_hnode, n, head, fnode) { + hlist_for_each_entry_safe(tmp_fil, n, head, fnode) { if (!memcmp(tmp_fil->faddr, &src_addr, ETH_ALEN) && tmp_fil->vlan_id == vlan_id) { found = 1; @@ -272,7 +272,7 @@ static void qlcnic_send_filter(struct qlcnic_adapter *adapter, struct sk_buff *skb) { struct qlcnic_filter *fil, *tmp_fil; - struct hlist_node *tmp_hnode, *n; + struct hlist_node *n; struct hlist_head *head; struct net_device *netdev = adapter->netdev; struct ethhdr *phdr = (struct ethhdr *)(skb->data); @@ -294,7 +294,7 @@ static void qlcnic_send_filter(struct qlcnic_adapter *adapter, hindex = qlcnic_mac_hash(src_addr) & (adapter->fhash.fbucket_size - 1); head = &(adapter->fhash.fhead[hindex]); - hlist_for_each_entry_safe(tmp_fil, tmp_hnode, n, head, fnode) { + hlist_for_each_entry_safe(tmp_fil, n, head, fnode) { if (!memcmp(tmp_fil->faddr, &src_addr, ETH_ALEN) && tmp_fil->vlan_id == vlan_id) { if (jiffies > (QLCNIC_READD_AGE * HZ + tmp_fil->ftime)) diff --git a/drivers/net/ethernet/sun/sunvnet.c b/drivers/net/ethernet/sun/sunvnet.c index 289b4ee..1df0ff3 100644 --- a/drivers/net/ethernet/sun/sunvnet.c +++ b/drivers/net/ethernet/sun/sunvnet.c @@ -614,10 +614,9 @@ struct vnet_port *__tx_port_find(struct vnet *vp, struct sk_buff *skb) { unsigned int hash = vnet_hashfn(skb->data); struct hlist_head *hp = &vp->port_hash[hash]; - struct hlist_node *n; struct vnet_port *port; - hlist_for_each_entry(port, n, hp, hash) { + hlist_for_each_entry(port, hp, hash) { if (ether_addr_equal(port->raddr, skb->data)) return port; } diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c index defcd8a8..417b2af 100644 --- a/drivers/net/macvlan.c +++ b/drivers/net/macvlan.c @@ -55,9 +55,8 @@ static struct macvlan_dev *macvlan_hash_lookup(const struct macvlan_port *port, const unsigned char *addr) { struct macvlan_dev *vlan; - struct hlist_node *n; - hlist_for_each_entry_rcu(vlan, n, &port->vlan_hash[addr[5]], hlist) { + hlist_for_each_entry_rcu(vlan, &port->vlan_hash[addr[5]], hlist) { if (ether_addr_equal_64bits(vlan->dev->dev_addr, addr)) return vlan; } @@ -149,7 +148,6 @@ static void macvlan_broadcast(struct sk_buff *skb, { const struct ethhdr *eth = eth_hdr(skb); const struct macvlan_dev *vlan; - struct hlist_node *n; struct sk_buff *nskb; unsigned int i; int err; @@ -159,7 +157,7 @@ static void macvlan_broadcast(struct sk_buff *skb, return; for (i = 0; i < MACVLAN_HASH_SIZE; i++) { - hlist_for_each_entry_rcu(vlan, n, &port->vlan_hash[i], hlist) { + hlist_for_each_entry_rcu(vlan, &port->vlan_hash[i], hlist) { if (vlan->dev == src || !(vlan->mode & mode)) continue; diff --git a/drivers/net/macvtap.c b/drivers/net/macvtap.c index 9724301..a449439 100644 --- a/drivers/net/macvtap.c +++ b/drivers/net/macvtap.c @@ -279,28 +279,17 @@ static int macvtap_receive(struct sk_buff *skb) static int macvtap_get_minor(struct macvlan_dev *vlan) { int retval = -ENOMEM; - int id; mutex_lock(&minor_lock); - if (idr_pre_get(&minor_idr, GFP_KERNEL) == 0) - goto exit; - - retval = idr_get_new_above(&minor_idr, vlan, 1, &id); - if (retval < 0) { - if (retval == -EAGAIN) - retval = -ENOMEM; - goto exit; - } - if (id < MACVTAP_NUM_DEVS) { - vlan->minor = id; - } else { + retval = idr_alloc(&minor_idr, vlan, 1, MACVTAP_NUM_DEVS, GFP_KERNEL); + if (retval >= 0) { + vlan->minor = retval; + } else if (retval == -ENOSPC) { printk(KERN_ERR "too many macvtap devices\n"); retval = -EINVAL; - idr_remove(&minor_idr, id); } -exit: mutex_unlock(&minor_lock); - return retval; + return retval < 0 ? retval : 0; } static void macvtap_free_minor(struct macvlan_dev *vlan) diff --git a/drivers/net/ppp/ppp_generic.c b/drivers/net/ppp/ppp_generic.c index 3db9131..72ff14b 100644 --- a/drivers/net/ppp/ppp_generic.c +++ b/drivers/net/ppp/ppp_generic.c @@ -2953,46 +2953,21 @@ static void __exit ppp_cleanup(void) * by holding all_ppp_mutex */ -static int __unit_alloc(struct idr *p, void *ptr, int n) -{ - int unit, err; - -again: - if (!idr_pre_get(p, GFP_KERNEL)) { - pr_err("PPP: No free memory for idr\n"); - return -ENOMEM; - } - - err = idr_get_new_above(p, ptr, n, &unit); - if (err < 0) { - if (err == -EAGAIN) - goto again; - return err; - } - - return unit; -} - /* associate pointer with specified number */ static int unit_set(struct idr *p, void *ptr, int n) { int unit; - unit = __unit_alloc(p, ptr, n); - if (unit < 0) - return unit; - else if (unit != n) { - idr_remove(p, unit); - return -EINVAL; - } - + unit = idr_alloc(p, ptr, n, n + 1, GFP_KERNEL); + if (unit == -ENOSPC) + unit = -EINVAL; return unit; } /* get new free unit number and associate pointer with it */ static int unit_get(struct idr *p, void *ptr) { - return __unit_alloc(p, ptr, 0); + return idr_alloc(p, ptr, 0, 0, GFP_KERNEL); } /* put unit number back to a pool */ diff --git a/drivers/net/tun.c b/drivers/net/tun.c index b6f45c5..2c6a22e 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -197,9 +197,8 @@ static inline u32 tun_hashfn(u32 rxhash) static struct tun_flow_entry *tun_flow_find(struct hlist_head *head, u32 rxhash) { struct tun_flow_entry *e; - struct hlist_node *n; - hlist_for_each_entry_rcu(e, n, head, hash_link) { + hlist_for_each_entry_rcu(e, head, hash_link) { if (e->rxhash == rxhash) return e; } @@ -241,9 +240,9 @@ static void tun_flow_flush(struct tun_struct *tun) spin_lock_bh(&tun->lock); for (i = 0; i < TUN_NUM_FLOW_ENTRIES; i++) { struct tun_flow_entry *e; - struct hlist_node *h, *n; + struct hlist_node *n; - hlist_for_each_entry_safe(e, h, n, &tun->flows[i], hash_link) + hlist_for_each_entry_safe(e, n, &tun->flows[i], hash_link) tun_flow_delete(tun, e); } spin_unlock_bh(&tun->lock); @@ -256,9 +255,9 @@ static void tun_flow_delete_by_queue(struct tun_struct *tun, u16 queue_index) spin_lock_bh(&tun->lock); for (i = 0; i < TUN_NUM_FLOW_ENTRIES; i++) { struct tun_flow_entry *e; - struct hlist_node *h, *n; + struct hlist_node *n; - hlist_for_each_entry_safe(e, h, n, &tun->flows[i], hash_link) { + hlist_for_each_entry_safe(e, n, &tun->flows[i], hash_link) { if (e->queue_index == queue_index) tun_flow_delete(tun, e); } @@ -279,9 +278,9 @@ static void tun_flow_cleanup(unsigned long data) spin_lock_bh(&tun->lock); for (i = 0; i < TUN_NUM_FLOW_ENTRIES; i++) { struct tun_flow_entry *e; - struct hlist_node *h, *n; + struct hlist_node *n; - hlist_for_each_entry_safe(e, h, n, &tun->flows[i], hash_link) { + hlist_for_each_entry_safe(e, n, &tun->flows[i], hash_link) { unsigned long this_timer; count++; this_timer = e->updated + delay; diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c index f736823..f10e58a 100644 --- a/drivers/net/vxlan.c +++ b/drivers/net/vxlan.c @@ -145,9 +145,8 @@ static inline struct hlist_head *vni_head(struct net *net, u32 id) static struct vxlan_dev *vxlan_find_vni(struct net *net, u32 id) { struct vxlan_dev *vxlan; - struct hlist_node *node; - hlist_for_each_entry_rcu(vxlan, node, vni_head(net, id), hlist) { + hlist_for_each_entry_rcu(vxlan, vni_head(net, id), hlist) { if (vxlan->vni == id) return vxlan; } @@ -292,9 +291,8 @@ static struct vxlan_fdb *vxlan_find_mac(struct vxlan_dev *vxlan, { struct hlist_head *head = vxlan_fdb_head(vxlan, mac); struct vxlan_fdb *f; - struct hlist_node *node; - hlist_for_each_entry_rcu(f, node, head, hlist) { + hlist_for_each_entry_rcu(f, head, hlist) { if (compare_ether_addr(mac, f->eth_addr) == 0) return f; } @@ -422,10 +420,9 @@ static int vxlan_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb, for (h = 0; h < FDB_HASH_SIZE; ++h) { struct vxlan_fdb *f; - struct hlist_node *n; int err; - hlist_for_each_entry_rcu(f, n, &vxlan->fdb_head[h], hlist) { + hlist_for_each_entry_rcu(f, &vxlan->fdb_head[h], hlist) { if (idx < cb->args[0]) goto skip; @@ -483,11 +480,10 @@ static bool vxlan_group_used(struct vxlan_net *vn, const struct vxlan_dev *this) { const struct vxlan_dev *vxlan; - struct hlist_node *node; unsigned h; for (h = 0; h < VNI_HASH_SIZE; ++h) - hlist_for_each_entry(vxlan, node, &vn->vni_list[h], hlist) { + hlist_for_each_entry(vxlan, &vn->vni_list[h], hlist) { if (vxlan == this) continue; diff --git a/drivers/net/wireless/zd1201.c b/drivers/net/wireless/zd1201.c index 48273dd..4941f20 100644 --- a/drivers/net/wireless/zd1201.c +++ b/drivers/net/wireless/zd1201.c @@ -309,7 +309,6 @@ static void zd1201_usbrx(struct urb *urb) if (data[urb->actual_length-1] == ZD1201_PACKET_RXDATA) { int datalen = urb->actual_length-1; unsigned short len, fc, seq; - struct hlist_node *node; len = ntohs(*(__be16 *)&data[datalen-2]); if (len>datalen) @@ -362,7 +361,7 @@ static void zd1201_usbrx(struct urb *urb) hlist_add_head(&frag->fnode, &zd->fraglist); goto resubmit; } - hlist_for_each_entry(frag, node, &zd->fraglist, fnode) + hlist_for_each_entry(frag, &zd->fraglist, fnode) if (frag->seq == (seq&IEEE80211_SCTL_SEQ)) break; if (!frag) @@ -1831,14 +1830,14 @@ err_zd: static void zd1201_disconnect(struct usb_interface *interface) { struct zd1201 *zd = usb_get_intfdata(interface); - struct hlist_node *node, *node2; + struct hlist_node *node2; struct zd1201_frag *frag; if (!zd) return; usb_set_intfdata(interface, NULL); - hlist_for_each_entry_safe(frag, node, node2, &zd->fraglist, fnode) { + hlist_for_each_entry_safe(frag, node2, &zd->fraglist, fnode) { hlist_del_init(&frag->fnode); kfree_skb(frag->skb); kfree(frag); diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index 924e466..b099e00 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -842,9 +842,8 @@ static struct pci_cap_saved_state *pci_find_saved_cap( struct pci_dev *pci_dev, char cap) { struct pci_cap_saved_state *tmp; - struct hlist_node *pos; - hlist_for_each_entry(tmp, pos, &pci_dev->saved_cap_space, next) { + hlist_for_each_entry(tmp, &pci_dev->saved_cap_space, next) { if (tmp->cap.cap_nr == cap) return tmp; } @@ -1041,7 +1040,6 @@ struct pci_saved_state *pci_store_saved_state(struct pci_dev *dev) struct pci_saved_state *state; struct pci_cap_saved_state *tmp; struct pci_cap_saved_data *cap; - struct hlist_node *pos; size_t size; if (!dev->state_saved) @@ -1049,7 +1047,7 @@ struct pci_saved_state *pci_store_saved_state(struct pci_dev *dev) size = sizeof(*state) + sizeof(struct pci_cap_saved_data); - hlist_for_each_entry(tmp, pos, &dev->saved_cap_space, next) + hlist_for_each_entry(tmp, &dev->saved_cap_space, next) size += sizeof(struct pci_cap_saved_data) + tmp->cap.size; state = kzalloc(size, GFP_KERNEL); @@ -1060,7 +1058,7 @@ struct pci_saved_state *pci_store_saved_state(struct pci_dev *dev) sizeof(state->config_space)); cap = state->cap; - hlist_for_each_entry(tmp, pos, &dev->saved_cap_space, next) { + hlist_for_each_entry(tmp, &dev->saved_cap_space, next) { size_t len = sizeof(struct pci_cap_saved_data) + tmp->cap.size; memcpy(cap, &tmp->cap, len); cap = (struct pci_cap_saved_data *)((u8 *)cap + len); @@ -2038,9 +2036,9 @@ void pci_allocate_cap_save_buffers(struct pci_dev *dev) void pci_free_cap_save_buffers(struct pci_dev *dev) { struct pci_cap_saved_state *tmp; - struct hlist_node *pos, *n; + struct hlist_node *n; - hlist_for_each_entry_safe(tmp, pos, n, &dev->saved_cap_space, next) + hlist_for_each_entry_safe(tmp, n, &dev->saved_cap_space, next) kfree(tmp); } diff --git a/drivers/power/bq2415x_charger.c b/drivers/power/bq2415x_charger.c index ca91396..0727f92 100644 --- a/drivers/power/bq2415x_charger.c +++ b/drivers/power/bq2415x_charger.c @@ -1515,16 +1515,11 @@ static int bq2415x_probe(struct i2c_client *client, } /* Get new ID for the new device */ - ret = idr_pre_get(&bq2415x_id, GFP_KERNEL); - if (ret == 0) - return -ENOMEM; - mutex_lock(&bq2415x_id_mutex); - ret = idr_get_new(&bq2415x_id, client, &num); + num = idr_alloc(&bq2415x_id, client, 0, 0, GFP_KERNEL); mutex_unlock(&bq2415x_id_mutex); - - if (ret < 0) - return ret; + if (num < 0) + return num; name = kasprintf(GFP_KERNEL, "%s-%d", id->name, num); if (!name) { diff --git a/drivers/power/bq27x00_battery.c b/drivers/power/bq27x00_battery.c index 8ccf5d7..26037ca 100644 --- a/drivers/power/bq27x00_battery.c +++ b/drivers/power/bq27x00_battery.c @@ -791,14 +791,11 @@ static int bq27x00_battery_probe(struct i2c_client *client, int retval = 0; /* Get new ID for the new battery device */ - retval = idr_pre_get(&battery_id, GFP_KERNEL); - if (retval == 0) - return -ENOMEM; mutex_lock(&battery_mutex); - retval = idr_get_new(&battery_id, client, &num); + num = idr_alloc(&battery_id, client, 0, 0, GFP_KERNEL); mutex_unlock(&battery_mutex); - if (retval < 0) - return retval; + if (num < 0) + return num; name = kasprintf(GFP_KERNEL, "%s-%d", id->name, num); if (!name) { diff --git a/drivers/power/ds2782_battery.c b/drivers/power/ds2782_battery.c index e7301b3..c09e772 100644 --- a/drivers/power/ds2782_battery.c +++ b/drivers/power/ds2782_battery.c @@ -395,17 +395,12 @@ static int ds278x_battery_probe(struct i2c_client *client, } /* Get an ID for this battery */ - ret = idr_pre_get(&battery_id, GFP_KERNEL); - if (ret == 0) { - ret = -ENOMEM; - goto fail_id; - } - mutex_lock(&battery_lock); - ret = idr_get_new(&battery_id, client, &num); + ret = idr_alloc(&battery_id, client, 0, 0, GFP_KERNEL); mutex_unlock(&battery_lock); if (ret < 0) goto fail_id; + num = ret; info = kzalloc(sizeof(*info), GFP_KERNEL); if (!info) { diff --git a/drivers/pps/clients/pps-gpio.c b/drivers/pps/clients/pps-gpio.c index 2bf0c1b..d3db26e 100644 --- a/drivers/pps/clients/pps-gpio.c +++ b/drivers/pps/clients/pps-gpio.c @@ -128,7 +128,8 @@ static int pps_gpio_probe(struct platform_device *pdev) } /* allocate space for device info */ - data = kzalloc(sizeof(struct pps_gpio_device_data), GFP_KERNEL); + data = devm_kzalloc(&pdev->dev, sizeof(struct pps_gpio_device_data), + GFP_KERNEL); if (data == NULL) { err = -ENOMEM; goto return_error; @@ -150,7 +151,6 @@ static int pps_gpio_probe(struct platform_device *pdev) pps_default_params |= PPS_CAPTURECLEAR | PPS_OFFSETCLEAR; data->pps = pps_register_source(&data->info, pps_default_params); if (data->pps == NULL) { - kfree(data); pr_err("failed to register IRQ %d as PPS source\n", irq); err = -EINVAL; goto return_error; @@ -164,7 +164,6 @@ static int pps_gpio_probe(struct platform_device *pdev) get_irqf_trigger_flags(pdata), data->info.name, data); if (ret) { pps_unregister_source(data->pps); - kfree(data); pr_err("failed to acquire IRQ %d\n", irq); err = -EINVAL; goto return_error; @@ -190,7 +189,6 @@ static int pps_gpio_remove(struct platform_device *pdev) gpio_free(pdata->gpio_pin); pps_unregister_source(data->pps); pr_info("removed IRQ %d as PPS source\n", data->irq); - kfree(data); return 0; } diff --git a/drivers/pps/kapi.c b/drivers/pps/kapi.c index f197e8e..cdad4d9 100644 --- a/drivers/pps/kapi.c +++ b/drivers/pps/kapi.c @@ -102,7 +102,7 @@ struct pps_device *pps_register_source(struct pps_source_info *info, goto pps_register_source_exit; } - /* These initializations must be done before calling idr_get_new() + /* These initializations must be done before calling idr_alloc() * in order to avoid reces into pps_event(). */ pps->params.api_version = PPS_API_VERS; diff --git a/drivers/pps/pps.c b/drivers/pps/pps.c index 6437703..7173e3a 100644 --- a/drivers/pps/pps.c +++ b/drivers/pps/pps.c @@ -295,29 +295,21 @@ int pps_register_cdev(struct pps_device *pps) dev_t devt; mutex_lock(&pps_idr_lock); - /* Get new ID for the new PPS source */ - if (idr_pre_get(&pps_idr, GFP_KERNEL) == 0) { - mutex_unlock(&pps_idr_lock); - return -ENOMEM; - } - - /* Now really allocate the PPS source. - * After idr_get_new() calling the new source will be freely available - * into the kernel. + /* + * Get new ID for the new PPS source. After idr_alloc() calling + * the new source will be freely available into the kernel. */ - err = idr_get_new(&pps_idr, pps, &pps->id); - mutex_unlock(&pps_idr_lock); - - if (err < 0) - return err; - - pps->id &= MAX_IDR_MASK; - if (pps->id >= PPS_MAX_SOURCES) { - pr_err("%s: too many PPS sources in the system\n", - pps->info.name); - err = -EBUSY; - goto free_idr; + err = idr_alloc(&pps_idr, pps, 0, PPS_MAX_SOURCES, GFP_KERNEL); + if (err < 0) { + if (err == -ENOSPC) { + pr_err("%s: too many PPS sources in the system\n", + pps->info.name); + err = -EBUSY; + } + goto out_unlock; } + pps->id = err; + mutex_unlock(&pps_idr_lock); devt = MKDEV(MAJOR(pps_devt), pps->id); @@ -351,8 +343,8 @@ del_cdev: free_idr: mutex_lock(&pps_idr_lock); idr_remove(&pps_idr, pps->id); +out_unlock: mutex_unlock(&pps_idr_lock); - return err; } diff --git a/drivers/remoteproc/remoteproc_core.c b/drivers/remoteproc/remoteproc_core.c index dd3bfaf..29387df 100644 --- a/drivers/remoteproc/remoteproc_core.c +++ b/drivers/remoteproc/remoteproc_core.c @@ -199,11 +199,6 @@ int rproc_alloc_vring(struct rproc_vdev *rvdev, int i) /* actual size of vring (in bytes) */ size = PAGE_ALIGN(vring_size(rvring->len, rvring->align)); - if (!idr_pre_get(&rproc->notifyids, GFP_KERNEL)) { - dev_err(dev, "idr_pre_get failed\n"); - return -ENOMEM; - } - /* * Allocate non-cacheable memory for the vring. In the future * this call will also configure the IOMMU for us @@ -221,12 +216,13 @@ int rproc_alloc_vring(struct rproc_vdev *rvdev, int i) * TODO: let the rproc know the notifyid of this vring * TODO: support predefined notifyids (via resource table) */ - ret = idr_get_new(&rproc->notifyids, rvring, ¬ifyid); + ret = idr_alloc(&rproc->notifyids, rvring, 0, 0, GFP_KERNEL); if (ret) { - dev_err(dev, "idr_get_new failed: %d\n", ret); + dev_err(dev, "idr_alloc failed: %d\n", ret); dma_free_coherent(dev->parent, size, va, dma); return ret; } + notifyid = ret; /* Store largest notifyid */ rproc->max_notifyid = max(rproc->max_notifyid, notifyid); @@ -1180,7 +1176,6 @@ static void rproc_type_release(struct device *dev) rproc_delete_debug_dir(rproc); - idr_remove_all(&rproc->notifyids); idr_destroy(&rproc->notifyids); if (rproc->index >= 0) diff --git a/drivers/rpmsg/virtio_rpmsg_bus.c b/drivers/rpmsg/virtio_rpmsg_bus.c index d854460..a59684b 100644 --- a/drivers/rpmsg/virtio_rpmsg_bus.c +++ b/drivers/rpmsg/virtio_rpmsg_bus.c @@ -213,13 +213,10 @@ static struct rpmsg_endpoint *__rpmsg_create_ept(struct virtproc_info *vrp, struct rpmsg_channel *rpdev, rpmsg_rx_cb_t cb, void *priv, u32 addr) { - int err, tmpaddr, request; + int id_min, id_max, id; struct rpmsg_endpoint *ept; struct device *dev = rpdev ? &rpdev->dev : &vrp->vdev->dev; - if (!idr_pre_get(&vrp->endpoints, GFP_KERNEL)) - return NULL; - ept = kzalloc(sizeof(*ept), GFP_KERNEL); if (!ept) { dev_err(dev, "failed to kzalloc a new ept\n"); @@ -234,31 +231,28 @@ static struct rpmsg_endpoint *__rpmsg_create_ept(struct virtproc_info *vrp, ept->priv = priv; /* do we need to allocate a local address ? */ - request = addr == RPMSG_ADDR_ANY ? RPMSG_RESERVED_ADDRESSES : addr; + if (addr == RPMSG_ADDR_ANY) { + id_min = RPMSG_RESERVED_ADDRESSES; + id_max = 0; + } else { + id_min = addr; + id_max = addr + 1; + } mutex_lock(&vrp->endpoints_lock); /* bind the endpoint to an rpmsg address (and allocate one if needed) */ - err = idr_get_new_above(&vrp->endpoints, ept, request, &tmpaddr); - if (err) { - dev_err(dev, "idr_get_new_above failed: %d\n", err); + id = idr_alloc(&vrp->endpoints, ept, id_min, id_max, GFP_KERNEL); + if (id < 0) { + dev_err(dev, "idr_alloc failed: %d\n", id); goto free_ept; } - - /* make sure the user's address request is fulfilled, if relevant */ - if (addr != RPMSG_ADDR_ANY && tmpaddr != addr) { - dev_err(dev, "address 0x%x already in use\n", addr); - goto rem_idr; - } - - ept->addr = tmpaddr; + ept->addr = id; mutex_unlock(&vrp->endpoints_lock); return ept; -rem_idr: - idr_remove(&vrp->endpoints, request); free_ept: mutex_unlock(&vrp->endpoints_lock); kref_put(&ept->refcount, __ept_release); @@ -1036,7 +1030,6 @@ static void rpmsg_remove(struct virtio_device *vdev) if (vrp->ns_ept) __rpmsg_destroy_ept(vrp, vrp->ns_ept); - idr_remove_all(&vrp->endpoints); idr_destroy(&vrp->endpoints); vdev->config->del_vqs(vrp->vdev); diff --git a/drivers/scsi/aacraid/aacraid.h b/drivers/scsi/aacraid/aacraid.h index 742f5d7..a6f7190 100644 --- a/drivers/scsi/aacraid/aacraid.h +++ b/drivers/scsi/aacraid/aacraid.h @@ -12,13 +12,13 @@ *----------------------------------------------------------------------------*/ #ifndef AAC_DRIVER_BUILD -# define AAC_DRIVER_BUILD 29801 +# define AAC_DRIVER_BUILD 30000 # define AAC_DRIVER_BRANCH "-ms" #endif #define MAXIMUM_NUM_CONTAINERS 32 #define AAC_NUM_MGT_FIB 8 -#define AAC_NUM_IO_FIB (512 - AAC_NUM_MGT_FIB) +#define AAC_NUM_IO_FIB (1024 - AAC_NUM_MGT_FIB) #define AAC_NUM_FIB (AAC_NUM_IO_FIB + AAC_NUM_MGT_FIB) #define AAC_MAX_LUN (8) @@ -36,6 +36,10 @@ #define CONTAINER_TO_ID(cont) (cont) #define CONTAINER_TO_LUN(cont) (0) +#define PMC_DEVICE_S7 0x28c +#define PMC_DEVICE_S8 0x28d +#define PMC_DEVICE_S9 0x28f + #define aac_phys_to_logical(x) ((x)+1) #define aac_logical_to_phys(x) ((x)?(x)-1:0) diff --git a/drivers/scsi/aacraid/comminit.c b/drivers/scsi/aacraid/comminit.c index 8e5d3be..3f75995 100644 --- a/drivers/scsi/aacraid/comminit.c +++ b/drivers/scsi/aacraid/comminit.c @@ -404,7 +404,13 @@ struct aac_dev *aac_init_adapter(struct aac_dev *dev) dev->max_fib_size = status[1] & 0xFFE0; host->sg_tablesize = status[2] >> 16; dev->sg_tablesize = status[2] & 0xFFFF; - host->can_queue = (status[3] & 0xFFFF) - AAC_NUM_MGT_FIB; + if (dev->pdev->device == PMC_DEVICE_S7 || + dev->pdev->device == PMC_DEVICE_S8 || + dev->pdev->device == PMC_DEVICE_S9) + host->can_queue = ((status[3] >> 16) ? (status[3] >> 16) : + (status[3] & 0xFFFF)) - AAC_NUM_MGT_FIB; + else + host->can_queue = (status[3] & 0xFFFF) - AAC_NUM_MGT_FIB; dev->max_num_aif = status[4] & 0xFFFF; /* * NOTE: @@ -452,6 +458,9 @@ struct aac_dev *aac_init_adapter(struct aac_dev *dev) } } + if (host->can_queue > AAC_NUM_IO_FIB) + host->can_queue = AAC_NUM_IO_FIB; + /* * Ok now init the communication subsystem */ diff --git a/drivers/scsi/bfa/bfad.c b/drivers/scsi/bfa/bfad.c index e6bf126..a5f7690 100644 --- a/drivers/scsi/bfa/bfad.c +++ b/drivers/scsi/bfa/bfad.c @@ -1034,7 +1034,7 @@ bfad_start_ops(struct bfad_s *bfad) { sizeof(driver_info.host_os_patch) - 1); strncpy(driver_info.os_device_name, bfad->pci_name, - sizeof(driver_info.os_device_name - 1)); + sizeof(driver_info.os_device_name) - 1); /* FCS driver info init */ spin_lock_irqsave(&bfad->bfad_lock, flags); diff --git a/drivers/scsi/bfa/bfad_im.c b/drivers/scsi/bfa/bfad_im.c index 8f92732..5864f98 100644 --- a/drivers/scsi/bfa/bfad_im.c +++ b/drivers/scsi/bfa/bfad_im.c @@ -523,20 +523,13 @@ bfad_im_scsi_host_alloc(struct bfad_s *bfad, struct bfad_im_port_s *im_port, int error = 1; mutex_lock(&bfad_mutex); - if (!idr_pre_get(&bfad_im_port_index, GFP_KERNEL)) { + error = idr_alloc(&bfad_im_port_index, im_port, 0, 0, GFP_KERNEL); + if (error < 0) { mutex_unlock(&bfad_mutex); - printk(KERN_WARNING "idr_pre_get failure\n"); + printk(KERN_WARNING "idr_alloc failure\n"); goto out; } - - error = idr_get_new(&bfad_im_port_index, im_port, - &im_port->idr_id); - if (error) { - mutex_unlock(&bfad_mutex); - printk(KERN_WARNING "idr_get_new failure\n"); - goto out; - } - + im_port->idr_id = error; mutex_unlock(&bfad_mutex); im_port->shost = bfad_scsi_host_alloc(im_port, bfad); diff --git a/drivers/scsi/bnx2fc/bnx2fc.h b/drivers/scsi/bnx2fc/bnx2fc.h index 3486845..50fcd01 100644 --- a/drivers/scsi/bnx2fc/bnx2fc.h +++ b/drivers/scsi/bnx2fc/bnx2fc.h @@ -64,7 +64,7 @@ #include "bnx2fc_constants.h" #define BNX2FC_NAME "bnx2fc" -#define BNX2FC_VERSION "1.0.12" +#define BNX2FC_VERSION "1.0.13" #define PFX "bnx2fc: " @@ -156,6 +156,18 @@ #define BNX2FC_RELOGIN_WAIT_TIME 200 #define BNX2FC_RELOGIN_WAIT_CNT 10 +#define BNX2FC_STATS(hba, stat, cnt) \ + do { \ + u32 val; \ + \ + val = fw_stats->stat.cnt; \ + if (hba->prev_stats.stat.cnt <= val) \ + val -= hba->prev_stats.stat.cnt; \ + else \ + val += (0xfffffff - hba->prev_stats.stat.cnt); \ + hba->bfw_stats.cnt += val; \ + } while (0) + /* bnx2fc driver uses only one instance of fcoe_percpu_s */ extern struct fcoe_percpu_s bnx2fc_global; @@ -167,6 +179,14 @@ struct bnx2fc_percpu_s { spinlock_t fp_work_lock; }; +struct bnx2fc_fw_stats { + u64 fc_crc_cnt; + u64 fcoe_tx_pkt_cnt; + u64 fcoe_rx_pkt_cnt; + u64 fcoe_tx_byte_cnt; + u64 fcoe_rx_byte_cnt; +}; + struct bnx2fc_hba { struct list_head list; struct cnic_dev *cnic; @@ -207,6 +227,8 @@ struct bnx2fc_hba { struct bnx2fc_rport **tgt_ofld_list; /* statistics */ + struct bnx2fc_fw_stats bfw_stats; + struct fcoe_statistics_params prev_stats; struct fcoe_statistics_params *stats_buffer; dma_addr_t stats_buf_dma; struct completion stat_req_done; @@ -280,6 +302,7 @@ struct bnx2fc_rport { #define BNX2FC_FLAG_UPLD_REQ_COMPL 0x7 #define BNX2FC_FLAG_EXPL_LOGO 0x8 #define BNX2FC_FLAG_DISABLE_FAILED 0x9 +#define BNX2FC_FLAG_ENABLED 0xa u8 src_addr[ETH_ALEN]; u32 max_sqes; @@ -468,6 +491,8 @@ int bnx2fc_send_fw_fcoe_init_msg(struct bnx2fc_hba *hba); int bnx2fc_send_fw_fcoe_destroy_msg(struct bnx2fc_hba *hba); int bnx2fc_send_session_ofld_req(struct fcoe_port *port, struct bnx2fc_rport *tgt); +int bnx2fc_send_session_enable_req(struct fcoe_port *port, + struct bnx2fc_rport *tgt); int bnx2fc_send_session_disable_req(struct fcoe_port *port, struct bnx2fc_rport *tgt); int bnx2fc_send_session_destroy_req(struct bnx2fc_hba *hba, diff --git a/drivers/scsi/bnx2fc/bnx2fc_fcoe.c b/drivers/scsi/bnx2fc/bnx2fc_fcoe.c index 70ecd95..6401db4 100644 --- a/drivers/scsi/bnx2fc/bnx2fc_fcoe.c +++ b/drivers/scsi/bnx2fc/bnx2fc_fcoe.c @@ -22,7 +22,7 @@ DEFINE_PER_CPU(struct bnx2fc_percpu_s, bnx2fc_percpu); #define DRV_MODULE_NAME "bnx2fc" #define DRV_MODULE_VERSION BNX2FC_VERSION -#define DRV_MODULE_RELDATE "Jun 04, 2012" +#define DRV_MODULE_RELDATE "Dec 21, 2012" static char version[] = @@ -687,11 +687,16 @@ static struct fc_host_statistics *bnx2fc_get_host_stats(struct Scsi_Host *shost) BNX2FC_HBA_DBG(lport, "FW stat req timed out\n"); return bnx2fc_stats; } - bnx2fc_stats->invalid_crc_count += fw_stats->rx_stat2.fc_crc_cnt; - bnx2fc_stats->tx_frames += fw_stats->tx_stat.fcoe_tx_pkt_cnt; - bnx2fc_stats->tx_words += (fw_stats->tx_stat.fcoe_tx_byte_cnt) / 4; - bnx2fc_stats->rx_frames += fw_stats->rx_stat0.fcoe_rx_pkt_cnt; - bnx2fc_stats->rx_words += (fw_stats->rx_stat0.fcoe_rx_byte_cnt) / 4; + BNX2FC_STATS(hba, rx_stat2, fc_crc_cnt); + bnx2fc_stats->invalid_crc_count += hba->bfw_stats.fc_crc_cnt; + BNX2FC_STATS(hba, tx_stat, fcoe_tx_pkt_cnt); + bnx2fc_stats->tx_frames += hba->bfw_stats.fcoe_tx_pkt_cnt; + BNX2FC_STATS(hba, tx_stat, fcoe_tx_byte_cnt); + bnx2fc_stats->tx_words += ((hba->bfw_stats.fcoe_tx_byte_cnt) / 4); + BNX2FC_STATS(hba, rx_stat0, fcoe_rx_pkt_cnt); + bnx2fc_stats->rx_frames += hba->bfw_stats.fcoe_rx_pkt_cnt; + BNX2FC_STATS(hba, rx_stat0, fcoe_rx_byte_cnt); + bnx2fc_stats->rx_words += ((hba->bfw_stats.fcoe_rx_byte_cnt) / 4); bnx2fc_stats->dumped_frames = 0; bnx2fc_stats->lip_count = 0; @@ -700,6 +705,8 @@ static struct fc_host_statistics *bnx2fc_get_host_stats(struct Scsi_Host *shost) bnx2fc_stats->loss_of_signal_count = 0; bnx2fc_stats->prim_seq_protocol_err_count = 0; + memcpy(&hba->prev_stats, hba->stats_buffer, + sizeof(struct fcoe_statistics_params)); return bnx2fc_stats; } @@ -2660,7 +2667,7 @@ static struct scsi_host_template bnx2fc_shost_template = { .can_queue = BNX2FC_CAN_QUEUE, .use_clustering = ENABLE_CLUSTERING, .sg_tablesize = BNX2FC_MAX_BDS_PER_CMD, - .max_sectors = 512, + .max_sectors = 1024, }; static struct libfc_function_template bnx2fc_libfc_fcn_templ = { diff --git a/drivers/scsi/bnx2fc/bnx2fc_hwi.c b/drivers/scsi/bnx2fc/bnx2fc_hwi.c index ef60afa..85ea98a 100644 --- a/drivers/scsi/bnx2fc/bnx2fc_hwi.c +++ b/drivers/scsi/bnx2fc/bnx2fc_hwi.c @@ -347,7 +347,7 @@ int bnx2fc_send_session_ofld_req(struct fcoe_port *port, * @port: port structure pointer * @tgt: bnx2fc_rport structure pointer */ -static int bnx2fc_send_session_enable_req(struct fcoe_port *port, +int bnx2fc_send_session_enable_req(struct fcoe_port *port, struct bnx2fc_rport *tgt) { struct kwqe *kwqe_arr[2]; @@ -759,8 +759,6 @@ static void bnx2fc_process_unsol_compl(struct bnx2fc_rport *tgt, u16 wqe) case FCOE_ERROR_CODE_DATA_SOFN_SEQ_ACTIVE_RESET: BNX2FC_TGT_DBG(tgt, "REC TOV popped for xid - 0x%x\n", xid); - memset(&io_req->err_entry, 0, - sizeof(struct fcoe_err_report_entry)); memcpy(&io_req->err_entry, err_entry, sizeof(struct fcoe_err_report_entry)); if (!test_bit(BNX2FC_FLAG_SRR_SENT, @@ -847,8 +845,6 @@ ret_err_rqe: goto ret_warn_rqe; } - memset(&io_req->err_entry, 0, - sizeof(struct fcoe_err_report_entry)); memcpy(&io_req->err_entry, err_entry, sizeof(struct fcoe_err_report_entry)); @@ -1124,7 +1120,6 @@ static void bnx2fc_process_ofld_cmpl(struct bnx2fc_hba *hba, struct bnx2fc_interface *interface; u32 conn_id; u32 context_id; - int rc; conn_id = ofld_kcqe->fcoe_conn_id; context_id = ofld_kcqe->fcoe_conn_context_id; @@ -1153,17 +1148,10 @@ static void bnx2fc_process_ofld_cmpl(struct bnx2fc_hba *hba, "resources\n"); set_bit(BNX2FC_FLAG_CTX_ALLOC_FAILURE, &tgt->flags); } - goto ofld_cmpl_err; } else { - - /* now enable the session */ - rc = bnx2fc_send_session_enable_req(port, tgt); - if (rc) { - printk(KERN_ERR PFX "enable session failed\n"); - goto ofld_cmpl_err; - } + /* FW offload request successfully completed */ + set_bit(BNX2FC_FLAG_OFFLOADED, &tgt->flags); } - return; ofld_cmpl_err: set_bit(BNX2FC_FLAG_OFLD_REQ_CMPL, &tgt->flags); wake_up_interruptible(&tgt->ofld_wait); @@ -1210,15 +1198,9 @@ static void bnx2fc_process_enable_conn_cmpl(struct bnx2fc_hba *hba, printk(KERN_ERR PFX "bnx2fc-enbl_cmpl: HBA mis-match\n"); goto enbl_cmpl_err; } - if (ofld_kcqe->completion_status) - goto enbl_cmpl_err; - else { + if (!ofld_kcqe->completion_status) /* enable successful - rport ready for issuing IOs */ - set_bit(BNX2FC_FLAG_OFFLOADED, &tgt->flags); - set_bit(BNX2FC_FLAG_OFLD_REQ_CMPL, &tgt->flags); - wake_up_interruptible(&tgt->ofld_wait); - } - return; + set_bit(BNX2FC_FLAG_ENABLED, &tgt->flags); enbl_cmpl_err: set_bit(BNX2FC_FLAG_OFLD_REQ_CMPL, &tgt->flags); @@ -1251,6 +1233,7 @@ static void bnx2fc_process_conn_disable_cmpl(struct bnx2fc_hba *hba, /* disable successful */ BNX2FC_TGT_DBG(tgt, "disable successful\n"); clear_bit(BNX2FC_FLAG_OFFLOADED, &tgt->flags); + clear_bit(BNX2FC_FLAG_ENABLED, &tgt->flags); set_bit(BNX2FC_FLAG_DISABLED, &tgt->flags); set_bit(BNX2FC_FLAG_UPLD_REQ_COMPL, &tgt->flags); wake_up_interruptible(&tgt->upld_wait); diff --git a/drivers/scsi/bnx2fc/bnx2fc_io.c b/drivers/scsi/bnx2fc/bnx2fc_io.c index 8d4626c..60798e8 100644 --- a/drivers/scsi/bnx2fc/bnx2fc_io.c +++ b/drivers/scsi/bnx2fc/bnx2fc_io.c @@ -654,7 +654,7 @@ int bnx2fc_init_mp_req(struct bnx2fc_cmd *io_req) mp_req->mp_resp_bd = dma_alloc_coherent(&hba->pcidev->dev, sz, &mp_req->mp_resp_bd_dma, GFP_ATOMIC); - if (!mp_req->mp_req_bd) { + if (!mp_req->mp_resp_bd) { printk(KERN_ERR PFX "unable to alloc MP resp bd\n"); bnx2fc_free_mp_resc(io_req); return FAILED; @@ -685,8 +685,8 @@ int bnx2fc_init_mp_req(struct bnx2fc_cmd *io_req) static int bnx2fc_initiate_tmf(struct scsi_cmnd *sc_cmd, u8 tm_flags) { struct fc_lport *lport; - struct fc_rport *rport = starget_to_rport(scsi_target(sc_cmd->device)); - struct fc_rport_libfc_priv *rp = rport->dd_data; + struct fc_rport *rport; + struct fc_rport_libfc_priv *rp; struct fcoe_port *port; struct bnx2fc_interface *interface; struct bnx2fc_rport *tgt; @@ -704,6 +704,7 @@ static int bnx2fc_initiate_tmf(struct scsi_cmnd *sc_cmd, u8 tm_flags) unsigned long start = jiffies; lport = shost_priv(host); + rport = starget_to_rport(scsi_target(sc_cmd->device)); port = lport_priv(lport); interface = port->priv; @@ -712,6 +713,7 @@ static int bnx2fc_initiate_tmf(struct scsi_cmnd *sc_cmd, u8 tm_flags) rc = FAILED; goto tmf_err; } + rp = rport->dd_data; rc = fc_block_scsi_eh(sc_cmd); if (rc) diff --git a/drivers/scsi/bnx2fc/bnx2fc_tgt.c b/drivers/scsi/bnx2fc/bnx2fc_tgt.c index b9d0d9c..c57a3bb 100644 --- a/drivers/scsi/bnx2fc/bnx2fc_tgt.c +++ b/drivers/scsi/bnx2fc/bnx2fc_tgt.c @@ -33,6 +33,7 @@ static void bnx2fc_upld_timer(unsigned long data) BNX2FC_TGT_DBG(tgt, "upld_timer - Upload compl not received!!\n"); /* fake upload completion */ clear_bit(BNX2FC_FLAG_OFFLOADED, &tgt->flags); + clear_bit(BNX2FC_FLAG_ENABLED, &tgt->flags); set_bit(BNX2FC_FLAG_UPLD_REQ_COMPL, &tgt->flags); wake_up_interruptible(&tgt->upld_wait); } @@ -55,10 +56,25 @@ static void bnx2fc_ofld_timer(unsigned long data) * resources are freed up in bnx2fc_offload_session */ clear_bit(BNX2FC_FLAG_OFFLOADED, &tgt->flags); + clear_bit(BNX2FC_FLAG_ENABLED, &tgt->flags); set_bit(BNX2FC_FLAG_OFLD_REQ_CMPL, &tgt->flags); wake_up_interruptible(&tgt->ofld_wait); } +static void bnx2fc_ofld_wait(struct bnx2fc_rport *tgt) +{ + setup_timer(&tgt->ofld_timer, bnx2fc_ofld_timer, (unsigned long)tgt); + mod_timer(&tgt->ofld_timer, jiffies + BNX2FC_FW_TIMEOUT); + + wait_event_interruptible(tgt->ofld_wait, + (test_bit( + BNX2FC_FLAG_OFLD_REQ_CMPL, + &tgt->flags))); + if (signal_pending(current)) + flush_signals(current); + del_timer_sync(&tgt->ofld_timer); +} + static void bnx2fc_offload_session(struct fcoe_port *port, struct bnx2fc_rport *tgt, struct fc_rport_priv *rdata) @@ -103,17 +119,7 @@ retry_ofld: * wait for the session is offloaded and enabled. 3 Secs * should be ample time for this process to complete. */ - setup_timer(&tgt->ofld_timer, bnx2fc_ofld_timer, (unsigned long)tgt); - mod_timer(&tgt->ofld_timer, jiffies + BNX2FC_FW_TIMEOUT); - - wait_event_interruptible(tgt->ofld_wait, - (test_bit( - BNX2FC_FLAG_OFLD_REQ_CMPL, - &tgt->flags))); - if (signal_pending(current)) - flush_signals(current); - - del_timer_sync(&tgt->ofld_timer); + bnx2fc_ofld_wait(tgt); if (!(test_bit(BNX2FC_FLAG_OFFLOADED, &tgt->flags))) { if (test_and_clear_bit(BNX2FC_FLAG_CTX_ALLOC_FAILURE, @@ -131,14 +137,23 @@ retry_ofld: } if (bnx2fc_map_doorbell(tgt)) { printk(KERN_ERR PFX "map doorbell failed - no mem\n"); - /* upload will take care of cleaning up sess resc */ - lport->tt.rport_logoff(rdata); + goto ofld_err; } + clear_bit(BNX2FC_FLAG_OFLD_REQ_CMPL, &tgt->flags); + rval = bnx2fc_send_session_enable_req(port, tgt); + if (rval) { + pr_err(PFX "enable session failed\n"); + goto ofld_err; + } + bnx2fc_ofld_wait(tgt); + if (!(test_bit(BNX2FC_FLAG_ENABLED, &tgt->flags))) + goto ofld_err; return; ofld_err: /* couldn't offload the session. log off from this rport */ BNX2FC_TGT_DBG(tgt, "bnx2fc_offload_session - offload error\n"); + clear_bit(BNX2FC_FLAG_OFFLOADED, &tgt->flags); /* Free session resources */ bnx2fc_free_session_resc(hba, tgt); tgt_init_err: @@ -259,6 +274,19 @@ void bnx2fc_flush_active_ios(struct bnx2fc_rport *tgt) spin_unlock_bh(&tgt->tgt_lock); } +static void bnx2fc_upld_wait(struct bnx2fc_rport *tgt) +{ + setup_timer(&tgt->upld_timer, bnx2fc_upld_timer, (unsigned long)tgt); + mod_timer(&tgt->upld_timer, jiffies + BNX2FC_FW_TIMEOUT); + wait_event_interruptible(tgt->upld_wait, + (test_bit( + BNX2FC_FLAG_UPLD_REQ_COMPL, + &tgt->flags))); + if (signal_pending(current)) + flush_signals(current); + del_timer_sync(&tgt->upld_timer); +} + static void bnx2fc_upload_session(struct fcoe_port *port, struct bnx2fc_rport *tgt) { @@ -279,19 +307,8 @@ static void bnx2fc_upload_session(struct fcoe_port *port, * wait for upload to complete. 3 Secs * should be sufficient time for this process to complete. */ - setup_timer(&tgt->upld_timer, bnx2fc_upld_timer, (unsigned long)tgt); - mod_timer(&tgt->upld_timer, jiffies + BNX2FC_FW_TIMEOUT); - BNX2FC_TGT_DBG(tgt, "waiting for disable compl\n"); - wait_event_interruptible(tgt->upld_wait, - (test_bit( - BNX2FC_FLAG_UPLD_REQ_COMPL, - &tgt->flags))); - - if (signal_pending(current)) - flush_signals(current); - - del_timer_sync(&tgt->upld_timer); + bnx2fc_upld_wait(tgt); /* * traverse thru the active_q and tmf_q and cleanup @@ -308,24 +325,13 @@ static void bnx2fc_upload_session(struct fcoe_port *port, bnx2fc_send_session_destroy_req(hba, tgt); /* wait for destroy to complete */ - setup_timer(&tgt->upld_timer, - bnx2fc_upld_timer, (unsigned long)tgt); - mod_timer(&tgt->upld_timer, jiffies + BNX2FC_FW_TIMEOUT); - - wait_event_interruptible(tgt->upld_wait, - (test_bit( - BNX2FC_FLAG_UPLD_REQ_COMPL, - &tgt->flags))); + bnx2fc_upld_wait(tgt); if (!(test_bit(BNX2FC_FLAG_DESTROYED, &tgt->flags))) printk(KERN_ERR PFX "ERROR!! destroy timed out\n"); BNX2FC_TGT_DBG(tgt, "destroy wait complete flags = 0x%lx\n", tgt->flags); - if (signal_pending(current)) - flush_signals(current); - - del_timer_sync(&tgt->upld_timer); } else if (test_bit(BNX2FC_FLAG_DISABLE_FAILED, &tgt->flags)) { printk(KERN_ERR PFX "ERROR!! DISABLE req failed, destroy" @@ -381,7 +387,9 @@ static int bnx2fc_init_tgt(struct bnx2fc_rport *tgt, tgt->rq_cons_idx = 0; atomic_set(&tgt->num_active_ios, 0); - if (rdata->flags & FC_RP_FLAGS_RETRY) { + if (rdata->flags & FC_RP_FLAGS_RETRY && + rdata->ids.roles & FC_RPORT_ROLE_FCP_TARGET && + !(rdata->ids.roles & FC_RPORT_ROLE_FCP_INITIATOR)) { tgt->dev_type = TYPE_TAPE; tgt->io_timeout = 0; /* use default ULP timeout */ } else { @@ -479,7 +487,7 @@ void bnx2fc_rport_event_handler(struct fc_lport *lport, tgt = (struct bnx2fc_rport *)&rp[1]; /* This can happen when ADISC finds the same target */ - if (test_bit(BNX2FC_FLAG_OFFLOADED, &tgt->flags)) { + if (test_bit(BNX2FC_FLAG_ENABLED, &tgt->flags)) { BNX2FC_TGT_DBG(tgt, "already offloaded\n"); mutex_unlock(&hba->hba_mutex); return; @@ -494,11 +502,8 @@ void bnx2fc_rport_event_handler(struct fc_lport *lport, BNX2FC_TGT_DBG(tgt, "OFFLOAD num_ofld_sess = %d\n", hba->num_ofld_sess); - if (test_bit(BNX2FC_FLAG_OFFLOADED, &tgt->flags)) { - /* - * Session is offloaded and enabled. Map - * doorbell register for this target - */ + if (test_bit(BNX2FC_FLAG_ENABLED, &tgt->flags)) { + /* Session is offloaded and enabled. */ BNX2FC_TGT_DBG(tgt, "sess offloaded\n"); /* This counter is protected with hba mutex */ hba->num_ofld_sess++; @@ -535,7 +540,7 @@ void bnx2fc_rport_event_handler(struct fc_lport *lport, */ tgt = (struct bnx2fc_rport *)&rp[1]; - if (!(test_bit(BNX2FC_FLAG_OFFLOADED, &tgt->flags))) { + if (!(test_bit(BNX2FC_FLAG_ENABLED, &tgt->flags))) { mutex_unlock(&hba->hba_mutex); break; } diff --git a/drivers/scsi/bnx2i/bnx2i_hwi.c b/drivers/scsi/bnx2i/bnx2i_hwi.c index 91eec60..a28b03e 100644 --- a/drivers/scsi/bnx2i/bnx2i_hwi.c +++ b/drivers/scsi/bnx2i/bnx2i_hwi.c @@ -1317,7 +1317,7 @@ int bnx2i_send_fw_iscsi_init_msg(struct bnx2i_hba *hba) (1ULL << ISCSI_KCQE_COMPLETION_STATUS_PROTOCOL_ERR_LUN)); if (error_mask1) { iscsi_init2.error_bit_map[0] = error_mask1; - mask64 &= (u32)(~mask64); + mask64 ^= (u32)(mask64); mask64 |= error_mask1; } else iscsi_init2.error_bit_map[0] = (u32) mask64; diff --git a/drivers/scsi/ch.c b/drivers/scsi/ch.c index a15474e..2a32374 100644 --- a/drivers/scsi/ch.c +++ b/drivers/scsi/ch.c @@ -895,7 +895,7 @@ static int ch_probe(struct device *dev) { struct scsi_device *sd = to_scsi_device(dev); struct device *class_dev; - int minor, ret = -ENOMEM; + int ret; scsi_changer *ch; if (sd->type != TYPE_MEDIUM_CHANGER) @@ -905,22 +905,19 @@ static int ch_probe(struct device *dev) if (NULL == ch) return -ENOMEM; - if (!idr_pre_get(&ch_index_idr, GFP_KERNEL)) - goto free_ch; - + idr_preload(GFP_KERNEL); spin_lock(&ch_index_lock); - ret = idr_get_new(&ch_index_idr, ch, &minor); + ret = idr_alloc(&ch_index_idr, ch, 0, CH_MAX_DEVS + 1, GFP_NOWAIT); spin_unlock(&ch_index_lock); + idr_preload_end(); - if (ret) + if (ret < 0) { + if (ret == -ENOSPC) + ret = -ENODEV; goto free_ch; - - if (minor > CH_MAX_DEVS) { - ret = -ENODEV; - goto remove_idr; } - ch->minor = minor; + ch->minor = ret; sprintf(ch->name,"ch%d",ch->minor); class_dev = device_create(ch_sysfs_class, dev, @@ -944,7 +941,7 @@ static int ch_probe(struct device *dev) return 0; remove_idr: - idr_remove(&ch_index_idr, minor); + idr_remove(&ch_index_idr, ch->minor); free_ch: kfree(ch); return ret; diff --git a/drivers/scsi/csiostor/csio_hw.c b/drivers/scsi/csiostor/csio_hw.c index 8ecdb94..bdd78fb 100644 --- a/drivers/scsi/csiostor/csio_hw.c +++ b/drivers/scsi/csiostor/csio_hw.c @@ -2131,13 +2131,16 @@ csio_hw_flash_config(struct csio_hw *hw, u32 *fw_cfg_param, char *path) value_to_add = 4 - (cf->size % 4); cfg_data = kzalloc(cf->size+value_to_add, GFP_KERNEL); - if (cfg_data == NULL) - return -ENOMEM; + if (cfg_data == NULL) { + ret = -ENOMEM; + goto leave; + } memcpy((void *)cfg_data, (const void *)cf->data, cf->size); - - if (csio_hw_check_fwconfig(hw, fw_cfg_param) != 0) - return -EINVAL; + if (csio_hw_check_fwconfig(hw, fw_cfg_param) != 0) { + ret = -EINVAL; + goto leave; + } mtype = FW_PARAMS_PARAM_Y_GET(*fw_cfg_param); maddr = FW_PARAMS_PARAM_Z_GET(*fw_cfg_param) << 16; @@ -2149,9 +2152,9 @@ csio_hw_flash_config(struct csio_hw *hw, u32 *fw_cfg_param, char *path) strncpy(path, "/lib/firmware/" CSIO_CF_FNAME, 64); } +leave: kfree(cfg_data); release_firmware(cf); - return ret; } diff --git a/drivers/scsi/csiostor/csio_init.c b/drivers/scsi/csiostor/csio_init.c index c323b20..0604b5f 100644 --- a/drivers/scsi/csiostor/csio_init.c +++ b/drivers/scsi/csiostor/csio_init.c @@ -60,13 +60,6 @@ static struct scsi_transport_template *csio_fcoe_transport_vport; /* * debugfs support */ -static int -csio_mem_open(struct inode *inode, struct file *file) -{ - file->private_data = inode->i_private; - return 0; -} - static ssize_t csio_mem_read(struct file *file, char __user *buf, size_t count, loff_t *ppos) { @@ -110,7 +103,7 @@ csio_mem_read(struct file *file, char __user *buf, size_t count, loff_t *ppos) static const struct file_operations csio_mem_debugfs_fops = { .owner = THIS_MODULE, - .open = csio_mem_open, + .open = simple_open, .read = csio_mem_read, .llseek = default_llseek, }; diff --git a/drivers/scsi/cxgbi/cxgb4i/cxgb4i.c b/drivers/scsi/cxgbi/cxgb4i/cxgb4i.c index f924b3c..3fecf35 100644 --- a/drivers/scsi/cxgbi/cxgb4i/cxgb4i.c +++ b/drivers/scsi/cxgbi/cxgb4i/cxgb4i.c @@ -1564,6 +1564,7 @@ static int t4_uld_state_change(void *handle, enum cxgb4_state state) break; case CXGB4_STATE_DETACH: pr_info("cdev 0x%p, DETACH.\n", cdev); + cxgbi_device_unregister(cdev); break; default: pr_info("cdev 0x%p, unknown state %d.\n", cdev, state); diff --git a/drivers/scsi/fnic/fnic_fcs.c b/drivers/scsi/fnic/fnic_fcs.c index 3c53c34..483eb9d 100644 --- a/drivers/scsi/fnic/fnic_fcs.c +++ b/drivers/scsi/fnic/fnic_fcs.c @@ -495,7 +495,8 @@ void fnic_eth_send(struct fcoe_ctlr *fip, struct sk_buff *skb) } fnic_queue_wq_eth_desc(wq, skb, pa, skb->len, - fnic->vlan_hw_insert, fnic->vlan_id, 1); + 0 /* hw inserts cos value */, + fnic->vlan_id, 1); spin_unlock_irqrestore(&fnic->wq_lock[0], flags); } @@ -563,7 +564,8 @@ static int fnic_send_frame(struct fnic *fnic, struct fc_frame *fp) } fnic_queue_wq_desc(wq, skb, pa, tot_len, fr_eof(fp), - fnic->vlan_hw_insert, fnic->vlan_id, 1, 1, 1); + 0 /* hw inserts cos value */, + fnic->vlan_id, 1, 1, 1); fnic_send_frame_end: spin_unlock_irqrestore(&fnic->wq_lock[0], flags); diff --git a/drivers/scsi/gdth.c b/drivers/scsi/gdth.c index 599790e..59bceac 100644 --- a/drivers/scsi/gdth.c +++ b/drivers/scsi/gdth.c @@ -1107,14 +1107,8 @@ static int gdth_init_pci(struct pci_dev *pdev, gdth_pci_str *pcistr, pci_read_config_word(pdev, PCI_COMMAND, &command); command |= 6; pci_write_config_word(pdev, PCI_COMMAND, command); - if (pci_resource_start(pdev, 8) == 1UL) - pci_resource_start(pdev, 8) = 0UL; - i = 0xFEFF0001UL; - pci_write_config_dword(pdev, PCI_ROM_ADDRESS, i); - gdth_delay(1); - pci_write_config_dword(pdev, PCI_ROM_ADDRESS, - pci_resource_start(pdev, 8)); - + gdth_delay(1); + dp6m_ptr = ha->brd; /* Ensure that it is safe to access the non HW portions of DPMEM. diff --git a/drivers/scsi/ipr.c b/drivers/scsi/ipr.c index 1d7da3f..8fa79b8 100644 --- a/drivers/scsi/ipr.c +++ b/drivers/scsi/ipr.c @@ -98,6 +98,7 @@ static unsigned int ipr_transop_timeout = 0; static unsigned int ipr_debug = 0; static unsigned int ipr_max_devs = IPR_DEFAULT_SIS64_DEVS; static unsigned int ipr_dual_ioa_raid = 1; +static unsigned int ipr_number_of_msix = 2; static DEFINE_SPINLOCK(ipr_driver_lock); /* This table describes the differences between DMA controller chips */ @@ -107,6 +108,7 @@ static const struct ipr_chip_cfg_t ipr_chip_cfg[] = { .max_cmds = 100, .cache_line_size = 0x20, .clear_isr = 1, + .iopoll_weight = 0, { .set_interrupt_mask_reg = 0x0022C, .clr_interrupt_mask_reg = 0x00230, @@ -131,6 +133,7 @@ static const struct ipr_chip_cfg_t ipr_chip_cfg[] = { .max_cmds = 100, .cache_line_size = 0x20, .clear_isr = 1, + .iopoll_weight = 0, { .set_interrupt_mask_reg = 0x00288, .clr_interrupt_mask_reg = 0x0028C, @@ -155,6 +158,7 @@ static const struct ipr_chip_cfg_t ipr_chip_cfg[] = { .max_cmds = 1000, .cache_line_size = 0x20, .clear_isr = 0, + .iopoll_weight = 64, { .set_interrupt_mask_reg = 0x00010, .clr_interrupt_mask_reg = 0x00018, @@ -215,6 +219,8 @@ MODULE_PARM_DESC(dual_ioa_raid, "Enable dual adapter RAID support. Set to 1 to e module_param_named(max_devs, ipr_max_devs, int, 0); MODULE_PARM_DESC(max_devs, "Specify the maximum number of physical devices. " "[Default=" __stringify(IPR_DEFAULT_SIS64_DEVS) "]"); +module_param_named(number_of_msix, ipr_number_of_msix, int, 0); +MODULE_PARM_DESC(number_of_msix, "Specify the number of MSIX interrupts to use on capable adapters (1 - 5). (default:2)"); MODULE_LICENSE("GPL"); MODULE_VERSION(IPR_DRIVER_VERSION); @@ -549,7 +555,8 @@ static void ipr_trc_hook(struct ipr_cmnd *ipr_cmd, struct ipr_trace_entry *trace_entry; struct ipr_ioa_cfg *ioa_cfg = ipr_cmd->ioa_cfg; - trace_entry = &ioa_cfg->trace[ioa_cfg->trace_index++]; + trace_entry = &ioa_cfg->trace[atomic_add_return + (1, &ioa_cfg->trace_index)%IPR_NUM_TRACE_ENTRIES]; trace_entry->time = jiffies; trace_entry->op_code = ipr_cmd->ioarcb.cmd_pkt.cdb[0]; trace_entry->type = type; @@ -560,6 +567,7 @@ static void ipr_trc_hook(struct ipr_cmnd *ipr_cmd, trace_entry->cmd_index = ipr_cmd->cmd_index & 0xff; trace_entry->res_handle = ipr_cmd->ioarcb.res_handle; trace_entry->u.add_data = add_data; + wmb(); } #else #define ipr_trc_hook(ipr_cmd, type, add_data) do { } while (0) @@ -595,8 +603,11 @@ static void ipr_reinit_ipr_cmnd(struct ipr_cmnd *ipr_cmd) struct ipr_ioasa *ioasa = &ipr_cmd->s.ioasa; struct ipr_ioasa64 *ioasa64 = &ipr_cmd->s.ioasa64; dma_addr_t dma_addr = ipr_cmd->dma_addr; + int hrrq_id; + hrrq_id = ioarcb->cmd_pkt.hrrq_id; memset(&ioarcb->cmd_pkt, 0, sizeof(struct ipr_cmd_pkt)); + ioarcb->cmd_pkt.hrrq_id = hrrq_id; ioarcb->data_transfer_length = 0; ioarcb->read_data_transfer_length = 0; ioarcb->ioadl_len = 0; @@ -646,12 +657,16 @@ static void ipr_init_ipr_cmnd(struct ipr_cmnd *ipr_cmd, * pointer to ipr command struct **/ static -struct ipr_cmnd *__ipr_get_free_ipr_cmnd(struct ipr_ioa_cfg *ioa_cfg) +struct ipr_cmnd *__ipr_get_free_ipr_cmnd(struct ipr_hrr_queue *hrrq) { - struct ipr_cmnd *ipr_cmd; + struct ipr_cmnd *ipr_cmd = NULL; + + if (likely(!list_empty(&hrrq->hrrq_free_q))) { + ipr_cmd = list_entry(hrrq->hrrq_free_q.next, + struct ipr_cmnd, queue); + list_del(&ipr_cmd->queue); + } - ipr_cmd = list_entry(ioa_cfg->free_q.next, struct ipr_cmnd, queue); - list_del(&ipr_cmd->queue); return ipr_cmd; } @@ -666,7 +681,8 @@ struct ipr_cmnd *__ipr_get_free_ipr_cmnd(struct ipr_ioa_cfg *ioa_cfg) static struct ipr_cmnd *ipr_get_free_ipr_cmnd(struct ipr_ioa_cfg *ioa_cfg) { - struct ipr_cmnd *ipr_cmd = __ipr_get_free_ipr_cmnd(ioa_cfg); + struct ipr_cmnd *ipr_cmd = + __ipr_get_free_ipr_cmnd(&ioa_cfg->hrrq[IPR_INIT_HRRQ]); ipr_init_ipr_cmnd(ipr_cmd, ipr_lock_and_done); return ipr_cmd; } @@ -686,9 +702,15 @@ static void ipr_mask_and_clear_interrupts(struct ipr_ioa_cfg *ioa_cfg, u32 clr_ints) { volatile u32 int_reg; + int i; /* Stop new interrupts */ - ioa_cfg->allow_interrupts = 0; + for (i = 0; i < ioa_cfg->hrrq_num; i++) { + spin_lock(&ioa_cfg->hrrq[i]._lock); + ioa_cfg->hrrq[i].allow_interrupts = 0; + spin_unlock(&ioa_cfg->hrrq[i]._lock); + } + wmb(); /* Set interrupt mask to stop all new interrupts */ if (ioa_cfg->sis64) @@ -761,13 +783,12 @@ static int ipr_set_pcix_cmd_reg(struct ipr_ioa_cfg *ioa_cfg) **/ static void ipr_sata_eh_done(struct ipr_cmnd *ipr_cmd) { - struct ipr_ioa_cfg *ioa_cfg = ipr_cmd->ioa_cfg; struct ata_queued_cmd *qc = ipr_cmd->qc; struct ipr_sata_port *sata_port = qc->ap->private_data; qc->err_mask |= AC_ERR_OTHER; sata_port->ioasa.status |= ATA_BUSY; - list_add_tail(&ipr_cmd->queue, &ioa_cfg->free_q); + list_add_tail(&ipr_cmd->queue, &ipr_cmd->hrrq->hrrq_free_q); ata_qc_complete(qc); } @@ -783,14 +804,13 @@ static void ipr_sata_eh_done(struct ipr_cmnd *ipr_cmd) **/ static void ipr_scsi_eh_done(struct ipr_cmnd *ipr_cmd) { - struct ipr_ioa_cfg *ioa_cfg = ipr_cmd->ioa_cfg; struct scsi_cmnd *scsi_cmd = ipr_cmd->scsi_cmd; scsi_cmd->result |= (DID_ERROR << 16); scsi_dma_unmap(ipr_cmd->scsi_cmd); scsi_cmd->scsi_done(scsi_cmd); - list_add_tail(&ipr_cmd->queue, &ioa_cfg->free_q); + list_add_tail(&ipr_cmd->queue, &ipr_cmd->hrrq->hrrq_free_q); } /** @@ -805,24 +825,32 @@ static void ipr_scsi_eh_done(struct ipr_cmnd *ipr_cmd) static void ipr_fail_all_ops(struct ipr_ioa_cfg *ioa_cfg) { struct ipr_cmnd *ipr_cmd, *temp; + struct ipr_hrr_queue *hrrq; ENTER; - list_for_each_entry_safe(ipr_cmd, temp, &ioa_cfg->pending_q, queue) { - list_del(&ipr_cmd->queue); + for_each_hrrq(hrrq, ioa_cfg) { + spin_lock(&hrrq->_lock); + list_for_each_entry_safe(ipr_cmd, + temp, &hrrq->hrrq_pending_q, queue) { + list_del(&ipr_cmd->queue); - ipr_cmd->s.ioasa.hdr.ioasc = cpu_to_be32(IPR_IOASC_IOA_WAS_RESET); - ipr_cmd->s.ioasa.hdr.ilid = cpu_to_be32(IPR_DRIVER_ILID); + ipr_cmd->s.ioasa.hdr.ioasc = + cpu_to_be32(IPR_IOASC_IOA_WAS_RESET); + ipr_cmd->s.ioasa.hdr.ilid = + cpu_to_be32(IPR_DRIVER_ILID); - if (ipr_cmd->scsi_cmd) - ipr_cmd->done = ipr_scsi_eh_done; - else if (ipr_cmd->qc) - ipr_cmd->done = ipr_sata_eh_done; + if (ipr_cmd->scsi_cmd) + ipr_cmd->done = ipr_scsi_eh_done; + else if (ipr_cmd->qc) + ipr_cmd->done = ipr_sata_eh_done; - ipr_trc_hook(ipr_cmd, IPR_TRACE_FINISH, IPR_IOASC_IOA_WAS_RESET); - del_timer(&ipr_cmd->timer); - ipr_cmd->done(ipr_cmd); + ipr_trc_hook(ipr_cmd, IPR_TRACE_FINISH, + IPR_IOASC_IOA_WAS_RESET); + del_timer(&ipr_cmd->timer); + ipr_cmd->done(ipr_cmd); + } + spin_unlock(&hrrq->_lock); } - LEAVE; } @@ -872,9 +900,7 @@ static void ipr_do_req(struct ipr_cmnd *ipr_cmd, void (*done) (struct ipr_cmnd *), void (*timeout_func) (struct ipr_cmnd *), u32 timeout) { - struct ipr_ioa_cfg *ioa_cfg = ipr_cmd->ioa_cfg; - - list_add_tail(&ipr_cmd->queue, &ioa_cfg->pending_q); + list_add_tail(&ipr_cmd->queue, &ipr_cmd->hrrq->hrrq_pending_q); ipr_cmd->done = done; @@ -975,6 +1001,14 @@ static void ipr_send_blocking_cmd(struct ipr_cmnd *ipr_cmd, spin_lock_irq(ioa_cfg->host->host_lock); } +static int ipr_get_hrrq_index(struct ipr_ioa_cfg *ioa_cfg) +{ + if (ioa_cfg->hrrq_num == 1) + return 0; + else + return (atomic_add_return(1, &ioa_cfg->hrrq_index) % (ioa_cfg->hrrq_num - 1)) + 1; +} + /** * ipr_send_hcam - Send an HCAM to the adapter. * @ioa_cfg: ioa config struct @@ -994,9 +1028,9 @@ static void ipr_send_hcam(struct ipr_ioa_cfg *ioa_cfg, u8 type, struct ipr_cmnd *ipr_cmd; struct ipr_ioarcb *ioarcb; - if (ioa_cfg->allow_cmds) { + if (ioa_cfg->hrrq[IPR_INIT_HRRQ].allow_cmds) { ipr_cmd = ipr_get_free_ipr_cmnd(ioa_cfg); - list_add_tail(&ipr_cmd->queue, &ioa_cfg->pending_q); + list_add_tail(&ipr_cmd->queue, &ipr_cmd->hrrq->hrrq_pending_q); list_add_tail(&hostrcb->queue, &ioa_cfg->hostrcb_pending_q); ipr_cmd->u.hostrcb = hostrcb; @@ -1166,14 +1200,15 @@ static int ipr_is_same_device(struct ipr_resource_entry *res, } /** - * ipr_format_res_path - Format the resource path for printing. + * __ipr_format_res_path - Format the resource path for printing. * @res_path: resource path * @buf: buffer + * @len: length of buffer provided * * Return value: * pointer to buffer **/ -static char *ipr_format_res_path(u8 *res_path, char *buffer, int len) +static char *__ipr_format_res_path(u8 *res_path, char *buffer, int len) { int i; char *p = buffer; @@ -1187,6 +1222,27 @@ static char *ipr_format_res_path(u8 *res_path, char *buffer, int len) } /** + * ipr_format_res_path - Format the resource path for printing. + * @ioa_cfg: ioa config struct + * @res_path: resource path + * @buf: buffer + * @len: length of buffer provided + * + * Return value: + * pointer to buffer + **/ +static char *ipr_format_res_path(struct ipr_ioa_cfg *ioa_cfg, + u8 *res_path, char *buffer, int len) +{ + char *p = buffer; + + *p = '\0'; + p += snprintf(p, buffer + len - p, "%d/", ioa_cfg->host->host_no); + __ipr_format_res_path(res_path, p, len - (buffer - p)); + return buffer; +} + +/** * ipr_update_res_entry - Update the resource entry. * @res: resource entry struct * @cfgtew: config table entry wrapper struct @@ -1226,8 +1282,8 @@ static void ipr_update_res_entry(struct ipr_resource_entry *res, if (res->sdev && new_path) sdev_printk(KERN_INFO, res->sdev, "Resource path: %s\n", - ipr_format_res_path(res->res_path, buffer, - sizeof(buffer))); + ipr_format_res_path(res->ioa_cfg, + res->res_path, buffer, sizeof(buffer))); } else { res->flags = cfgtew->u.cfgte->flags; if (res->flags & IPR_IS_IOA_RESOURCE) @@ -1363,7 +1419,7 @@ static void ipr_process_ccn(struct ipr_cmnd *ipr_cmd) u32 ioasc = be32_to_cpu(ipr_cmd->s.ioasa.hdr.ioasc); list_del(&hostrcb->queue); - list_add_tail(&ipr_cmd->queue, &ioa_cfg->free_q); + list_add_tail(&ipr_cmd->queue, &ipr_cmd->hrrq->hrrq_free_q); if (ioasc) { if (ioasc != IPR_IOASC_IOA_WAS_RESET) @@ -1613,8 +1669,8 @@ static void ipr_log_sis64_config_error(struct ipr_ioa_cfg *ioa_cfg, ipr_err_separator; ipr_err("Device %d : %s", i + 1, - ipr_format_res_path(dev_entry->res_path, buffer, - sizeof(buffer))); + __ipr_format_res_path(dev_entry->res_path, + buffer, sizeof(buffer))); ipr_log_ext_vpd(&dev_entry->vpd); ipr_err("-----New Device Information-----\n"); @@ -1960,14 +2016,16 @@ static void ipr_log64_fabric_path(struct ipr_hostrcb *hostrcb, ipr_hcam_err(hostrcb, "%s %s: Resource Path=%s\n", path_active_desc[i].desc, path_state_desc[j].desc, - ipr_format_res_path(fabric->res_path, buffer, - sizeof(buffer))); + ipr_format_res_path(hostrcb->ioa_cfg, + fabric->res_path, + buffer, sizeof(buffer))); return; } } ipr_err("Path state=%02X Resource Path=%s\n", path_state, - ipr_format_res_path(fabric->res_path, buffer, sizeof(buffer))); + ipr_format_res_path(hostrcb->ioa_cfg, fabric->res_path, + buffer, sizeof(buffer))); } static const struct { @@ -2108,18 +2166,20 @@ static void ipr_log64_path_elem(struct ipr_hostrcb *hostrcb, ipr_hcam_err(hostrcb, "%s %s: Resource Path=%s, Link rate=%s, WWN=%08X%08X\n", path_status_desc[j].desc, path_type_desc[i].desc, - ipr_format_res_path(cfg->res_path, buffer, - sizeof(buffer)), - link_rate[cfg->link_rate & IPR_PHY_LINK_RATE_MASK], - be32_to_cpu(cfg->wwid[0]), be32_to_cpu(cfg->wwid[1])); + ipr_format_res_path(hostrcb->ioa_cfg, + cfg->res_path, buffer, sizeof(buffer)), + link_rate[cfg->link_rate & IPR_PHY_LINK_RATE_MASK], + be32_to_cpu(cfg->wwid[0]), + be32_to_cpu(cfg->wwid[1])); return; } } ipr_hcam_err(hostrcb, "Path element=%02X: Resource Path=%s, Link rate=%s " "WWN=%08X%08X\n", cfg->type_status, - ipr_format_res_path(cfg->res_path, buffer, sizeof(buffer)), - link_rate[cfg->link_rate & IPR_PHY_LINK_RATE_MASK], - be32_to_cpu(cfg->wwid[0]), be32_to_cpu(cfg->wwid[1])); + ipr_format_res_path(hostrcb->ioa_cfg, + cfg->res_path, buffer, sizeof(buffer)), + link_rate[cfg->link_rate & IPR_PHY_LINK_RATE_MASK], + be32_to_cpu(cfg->wwid[0]), be32_to_cpu(cfg->wwid[1])); } /** @@ -2182,7 +2242,8 @@ static void ipr_log_sis64_array_error(struct ipr_ioa_cfg *ioa_cfg, ipr_err("RAID %s Array Configuration: %s\n", error->protection_level, - ipr_format_res_path(error->last_res_path, buffer, sizeof(buffer))); + ipr_format_res_path(ioa_cfg, error->last_res_path, + buffer, sizeof(buffer))); ipr_err_separator; @@ -2203,11 +2264,12 @@ static void ipr_log_sis64_array_error(struct ipr_ioa_cfg *ioa_cfg, ipr_err("Array Member %d:\n", i); ipr_log_ext_vpd(&array_entry->vpd); ipr_err("Current Location: %s\n", - ipr_format_res_path(array_entry->res_path, buffer, - sizeof(buffer))); + ipr_format_res_path(ioa_cfg, array_entry->res_path, + buffer, sizeof(buffer))); ipr_err("Expected Location: %s\n", - ipr_format_res_path(array_entry->expected_res_path, - buffer, sizeof(buffer))); + ipr_format_res_path(ioa_cfg, + array_entry->expected_res_path, + buffer, sizeof(buffer))); ipr_err_separator; } @@ -2409,7 +2471,7 @@ static void ipr_process_error(struct ipr_cmnd *ipr_cmd) fd_ioasc = be32_to_cpu(hostrcb->hcam.u.error.fd_ioasc); list_del(&hostrcb->queue); - list_add_tail(&ipr_cmd->queue, &ioa_cfg->free_q); + list_add_tail(&ipr_cmd->queue, &ipr_cmd->hrrq->hrrq_free_q); if (!ioasc) { ipr_handle_log_data(ioa_cfg, hostrcb); @@ -2491,36 +2553,6 @@ static void ipr_oper_timeout(struct ipr_cmnd *ipr_cmd) } /** - * ipr_reset_reload - Reset/Reload the IOA - * @ioa_cfg: ioa config struct - * @shutdown_type: shutdown type - * - * This function resets the adapter and re-initializes it. - * This function assumes that all new host commands have been stopped. - * Return value: - * SUCCESS / FAILED - **/ -static int ipr_reset_reload(struct ipr_ioa_cfg *ioa_cfg, - enum ipr_shutdown_type shutdown_type) -{ - if (!ioa_cfg->in_reset_reload) - ipr_initiate_ioa_reset(ioa_cfg, shutdown_type); - - spin_unlock_irq(ioa_cfg->host->host_lock); - wait_event(ioa_cfg->reset_wait_q, !ioa_cfg->in_reset_reload); - spin_lock_irq(ioa_cfg->host->host_lock); - - /* If we got hit with a host reset while we were already resetting - the adapter for some reason, and the reset failed. */ - if (ioa_cfg->ioa_is_dead) { - ipr_trace; - return FAILED; - } - - return SUCCESS; -} - -/** * ipr_find_ses_entry - Find matching SES in SES table * @res: resource entry struct of SES * @@ -3153,7 +3185,8 @@ static void ipr_worker_thread(struct work_struct *work) restart: do { did_work = 0; - if (!ioa_cfg->allow_cmds || !ioa_cfg->allow_ml_add_del) { + if (!ioa_cfg->hrrq[IPR_INIT_HRRQ].allow_cmds || + !ioa_cfg->allow_ml_add_del) { spin_unlock_irqrestore(ioa_cfg->host->host_lock, lock_flags); return; } @@ -3401,7 +3434,7 @@ static ssize_t ipr_show_adapter_state(struct device *dev, int len; spin_lock_irqsave(ioa_cfg->host->host_lock, lock_flags); - if (ioa_cfg->ioa_is_dead) + if (ioa_cfg->hrrq[IPR_INIT_HRRQ].ioa_is_dead) len = snprintf(buf, PAGE_SIZE, "offline\n"); else len = snprintf(buf, PAGE_SIZE, "online\n"); @@ -3427,14 +3460,20 @@ static ssize_t ipr_store_adapter_state(struct device *dev, struct Scsi_Host *shost = class_to_shost(dev); struct ipr_ioa_cfg *ioa_cfg = (struct ipr_ioa_cfg *)shost->hostdata; unsigned long lock_flags; - int result = count; + int result = count, i; if (!capable(CAP_SYS_ADMIN)) return -EACCES; spin_lock_irqsave(ioa_cfg->host->host_lock, lock_flags); - if (ioa_cfg->ioa_is_dead && !strncmp(buf, "online", 6)) { - ioa_cfg->ioa_is_dead = 0; + if (ioa_cfg->hrrq[IPR_INIT_HRRQ].ioa_is_dead && + !strncmp(buf, "online", 6)) { + for (i = 0; i < ioa_cfg->hrrq_num; i++) { + spin_lock(&ioa_cfg->hrrq[i]._lock); + ioa_cfg->hrrq[i].ioa_is_dead = 0; + spin_unlock(&ioa_cfg->hrrq[i]._lock); + } + wmb(); ioa_cfg->reset_retries = 0; ioa_cfg->in_ioa_bringdown = 0; ipr_initiate_ioa_reset(ioa_cfg, IPR_SHUTDOWN_NONE); @@ -3494,6 +3533,95 @@ static struct device_attribute ipr_ioa_reset_attr = { .store = ipr_store_reset_adapter }; +static int ipr_iopoll(struct blk_iopoll *iop, int budget); + /** + * ipr_show_iopoll_weight - Show ipr polling mode + * @dev: class device struct + * @buf: buffer + * + * Return value: + * number of bytes printed to buffer + **/ +static ssize_t ipr_show_iopoll_weight(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct Scsi_Host *shost = class_to_shost(dev); + struct ipr_ioa_cfg *ioa_cfg = (struct ipr_ioa_cfg *)shost->hostdata; + unsigned long lock_flags = 0; + int len; + + spin_lock_irqsave(shost->host_lock, lock_flags); + len = snprintf(buf, PAGE_SIZE, "%d\n", ioa_cfg->iopoll_weight); + spin_unlock_irqrestore(shost->host_lock, lock_flags); + + return len; +} + +/** + * ipr_store_iopoll_weight - Change the adapter's polling mode + * @dev: class device struct + * @buf: buffer + * + * Return value: + * number of bytes printed to buffer + **/ +static ssize_t ipr_store_iopoll_weight(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + struct Scsi_Host *shost = class_to_shost(dev); + struct ipr_ioa_cfg *ioa_cfg = (struct ipr_ioa_cfg *)shost->hostdata; + unsigned long user_iopoll_weight; + unsigned long lock_flags = 0; + int i; + + if (!ioa_cfg->sis64) { + dev_info(&ioa_cfg->pdev->dev, "blk-iopoll not supported on this adapter\n"); + return -EINVAL; + } + if (kstrtoul(buf, 10, &user_iopoll_weight)) + return -EINVAL; + + if (user_iopoll_weight > 256) { + dev_info(&ioa_cfg->pdev->dev, "Invalid blk-iopoll weight. It must be less than 256\n"); + return -EINVAL; + } + + if (user_iopoll_weight == ioa_cfg->iopoll_weight) { + dev_info(&ioa_cfg->pdev->dev, "Current blk-iopoll weight has the same weight\n"); + return strlen(buf); + } + + if (blk_iopoll_enabled && ioa_cfg->iopoll_weight && + ioa_cfg->sis64 && ioa_cfg->nvectors > 1) { + for (i = 1; i < ioa_cfg->hrrq_num; i++) + blk_iopoll_disable(&ioa_cfg->hrrq[i].iopoll); + } + + spin_lock_irqsave(shost->host_lock, lock_flags); + ioa_cfg->iopoll_weight = user_iopoll_weight; + if (blk_iopoll_enabled && ioa_cfg->iopoll_weight && + ioa_cfg->sis64 && ioa_cfg->nvectors > 1) { + for (i = 1; i < ioa_cfg->hrrq_num; i++) { + blk_iopoll_init(&ioa_cfg->hrrq[i].iopoll, + ioa_cfg->iopoll_weight, ipr_iopoll); + blk_iopoll_enable(&ioa_cfg->hrrq[i].iopoll); + } + } + spin_unlock_irqrestore(shost->host_lock, lock_flags); + + return strlen(buf); +} + +static struct device_attribute ipr_iopoll_weight_attr = { + .attr = { + .name = "iopoll_weight", + .mode = S_IRUGO | S_IWUSR, + }, + .show = ipr_show_iopoll_weight, + .store = ipr_store_iopoll_weight +}; + /** * ipr_alloc_ucode_buffer - Allocates a microcode download buffer * @buf_len: buffer length @@ -3862,6 +3990,7 @@ static struct device_attribute *ipr_ioa_attrs[] = { &ipr_ioa_reset_attr, &ipr_update_fw_attr, &ipr_ioa_fw_type_attr, + &ipr_iopoll_weight_attr, NULL, }; @@ -4014,7 +4143,7 @@ static int ipr_alloc_dump(struct ipr_ioa_cfg *ioa_cfg) ioa_cfg->dump = dump; ioa_cfg->sdt_state = WAIT_FOR_DUMP; - if (ioa_cfg->ioa_is_dead && !ioa_cfg->dump_taken) { + if (ioa_cfg->hrrq[IPR_INIT_HRRQ].ioa_is_dead && !ioa_cfg->dump_taken) { ioa_cfg->dump_taken = 1; schedule_work(&ioa_cfg->work_q); } @@ -4227,8 +4356,8 @@ static ssize_t ipr_show_resource_path(struct device *dev, struct device_attribut res = (struct ipr_resource_entry *)sdev->hostdata; if (res && ioa_cfg->sis64) len = snprintf(buf, PAGE_SIZE, "%s\n", - ipr_format_res_path(res->res_path, buffer, - sizeof(buffer))); + __ipr_format_res_path(res->res_path, buffer, + sizeof(buffer))); else if (res) len = snprintf(buf, PAGE_SIZE, "%d:%d:%d:%d\n", ioa_cfg->host->host_no, res->bus, res->target, res->lun); @@ -4556,8 +4685,8 @@ static int ipr_slave_configure(struct scsi_device *sdev) scsi_adjust_queue_depth(sdev, 0, sdev->host->cmd_per_lun); if (ioa_cfg->sis64) sdev_printk(KERN_INFO, sdev, "Resource path: %s\n", - ipr_format_res_path(res->res_path, buffer, - sizeof(buffer))); + ipr_format_res_path(ioa_cfg, + res->res_path, buffer, sizeof(buffer))); return 0; } spin_unlock_irqrestore(ioa_cfg->host->host_lock, lock_flags); @@ -4638,22 +4767,18 @@ static int ipr_slave_alloc(struct scsi_device *sdev) return rc; } -/** - * ipr_eh_host_reset - Reset the host adapter - * @scsi_cmd: scsi command struct - * - * Return value: - * SUCCESS / FAILED - **/ -static int __ipr_eh_host_reset(struct scsi_cmnd *scsi_cmd) +static int ipr_eh_host_reset(struct scsi_cmnd *cmd) { struct ipr_ioa_cfg *ioa_cfg; - int rc; + unsigned long lock_flags = 0; + int rc = SUCCESS; ENTER; - ioa_cfg = (struct ipr_ioa_cfg *) scsi_cmd->device->host->hostdata; + ioa_cfg = (struct ipr_ioa_cfg *) cmd->device->host->hostdata; + spin_lock_irqsave(ioa_cfg->host->host_lock, lock_flags); if (!ioa_cfg->in_reset_reload) { + ipr_initiate_ioa_reset(ioa_cfg, IPR_SHUTDOWN_ABBREV); dev_err(&ioa_cfg->pdev->dev, "Adapter being reset as a result of error recovery.\n"); @@ -4661,20 +4786,19 @@ static int __ipr_eh_host_reset(struct scsi_cmnd *scsi_cmd) ioa_cfg->sdt_state = GET_DUMP; } - rc = ipr_reset_reload(ioa_cfg, IPR_SHUTDOWN_ABBREV); - - LEAVE; - return rc; -} - -static int ipr_eh_host_reset(struct scsi_cmnd *cmd) -{ - int rc; + spin_unlock_irqrestore(ioa_cfg->host->host_lock, lock_flags); + wait_event(ioa_cfg->reset_wait_q, !ioa_cfg->in_reset_reload); + spin_lock_irqsave(ioa_cfg->host->host_lock, lock_flags); - spin_lock_irq(cmd->device->host->host_lock); - rc = __ipr_eh_host_reset(cmd); - spin_unlock_irq(cmd->device->host->host_lock); + /* If we got hit with a host reset while we were already resetting + the adapter for some reason, and the reset failed. */ + if (ioa_cfg->hrrq[IPR_INIT_HRRQ].ioa_is_dead) { + ipr_trace; + rc = FAILED; + } + spin_unlock_irqrestore(ioa_cfg->host->host_lock, lock_flags); + LEAVE; return rc; } @@ -4723,7 +4847,7 @@ static int ipr_device_reset(struct ipr_ioa_cfg *ioa_cfg, ipr_send_blocking_cmd(ipr_cmd, ipr_timeout, IPR_DEVICE_RESET_TIMEOUT); ioasc = be32_to_cpu(ipr_cmd->s.ioasa.hdr.ioasc); - list_add_tail(&ipr_cmd->queue, &ioa_cfg->free_q); + list_add_tail(&ipr_cmd->queue, &ipr_cmd->hrrq->hrrq_free_q); if (ipr_is_gata(res) && res->sata_port && ioasc != IPR_IOASC_IOA_WAS_RESET) { if (ipr_cmd->ioa_cfg->sis64) memcpy(&res->sata_port->ioasa, &ipr_cmd->s.ioasa64.u.gata, @@ -4793,6 +4917,7 @@ static int __ipr_eh_dev_reset(struct scsi_cmnd *scsi_cmd) struct ipr_resource_entry *res; struct ata_port *ap; int rc = 0; + struct ipr_hrr_queue *hrrq; ENTER; ioa_cfg = (struct ipr_ioa_cfg *) scsi_cmd->device->host->hostdata; @@ -4808,22 +4933,26 @@ static int __ipr_eh_dev_reset(struct scsi_cmnd *scsi_cmd) */ if (ioa_cfg->in_reset_reload) return FAILED; - if (ioa_cfg->ioa_is_dead) + if (ioa_cfg->hrrq[IPR_INIT_HRRQ].ioa_is_dead) return FAILED; - list_for_each_entry(ipr_cmd, &ioa_cfg->pending_q, queue) { - if (ipr_cmd->ioarcb.res_handle == res->res_handle) { - if (ipr_cmd->scsi_cmd) - ipr_cmd->done = ipr_scsi_eh_done; - if (ipr_cmd->qc) - ipr_cmd->done = ipr_sata_eh_done; - if (ipr_cmd->qc && !(ipr_cmd->qc->flags & ATA_QCFLAG_FAILED)) { - ipr_cmd->qc->err_mask |= AC_ERR_TIMEOUT; - ipr_cmd->qc->flags |= ATA_QCFLAG_FAILED; + for_each_hrrq(hrrq, ioa_cfg) { + spin_lock(&hrrq->_lock); + list_for_each_entry(ipr_cmd, &hrrq->hrrq_pending_q, queue) { + if (ipr_cmd->ioarcb.res_handle == res->res_handle) { + if (ipr_cmd->scsi_cmd) + ipr_cmd->done = ipr_scsi_eh_done; + if (ipr_cmd->qc) + ipr_cmd->done = ipr_sata_eh_done; + if (ipr_cmd->qc && + !(ipr_cmd->qc->flags & ATA_QCFLAG_FAILED)) { + ipr_cmd->qc->err_mask |= AC_ERR_TIMEOUT; + ipr_cmd->qc->flags |= ATA_QCFLAG_FAILED; + } } } + spin_unlock(&hrrq->_lock); } - res->resetting_device = 1; scmd_printk(KERN_ERR, scsi_cmd, "Resetting device\n"); @@ -4833,11 +4962,17 @@ static int __ipr_eh_dev_reset(struct scsi_cmnd *scsi_cmd) ata_std_error_handler(ap); spin_lock_irq(scsi_cmd->device->host->host_lock); - list_for_each_entry(ipr_cmd, &ioa_cfg->pending_q, queue) { - if (ipr_cmd->ioarcb.res_handle == res->res_handle) { - rc = -EIO; - break; + for_each_hrrq(hrrq, ioa_cfg) { + spin_lock(&hrrq->_lock); + list_for_each_entry(ipr_cmd, + &hrrq->hrrq_pending_q, queue) { + if (ipr_cmd->ioarcb.res_handle == + res->res_handle) { + rc = -EIO; + break; + } } + spin_unlock(&hrrq->_lock); } } else rc = ipr_device_reset(ioa_cfg, res); @@ -4890,7 +5025,7 @@ static void ipr_bus_reset_done(struct ipr_cmnd *ipr_cmd) else ipr_cmd->sibling->done(ipr_cmd->sibling); - list_add_tail(&ipr_cmd->queue, &ioa_cfg->free_q); + list_add_tail(&ipr_cmd->queue, &ipr_cmd->hrrq->hrrq_free_q); LEAVE; } @@ -4951,6 +5086,7 @@ static int ipr_cancel_op(struct scsi_cmnd *scsi_cmd) struct ipr_cmd_pkt *cmd_pkt; u32 ioasc, int_reg; int op_found = 0; + struct ipr_hrr_queue *hrrq; ENTER; ioa_cfg = (struct ipr_ioa_cfg *)scsi_cmd->device->host->hostdata; @@ -4960,7 +5096,8 @@ static int ipr_cancel_op(struct scsi_cmnd *scsi_cmd) * This will force the mid-layer to call ipr_eh_host_reset, * which will then go to sleep and wait for the reset to complete */ - if (ioa_cfg->in_reset_reload || ioa_cfg->ioa_is_dead) + if (ioa_cfg->in_reset_reload || + ioa_cfg->hrrq[IPR_INIT_HRRQ].ioa_is_dead) return FAILED; if (!res) return FAILED; @@ -4975,12 +5112,16 @@ static int ipr_cancel_op(struct scsi_cmnd *scsi_cmd) if (!ipr_is_gscsi(res)) return FAILED; - list_for_each_entry(ipr_cmd, &ioa_cfg->pending_q, queue) { - if (ipr_cmd->scsi_cmd == scsi_cmd) { - ipr_cmd->done = ipr_scsi_eh_done; - op_found = 1; - break; + for_each_hrrq(hrrq, ioa_cfg) { + spin_lock(&hrrq->_lock); + list_for_each_entry(ipr_cmd, &hrrq->hrrq_pending_q, queue) { + if (ipr_cmd->scsi_cmd == scsi_cmd) { + ipr_cmd->done = ipr_scsi_eh_done; + op_found = 1; + break; + } } + spin_unlock(&hrrq->_lock); } if (!op_found) @@ -5007,7 +5148,7 @@ static int ipr_cancel_op(struct scsi_cmnd *scsi_cmd) ipr_trace; } - list_add_tail(&ipr_cmd->queue, &ioa_cfg->free_q); + list_add_tail(&ipr_cmd->queue, &hrrq->hrrq_free_q); if (!ipr_is_naca_model(res)) res->needs_sync_complete = 1; @@ -5099,6 +5240,9 @@ static irqreturn_t ipr_handle_other_interrupt(struct ipr_ioa_cfg *ioa_cfg, } else { if (int_reg & IPR_PCII_IOA_UNIT_CHECKED) ioa_cfg->ioa_unit_checked = 1; + else if (int_reg & IPR_PCII_NO_HOST_RRQ) + dev_err(&ioa_cfg->pdev->dev, + "No Host RRQ. 0x%08X\n", int_reg); else dev_err(&ioa_cfg->pdev->dev, "Permanent IOA failure. 0x%08X\n", int_reg); @@ -5121,10 +5265,10 @@ static irqreturn_t ipr_handle_other_interrupt(struct ipr_ioa_cfg *ioa_cfg, * Return value: * none **/ -static void ipr_isr_eh(struct ipr_ioa_cfg *ioa_cfg, char *msg) +static void ipr_isr_eh(struct ipr_ioa_cfg *ioa_cfg, char *msg, u16 number) { ioa_cfg->errors_logged++; - dev_err(&ioa_cfg->pdev->dev, "%s\n", msg); + dev_err(&ioa_cfg->pdev->dev, "%s %d\n", msg, number); if (WAIT_FOR_DUMP == ioa_cfg->sdt_state) ioa_cfg->sdt_state = GET_DUMP; @@ -5132,6 +5276,83 @@ static void ipr_isr_eh(struct ipr_ioa_cfg *ioa_cfg, char *msg) ipr_initiate_ioa_reset(ioa_cfg, IPR_SHUTDOWN_NONE); } +static int ipr_process_hrrq(struct ipr_hrr_queue *hrr_queue, int budget, + struct list_head *doneq) +{ + u32 ioasc; + u16 cmd_index; + struct ipr_cmnd *ipr_cmd; + struct ipr_ioa_cfg *ioa_cfg = hrr_queue->ioa_cfg; + int num_hrrq = 0; + + /* If interrupts are disabled, ignore the interrupt */ + if (!hrr_queue->allow_interrupts) + return 0; + + while ((be32_to_cpu(*hrr_queue->hrrq_curr) & IPR_HRRQ_TOGGLE_BIT) == + hrr_queue->toggle_bit) { + + cmd_index = (be32_to_cpu(*hrr_queue->hrrq_curr) & + IPR_HRRQ_REQ_RESP_HANDLE_MASK) >> + IPR_HRRQ_REQ_RESP_HANDLE_SHIFT; + + if (unlikely(cmd_index > hrr_queue->max_cmd_id || + cmd_index < hrr_queue->min_cmd_id)) { + ipr_isr_eh(ioa_cfg, + "Invalid response handle from IOA: ", + cmd_index); + break; + } + + ipr_cmd = ioa_cfg->ipr_cmnd_list[cmd_index]; + ioasc = be32_to_cpu(ipr_cmd->s.ioasa.hdr.ioasc); + + ipr_trc_hook(ipr_cmd, IPR_TRACE_FINISH, ioasc); + + list_move_tail(&ipr_cmd->queue, doneq); + + if (hrr_queue->hrrq_curr < hrr_queue->hrrq_end) { + hrr_queue->hrrq_curr++; + } else { + hrr_queue->hrrq_curr = hrr_queue->hrrq_start; + hrr_queue->toggle_bit ^= 1u; + } + num_hrrq++; + if (budget > 0 && num_hrrq >= budget) + break; + } + + return num_hrrq; +} + +static int ipr_iopoll(struct blk_iopoll *iop, int budget) +{ + struct ipr_ioa_cfg *ioa_cfg; + struct ipr_hrr_queue *hrrq; + struct ipr_cmnd *ipr_cmd, *temp; + unsigned long hrrq_flags; + int completed_ops; + LIST_HEAD(doneq); + + hrrq = container_of(iop, struct ipr_hrr_queue, iopoll); + ioa_cfg = hrrq->ioa_cfg; + + spin_lock_irqsave(hrrq->lock, hrrq_flags); + completed_ops = ipr_process_hrrq(hrrq, budget, &doneq); + + if (completed_ops < budget) + blk_iopoll_complete(iop); + spin_unlock_irqrestore(hrrq->lock, hrrq_flags); + + list_for_each_entry_safe(ipr_cmd, temp, &doneq, queue) { + list_del(&ipr_cmd->queue); + del_timer(&ipr_cmd->timer); + ipr_cmd->fast_done(ipr_cmd); + } + + return completed_ops; +} + /** * ipr_isr - Interrupt service routine * @irq: irq number @@ -5142,78 +5363,48 @@ static void ipr_isr_eh(struct ipr_ioa_cfg *ioa_cfg, char *msg) **/ static irqreturn_t ipr_isr(int irq, void *devp) { - struct ipr_ioa_cfg *ioa_cfg = (struct ipr_ioa_cfg *)devp; - unsigned long lock_flags = 0; + struct ipr_hrr_queue *hrrq = (struct ipr_hrr_queue *)devp; + struct ipr_ioa_cfg *ioa_cfg = hrrq->ioa_cfg; + unsigned long hrrq_flags = 0; u32 int_reg = 0; - u32 ioasc; - u16 cmd_index; int num_hrrq = 0; int irq_none = 0; struct ipr_cmnd *ipr_cmd, *temp; irqreturn_t rc = IRQ_NONE; LIST_HEAD(doneq); - spin_lock_irqsave(ioa_cfg->host->host_lock, lock_flags); - + spin_lock_irqsave(hrrq->lock, hrrq_flags); /* If interrupts are disabled, ignore the interrupt */ - if (!ioa_cfg->allow_interrupts) { - spin_unlock_irqrestore(ioa_cfg->host->host_lock, lock_flags); + if (!hrrq->allow_interrupts) { + spin_unlock_irqrestore(hrrq->lock, hrrq_flags); return IRQ_NONE; } while (1) { - ipr_cmd = NULL; - - while ((be32_to_cpu(*ioa_cfg->hrrq_curr) & IPR_HRRQ_TOGGLE_BIT) == - ioa_cfg->toggle_bit) { - - cmd_index = (be32_to_cpu(*ioa_cfg->hrrq_curr) & - IPR_HRRQ_REQ_RESP_HANDLE_MASK) >> IPR_HRRQ_REQ_RESP_HANDLE_SHIFT; - - if (unlikely(cmd_index >= IPR_NUM_CMD_BLKS)) { - ipr_isr_eh(ioa_cfg, "Invalid response handle from IOA"); - rc = IRQ_HANDLED; - goto unlock_out; - } - - ipr_cmd = ioa_cfg->ipr_cmnd_list[cmd_index]; - - ioasc = be32_to_cpu(ipr_cmd->s.ioasa.hdr.ioasc); + if (ipr_process_hrrq(hrrq, -1, &doneq)) { + rc = IRQ_HANDLED; - ipr_trc_hook(ipr_cmd, IPR_TRACE_FINISH, ioasc); - - list_move_tail(&ipr_cmd->queue, &doneq); - - rc = IRQ_HANDLED; - - if (ioa_cfg->hrrq_curr < ioa_cfg->hrrq_end) { - ioa_cfg->hrrq_curr++; - } else { - ioa_cfg->hrrq_curr = ioa_cfg->hrrq_start; - ioa_cfg->toggle_bit ^= 1u; - } - } - - if (ipr_cmd && !ioa_cfg->clear_isr) - break; + if (!ioa_cfg->clear_isr) + break; - if (ipr_cmd != NULL) { /* Clear the PCI interrupt */ num_hrrq = 0; do { - writel(IPR_PCII_HRRQ_UPDATED, ioa_cfg->regs.clr_interrupt_reg32); + writel(IPR_PCII_HRRQ_UPDATED, + ioa_cfg->regs.clr_interrupt_reg32); int_reg = readl(ioa_cfg->regs.sense_interrupt_reg32); } while (int_reg & IPR_PCII_HRRQ_UPDATED && - num_hrrq++ < IPR_MAX_HRRQ_RETRIES); + num_hrrq++ < IPR_MAX_HRRQ_RETRIES); } else if (rc == IRQ_NONE && irq_none == 0) { int_reg = readl(ioa_cfg->regs.sense_interrupt_reg32); irq_none++; } else if (num_hrrq == IPR_MAX_HRRQ_RETRIES && int_reg & IPR_PCII_HRRQ_UPDATED) { - ipr_isr_eh(ioa_cfg, "Error clearing HRRQ"); + ipr_isr_eh(ioa_cfg, + "Error clearing HRRQ: ", num_hrrq); rc = IRQ_HANDLED; - goto unlock_out; + break; } else break; } @@ -5221,14 +5412,64 @@ static irqreturn_t ipr_isr(int irq, void *devp) if (unlikely(rc == IRQ_NONE)) rc = ipr_handle_other_interrupt(ioa_cfg, int_reg); -unlock_out: - spin_unlock_irqrestore(ioa_cfg->host->host_lock, lock_flags); + spin_unlock_irqrestore(hrrq->lock, hrrq_flags); list_for_each_entry_safe(ipr_cmd, temp, &doneq, queue) { list_del(&ipr_cmd->queue); del_timer(&ipr_cmd->timer); ipr_cmd->fast_done(ipr_cmd); } + return rc; +} + +/** + * ipr_isr_mhrrq - Interrupt service routine + * @irq: irq number + * @devp: pointer to ioa config struct + * + * Return value: + * IRQ_NONE / IRQ_HANDLED + **/ +static irqreturn_t ipr_isr_mhrrq(int irq, void *devp) +{ + struct ipr_hrr_queue *hrrq = (struct ipr_hrr_queue *)devp; + struct ipr_ioa_cfg *ioa_cfg = hrrq->ioa_cfg; + unsigned long hrrq_flags = 0; + struct ipr_cmnd *ipr_cmd, *temp; + irqreturn_t rc = IRQ_NONE; + LIST_HEAD(doneq); + + spin_lock_irqsave(hrrq->lock, hrrq_flags); + + /* If interrupts are disabled, ignore the interrupt */ + if (!hrrq->allow_interrupts) { + spin_unlock_irqrestore(hrrq->lock, hrrq_flags); + return IRQ_NONE; + } + + if (blk_iopoll_enabled && ioa_cfg->iopoll_weight && + ioa_cfg->sis64 && ioa_cfg->nvectors > 1) { + if ((be32_to_cpu(*hrrq->hrrq_curr) & IPR_HRRQ_TOGGLE_BIT) == + hrrq->toggle_bit) { + if (!blk_iopoll_sched_prep(&hrrq->iopoll)) + blk_iopoll_sched(&hrrq->iopoll); + spin_unlock_irqrestore(hrrq->lock, hrrq_flags); + return IRQ_HANDLED; + } + } else { + if ((be32_to_cpu(*hrrq->hrrq_curr) & IPR_HRRQ_TOGGLE_BIT) == + hrrq->toggle_bit) + + if (ipr_process_hrrq(hrrq, -1, &doneq)) + rc = IRQ_HANDLED; + } + spin_unlock_irqrestore(hrrq->lock, hrrq_flags); + + list_for_each_entry_safe(ipr_cmd, temp, &doneq, queue) { + list_del(&ipr_cmd->queue); + del_timer(&ipr_cmd->timer); + ipr_cmd->fast_done(ipr_cmd); + } return rc; } @@ -5388,7 +5629,6 @@ static void ipr_erp_done(struct ipr_cmnd *ipr_cmd) { struct scsi_cmnd *scsi_cmd = ipr_cmd->scsi_cmd; struct ipr_resource_entry *res = scsi_cmd->device->hostdata; - struct ipr_ioa_cfg *ioa_cfg = ipr_cmd->ioa_cfg; u32 ioasc = be32_to_cpu(ipr_cmd->s.ioasa.hdr.ioasc); if (IPR_IOASC_SENSE_KEY(ioasc) > 0) { @@ -5406,7 +5646,7 @@ static void ipr_erp_done(struct ipr_cmnd *ipr_cmd) res->in_erp = 0; } scsi_dma_unmap(ipr_cmd->scsi_cmd); - list_add_tail(&ipr_cmd->queue, &ioa_cfg->free_q); + list_add_tail(&ipr_cmd->queue, &ipr_cmd->hrrq->hrrq_free_q); scsi_cmd->scsi_done(scsi_cmd); } @@ -5790,7 +6030,7 @@ static void ipr_erp_start(struct ipr_ioa_cfg *ioa_cfg, } scsi_dma_unmap(ipr_cmd->scsi_cmd); - list_add_tail(&ipr_cmd->queue, &ioa_cfg->free_q); + list_add_tail(&ipr_cmd->queue, &ipr_cmd->hrrq->hrrq_free_q); scsi_cmd->scsi_done(scsi_cmd); } @@ -5809,21 +6049,21 @@ static void ipr_scsi_done(struct ipr_cmnd *ipr_cmd) struct ipr_ioa_cfg *ioa_cfg = ipr_cmd->ioa_cfg; struct scsi_cmnd *scsi_cmd = ipr_cmd->scsi_cmd; u32 ioasc = be32_to_cpu(ipr_cmd->s.ioasa.hdr.ioasc); - unsigned long lock_flags; + unsigned long hrrq_flags; scsi_set_resid(scsi_cmd, be32_to_cpu(ipr_cmd->s.ioasa.hdr.residual_data_len)); if (likely(IPR_IOASC_SENSE_KEY(ioasc) == 0)) { scsi_dma_unmap(scsi_cmd); - spin_lock_irqsave(ioa_cfg->host->host_lock, lock_flags); - list_add_tail(&ipr_cmd->queue, &ioa_cfg->free_q); + spin_lock_irqsave(ipr_cmd->hrrq->lock, hrrq_flags); + list_add_tail(&ipr_cmd->queue, &ipr_cmd->hrrq->hrrq_free_q); scsi_cmd->scsi_done(scsi_cmd); - spin_unlock_irqrestore(ioa_cfg->host->host_lock, lock_flags); + spin_unlock_irqrestore(ipr_cmd->hrrq->lock, hrrq_flags); } else { - spin_lock_irqsave(ioa_cfg->host->host_lock, lock_flags); + spin_lock_irqsave(ipr_cmd->hrrq->lock, hrrq_flags); ipr_erp_start(ioa_cfg, ipr_cmd); - spin_unlock_irqrestore(ioa_cfg->host->host_lock, lock_flags); + spin_unlock_irqrestore(ipr_cmd->hrrq->lock, hrrq_flags); } } @@ -5846,22 +6086,34 @@ static int ipr_queuecommand(struct Scsi_Host *shost, struct ipr_resource_entry *res; struct ipr_ioarcb *ioarcb; struct ipr_cmnd *ipr_cmd; - unsigned long lock_flags; + unsigned long hrrq_flags, lock_flags; int rc; + struct ipr_hrr_queue *hrrq; + int hrrq_id; ioa_cfg = (struct ipr_ioa_cfg *)shost->hostdata; - spin_lock_irqsave(shost->host_lock, lock_flags); scsi_cmd->result = (DID_OK << 16); res = scsi_cmd->device->hostdata; + if (ipr_is_gata(res) && res->sata_port) { + spin_lock_irqsave(ioa_cfg->host->host_lock, lock_flags); + rc = ata_sas_queuecmd(scsi_cmd, res->sata_port->ap); + spin_unlock_irqrestore(ioa_cfg->host->host_lock, lock_flags); + return rc; + } + + hrrq_id = ipr_get_hrrq_index(ioa_cfg); + hrrq = &ioa_cfg->hrrq[hrrq_id]; + + spin_lock_irqsave(hrrq->lock, hrrq_flags); /* * We are currently blocking all devices due to a host reset * We have told the host to stop giving us new requests, but * ERP ops don't count. FIXME */ - if (unlikely(!ioa_cfg->allow_cmds && !ioa_cfg->ioa_is_dead)) { - spin_unlock_irqrestore(shost->host_lock, lock_flags); + if (unlikely(!hrrq->allow_cmds && !hrrq->ioa_is_dead)) { + spin_unlock_irqrestore(hrrq->lock, hrrq_flags); return SCSI_MLQUEUE_HOST_BUSY; } @@ -5869,19 +6121,17 @@ static int ipr_queuecommand(struct Scsi_Host *shost, * FIXME - Create scsi_set_host_offline interface * and the ioa_is_dead check can be removed */ - if (unlikely(ioa_cfg->ioa_is_dead || !res)) { - spin_unlock_irqrestore(shost->host_lock, lock_flags); + if (unlikely(hrrq->ioa_is_dead || !res)) { + spin_unlock_irqrestore(hrrq->lock, hrrq_flags); goto err_nodev; } - if (ipr_is_gata(res) && res->sata_port) { - rc = ata_sas_queuecmd(scsi_cmd, res->sata_port->ap); - spin_unlock_irqrestore(shost->host_lock, lock_flags); - return rc; + ipr_cmd = __ipr_get_free_ipr_cmnd(hrrq); + if (ipr_cmd == NULL) { + spin_unlock_irqrestore(hrrq->lock, hrrq_flags); + return SCSI_MLQUEUE_HOST_BUSY; } - - ipr_cmd = __ipr_get_free_ipr_cmnd(ioa_cfg); - spin_unlock_irqrestore(shost->host_lock, lock_flags); + spin_unlock_irqrestore(hrrq->lock, hrrq_flags); ipr_init_ipr_cmnd(ipr_cmd, ipr_scsi_done); ioarcb = &ipr_cmd->ioarcb; @@ -5902,26 +6152,27 @@ static int ipr_queuecommand(struct Scsi_Host *shost, } if (scsi_cmd->cmnd[0] >= 0xC0 && - (!ipr_is_gscsi(res) || scsi_cmd->cmnd[0] == IPR_QUERY_RSRC_STATE)) + (!ipr_is_gscsi(res) || scsi_cmd->cmnd[0] == IPR_QUERY_RSRC_STATE)) { ioarcb->cmd_pkt.request_type = IPR_RQTYPE_IOACMD; + } if (ioa_cfg->sis64) rc = ipr_build_ioadl64(ioa_cfg, ipr_cmd); else rc = ipr_build_ioadl(ioa_cfg, ipr_cmd); - spin_lock_irqsave(shost->host_lock, lock_flags); - if (unlikely(rc || (!ioa_cfg->allow_cmds && !ioa_cfg->ioa_is_dead))) { - list_add_tail(&ipr_cmd->queue, &ioa_cfg->free_q); - spin_unlock_irqrestore(shost->host_lock, lock_flags); + spin_lock_irqsave(hrrq->lock, hrrq_flags); + if (unlikely(rc || (!hrrq->allow_cmds && !hrrq->ioa_is_dead))) { + list_add_tail(&ipr_cmd->queue, &hrrq->hrrq_free_q); + spin_unlock_irqrestore(hrrq->lock, hrrq_flags); if (!rc) scsi_dma_unmap(scsi_cmd); return SCSI_MLQUEUE_HOST_BUSY; } - if (unlikely(ioa_cfg->ioa_is_dead)) { - list_add_tail(&ipr_cmd->queue, &ioa_cfg->free_q); - spin_unlock_irqrestore(shost->host_lock, lock_flags); + if (unlikely(hrrq->ioa_is_dead)) { + list_add_tail(&ipr_cmd->queue, &hrrq->hrrq_free_q); + spin_unlock_irqrestore(hrrq->lock, hrrq_flags); scsi_dma_unmap(scsi_cmd); goto err_nodev; } @@ -5931,18 +6182,18 @@ static int ipr_queuecommand(struct Scsi_Host *shost, ioarcb->cmd_pkt.flags_hi |= IPR_FLAGS_HI_SYNC_COMPLETE; res->needs_sync_complete = 0; } - list_add_tail(&ipr_cmd->queue, &ioa_cfg->pending_q); + list_add_tail(&ipr_cmd->queue, &hrrq->hrrq_pending_q); ipr_trc_hook(ipr_cmd, IPR_TRACE_START, IPR_GET_RES_PHYS_LOC(res)); ipr_send_command(ipr_cmd); - spin_unlock_irqrestore(shost->host_lock, lock_flags); + spin_unlock_irqrestore(hrrq->lock, hrrq_flags); return 0; err_nodev: - spin_lock_irqsave(shost->host_lock, lock_flags); + spin_lock_irqsave(hrrq->lock, hrrq_flags); memset(scsi_cmd->sense_buffer, 0, SCSI_SENSE_BUFFERSIZE); scsi_cmd->result = (DID_NO_CONNECT << 16); scsi_cmd->scsi_done(scsi_cmd); - spin_unlock_irqrestore(shost->host_lock, lock_flags); + spin_unlock_irqrestore(hrrq->lock, hrrq_flags); return 0; } @@ -6040,7 +6291,7 @@ static void ipr_ata_phy_reset(struct ata_port *ap) spin_lock_irqsave(ioa_cfg->host->host_lock, flags); } - if (!ioa_cfg->allow_cmds) + if (!ioa_cfg->hrrq[IPR_INIT_HRRQ].allow_cmds) goto out_unlock; rc = ipr_device_reset(ioa_cfg, res); @@ -6071,6 +6322,7 @@ static void ipr_ata_post_internal(struct ata_queued_cmd *qc) struct ipr_sata_port *sata_port = qc->ap->private_data; struct ipr_ioa_cfg *ioa_cfg = sata_port->ioa_cfg; struct ipr_cmnd *ipr_cmd; + struct ipr_hrr_queue *hrrq; unsigned long flags; spin_lock_irqsave(ioa_cfg->host->host_lock, flags); @@ -6080,11 +6332,15 @@ static void ipr_ata_post_internal(struct ata_queued_cmd *qc) spin_lock_irqsave(ioa_cfg->host->host_lock, flags); } - list_for_each_entry(ipr_cmd, &ioa_cfg->pending_q, queue) { - if (ipr_cmd->qc == qc) { - ipr_device_reset(ioa_cfg, sata_port->res); - break; + for_each_hrrq(hrrq, ioa_cfg) { + spin_lock(&hrrq->_lock); + list_for_each_entry(ipr_cmd, &hrrq->hrrq_pending_q, queue) { + if (ipr_cmd->qc == qc) { + ipr_device_reset(ioa_cfg, sata_port->res); + break; + } } + spin_unlock(&hrrq->_lock); } spin_unlock_irqrestore(ioa_cfg->host->host_lock, flags); } @@ -6133,6 +6389,7 @@ static void ipr_sata_done(struct ipr_cmnd *ipr_cmd) struct ipr_resource_entry *res = sata_port->res; u32 ioasc = be32_to_cpu(ipr_cmd->s.ioasa.hdr.ioasc); + spin_lock(&ipr_cmd->hrrq->_lock); if (ipr_cmd->ioa_cfg->sis64) memcpy(&sata_port->ioasa, &ipr_cmd->s.ioasa64.u.gata, sizeof(struct ipr_ioasa_gata)); @@ -6148,7 +6405,8 @@ static void ipr_sata_done(struct ipr_cmnd *ipr_cmd) qc->err_mask |= __ac_err_mask(sata_port->ioasa.status); else qc->err_mask |= ac_err_mask(sata_port->ioasa.status); - list_add_tail(&ipr_cmd->queue, &ioa_cfg->free_q); + list_add_tail(&ipr_cmd->queue, &ipr_cmd->hrrq->hrrq_free_q); + spin_unlock(&ipr_cmd->hrrq->_lock); ata_qc_complete(qc); } @@ -6244,6 +6502,48 @@ static void ipr_build_ata_ioadl(struct ipr_cmnd *ipr_cmd, } /** + * ipr_qc_defer - Get a free ipr_cmd + * @qc: queued command + * + * Return value: + * 0 if success + **/ +static int ipr_qc_defer(struct ata_queued_cmd *qc) +{ + struct ata_port *ap = qc->ap; + struct ipr_sata_port *sata_port = ap->private_data; + struct ipr_ioa_cfg *ioa_cfg = sata_port->ioa_cfg; + struct ipr_cmnd *ipr_cmd; + struct ipr_hrr_queue *hrrq; + int hrrq_id; + + hrrq_id = ipr_get_hrrq_index(ioa_cfg); + hrrq = &ioa_cfg->hrrq[hrrq_id]; + + qc->lldd_task = NULL; + spin_lock(&hrrq->_lock); + if (unlikely(hrrq->ioa_is_dead)) { + spin_unlock(&hrrq->_lock); + return 0; + } + + if (unlikely(!hrrq->allow_cmds)) { + spin_unlock(&hrrq->_lock); + return ATA_DEFER_LINK; + } + + ipr_cmd = __ipr_get_free_ipr_cmnd(hrrq); + if (ipr_cmd == NULL) { + spin_unlock(&hrrq->_lock); + return ATA_DEFER_LINK; + } + + qc->lldd_task = ipr_cmd; + spin_unlock(&hrrq->_lock); + return 0; +} + +/** * ipr_qc_issue - Issue a SATA qc to a device * @qc: queued command * @@ -6260,10 +6560,23 @@ static unsigned int ipr_qc_issue(struct ata_queued_cmd *qc) struct ipr_ioarcb *ioarcb; struct ipr_ioarcb_ata_regs *regs; - if (unlikely(!ioa_cfg->allow_cmds || ioa_cfg->ioa_is_dead)) + if (qc->lldd_task == NULL) + ipr_qc_defer(qc); + + ipr_cmd = qc->lldd_task; + if (ipr_cmd == NULL) return AC_ERR_SYSTEM; - ipr_cmd = ipr_get_free_ipr_cmnd(ioa_cfg); + qc->lldd_task = NULL; + spin_lock(&ipr_cmd->hrrq->_lock); + if (unlikely(!ipr_cmd->hrrq->allow_cmds || + ipr_cmd->hrrq->ioa_is_dead)) { + list_add_tail(&ipr_cmd->queue, &ipr_cmd->hrrq->hrrq_free_q); + spin_unlock(&ipr_cmd->hrrq->_lock); + return AC_ERR_SYSTEM; + } + + ipr_init_ipr_cmnd(ipr_cmd, ipr_lock_and_done); ioarcb = &ipr_cmd->ioarcb; if (ioa_cfg->sis64) { @@ -6275,7 +6588,7 @@ static unsigned int ipr_qc_issue(struct ata_queued_cmd *qc) memset(regs, 0, sizeof(*regs)); ioarcb->add_cmd_parms_len = cpu_to_be16(sizeof(*regs)); - list_add_tail(&ipr_cmd->queue, &ioa_cfg->pending_q); + list_add_tail(&ipr_cmd->queue, &ipr_cmd->hrrq->hrrq_pending_q); ipr_cmd->qc = qc; ipr_cmd->done = ipr_sata_done; ipr_cmd->ioarcb.res_handle = res->res_handle; @@ -6315,10 +6628,12 @@ static unsigned int ipr_qc_issue(struct ata_queued_cmd *qc) default: WARN_ON(1); + spin_unlock(&ipr_cmd->hrrq->_lock); return AC_ERR_INVALID; } ipr_send_command(ipr_cmd); + spin_unlock(&ipr_cmd->hrrq->_lock); return 0; } @@ -6357,6 +6672,7 @@ static struct ata_port_operations ipr_sata_ops = { .hardreset = ipr_sata_reset, .post_internal_cmd = ipr_ata_post_internal, .qc_prep = ata_noop_qc_prep, + .qc_defer = ipr_qc_defer, .qc_issue = ipr_qc_issue, .qc_fill_rtf = ipr_qc_fill_rtf, .port_start = ata_sas_port_start, @@ -6427,7 +6743,7 @@ static int ipr_ioa_bringdown_done(struct ipr_cmnd *ipr_cmd) ENTER; ioa_cfg->in_reset_reload = 0; ioa_cfg->reset_retries = 0; - list_add_tail(&ipr_cmd->queue, &ioa_cfg->free_q); + list_add_tail(&ipr_cmd->queue, &ipr_cmd->hrrq->hrrq_free_q); wake_up_all(&ioa_cfg->reset_wait_q); spin_unlock_irq(ioa_cfg->host->host_lock); @@ -6454,11 +6770,16 @@ static int ipr_ioa_reset_done(struct ipr_cmnd *ipr_cmd) struct ipr_ioa_cfg *ioa_cfg = ipr_cmd->ioa_cfg; struct ipr_resource_entry *res; struct ipr_hostrcb *hostrcb, *temp; - int i = 0; + int i = 0, j; ENTER; ioa_cfg->in_reset_reload = 0; - ioa_cfg->allow_cmds = 1; + for (j = 0; j < ioa_cfg->hrrq_num; j++) { + spin_lock(&ioa_cfg->hrrq[j]._lock); + ioa_cfg->hrrq[j].allow_cmds = 1; + spin_unlock(&ioa_cfg->hrrq[j]._lock); + } + wmb(); ioa_cfg->reset_cmd = NULL; ioa_cfg->doorbell |= IPR_RUNTIME_RESET; @@ -6482,14 +6803,14 @@ static int ipr_ioa_reset_done(struct ipr_cmnd *ipr_cmd) dev_info(&ioa_cfg->pdev->dev, "IOA initialized.\n"); ioa_cfg->reset_retries = 0; - list_add_tail(&ipr_cmd->queue, &ioa_cfg->free_q); + list_add_tail(&ipr_cmd->queue, &ipr_cmd->hrrq->hrrq_free_q); wake_up_all(&ioa_cfg->reset_wait_q); spin_unlock(ioa_cfg->host->host_lock); scsi_unblock_requests(ioa_cfg->host); spin_lock(ioa_cfg->host->host_lock); - if (!ioa_cfg->allow_cmds) + if (!ioa_cfg->hrrq[IPR_INIT_HRRQ].allow_cmds) scsi_block_requests(ioa_cfg->host); LEAVE; @@ -6560,9 +6881,11 @@ static int ipr_set_supported_devs(struct ipr_cmnd *ipr_cmd) if (!ioa_cfg->sis64) ipr_cmd->job_step = ipr_set_supported_devs; + LEAVE; return IPR_RC_JOB_RETURN; } + LEAVE; return IPR_RC_JOB_CONTINUE; } @@ -6820,7 +7143,7 @@ static int ipr_reset_cmd_failed(struct ipr_cmnd *ipr_cmd) ipr_cmd->ioarcb.cmd_pkt.cdb[0], ioasc); ipr_initiate_ioa_reset(ioa_cfg, IPR_SHUTDOWN_NONE); - list_add_tail(&ipr_cmd->queue, &ioa_cfg->free_q); + list_add_tail(&ipr_cmd->queue, &ipr_cmd->hrrq->hrrq_free_q); return IPR_RC_JOB_RETURN; } @@ -7278,46 +7601,71 @@ static int ipr_ioafp_identify_hrrq(struct ipr_cmnd *ipr_cmd) { struct ipr_ioa_cfg *ioa_cfg = ipr_cmd->ioa_cfg; struct ipr_ioarcb *ioarcb = &ipr_cmd->ioarcb; + struct ipr_hrr_queue *hrrq; ENTER; + ipr_cmd->job_step = ipr_ioafp_std_inquiry; dev_info(&ioa_cfg->pdev->dev, "Starting IOA initialization sequence.\n"); - ioarcb->cmd_pkt.cdb[0] = IPR_ID_HOST_RR_Q; - ioarcb->res_handle = cpu_to_be32(IPR_IOA_RES_HANDLE); + if (ioa_cfg->identify_hrrq_index < ioa_cfg->hrrq_num) { + hrrq = &ioa_cfg->hrrq[ioa_cfg->identify_hrrq_index]; - ioarcb->cmd_pkt.request_type = IPR_RQTYPE_IOACMD; - if (ioa_cfg->sis64) - ioarcb->cmd_pkt.cdb[1] = 0x1; - ioarcb->cmd_pkt.cdb[2] = - ((u64) ioa_cfg->host_rrq_dma >> 24) & 0xff; - ioarcb->cmd_pkt.cdb[3] = - ((u64) ioa_cfg->host_rrq_dma >> 16) & 0xff; - ioarcb->cmd_pkt.cdb[4] = - ((u64) ioa_cfg->host_rrq_dma >> 8) & 0xff; - ioarcb->cmd_pkt.cdb[5] = - ((u64) ioa_cfg->host_rrq_dma) & 0xff; - ioarcb->cmd_pkt.cdb[7] = - ((sizeof(u32) * IPR_NUM_CMD_BLKS) >> 8) & 0xff; - ioarcb->cmd_pkt.cdb[8] = - (sizeof(u32) * IPR_NUM_CMD_BLKS) & 0xff; + ioarcb->cmd_pkt.cdb[0] = IPR_ID_HOST_RR_Q; + ioarcb->res_handle = cpu_to_be32(IPR_IOA_RES_HANDLE); - if (ioa_cfg->sis64) { - ioarcb->cmd_pkt.cdb[10] = - ((u64) ioa_cfg->host_rrq_dma >> 56) & 0xff; - ioarcb->cmd_pkt.cdb[11] = - ((u64) ioa_cfg->host_rrq_dma >> 48) & 0xff; - ioarcb->cmd_pkt.cdb[12] = - ((u64) ioa_cfg->host_rrq_dma >> 40) & 0xff; - ioarcb->cmd_pkt.cdb[13] = - ((u64) ioa_cfg->host_rrq_dma >> 32) & 0xff; - } + ioarcb->cmd_pkt.request_type = IPR_RQTYPE_IOACMD; + if (ioa_cfg->sis64) + ioarcb->cmd_pkt.cdb[1] = 0x1; - ipr_cmd->job_step = ipr_ioafp_std_inquiry; + if (ioa_cfg->nvectors == 1) + ioarcb->cmd_pkt.cdb[1] &= ~IPR_ID_HRRQ_SELE_ENABLE; + else + ioarcb->cmd_pkt.cdb[1] |= IPR_ID_HRRQ_SELE_ENABLE; + + ioarcb->cmd_pkt.cdb[2] = + ((u64) hrrq->host_rrq_dma >> 24) & 0xff; + ioarcb->cmd_pkt.cdb[3] = + ((u64) hrrq->host_rrq_dma >> 16) & 0xff; + ioarcb->cmd_pkt.cdb[4] = + ((u64) hrrq->host_rrq_dma >> 8) & 0xff; + ioarcb->cmd_pkt.cdb[5] = + ((u64) hrrq->host_rrq_dma) & 0xff; + ioarcb->cmd_pkt.cdb[7] = + ((sizeof(u32) * hrrq->size) >> 8) & 0xff; + ioarcb->cmd_pkt.cdb[8] = + (sizeof(u32) * hrrq->size) & 0xff; + + if (ioarcb->cmd_pkt.cdb[1] & IPR_ID_HRRQ_SELE_ENABLE) + ioarcb->cmd_pkt.cdb[9] = + ioa_cfg->identify_hrrq_index; - ipr_do_req(ipr_cmd, ipr_reset_ioa_job, ipr_timeout, IPR_INTERNAL_TIMEOUT); + if (ioa_cfg->sis64) { + ioarcb->cmd_pkt.cdb[10] = + ((u64) hrrq->host_rrq_dma >> 56) & 0xff; + ioarcb->cmd_pkt.cdb[11] = + ((u64) hrrq->host_rrq_dma >> 48) & 0xff; + ioarcb->cmd_pkt.cdb[12] = + ((u64) hrrq->host_rrq_dma >> 40) & 0xff; + ioarcb->cmd_pkt.cdb[13] = + ((u64) hrrq->host_rrq_dma >> 32) & 0xff; + } + + if (ioarcb->cmd_pkt.cdb[1] & IPR_ID_HRRQ_SELE_ENABLE) + ioarcb->cmd_pkt.cdb[14] = + ioa_cfg->identify_hrrq_index; + + ipr_do_req(ipr_cmd, ipr_reset_ioa_job, ipr_timeout, + IPR_INTERNAL_TIMEOUT); + + if (++ioa_cfg->identify_hrrq_index < ioa_cfg->hrrq_num) + ipr_cmd->job_step = ipr_ioafp_identify_hrrq; + + LEAVE; + return IPR_RC_JOB_RETURN; + } LEAVE; - return IPR_RC_JOB_RETURN; + return IPR_RC_JOB_CONTINUE; } /** @@ -7365,7 +7713,9 @@ static void ipr_reset_timer_done(struct ipr_cmnd *ipr_cmd) static void ipr_reset_start_timer(struct ipr_cmnd *ipr_cmd, unsigned long timeout) { - list_add_tail(&ipr_cmd->queue, &ipr_cmd->ioa_cfg->pending_q); + + ENTER; + list_add_tail(&ipr_cmd->queue, &ipr_cmd->hrrq->hrrq_pending_q); ipr_cmd->done = ipr_reset_ioa_job; ipr_cmd->timer.data = (unsigned long) ipr_cmd; @@ -7383,13 +7733,26 @@ static void ipr_reset_start_timer(struct ipr_cmnd *ipr_cmd, **/ static void ipr_init_ioa_mem(struct ipr_ioa_cfg *ioa_cfg) { - memset(ioa_cfg->host_rrq, 0, sizeof(u32) * IPR_NUM_CMD_BLKS); + struct ipr_hrr_queue *hrrq; + + for_each_hrrq(hrrq, ioa_cfg) { + spin_lock(&hrrq->_lock); + memset(hrrq->host_rrq, 0, sizeof(u32) * hrrq->size); + + /* Initialize Host RRQ pointers */ + hrrq->hrrq_start = hrrq->host_rrq; + hrrq->hrrq_end = &hrrq->host_rrq[hrrq->size - 1]; + hrrq->hrrq_curr = hrrq->hrrq_start; + hrrq->toggle_bit = 1; + spin_unlock(&hrrq->_lock); + } + wmb(); - /* Initialize Host RRQ pointers */ - ioa_cfg->hrrq_start = ioa_cfg->host_rrq; - ioa_cfg->hrrq_end = &ioa_cfg->host_rrq[IPR_NUM_CMD_BLKS - 1]; - ioa_cfg->hrrq_curr = ioa_cfg->hrrq_start; - ioa_cfg->toggle_bit = 1; + ioa_cfg->identify_hrrq_index = 0; + if (ioa_cfg->hrrq_num == 1) + atomic_set(&ioa_cfg->hrrq_index, 0); + else + atomic_set(&ioa_cfg->hrrq_index, 1); /* Zero out config table */ memset(ioa_cfg->u.cfg_table, 0, ioa_cfg->cfg_table_size); @@ -7446,7 +7809,8 @@ static int ipr_reset_next_stage(struct ipr_cmnd *ipr_cmd) ipr_cmd->timer.function = (void (*)(unsigned long))ipr_oper_timeout; ipr_cmd->done = ipr_reset_ioa_job; add_timer(&ipr_cmd->timer); - list_add_tail(&ipr_cmd->queue, &ioa_cfg->pending_q); + + list_add_tail(&ipr_cmd->queue, &ipr_cmd->hrrq->hrrq_pending_q); return IPR_RC_JOB_RETURN; } @@ -7466,12 +7830,18 @@ static int ipr_reset_enable_ioa(struct ipr_cmnd *ipr_cmd) struct ipr_ioa_cfg *ioa_cfg = ipr_cmd->ioa_cfg; volatile u32 int_reg; volatile u64 maskval; + int i; ENTER; ipr_cmd->job_step = ipr_ioafp_identify_hrrq; ipr_init_ioa_mem(ioa_cfg); - ioa_cfg->allow_interrupts = 1; + for (i = 0; i < ioa_cfg->hrrq_num; i++) { + spin_lock(&ioa_cfg->hrrq[i]._lock); + ioa_cfg->hrrq[i].allow_interrupts = 1; + spin_unlock(&ioa_cfg->hrrq[i]._lock); + } + wmb(); if (ioa_cfg->sis64) { /* Set the adapter to the correct endian mode. */ writel(IPR_ENDIAN_SWAP_KEY, ioa_cfg->regs.endian_swap_reg); @@ -7511,7 +7881,7 @@ static int ipr_reset_enable_ioa(struct ipr_cmnd *ipr_cmd) ipr_cmd->timer.function = (void (*)(unsigned long))ipr_oper_timeout; ipr_cmd->done = ipr_reset_ioa_job; add_timer(&ipr_cmd->timer); - list_add_tail(&ipr_cmd->queue, &ioa_cfg->pending_q); + list_add_tail(&ipr_cmd->queue, &ipr_cmd->hrrq->hrrq_pending_q); LEAVE; return IPR_RC_JOB_RETURN; @@ -8030,7 +8400,8 @@ static int ipr_reset_shutdown_ioa(struct ipr_cmnd *ipr_cmd) int rc = IPR_RC_JOB_CONTINUE; ENTER; - if (shutdown_type != IPR_SHUTDOWN_NONE && !ioa_cfg->ioa_is_dead) { + if (shutdown_type != IPR_SHUTDOWN_NONE && + !ioa_cfg->hrrq[IPR_INIT_HRRQ].ioa_is_dead) { ipr_cmd->ioarcb.res_handle = cpu_to_be32(IPR_IOA_RES_HANDLE); ipr_cmd->ioarcb.cmd_pkt.request_type = IPR_RQTYPE_IOACMD; ipr_cmd->ioarcb.cmd_pkt.cdb[0] = IPR_IOA_SHUTDOWN; @@ -8078,7 +8449,8 @@ static void ipr_reset_ioa_job(struct ipr_cmnd *ipr_cmd) * We are doing nested adapter resets and this is * not the current reset job. */ - list_add_tail(&ipr_cmd->queue, &ioa_cfg->free_q); + list_add_tail(&ipr_cmd->queue, + &ipr_cmd->hrrq->hrrq_free_q); return; } @@ -8113,9 +8485,15 @@ static void _ipr_initiate_ioa_reset(struct ipr_ioa_cfg *ioa_cfg, enum ipr_shutdown_type shutdown_type) { struct ipr_cmnd *ipr_cmd; + int i; ioa_cfg->in_reset_reload = 1; - ioa_cfg->allow_cmds = 0; + for (i = 0; i < ioa_cfg->hrrq_num; i++) { + spin_lock(&ioa_cfg->hrrq[i]._lock); + ioa_cfg->hrrq[i].allow_cmds = 0; + spin_unlock(&ioa_cfg->hrrq[i]._lock); + } + wmb(); scsi_block_requests(ioa_cfg->host); ipr_cmd = ipr_get_free_ipr_cmnd(ioa_cfg); @@ -8141,7 +8519,9 @@ static void _ipr_initiate_ioa_reset(struct ipr_ioa_cfg *ioa_cfg, static void ipr_initiate_ioa_reset(struct ipr_ioa_cfg *ioa_cfg, enum ipr_shutdown_type shutdown_type) { - if (ioa_cfg->ioa_is_dead) + int i; + + if (ioa_cfg->hrrq[IPR_INIT_HRRQ].ioa_is_dead) return; if (ioa_cfg->in_reset_reload) { @@ -8156,7 +8536,12 @@ static void ipr_initiate_ioa_reset(struct ipr_ioa_cfg *ioa_cfg, "IOA taken offline - error recovery failed\n"); ioa_cfg->reset_retries = 0; - ioa_cfg->ioa_is_dead = 1; + for (i = 0; i < ioa_cfg->hrrq_num; i++) { + spin_lock(&ioa_cfg->hrrq[i]._lock); + ioa_cfg->hrrq[i].ioa_is_dead = 1; + spin_unlock(&ioa_cfg->hrrq[i]._lock); + } + wmb(); if (ioa_cfg->in_ioa_bringdown) { ioa_cfg->reset_cmd = NULL; @@ -8188,9 +8573,17 @@ static void ipr_initiate_ioa_reset(struct ipr_ioa_cfg *ioa_cfg, */ static int ipr_reset_freeze(struct ipr_cmnd *ipr_cmd) { + struct ipr_ioa_cfg *ioa_cfg = ipr_cmd->ioa_cfg; + int i; + /* Disallow new interrupts, avoid loop */ - ipr_cmd->ioa_cfg->allow_interrupts = 0; - list_add_tail(&ipr_cmd->queue, &ipr_cmd->ioa_cfg->pending_q); + for (i = 0; i < ioa_cfg->hrrq_num; i++) { + spin_lock(&ioa_cfg->hrrq[i]._lock); + ioa_cfg->hrrq[i].allow_interrupts = 0; + spin_unlock(&ioa_cfg->hrrq[i]._lock); + } + wmb(); + list_add_tail(&ipr_cmd->queue, &ipr_cmd->hrrq->hrrq_pending_q); ipr_cmd->done = ipr_reset_ioa_job; return IPR_RC_JOB_RETURN; } @@ -8247,13 +8640,19 @@ static void ipr_pci_perm_failure(struct pci_dev *pdev) { unsigned long flags = 0; struct ipr_ioa_cfg *ioa_cfg = pci_get_drvdata(pdev); + int i; spin_lock_irqsave(ioa_cfg->host->host_lock, flags); if (ioa_cfg->sdt_state == WAIT_FOR_DUMP) ioa_cfg->sdt_state = ABORT_DUMP; ioa_cfg->reset_retries = IPR_NUM_RESET_RELOAD_RETRIES; ioa_cfg->in_ioa_bringdown = 1; - ioa_cfg->allow_cmds = 0; + for (i = 0; i < ioa_cfg->hrrq_num; i++) { + spin_lock(&ioa_cfg->hrrq[i]._lock); + ioa_cfg->hrrq[i].allow_cmds = 0; + spin_unlock(&ioa_cfg->hrrq[i]._lock); + } + wmb(); ipr_initiate_ioa_reset(ioa_cfg, IPR_SHUTDOWN_NONE); spin_unlock_irqrestore(ioa_cfg->host->host_lock, flags); } @@ -8310,12 +8709,11 @@ static int ipr_probe_ioa_part2(struct ipr_ioa_cfg *ioa_cfg) } else _ipr_initiate_ioa_reset(ioa_cfg, ipr_reset_enable_ioa, IPR_SHUTDOWN_NONE); - spin_unlock_irqrestore(ioa_cfg->host->host_lock, host_lock_flags); wait_event(ioa_cfg->reset_wait_q, !ioa_cfg->in_reset_reload); spin_lock_irqsave(ioa_cfg->host->host_lock, host_lock_flags); - if (ioa_cfg->ioa_is_dead) { + if (ioa_cfg->hrrq[IPR_INIT_HRRQ].ioa_is_dead) { rc = -EIO; } else if (ipr_invalid_adapter(ioa_cfg)) { if (!ipr_testmode) @@ -8376,8 +8774,13 @@ static void ipr_free_mem(struct ipr_ioa_cfg *ioa_cfg) pci_free_consistent(ioa_cfg->pdev, sizeof(struct ipr_misc_cbs), ioa_cfg->vpd_cbs, ioa_cfg->vpd_cbs_dma); ipr_free_cmd_blks(ioa_cfg); - pci_free_consistent(ioa_cfg->pdev, sizeof(u32) * IPR_NUM_CMD_BLKS, - ioa_cfg->host_rrq, ioa_cfg->host_rrq_dma); + + for (i = 0; i < ioa_cfg->hrrq_num; i++) + pci_free_consistent(ioa_cfg->pdev, + sizeof(u32) * ioa_cfg->hrrq[i].size, + ioa_cfg->hrrq[i].host_rrq, + ioa_cfg->hrrq[i].host_rrq_dma); + pci_free_consistent(ioa_cfg->pdev, ioa_cfg->cfg_table_size, ioa_cfg->u.cfg_table, ioa_cfg->cfg_table_dma); @@ -8408,8 +8811,23 @@ static void ipr_free_all_resources(struct ipr_ioa_cfg *ioa_cfg) struct pci_dev *pdev = ioa_cfg->pdev; ENTER; - free_irq(pdev->irq, ioa_cfg); - pci_disable_msi(pdev); + if (ioa_cfg->intr_flag == IPR_USE_MSI || + ioa_cfg->intr_flag == IPR_USE_MSIX) { + int i; + for (i = 0; i < ioa_cfg->nvectors; i++) + free_irq(ioa_cfg->vectors_info[i].vec, + &ioa_cfg->hrrq[i]); + } else + free_irq(pdev->irq, &ioa_cfg->hrrq[0]); + + if (ioa_cfg->intr_flag == IPR_USE_MSI) { + pci_disable_msi(pdev); + ioa_cfg->intr_flag &= ~IPR_USE_MSI; + } else if (ioa_cfg->intr_flag == IPR_USE_MSIX) { + pci_disable_msix(pdev); + ioa_cfg->intr_flag &= ~IPR_USE_MSIX; + } + iounmap(ioa_cfg->hdw_dma_regs); pci_release_regions(pdev); ipr_free_mem(ioa_cfg); @@ -8430,7 +8848,7 @@ static int ipr_alloc_cmd_blks(struct ipr_ioa_cfg *ioa_cfg) struct ipr_cmnd *ipr_cmd; struct ipr_ioarcb *ioarcb; dma_addr_t dma_addr; - int i; + int i, entries_each_hrrq, hrrq_id = 0; ioa_cfg->ipr_cmd_pool = pci_pool_create(IPR_NAME, ioa_cfg->pdev, sizeof(struct ipr_cmnd), 512, 0); @@ -8446,6 +8864,41 @@ static int ipr_alloc_cmd_blks(struct ipr_ioa_cfg *ioa_cfg) return -ENOMEM; } + for (i = 0; i < ioa_cfg->hrrq_num; i++) { + if (ioa_cfg->hrrq_num > 1) { + if (i == 0) { + entries_each_hrrq = IPR_NUM_INTERNAL_CMD_BLKS; + ioa_cfg->hrrq[i].min_cmd_id = 0; + ioa_cfg->hrrq[i].max_cmd_id = + (entries_each_hrrq - 1); + } else { + entries_each_hrrq = + IPR_NUM_BASE_CMD_BLKS/ + (ioa_cfg->hrrq_num - 1); + ioa_cfg->hrrq[i].min_cmd_id = + IPR_NUM_INTERNAL_CMD_BLKS + + (i - 1) * entries_each_hrrq; + ioa_cfg->hrrq[i].max_cmd_id = + (IPR_NUM_INTERNAL_CMD_BLKS + + i * entries_each_hrrq - 1); + } + } else { + entries_each_hrrq = IPR_NUM_CMD_BLKS; + ioa_cfg->hrrq[i].min_cmd_id = 0; + ioa_cfg->hrrq[i].max_cmd_id = (entries_each_hrrq - 1); + } + ioa_cfg->hrrq[i].size = entries_each_hrrq; + } + + BUG_ON(ioa_cfg->hrrq_num == 0); + + i = IPR_NUM_CMD_BLKS - + ioa_cfg->hrrq[ioa_cfg->hrrq_num - 1].max_cmd_id - 1; + if (i > 0) { + ioa_cfg->hrrq[ioa_cfg->hrrq_num - 1].size += i; + ioa_cfg->hrrq[ioa_cfg->hrrq_num - 1].max_cmd_id += i; + } + for (i = 0; i < IPR_NUM_CMD_BLKS; i++) { ipr_cmd = pci_pool_alloc(ioa_cfg->ipr_cmd_pool, GFP_KERNEL, &dma_addr); @@ -8484,7 +8937,11 @@ static int ipr_alloc_cmd_blks(struct ipr_ioa_cfg *ioa_cfg) ipr_cmd->sense_buffer_dma = dma_addr + offsetof(struct ipr_cmnd, sense_buffer); - list_add_tail(&ipr_cmd->queue, &ioa_cfg->free_q); + ipr_cmd->ioarcb.cmd_pkt.hrrq_id = hrrq_id; + ipr_cmd->hrrq = &ioa_cfg->hrrq[hrrq_id]; + list_add_tail(&ipr_cmd->queue, &ipr_cmd->hrrq->hrrq_free_q); + if (i >= ioa_cfg->hrrq[hrrq_id].max_cmd_id) + hrrq_id++; } return 0; @@ -8516,6 +8973,10 @@ static int ipr_alloc_mem(struct ipr_ioa_cfg *ioa_cfg) BITS_TO_LONGS(ioa_cfg->max_devs_supported), GFP_KERNEL); ioa_cfg->vset_ids = kzalloc(sizeof(unsigned long) * BITS_TO_LONGS(ioa_cfg->max_devs_supported), GFP_KERNEL); + + if (!ioa_cfg->target_ids || !ioa_cfg->array_ids + || !ioa_cfg->vset_ids) + goto out_free_res_entries; } for (i = 0; i < ioa_cfg->max_devs_supported; i++) { @@ -8530,15 +8991,34 @@ static int ipr_alloc_mem(struct ipr_ioa_cfg *ioa_cfg) if (!ioa_cfg->vpd_cbs) goto out_free_res_entries; + for (i = 0; i < ioa_cfg->hrrq_num; i++) { + INIT_LIST_HEAD(&ioa_cfg->hrrq[i].hrrq_free_q); + INIT_LIST_HEAD(&ioa_cfg->hrrq[i].hrrq_pending_q); + spin_lock_init(&ioa_cfg->hrrq[i]._lock); + if (i == 0) + ioa_cfg->hrrq[i].lock = ioa_cfg->host->host_lock; + else + ioa_cfg->hrrq[i].lock = &ioa_cfg->hrrq[i]._lock; + } + if (ipr_alloc_cmd_blks(ioa_cfg)) goto out_free_vpd_cbs; - ioa_cfg->host_rrq = pci_alloc_consistent(ioa_cfg->pdev, - sizeof(u32) * IPR_NUM_CMD_BLKS, - &ioa_cfg->host_rrq_dma); - - if (!ioa_cfg->host_rrq) - goto out_ipr_free_cmd_blocks; + for (i = 0; i < ioa_cfg->hrrq_num; i++) { + ioa_cfg->hrrq[i].host_rrq = pci_alloc_consistent(ioa_cfg->pdev, + sizeof(u32) * ioa_cfg->hrrq[i].size, + &ioa_cfg->hrrq[i].host_rrq_dma); + + if (!ioa_cfg->hrrq[i].host_rrq) { + while (--i > 0) + pci_free_consistent(pdev, + sizeof(u32) * ioa_cfg->hrrq[i].size, + ioa_cfg->hrrq[i].host_rrq, + ioa_cfg->hrrq[i].host_rrq_dma); + goto out_ipr_free_cmd_blocks; + } + ioa_cfg->hrrq[i].ioa_cfg = ioa_cfg; + } ioa_cfg->u.cfg_table = pci_alloc_consistent(ioa_cfg->pdev, ioa_cfg->cfg_table_size, @@ -8582,8 +9062,12 @@ out_free_hostrcb_dma: ioa_cfg->u.cfg_table, ioa_cfg->cfg_table_dma); out_free_host_rrq: - pci_free_consistent(pdev, sizeof(u32) * IPR_NUM_CMD_BLKS, - ioa_cfg->host_rrq, ioa_cfg->host_rrq_dma); + for (i = 0; i < ioa_cfg->hrrq_num; i++) { + pci_free_consistent(pdev, + sizeof(u32) * ioa_cfg->hrrq[i].size, + ioa_cfg->hrrq[i].host_rrq, + ioa_cfg->hrrq[i].host_rrq_dma); + } out_ipr_free_cmd_blocks: ipr_free_cmd_blks(ioa_cfg); out_free_vpd_cbs: @@ -8591,6 +9075,9 @@ out_free_vpd_cbs: ioa_cfg->vpd_cbs, ioa_cfg->vpd_cbs_dma); out_free_res_entries: kfree(ioa_cfg->res_entries); + kfree(ioa_cfg->target_ids); + kfree(ioa_cfg->array_ids); + kfree(ioa_cfg->vset_ids); goto out; } @@ -8638,15 +9125,11 @@ static void ipr_init_ioa_cfg(struct ipr_ioa_cfg *ioa_cfg, ioa_cfg->doorbell = IPR_DOORBELL; sprintf(ioa_cfg->eye_catcher, IPR_EYECATCHER); sprintf(ioa_cfg->trace_start, IPR_TRACE_START_LABEL); - sprintf(ioa_cfg->ipr_free_label, IPR_FREEQ_LABEL); - sprintf(ioa_cfg->ipr_pending_label, IPR_PENDQ_LABEL); sprintf(ioa_cfg->cfg_table_start, IPR_CFG_TBL_START); sprintf(ioa_cfg->resource_table_label, IPR_RES_TABLE_LABEL); sprintf(ioa_cfg->ipr_hcam_label, IPR_HCAM_LABEL); sprintf(ioa_cfg->ipr_cmd_label, IPR_CMD_LABEL); - INIT_LIST_HEAD(&ioa_cfg->free_q); - INIT_LIST_HEAD(&ioa_cfg->pending_q); INIT_LIST_HEAD(&ioa_cfg->hostrcb_free_q); INIT_LIST_HEAD(&ioa_cfg->hostrcb_pending_q); INIT_LIST_HEAD(&ioa_cfg->free_res_q); @@ -8724,6 +9207,88 @@ ipr_get_chip_info(const struct pci_device_id *dev_id) return NULL; } +static int ipr_enable_msix(struct ipr_ioa_cfg *ioa_cfg) +{ + struct msix_entry entries[IPR_MAX_MSIX_VECTORS]; + int i, err, vectors; + + for (i = 0; i < ARRAY_SIZE(entries); ++i) + entries[i].entry = i; + + vectors = ipr_number_of_msix; + + while ((err = pci_enable_msix(ioa_cfg->pdev, entries, vectors)) > 0) + vectors = err; + + if (err < 0) { + pci_disable_msix(ioa_cfg->pdev); + return err; + } + + if (!err) { + for (i = 0; i < vectors; i++) + ioa_cfg->vectors_info[i].vec = entries[i].vector; + ioa_cfg->nvectors = vectors; + } + + return err; +} + +static int ipr_enable_msi(struct ipr_ioa_cfg *ioa_cfg) +{ + int i, err, vectors; + + vectors = ipr_number_of_msix; + + while ((err = pci_enable_msi_block(ioa_cfg->pdev, vectors)) > 0) + vectors = err; + + if (err < 0) { + pci_disable_msi(ioa_cfg->pdev); + return err; + } + + if (!err) { + for (i = 0; i < vectors; i++) + ioa_cfg->vectors_info[i].vec = ioa_cfg->pdev->irq + i; + ioa_cfg->nvectors = vectors; + } + + return err; +} + +static void name_msi_vectors(struct ipr_ioa_cfg *ioa_cfg) +{ + int vec_idx, n = sizeof(ioa_cfg->vectors_info[0].desc) - 1; + + for (vec_idx = 0; vec_idx < ioa_cfg->nvectors; vec_idx++) { + snprintf(ioa_cfg->vectors_info[vec_idx].desc, n, + "host%d-%d", ioa_cfg->host->host_no, vec_idx); + ioa_cfg->vectors_info[vec_idx]. + desc[strlen(ioa_cfg->vectors_info[vec_idx].desc)] = 0; + } +} + +static int ipr_request_other_msi_irqs(struct ipr_ioa_cfg *ioa_cfg) +{ + int i, rc; + + for (i = 1; i < ioa_cfg->nvectors; i++) { + rc = request_irq(ioa_cfg->vectors_info[i].vec, + ipr_isr_mhrrq, + 0, + ioa_cfg->vectors_info[i].desc, + &ioa_cfg->hrrq[i]); + if (rc) { + while (--i >= 0) + free_irq(ioa_cfg->vectors_info[i].vec, + &ioa_cfg->hrrq[i]); + return rc; + } + } + return 0; +} + /** * ipr_test_intr - Handle the interrupt generated in ipr_test_msi(). * @pdev: PCI device struct @@ -8740,6 +9305,7 @@ static irqreturn_t ipr_test_intr(int irq, void *devp) unsigned long lock_flags = 0; irqreturn_t rc = IRQ_HANDLED; + dev_info(&ioa_cfg->pdev->dev, "Received IRQ : %d\n", irq); spin_lock_irqsave(ioa_cfg->host->host_lock, lock_flags); ioa_cfg->msi_received = 1; @@ -8787,9 +9353,9 @@ static int ipr_test_msi(struct ipr_ioa_cfg *ioa_cfg, struct pci_dev *pdev) writel(IPR_PCII_IO_DEBUG_ACKNOWLEDGE, ioa_cfg->regs.sense_interrupt_reg32); int_reg = readl(ioa_cfg->regs.sense_interrupt_reg); wait_event_timeout(ioa_cfg->msi_wait_q, ioa_cfg->msi_received, HZ); + spin_lock_irqsave(ioa_cfg->host->host_lock, lock_flags); ipr_mask_and_clear_interrupts(ioa_cfg, ~IPR_PCII_IOA_TRANS_TO_OPER); - spin_lock_irqsave(ioa_cfg->host->host_lock, lock_flags); if (!ioa_cfg->msi_received) { /* MSI test failed */ dev_info(&pdev->dev, "MSI test failed. Falling back to LSI.\n"); @@ -8806,8 +9372,7 @@ static int ipr_test_msi(struct ipr_ioa_cfg *ioa_cfg, struct pci_dev *pdev) return rc; } -/** - * ipr_probe_ioa - Allocates memory and does first stage of initialization + /* ipr_probe_ioa - Allocates memory and does first stage of initialization * @pdev: PCI device struct * @dev_id: PCI device id struct * @@ -8823,6 +9388,7 @@ static int ipr_probe_ioa(struct pci_dev *pdev, void __iomem *ipr_regs; int rc = PCIBIOS_SUCCESSFUL; volatile u32 mask, uproc, interrupts; + unsigned long lock_flags; ENTER; @@ -8918,17 +9484,56 @@ static int ipr_probe_ioa(struct pci_dev *pdev, goto cleanup_nomem; } - /* Enable MSI style interrupts if they are supported. */ - if (ioa_cfg->ipr_chip->intr_type == IPR_USE_MSI && !pci_enable_msi(pdev)) { + if (ipr_number_of_msix > IPR_MAX_MSIX_VECTORS) { + dev_err(&pdev->dev, "The max number of MSIX is %d\n", + IPR_MAX_MSIX_VECTORS); + ipr_number_of_msix = IPR_MAX_MSIX_VECTORS; + } + + if (ioa_cfg->ipr_chip->intr_type == IPR_USE_MSI && + ipr_enable_msix(ioa_cfg) == 0) + ioa_cfg->intr_flag = IPR_USE_MSIX; + else if (ioa_cfg->ipr_chip->intr_type == IPR_USE_MSI && + ipr_enable_msi(ioa_cfg) == 0) + ioa_cfg->intr_flag = IPR_USE_MSI; + else { + ioa_cfg->intr_flag = IPR_USE_LSI; + ioa_cfg->nvectors = 1; + dev_info(&pdev->dev, "Cannot enable MSI.\n"); + } + + if (ioa_cfg->intr_flag == IPR_USE_MSI || + ioa_cfg->intr_flag == IPR_USE_MSIX) { rc = ipr_test_msi(ioa_cfg, pdev); - if (rc == -EOPNOTSUPP) - pci_disable_msi(pdev); + if (rc == -EOPNOTSUPP) { + if (ioa_cfg->intr_flag == IPR_USE_MSI) { + ioa_cfg->intr_flag &= ~IPR_USE_MSI; + pci_disable_msi(pdev); + } else if (ioa_cfg->intr_flag == IPR_USE_MSIX) { + ioa_cfg->intr_flag &= ~IPR_USE_MSIX; + pci_disable_msix(pdev); + } + + ioa_cfg->intr_flag = IPR_USE_LSI; + ioa_cfg->nvectors = 1; + } else if (rc) goto out_msi_disable; - else - dev_info(&pdev->dev, "MSI enabled with IRQ: %d\n", pdev->irq); - } else if (ipr_debug) - dev_info(&pdev->dev, "Cannot enable MSI.\n"); + else { + if (ioa_cfg->intr_flag == IPR_USE_MSI) + dev_info(&pdev->dev, + "Request for %d MSIs succeeded with starting IRQ: %d\n", + ioa_cfg->nvectors, pdev->irq); + else if (ioa_cfg->intr_flag == IPR_USE_MSIX) + dev_info(&pdev->dev, + "Request for %d MSIXs succeeded.", + ioa_cfg->nvectors); + } + } + + ioa_cfg->hrrq_num = min3(ioa_cfg->nvectors, + (unsigned int)num_online_cpus(), + (unsigned int)IPR_MAX_HRRQ_NUM); /* Save away PCI config space for use following IOA reset */ rc = pci_save_state(pdev); @@ -8975,11 +9580,24 @@ static int ipr_probe_ioa(struct pci_dev *pdev, if (interrupts & IPR_PCII_IOA_UNIT_CHECKED) ioa_cfg->ioa_unit_checked = 1; + spin_lock_irqsave(ioa_cfg->host->host_lock, lock_flags); ipr_mask_and_clear_interrupts(ioa_cfg, ~IPR_PCII_IOA_TRANS_TO_OPER); - rc = request_irq(pdev->irq, ipr_isr, - ioa_cfg->msi_received ? 0 : IRQF_SHARED, - IPR_NAME, ioa_cfg); + spin_unlock_irqrestore(ioa_cfg->host->host_lock, lock_flags); + if (ioa_cfg->intr_flag == IPR_USE_MSI + || ioa_cfg->intr_flag == IPR_USE_MSIX) { + name_msi_vectors(ioa_cfg); + rc = request_irq(ioa_cfg->vectors_info[0].vec, ipr_isr, + 0, + ioa_cfg->vectors_info[0].desc, + &ioa_cfg->hrrq[0]); + if (!rc) + rc = ipr_request_other_msi_irqs(ioa_cfg); + } else { + rc = request_irq(pdev->irq, ipr_isr, + IRQF_SHARED, + IPR_NAME, &ioa_cfg->hrrq[0]); + } if (rc) { dev_err(&pdev->dev, "Couldn't register IRQ %d! rc=%d\n", pdev->irq, rc); @@ -9004,7 +9622,10 @@ out: cleanup_nolog: ipr_free_mem(ioa_cfg); out_msi_disable: - pci_disable_msi(pdev); + if (ioa_cfg->intr_flag == IPR_USE_MSI) + pci_disable_msi(pdev); + else if (ioa_cfg->intr_flag == IPR_USE_MSIX) + pci_disable_msix(pdev); cleanup_nomem: iounmap(ipr_regs); out_release_regions: @@ -9138,7 +9759,7 @@ static void ipr_remove(struct pci_dev *pdev) static int ipr_probe(struct pci_dev *pdev, const struct pci_device_id *dev_id) { struct ipr_ioa_cfg *ioa_cfg; - int rc; + int rc, i; rc = ipr_probe_ioa(pdev, dev_id); @@ -9185,6 +9806,17 @@ static int ipr_probe(struct pci_dev *pdev, const struct pci_device_id *dev_id) scsi_add_device(ioa_cfg->host, IPR_IOA_BUS, IPR_IOA_TARGET, IPR_IOA_LUN); ioa_cfg->allow_ml_add_del = 1; ioa_cfg->host->max_channel = IPR_VSET_BUS; + ioa_cfg->iopoll_weight = ioa_cfg->chip_cfg->iopoll_weight; + + if (blk_iopoll_enabled && ioa_cfg->iopoll_weight && + ioa_cfg->sis64 && ioa_cfg->nvectors > 1) { + for (i = 1; i < ioa_cfg->hrrq_num; i++) { + blk_iopoll_init(&ioa_cfg->hrrq[i].iopoll, + ioa_cfg->iopoll_weight, ipr_iopoll); + blk_iopoll_enable(&ioa_cfg->hrrq[i].iopoll); + } + } + schedule_work(&ioa_cfg->work_q); return 0; } @@ -9203,8 +9835,16 @@ static void ipr_shutdown(struct pci_dev *pdev) { struct ipr_ioa_cfg *ioa_cfg = pci_get_drvdata(pdev); unsigned long lock_flags = 0; + int i; spin_lock_irqsave(ioa_cfg->host->host_lock, lock_flags); + if (blk_iopoll_enabled && ioa_cfg->iopoll_weight && + ioa_cfg->sis64 && ioa_cfg->nvectors > 1) { + ioa_cfg->iopoll_weight = 0; + for (i = 1; i < ioa_cfg->hrrq_num; i++) + blk_iopoll_disable(&ioa_cfg->hrrq[i].iopoll); + } + while (ioa_cfg->in_reset_reload) { spin_unlock_irqrestore(ioa_cfg->host->host_lock, lock_flags); wait_event(ioa_cfg->reset_wait_q, !ioa_cfg->in_reset_reload); @@ -9277,6 +9917,8 @@ static struct pci_device_id ipr_pci_table[] = { { PCI_VENDOR_ID_IBM, PCI_DEVICE_ID_IBM_CROC_FPGA_E2, PCI_VENDOR_ID_IBM, IPR_SUBS_DEV_ID_57B2, 0, 0, 0 }, { PCI_VENDOR_ID_IBM, PCI_DEVICE_ID_IBM_CROC_FPGA_E2, + PCI_VENDOR_ID_IBM, IPR_SUBS_DEV_ID_57C0, 0, 0, 0 }, + { PCI_VENDOR_ID_IBM, PCI_DEVICE_ID_IBM_CROC_FPGA_E2, PCI_VENDOR_ID_IBM, IPR_SUBS_DEV_ID_57C3, 0, 0, 0 }, { PCI_VENDOR_ID_IBM, PCI_DEVICE_ID_IBM_CROC_FPGA_E2, PCI_VENDOR_ID_IBM, IPR_SUBS_DEV_ID_57C4, 0, 0, 0 }, @@ -9290,6 +9932,14 @@ static struct pci_device_id ipr_pci_table[] = { PCI_VENDOR_ID_IBM, IPR_SUBS_DEV_ID_57C8, 0, 0, 0 }, { PCI_VENDOR_ID_IBM, PCI_DEVICE_ID_IBM_CROCODILE, PCI_VENDOR_ID_IBM, IPR_SUBS_DEV_ID_57CE, 0, 0, 0 }, + { PCI_VENDOR_ID_IBM, PCI_DEVICE_ID_IBM_CROCODILE, + PCI_VENDOR_ID_IBM, IPR_SUBS_DEV_ID_57D5, 0, 0, 0 }, + { PCI_VENDOR_ID_IBM, PCI_DEVICE_ID_IBM_CROCODILE, + PCI_VENDOR_ID_IBM, IPR_SUBS_DEV_ID_57D6, 0, 0, 0 }, + { PCI_VENDOR_ID_IBM, PCI_DEVICE_ID_IBM_CROCODILE, + PCI_VENDOR_ID_IBM, IPR_SUBS_DEV_ID_57D7, 0, 0, 0 }, + { PCI_VENDOR_ID_IBM, PCI_DEVICE_ID_IBM_CROCODILE, + PCI_VENDOR_ID_IBM, IPR_SUBS_DEV_ID_57D8, 0, 0, 0 }, { } }; MODULE_DEVICE_TABLE(pci, ipr_pci_table); @@ -9316,9 +9966,7 @@ static struct pci_driver ipr_driver = { **/ static void ipr_halt_done(struct ipr_cmnd *ipr_cmd) { - struct ipr_ioa_cfg *ioa_cfg = ipr_cmd->ioa_cfg; - - list_add_tail(&ipr_cmd->queue, &ioa_cfg->free_q); + list_add_tail(&ipr_cmd->queue, &ipr_cmd->hrrq->hrrq_free_q); } /** @@ -9340,7 +9988,7 @@ static int ipr_halt(struct notifier_block *nb, ulong event, void *buf) list_for_each_entry(ioa_cfg, &ipr_ioa_head, queue) { spin_lock_irqsave(ioa_cfg->host->host_lock, flags); - if (!ioa_cfg->allow_cmds) { + if (!ioa_cfg->hrrq[IPR_INIT_HRRQ].allow_cmds) { spin_unlock_irqrestore(ioa_cfg->host->host_lock, flags); continue; } diff --git a/drivers/scsi/ipr.h b/drivers/scsi/ipr.h index c8a137f..1a9a246 100644 --- a/drivers/scsi/ipr.h +++ b/drivers/scsi/ipr.h @@ -32,14 +32,15 @@ #include <linux/libata.h> #include <linux/list.h> #include <linux/kref.h> +#include <linux/blk-iopoll.h> #include <scsi/scsi.h> #include <scsi/scsi_cmnd.h> /* * Literals */ -#define IPR_DRIVER_VERSION "2.5.4" -#define IPR_DRIVER_DATE "(July 11, 2012)" +#define IPR_DRIVER_VERSION "2.6.0" +#define IPR_DRIVER_DATE "(November 16, 2012)" /* * IPR_MAX_CMD_PER_LUN: This defines the maximum number of outstanding @@ -82,6 +83,7 @@ #define IPR_SUBS_DEV_ID_57B4 0x033B #define IPR_SUBS_DEV_ID_57B2 0x035F +#define IPR_SUBS_DEV_ID_57C0 0x0352 #define IPR_SUBS_DEV_ID_57C3 0x0353 #define IPR_SUBS_DEV_ID_57C4 0x0354 #define IPR_SUBS_DEV_ID_57C6 0x0357 @@ -94,6 +96,10 @@ #define IPR_SUBS_DEV_ID_574D 0x0356 #define IPR_SUBS_DEV_ID_57C8 0x035D +#define IPR_SUBS_DEV_ID_57D5 0x03FB +#define IPR_SUBS_DEV_ID_57D6 0x03FC +#define IPR_SUBS_DEV_ID_57D7 0x03FF +#define IPR_SUBS_DEV_ID_57D8 0x03FE #define IPR_NAME "ipr" /* @@ -298,6 +304,9 @@ IPR_PCII_NO_HOST_RRQ | IPR_PCII_IOARRIN_LOST | IPR_PCII_MMIO_ERROR) * Misc literals */ #define IPR_NUM_IOADL_ENTRIES IPR_MAX_SGLIST +#define IPR_MAX_MSIX_VECTORS 0x5 +#define IPR_MAX_HRRQ_NUM 0x10 +#define IPR_INIT_HRRQ 0x0 /* * Adapter interface types @@ -404,7 +413,7 @@ struct ipr_config_table_entry64 { __be64 dev_id; __be64 lun; __be64 lun_wwn[2]; -#define IPR_MAX_RES_PATH_LENGTH 24 +#define IPR_MAX_RES_PATH_LENGTH 48 __be64 res_path; struct ipr_std_inq_data std_inq_data; u8 reserved2[4]; @@ -459,9 +468,39 @@ struct ipr_supported_device { u8 reserved2[16]; }__attribute__((packed, aligned (4))); +struct ipr_hrr_queue { + struct ipr_ioa_cfg *ioa_cfg; + __be32 *host_rrq; + dma_addr_t host_rrq_dma; +#define IPR_HRRQ_REQ_RESP_HANDLE_MASK 0xfffffffc +#define IPR_HRRQ_RESP_BIT_SET 0x00000002 +#define IPR_HRRQ_TOGGLE_BIT 0x00000001 +#define IPR_HRRQ_REQ_RESP_HANDLE_SHIFT 2 +#define IPR_ID_HRRQ_SELE_ENABLE 0x02 + volatile __be32 *hrrq_start; + volatile __be32 *hrrq_end; + volatile __be32 *hrrq_curr; + + struct list_head hrrq_free_q; + struct list_head hrrq_pending_q; + spinlock_t _lock; + spinlock_t *lock; + + volatile u32 toggle_bit; + u32 size; + u32 min_cmd_id; + u32 max_cmd_id; + u8 allow_interrupts:1; + u8 ioa_is_dead:1; + u8 allow_cmds:1; + + struct blk_iopoll iopoll; +}; + /* Command packet structure */ struct ipr_cmd_pkt { - __be16 reserved; /* Reserved by IOA */ + u8 reserved; /* Reserved by IOA */ + u8 hrrq_id; u8 request_type; #define IPR_RQTYPE_SCSICDB 0x00 #define IPR_RQTYPE_IOACMD 0x01 @@ -1022,6 +1061,10 @@ struct ipr_hostrcb64_fabric_desc { struct ipr_hostrcb64_config_element elem[1]; }__attribute__((packed, aligned (8))); +#define for_each_hrrq(hrrq, ioa_cfg) \ + for (hrrq = (ioa_cfg)->hrrq; \ + hrrq < ((ioa_cfg)->hrrq + (ioa_cfg)->hrrq_num); hrrq++) + #define for_each_fabric_cfg(fabric, cfg) \ for (cfg = (fabric)->elem; \ cfg < ((fabric)->elem + be16_to_cpu((fabric)->num_entries)); \ @@ -1308,6 +1351,7 @@ struct ipr_chip_cfg_t { u16 max_cmds; u8 cache_line_size; u8 clear_isr; + u32 iopoll_weight; struct ipr_interrupt_offsets regs; }; @@ -1317,6 +1361,7 @@ struct ipr_chip_t { u16 intr_type; #define IPR_USE_LSI 0x00 #define IPR_USE_MSI 0x01 +#define IPR_USE_MSIX 0x02 u16 sis_type; #define IPR_SIS32 0x00 #define IPR_SIS64 0x01 @@ -1375,13 +1420,10 @@ struct ipr_ioa_cfg { struct list_head queue; - u8 allow_interrupts:1; u8 in_reset_reload:1; u8 in_ioa_bringdown:1; u8 ioa_unit_checked:1; - u8 ioa_is_dead:1; u8 dump_taken:1; - u8 allow_cmds:1; u8 allow_ml_add_del:1; u8 needs_hard_reset:1; u8 dual_raid:1; @@ -1413,21 +1455,7 @@ struct ipr_ioa_cfg { char trace_start[8]; #define IPR_TRACE_START_LABEL "trace" struct ipr_trace_entry *trace; - u32 trace_index:IPR_NUM_TRACE_INDEX_BITS; - - /* - * Queue for free command blocks - */ - char ipr_free_label[8]; -#define IPR_FREEQ_LABEL "free-q" - struct list_head free_q; - - /* - * Queue for command blocks outstanding to the adapter - */ - char ipr_pending_label[8]; -#define IPR_PENDQ_LABEL "pend-q" - struct list_head pending_q; + atomic_t trace_index; char cfg_table_start[8]; #define IPR_CFG_TBL_START "cfg" @@ -1452,16 +1480,10 @@ struct ipr_ioa_cfg { struct list_head hostrcb_free_q; struct list_head hostrcb_pending_q; - __be32 *host_rrq; - dma_addr_t host_rrq_dma; -#define IPR_HRRQ_REQ_RESP_HANDLE_MASK 0xfffffffc -#define IPR_HRRQ_RESP_BIT_SET 0x00000002 -#define IPR_HRRQ_TOGGLE_BIT 0x00000001 -#define IPR_HRRQ_REQ_RESP_HANDLE_SHIFT 2 - volatile __be32 *hrrq_start; - volatile __be32 *hrrq_end; - volatile __be32 *hrrq_curr; - volatile u32 toggle_bit; + struct ipr_hrr_queue hrrq[IPR_MAX_HRRQ_NUM]; + u32 hrrq_num; + atomic_t hrrq_index; + u16 identify_hrrq_index; struct ipr_bus_attributes bus_attr[IPR_MAX_NUM_BUSES]; @@ -1507,6 +1529,17 @@ struct ipr_ioa_cfg { u32 max_cmds; struct ipr_cmnd **ipr_cmnd_list; dma_addr_t *ipr_cmnd_list_dma; + + u16 intr_flag; + unsigned int nvectors; + + struct { + unsigned short vec; + char desc[22]; + } vectors_info[IPR_MAX_MSIX_VECTORS]; + + u32 iopoll_weight; + }; /* struct ipr_ioa_cfg */ struct ipr_cmnd { @@ -1544,6 +1577,7 @@ struct ipr_cmnd { struct scsi_device *sdev; } u; + struct ipr_hrr_queue *hrrq; struct ipr_ioa_cfg *ioa_cfg; }; @@ -1717,7 +1751,8 @@ struct ipr_ucode_image_header { if (ipr_is_device(hostrcb)) { \ if ((hostrcb)->ioa_cfg->sis64) { \ printk(KERN_ERR IPR_NAME ": %s: " fmt, \ - ipr_format_res_path(hostrcb->hcam.u.error64.fd_res_path, \ + ipr_format_res_path(hostrcb->ioa_cfg, \ + hostrcb->hcam.u.error64.fd_res_path, \ hostrcb->rp_buffer, \ sizeof(hostrcb->rp_buffer)), \ __VA_ARGS__); \ diff --git a/drivers/scsi/lpfc/lpfc.h b/drivers/scsi/lpfc/lpfc.h index df4c13a..7706c99 100644 --- a/drivers/scsi/lpfc/lpfc.h +++ b/drivers/scsi/lpfc/lpfc.h @@ -466,11 +466,13 @@ enum intr_type_t { MSIX, }; +#define LPFC_CT_CTX_MAX 64 struct unsol_rcv_ct_ctx { uint32_t ctxt_id; uint32_t SID; - uint32_t flags; -#define UNSOL_VALID 0x00000001 + uint32_t valid; +#define UNSOL_INVALID 0 +#define UNSOL_VALID 1 uint16_t oxid; uint16_t rxid; }; @@ -750,6 +752,15 @@ struct lpfc_hba { void __iomem *ctrl_regs_memmap_p;/* Kernel memory mapped address for PCI BAR2 */ + void __iomem *pci_bar0_memmap_p; /* Kernel memory mapped address for + PCI BAR0 with dual-ULP support */ + void __iomem *pci_bar2_memmap_p; /* Kernel memory mapped address for + PCI BAR2 with dual-ULP support */ + void __iomem *pci_bar4_memmap_p; /* Kernel memory mapped address for + PCI BAR4 with dual-ULP support */ +#define PCI_64BIT_BAR0 0 +#define PCI_64BIT_BAR2 2 +#define PCI_64BIT_BAR4 4 void __iomem *MBslimaddr; /* virtual address for mbox cmds */ void __iomem *HAregaddr; /* virtual address for host attn reg */ void __iomem *CAregaddr; /* virtual address for chip attn reg */ @@ -938,7 +949,7 @@ struct lpfc_hba { spinlock_t ct_ev_lock; /* synchronize access to ct_ev_waiters */ struct list_head ct_ev_waiters; - struct unsol_rcv_ct_ctx ct_ctx[64]; + struct unsol_rcv_ct_ctx ct_ctx[LPFC_CT_CTX_MAX]; uint32_t ctx_idx; uint8_t menlo_flag; /* menlo generic flags */ diff --git a/drivers/scsi/lpfc/lpfc_bsg.c b/drivers/scsi/lpfc/lpfc_bsg.c index f7368eb..32d5683 100644 --- a/drivers/scsi/lpfc/lpfc_bsg.c +++ b/drivers/scsi/lpfc/lpfc_bsg.c @@ -955,9 +955,9 @@ lpfc_bsg_ct_unsol_event(struct lpfc_hba *phba, struct lpfc_sli_ring *pring, spin_lock_irqsave(&phba->ct_ev_lock, flags); if (phba->sli_rev == LPFC_SLI_REV4) { evt_dat->immed_dat = phba->ctx_idx; - phba->ctx_idx = (phba->ctx_idx + 1) % 64; + phba->ctx_idx = (phba->ctx_idx + 1) % LPFC_CT_CTX_MAX; /* Provide warning for over-run of the ct_ctx array */ - if (phba->ct_ctx[evt_dat->immed_dat].flags & + if (phba->ct_ctx[evt_dat->immed_dat].valid == UNSOL_VALID) lpfc_printf_log(phba, KERN_WARNING, LOG_ELS, "2717 CT context array entry " @@ -973,7 +973,7 @@ lpfc_bsg_ct_unsol_event(struct lpfc_hba *phba, struct lpfc_sli_ring *pring, piocbq->iocb.unsli3.rcvsli3.ox_id; phba->ct_ctx[evt_dat->immed_dat].SID = piocbq->iocb.un.rcvels.remoteID; - phba->ct_ctx[evt_dat->immed_dat].flags = UNSOL_VALID; + phba->ct_ctx[evt_dat->immed_dat].valid = UNSOL_VALID; } else evt_dat->immed_dat = piocbq->iocb.ulpContext; @@ -1013,6 +1013,47 @@ error_ct_unsol_exit: } /** + * lpfc_bsg_ct_unsol_abort - handler ct abort to management plane + * @phba: Pointer to HBA context object. + * @dmabuf: pointer to a dmabuf that describes the FC sequence + * + * This function handles abort to the CT command toward management plane + * for SLI4 port. + * + * If the pending context of a CT command to management plane present, clears + * such context and returns 1 for handled; otherwise, it returns 0 indicating + * no context exists. + **/ +int +lpfc_bsg_ct_unsol_abort(struct lpfc_hba *phba, struct hbq_dmabuf *dmabuf) +{ + struct fc_frame_header fc_hdr; + struct fc_frame_header *fc_hdr_ptr = &fc_hdr; + int ctx_idx, handled = 0; + uint16_t oxid, rxid; + uint32_t sid; + + memcpy(fc_hdr_ptr, dmabuf->hbuf.virt, sizeof(struct fc_frame_header)); + sid = sli4_sid_from_fc_hdr(fc_hdr_ptr); + oxid = be16_to_cpu(fc_hdr_ptr->fh_ox_id); + rxid = be16_to_cpu(fc_hdr_ptr->fh_rx_id); + + for (ctx_idx = 0; ctx_idx < LPFC_CT_CTX_MAX; ctx_idx++) { + if (phba->ct_ctx[ctx_idx].valid != UNSOL_VALID) + continue; + if (phba->ct_ctx[ctx_idx].rxid != rxid) + continue; + if (phba->ct_ctx[ctx_idx].oxid != oxid) + continue; + if (phba->ct_ctx[ctx_idx].SID != sid) + continue; + phba->ct_ctx[ctx_idx].valid = UNSOL_INVALID; + handled = 1; + } + return handled; +} + +/** * lpfc_bsg_hba_set_event - process a SET_EVENT bsg vendor command * @job: SET_EVENT fc_bsg_job **/ @@ -1318,7 +1359,7 @@ lpfc_issue_ct_rsp(struct lpfc_hba *phba, struct fc_bsg_job *job, uint32_t tag, icmd->ulpClass = CLASS3; if (phba->sli_rev == LPFC_SLI_REV4) { /* Do not issue unsol response if oxid not marked as valid */ - if (!(phba->ct_ctx[tag].flags & UNSOL_VALID)) { + if (phba->ct_ctx[tag].valid != UNSOL_VALID) { rc = IOCB_ERROR; goto issue_ct_rsp_exit; } @@ -1352,7 +1393,7 @@ lpfc_issue_ct_rsp(struct lpfc_hba *phba, struct fc_bsg_job *job, uint32_t tag, phba->sli4_hba.rpi_ids[ndlp->nlp_rpi]; /* The exchange is done, mark the entry as invalid */ - phba->ct_ctx[tag].flags &= ~UNSOL_VALID; + phba->ct_ctx[tag].valid = UNSOL_INVALID; } else icmd->ulpContext = (ushort) tag; diff --git a/drivers/scsi/lpfc/lpfc_crtn.h b/drivers/scsi/lpfc/lpfc_crtn.h index 69d66e3..76ca65d 100644 --- a/drivers/scsi/lpfc/lpfc_crtn.h +++ b/drivers/scsi/lpfc/lpfc_crtn.h @@ -164,8 +164,7 @@ void lpfc_hb_timeout_handler(struct lpfc_hba *); void lpfc_ct_unsol_event(struct lpfc_hba *, struct lpfc_sli_ring *, struct lpfc_iocbq *); -void lpfc_sli4_ct_abort_unsol_event(struct lpfc_hba *, struct lpfc_sli_ring *, - struct lpfc_iocbq *); +int lpfc_ct_handle_unsol_abort(struct lpfc_hba *, struct hbq_dmabuf *); int lpfc_ns_cmd(struct lpfc_vport *, int, uint8_t, uint32_t); int lpfc_fdmi_cmd(struct lpfc_vport *, struct lpfc_nodelist *, int); void lpfc_fdmi_tmo(unsigned long); @@ -427,6 +426,7 @@ int lpfc_bsg_request(struct fc_bsg_job *); int lpfc_bsg_timeout(struct fc_bsg_job *); int lpfc_bsg_ct_unsol_event(struct lpfc_hba *, struct lpfc_sli_ring *, struct lpfc_iocbq *); +int lpfc_bsg_ct_unsol_abort(struct lpfc_hba *, struct hbq_dmabuf *); void __lpfc_sli_ringtx_put(struct lpfc_hba *, struct lpfc_sli_ring *, struct lpfc_iocbq *); struct lpfc_iocbq *lpfc_sli_ringtx_get(struct lpfc_hba *, diff --git a/drivers/scsi/lpfc/lpfc_ct.c b/drivers/scsi/lpfc/lpfc_ct.c index 65f9fb6..7bff3a1 100644 --- a/drivers/scsi/lpfc/lpfc_ct.c +++ b/drivers/scsi/lpfc/lpfc_ct.c @@ -164,37 +164,24 @@ lpfc_ct_unsol_event(struct lpfc_hba *phba, struct lpfc_sli_ring *pring, } /** - * lpfc_sli4_ct_abort_unsol_event - Default handle for sli4 unsol abort + * lpfc_ct_handle_unsol_abort - ct upper level protocol abort handler * @phba: Pointer to HBA context object. - * @pring: Pointer to the driver internal I/O ring. - * @piocbq: Pointer to the IOCBQ. + * @dmabuf: pointer to a dmabuf that describes the FC sequence * - * This function serves as the default handler for the sli4 unsolicited - * abort event. It shall be invoked when there is no application interface - * registered unsolicited abort handler. This handler does nothing but - * just simply releases the dma buffer used by the unsol abort event. + * This function serves as the upper level protocol abort handler for CT + * protocol. + * + * Return 1 if abort has been handled, 0 otherwise. **/ -void -lpfc_sli4_ct_abort_unsol_event(struct lpfc_hba *phba, - struct lpfc_sli_ring *pring, - struct lpfc_iocbq *piocbq) +int +lpfc_ct_handle_unsol_abort(struct lpfc_hba *phba, struct hbq_dmabuf *dmabuf) { - IOCB_t *icmd = &piocbq->iocb; - struct lpfc_dmabuf *bdeBuf; - uint32_t size; + int handled; - /* Forward abort event to any process registered to receive ct event */ - if (lpfc_bsg_ct_unsol_event(phba, pring, piocbq) == 0) - return; + /* CT upper level goes through BSG */ + handled = lpfc_bsg_ct_unsol_abort(phba, dmabuf); - /* If there is no BDE associated with IOCB, there is nothing to do */ - if (icmd->ulpBdeCount == 0) - return; - bdeBuf = piocbq->context2; - piocbq->context2 = NULL; - size = icmd->un.cont64[0].tus.f.bdeSize; - lpfc_ct_unsol_buffer(phba, piocbq, bdeBuf, size); - lpfc_in_buf_free(phba, bdeBuf); + return handled; } static void diff --git a/drivers/scsi/lpfc/lpfc_els.c b/drivers/scsi/lpfc/lpfc_els.c index b9440de..08d156a 100644 --- a/drivers/scsi/lpfc/lpfc_els.c +++ b/drivers/scsi/lpfc/lpfc_els.c @@ -3122,6 +3122,13 @@ lpfc_els_retry(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb, case IOERR_SEQUENCE_TIMEOUT: case IOERR_INVALID_RPI: + if (cmd == ELS_CMD_PLOGI && + did == NameServer_DID) { + /* Continue forever if plogi to */ + /* the nameserver fails */ + maxretry = 0; + delay = 100; + } retry = 1; break; } @@ -6517,7 +6524,8 @@ lpfc_els_unsol_buffer(struct lpfc_hba *phba, struct lpfc_sli_ring *pring, struct lpfc_nodelist *ndlp; struct ls_rjt stat; uint32_t *payload; - uint32_t cmd, did, newnode, rjt_err = 0; + uint32_t cmd, did, newnode; + uint8_t rjt_exp, rjt_err = 0; IOCB_t *icmd = &elsiocb->iocb; if (!vport || !(elsiocb->context2)) @@ -6606,12 +6614,14 @@ lpfc_els_unsol_buffer(struct lpfc_hba *phba, struct lpfc_sli_ring *pring, /* If Nport discovery is delayed, reject PLOGIs */ if (vport->fc_flag & FC_DISC_DELAYED) { rjt_err = LSRJT_UNABLE_TPC; + rjt_exp = LSEXP_NOTHING_MORE; break; } if (vport->port_state < LPFC_DISC_AUTH) { if (!(phba->pport->fc_flag & FC_PT2PT) || (phba->pport->fc_flag & FC_PT2PT_PLOGI)) { rjt_err = LSRJT_UNABLE_TPC; + rjt_exp = LSEXP_NOTHING_MORE; break; } /* We get here, and drop thru, if we are PT2PT with @@ -6648,6 +6658,7 @@ lpfc_els_unsol_buffer(struct lpfc_hba *phba, struct lpfc_sli_ring *pring, lpfc_send_els_event(vport, ndlp, payload); if (vport->port_state < LPFC_DISC_AUTH) { rjt_err = LSRJT_UNABLE_TPC; + rjt_exp = LSEXP_NOTHING_MORE; break; } lpfc_disc_state_machine(vport, ndlp, elsiocb, NLP_EVT_RCV_LOGO); @@ -6661,6 +6672,7 @@ lpfc_els_unsol_buffer(struct lpfc_hba *phba, struct lpfc_sli_ring *pring, lpfc_send_els_event(vport, ndlp, payload); if (vport->port_state < LPFC_DISC_AUTH) { rjt_err = LSRJT_UNABLE_TPC; + rjt_exp = LSEXP_NOTHING_MORE; break; } lpfc_disc_state_machine(vport, ndlp, elsiocb, NLP_EVT_RCV_PRLO); @@ -6680,6 +6692,7 @@ lpfc_els_unsol_buffer(struct lpfc_hba *phba, struct lpfc_sli_ring *pring, phba->fc_stat.elsRcvADISC++; if (vport->port_state < LPFC_DISC_AUTH) { rjt_err = LSRJT_UNABLE_TPC; + rjt_exp = LSEXP_NOTHING_MORE; break; } lpfc_disc_state_machine(vport, ndlp, elsiocb, @@ -6693,6 +6706,7 @@ lpfc_els_unsol_buffer(struct lpfc_hba *phba, struct lpfc_sli_ring *pring, phba->fc_stat.elsRcvPDISC++; if (vport->port_state < LPFC_DISC_AUTH) { rjt_err = LSRJT_UNABLE_TPC; + rjt_exp = LSEXP_NOTHING_MORE; break; } lpfc_disc_state_machine(vport, ndlp, elsiocb, @@ -6730,6 +6744,7 @@ lpfc_els_unsol_buffer(struct lpfc_hba *phba, struct lpfc_sli_ring *pring, phba->fc_stat.elsRcvPRLI++; if (vport->port_state < LPFC_DISC_AUTH) { rjt_err = LSRJT_UNABLE_TPC; + rjt_exp = LSEXP_NOTHING_MORE; break; } lpfc_disc_state_machine(vport, ndlp, elsiocb, NLP_EVT_RCV_PRLI); @@ -6813,6 +6828,11 @@ lpfc_els_unsol_buffer(struct lpfc_hba *phba, struct lpfc_sli_ring *pring, if (newnode) lpfc_nlp_put(ndlp); break; + case ELS_CMD_REC: + /* receive this due to exchange closed */ + rjt_err = LSRJT_UNABLE_TPC; + rjt_exp = LSEXP_INVALID_OX_RX; + break; default: lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_ELS_UNSOL, "RCV ELS cmd: cmd:x%x did:x%x/ste:x%x", @@ -6820,6 +6840,7 @@ lpfc_els_unsol_buffer(struct lpfc_hba *phba, struct lpfc_sli_ring *pring, /* Unsupported ELS command, reject */ rjt_err = LSRJT_CMD_UNSUPPORTED; + rjt_exp = LSEXP_NOTHING_MORE; /* Unknown ELS command <elsCmd> received from NPORT <did> */ lpfc_printf_vlog(vport, KERN_ERR, LOG_ELS, @@ -6834,7 +6855,7 @@ lpfc_els_unsol_buffer(struct lpfc_hba *phba, struct lpfc_sli_ring *pring, if (rjt_err) { memset(&stat, 0, sizeof(stat)); stat.un.b.lsRjtRsnCode = rjt_err; - stat.un.b.lsRjtRsnCodeExp = LSEXP_NOTHING_MORE; + stat.un.b.lsRjtRsnCodeExp = rjt_exp; lpfc_els_rsp_reject(vport, stat.un.lsRjtError, elsiocb, ndlp, NULL); } diff --git a/drivers/scsi/lpfc/lpfc_hw.h b/drivers/scsi/lpfc/lpfc_hw.h index 7398ca8..e8c4760 100644 --- a/drivers/scsi/lpfc/lpfc_hw.h +++ b/drivers/scsi/lpfc/lpfc_hw.h @@ -538,6 +538,7 @@ struct fc_vft_header { #define ELS_CMD_ECHO 0x10000000 #define ELS_CMD_TEST 0x11000000 #define ELS_CMD_RRQ 0x12000000 +#define ELS_CMD_REC 0x13000000 #define ELS_CMD_PRLI 0x20100014 #define ELS_CMD_PRLO 0x21100014 #define ELS_CMD_PRLO_ACC 0x02100014 @@ -574,6 +575,7 @@ struct fc_vft_header { #define ELS_CMD_ECHO 0x10 #define ELS_CMD_TEST 0x11 #define ELS_CMD_RRQ 0x12 +#define ELS_CMD_REC 0x13 #define ELS_CMD_PRLI 0x14001020 #define ELS_CMD_PRLO 0x14001021 #define ELS_CMD_PRLO_ACC 0x14001002 diff --git a/drivers/scsi/lpfc/lpfc_hw4.h b/drivers/scsi/lpfc/lpfc_hw4.h index a47cfbd..6e93b88 100644 --- a/drivers/scsi/lpfc/lpfc_hw4.h +++ b/drivers/scsi/lpfc/lpfc_hw4.h @@ -106,6 +106,7 @@ struct lpfc_sli_intf { #define LPFC_SLI4_MB_WORD_COUNT 64 #define LPFC_MAX_MQ_PAGE 8 +#define LPFC_MAX_WQ_PAGE_V0 4 #define LPFC_MAX_WQ_PAGE 8 #define LPFC_MAX_CQ_PAGE 4 #define LPFC_MAX_EQ_PAGE 8 @@ -703,24 +704,41 @@ struct lpfc_register { * BAR0. The offsets are the same so the driver must account for * any base address difference. */ -#define LPFC_RQ_DOORBELL 0x00A0 -#define lpfc_rq_doorbell_num_posted_SHIFT 16 -#define lpfc_rq_doorbell_num_posted_MASK 0x3FFF -#define lpfc_rq_doorbell_num_posted_WORD word0 -#define lpfc_rq_doorbell_id_SHIFT 0 -#define lpfc_rq_doorbell_id_MASK 0xFFFF -#define lpfc_rq_doorbell_id_WORD word0 - -#define LPFC_WQ_DOORBELL 0x0040 -#define lpfc_wq_doorbell_num_posted_SHIFT 24 -#define lpfc_wq_doorbell_num_posted_MASK 0x00FF -#define lpfc_wq_doorbell_num_posted_WORD word0 -#define lpfc_wq_doorbell_index_SHIFT 16 -#define lpfc_wq_doorbell_index_MASK 0x00FF -#define lpfc_wq_doorbell_index_WORD word0 -#define lpfc_wq_doorbell_id_SHIFT 0 -#define lpfc_wq_doorbell_id_MASK 0xFFFF -#define lpfc_wq_doorbell_id_WORD word0 +#define LPFC_ULP0_RQ_DOORBELL 0x00A0 +#define LPFC_ULP1_RQ_DOORBELL 0x00C0 +#define lpfc_rq_db_list_fm_num_posted_SHIFT 24 +#define lpfc_rq_db_list_fm_num_posted_MASK 0x00FF +#define lpfc_rq_db_list_fm_num_posted_WORD word0 +#define lpfc_rq_db_list_fm_index_SHIFT 16 +#define lpfc_rq_db_list_fm_index_MASK 0x00FF +#define lpfc_rq_db_list_fm_index_WORD word0 +#define lpfc_rq_db_list_fm_id_SHIFT 0 +#define lpfc_rq_db_list_fm_id_MASK 0xFFFF +#define lpfc_rq_db_list_fm_id_WORD word0 +#define lpfc_rq_db_ring_fm_num_posted_SHIFT 16 +#define lpfc_rq_db_ring_fm_num_posted_MASK 0x3FFF +#define lpfc_rq_db_ring_fm_num_posted_WORD word0 +#define lpfc_rq_db_ring_fm_id_SHIFT 0 +#define lpfc_rq_db_ring_fm_id_MASK 0xFFFF +#define lpfc_rq_db_ring_fm_id_WORD word0 + +#define LPFC_ULP0_WQ_DOORBELL 0x0040 +#define LPFC_ULP1_WQ_DOORBELL 0x0060 +#define lpfc_wq_db_list_fm_num_posted_SHIFT 24 +#define lpfc_wq_db_list_fm_num_posted_MASK 0x00FF +#define lpfc_wq_db_list_fm_num_posted_WORD word0 +#define lpfc_wq_db_list_fm_index_SHIFT 16 +#define lpfc_wq_db_list_fm_index_MASK 0x00FF +#define lpfc_wq_db_list_fm_index_WORD word0 +#define lpfc_wq_db_list_fm_id_SHIFT 0 +#define lpfc_wq_db_list_fm_id_MASK 0xFFFF +#define lpfc_wq_db_list_fm_id_WORD word0 +#define lpfc_wq_db_ring_fm_num_posted_SHIFT 16 +#define lpfc_wq_db_ring_fm_num_posted_MASK 0x3FFF +#define lpfc_wq_db_ring_fm_num_posted_WORD word0 +#define lpfc_wq_db_ring_fm_id_SHIFT 0 +#define lpfc_wq_db_ring_fm_id_MASK 0xFFFF +#define lpfc_wq_db_ring_fm_id_WORD word0 #define LPFC_EQCQ_DOORBELL 0x0120 #define lpfc_eqcq_doorbell_se_SHIFT 31 @@ -1131,12 +1149,22 @@ struct lpfc_mbx_wq_create { struct { /* Version 0 Request */ uint32_t word0; #define lpfc_mbx_wq_create_num_pages_SHIFT 0 -#define lpfc_mbx_wq_create_num_pages_MASK 0x0000FFFF +#define lpfc_mbx_wq_create_num_pages_MASK 0x000000FF #define lpfc_mbx_wq_create_num_pages_WORD word0 +#define lpfc_mbx_wq_create_dua_SHIFT 8 +#define lpfc_mbx_wq_create_dua_MASK 0x00000001 +#define lpfc_mbx_wq_create_dua_WORD word0 #define lpfc_mbx_wq_create_cq_id_SHIFT 16 #define lpfc_mbx_wq_create_cq_id_MASK 0x0000FFFF #define lpfc_mbx_wq_create_cq_id_WORD word0 - struct dma_address page[LPFC_MAX_WQ_PAGE]; + struct dma_address page[LPFC_MAX_WQ_PAGE_V0]; + uint32_t word9; +#define lpfc_mbx_wq_create_bua_SHIFT 0 +#define lpfc_mbx_wq_create_bua_MASK 0x00000001 +#define lpfc_mbx_wq_create_bua_WORD word9 +#define lpfc_mbx_wq_create_ulp_num_SHIFT 8 +#define lpfc_mbx_wq_create_ulp_num_MASK 0x000000FF +#define lpfc_mbx_wq_create_ulp_num_WORD word9 } request; struct { /* Version 1 Request */ uint32_t word0; /* Word 0 is the same as in v0 */ @@ -1160,6 +1188,17 @@ struct lpfc_mbx_wq_create { #define lpfc_mbx_wq_create_q_id_SHIFT 0 #define lpfc_mbx_wq_create_q_id_MASK 0x0000FFFF #define lpfc_mbx_wq_create_q_id_WORD word0 + uint32_t doorbell_offset; + uint32_t word2; +#define lpfc_mbx_wq_create_bar_set_SHIFT 0 +#define lpfc_mbx_wq_create_bar_set_MASK 0x0000FFFF +#define lpfc_mbx_wq_create_bar_set_WORD word2 +#define WQ_PCI_BAR_0_AND_1 0x00 +#define WQ_PCI_BAR_2_AND_3 0x01 +#define WQ_PCI_BAR_4_AND_5 0x02 +#define lpfc_mbx_wq_create_db_format_SHIFT 16 +#define lpfc_mbx_wq_create_db_format_MASK 0x0000FFFF +#define lpfc_mbx_wq_create_db_format_WORD word2 } response; } u; }; @@ -1223,14 +1262,31 @@ struct lpfc_mbx_rq_create { #define lpfc_mbx_rq_create_num_pages_SHIFT 0 #define lpfc_mbx_rq_create_num_pages_MASK 0x0000FFFF #define lpfc_mbx_rq_create_num_pages_WORD word0 +#define lpfc_mbx_rq_create_dua_SHIFT 16 +#define lpfc_mbx_rq_create_dua_MASK 0x00000001 +#define lpfc_mbx_rq_create_dua_WORD word0 +#define lpfc_mbx_rq_create_bqu_SHIFT 17 +#define lpfc_mbx_rq_create_bqu_MASK 0x00000001 +#define lpfc_mbx_rq_create_bqu_WORD word0 +#define lpfc_mbx_rq_create_ulp_num_SHIFT 24 +#define lpfc_mbx_rq_create_ulp_num_MASK 0x000000FF +#define lpfc_mbx_rq_create_ulp_num_WORD word0 struct rq_context context; struct dma_address page[LPFC_MAX_WQ_PAGE]; } request; struct { uint32_t word0; -#define lpfc_mbx_rq_create_q_id_SHIFT 0 -#define lpfc_mbx_rq_create_q_id_MASK 0x0000FFFF -#define lpfc_mbx_rq_create_q_id_WORD word0 +#define lpfc_mbx_rq_create_q_id_SHIFT 0 +#define lpfc_mbx_rq_create_q_id_MASK 0x0000FFFF +#define lpfc_mbx_rq_create_q_id_WORD word0 + uint32_t doorbell_offset; + uint32_t word2; +#define lpfc_mbx_rq_create_bar_set_SHIFT 0 +#define lpfc_mbx_rq_create_bar_set_MASK 0x0000FFFF +#define lpfc_mbx_rq_create_bar_set_WORD word2 +#define lpfc_mbx_rq_create_db_format_SHIFT 16 +#define lpfc_mbx_rq_create_db_format_MASK 0x0000FFFF +#define lpfc_mbx_rq_create_db_format_WORD word2 } response; } u; }; @@ -1388,6 +1444,33 @@ struct lpfc_mbx_get_rsrc_extent_info { } u; }; +struct lpfc_mbx_query_fw_config { + struct mbox_header header; + struct { + uint32_t config_number; +#define LPFC_FC_FCOE 0x00000007 + uint32_t asic_revision; + uint32_t physical_port; + uint32_t function_mode; +#define LPFC_FCOE_INI_MODE 0x00000040 +#define LPFC_FCOE_TGT_MODE 0x00000080 +#define LPFC_DUA_MODE 0x00000800 + uint32_t ulp0_mode; +#define LPFC_ULP_FCOE_INIT_MODE 0x00000040 +#define LPFC_ULP_FCOE_TGT_MODE 0x00000080 + uint32_t ulp0_nap_words[12]; + uint32_t ulp1_mode; + uint32_t ulp1_nap_words[12]; + uint32_t function_capabilities; + uint32_t cqid_base; + uint32_t cqid_tot; + uint32_t eqid_base; + uint32_t eqid_tot; + uint32_t ulp0_nap2_words[2]; + uint32_t ulp1_nap2_words[2]; + } rsp; +}; + struct lpfc_id_range { uint32_t word5; #define lpfc_mbx_rsrc_id_word4_0_SHIFT 0 @@ -1803,51 +1886,6 @@ struct lpfc_mbx_redisc_fcf_tbl { #define lpfc_mbx_redisc_fcf_index_WORD word12 }; -struct lpfc_mbx_query_fw_cfg { - struct mbox_header header; - uint32_t config_number; - uint32_t asic_rev; - uint32_t phys_port; - uint32_t function_mode; -/* firmware Function Mode */ -#define lpfc_function_mode_toe_SHIFT 0 -#define lpfc_function_mode_toe_MASK 0x00000001 -#define lpfc_function_mode_toe_WORD function_mode -#define lpfc_function_mode_nic_SHIFT 1 -#define lpfc_function_mode_nic_MASK 0x00000001 -#define lpfc_function_mode_nic_WORD function_mode -#define lpfc_function_mode_rdma_SHIFT 2 -#define lpfc_function_mode_rdma_MASK 0x00000001 -#define lpfc_function_mode_rdma_WORD function_mode -#define lpfc_function_mode_vm_SHIFT 3 -#define lpfc_function_mode_vm_MASK 0x00000001 -#define lpfc_function_mode_vm_WORD function_mode -#define lpfc_function_mode_iscsi_i_SHIFT 4 -#define lpfc_function_mode_iscsi_i_MASK 0x00000001 -#define lpfc_function_mode_iscsi_i_WORD function_mode -#define lpfc_function_mode_iscsi_t_SHIFT 5 -#define lpfc_function_mode_iscsi_t_MASK 0x00000001 -#define lpfc_function_mode_iscsi_t_WORD function_mode -#define lpfc_function_mode_fcoe_i_SHIFT 6 -#define lpfc_function_mode_fcoe_i_MASK 0x00000001 -#define lpfc_function_mode_fcoe_i_WORD function_mode -#define lpfc_function_mode_fcoe_t_SHIFT 7 -#define lpfc_function_mode_fcoe_t_MASK 0x00000001 -#define lpfc_function_mode_fcoe_t_WORD function_mode -#define lpfc_function_mode_dal_SHIFT 8 -#define lpfc_function_mode_dal_MASK 0x00000001 -#define lpfc_function_mode_dal_WORD function_mode -#define lpfc_function_mode_lro_SHIFT 9 -#define lpfc_function_mode_lro_MASK 0x00000001 -#define lpfc_function_mode_lro_WORD function_mode -#define lpfc_function_mode_flex10_SHIFT 10 -#define lpfc_function_mode_flex10_MASK 0x00000001 -#define lpfc_function_mode_flex10_WORD function_mode -#define lpfc_function_mode_ncsi_SHIFT 11 -#define lpfc_function_mode_ncsi_MASK 0x00000001 -#define lpfc_function_mode_ncsi_WORD function_mode -}; - /* Status field for embedded SLI_CONFIG mailbox command */ #define STATUS_SUCCESS 0x0 #define STATUS_FAILED 0x1 @@ -2965,7 +3003,7 @@ struct lpfc_mqe { struct lpfc_mbx_read_config rd_config; struct lpfc_mbx_request_features req_ftrs; struct lpfc_mbx_post_hdr_tmpl hdr_tmpl; - struct lpfc_mbx_query_fw_cfg query_fw_cfg; + struct lpfc_mbx_query_fw_config query_fw_cfg; struct lpfc_mbx_supp_pages supp_pages; struct lpfc_mbx_pc_sli4_params sli4_params; struct lpfc_mbx_get_sli4_parameters get_sli4_parameters; diff --git a/drivers/scsi/lpfc/lpfc_init.c b/drivers/scsi/lpfc/lpfc_init.c index 89ad558..314b4f6 100644 --- a/drivers/scsi/lpfc/lpfc_init.c +++ b/drivers/scsi/lpfc/lpfc_init.c @@ -3165,14 +3165,10 @@ destroy_port(struct lpfc_vport *vport) int lpfc_get_instance(void) { - int instance = 0; + int ret; - /* Assign an unused number */ - if (!idr_pre_get(&lpfc_hba_index, GFP_KERNEL)) - return -1; - if (idr_get_new(&lpfc_hba_index, NULL, &instance)) - return -1; - return instance; + ret = idr_alloc(&lpfc_hba_index, NULL, 0, 0, GFP_KERNEL); + return ret < 0 ? -1 : ret; } /** @@ -6233,9 +6229,11 @@ lpfc_sli4_bar0_register_memmap(struct lpfc_hba *phba, uint32_t if_type) phba->sli4_hba.conf_regs_memmap_p + LPFC_CTL_PORT_SEM_OFFSET; phba->sli4_hba.RQDBregaddr = - phba->sli4_hba.conf_regs_memmap_p + LPFC_RQ_DOORBELL; + phba->sli4_hba.conf_regs_memmap_p + + LPFC_ULP0_RQ_DOORBELL; phba->sli4_hba.WQDBregaddr = - phba->sli4_hba.conf_regs_memmap_p + LPFC_WQ_DOORBELL; + phba->sli4_hba.conf_regs_memmap_p + + LPFC_ULP0_WQ_DOORBELL; phba->sli4_hba.EQCQDBregaddr = phba->sli4_hba.conf_regs_memmap_p + LPFC_EQCQ_DOORBELL; phba->sli4_hba.MQDBregaddr = @@ -6289,9 +6287,11 @@ lpfc_sli4_bar2_register_memmap(struct lpfc_hba *phba, uint32_t vf) return -ENODEV; phba->sli4_hba.RQDBregaddr = (phba->sli4_hba.drbl_regs_memmap_p + - vf * LPFC_VFR_PAGE_SIZE + LPFC_RQ_DOORBELL); + vf * LPFC_VFR_PAGE_SIZE + + LPFC_ULP0_RQ_DOORBELL); phba->sli4_hba.WQDBregaddr = (phba->sli4_hba.drbl_regs_memmap_p + - vf * LPFC_VFR_PAGE_SIZE + LPFC_WQ_DOORBELL); + vf * LPFC_VFR_PAGE_SIZE + + LPFC_ULP0_WQ_DOORBELL); phba->sli4_hba.EQCQDBregaddr = (phba->sli4_hba.drbl_regs_memmap_p + vf * LPFC_VFR_PAGE_SIZE + LPFC_EQCQ_DOORBELL); phba->sli4_hba.MQDBregaddr = (phba->sli4_hba.drbl_regs_memmap_p + @@ -6987,6 +6987,19 @@ lpfc_sli4_queue_destroy(struct lpfc_hba *phba) phba->sli4_hba.fcp_wq = NULL; } + if (phba->pci_bar0_memmap_p) { + iounmap(phba->pci_bar0_memmap_p); + phba->pci_bar0_memmap_p = NULL; + } + if (phba->pci_bar2_memmap_p) { + iounmap(phba->pci_bar2_memmap_p); + phba->pci_bar2_memmap_p = NULL; + } + if (phba->pci_bar4_memmap_p) { + iounmap(phba->pci_bar4_memmap_p); + phba->pci_bar4_memmap_p = NULL; + } + /* Release FCP CQ mapping array */ if (phba->sli4_hba.fcp_cq_map != NULL) { kfree(phba->sli4_hba.fcp_cq_map); @@ -7050,6 +7063,53 @@ lpfc_sli4_queue_setup(struct lpfc_hba *phba) int rc = -ENOMEM; int fcp_eqidx, fcp_cqidx, fcp_wqidx; int fcp_cq_index = 0; + uint32_t shdr_status, shdr_add_status; + union lpfc_sli4_cfg_shdr *shdr; + LPFC_MBOXQ_t *mboxq; + uint32_t length; + + /* Check for dual-ULP support */ + mboxq = (LPFC_MBOXQ_t *)mempool_alloc(phba->mbox_mem_pool, GFP_KERNEL); + if (!mboxq) { + lpfc_printf_log(phba, KERN_ERR, LOG_INIT, + "3249 Unable to allocate memory for " + "QUERY_FW_CFG mailbox command\n"); + return -ENOMEM; + } + length = (sizeof(struct lpfc_mbx_query_fw_config) - + sizeof(struct lpfc_sli4_cfg_mhdr)); + lpfc_sli4_config(phba, mboxq, LPFC_MBOX_SUBSYSTEM_COMMON, + LPFC_MBOX_OPCODE_QUERY_FW_CFG, + length, LPFC_SLI4_MBX_EMBED); + + rc = lpfc_sli_issue_mbox(phba, mboxq, MBX_POLL); + + shdr = (union lpfc_sli4_cfg_shdr *) + &mboxq->u.mqe.un.sli4_config.header.cfg_shdr; + shdr_status = bf_get(lpfc_mbox_hdr_status, &shdr->response); + shdr_add_status = bf_get(lpfc_mbox_hdr_add_status, &shdr->response); + if (shdr_status || shdr_add_status || rc) { + lpfc_printf_log(phba, KERN_ERR, LOG_INIT, + "3250 QUERY_FW_CFG mailbox failed with status " + "x%x add_status x%x, mbx status x%x\n", + shdr_status, shdr_add_status, rc); + if (rc != MBX_TIMEOUT) + mempool_free(mboxq, phba->mbox_mem_pool); + rc = -ENXIO; + goto out_error; + } + + phba->sli4_hba.fw_func_mode = + mboxq->u.mqe.un.query_fw_cfg.rsp.function_mode; + phba->sli4_hba.ulp0_mode = mboxq->u.mqe.un.query_fw_cfg.rsp.ulp0_mode; + phba->sli4_hba.ulp1_mode = mboxq->u.mqe.un.query_fw_cfg.rsp.ulp1_mode; + lpfc_printf_log(phba, KERN_INFO, LOG_INIT, + "3251 QUERY_FW_CFG: func_mode:x%x, ulp0_mode:x%x, " + "ulp1_mode:x%x\n", phba->sli4_hba.fw_func_mode, + phba->sli4_hba.ulp0_mode, phba->sli4_hba.ulp1_mode); + + if (rc != MBX_TIMEOUT) + mempool_free(mboxq, phba->mbox_mem_pool); /* * Set up HBA Event Queues (EQs) @@ -7664,78 +7724,6 @@ out: } /** - * lpfc_sli4_send_nop_mbox_cmds - Send sli-4 nop mailbox commands - * @phba: pointer to lpfc hba data structure. - * @cnt: number of nop mailbox commands to send. - * - * This routine is invoked to send a number @cnt of NOP mailbox command and - * wait for each command to complete. - * - * Return: the number of NOP mailbox command completed. - **/ -static int -lpfc_sli4_send_nop_mbox_cmds(struct lpfc_hba *phba, uint32_t cnt) -{ - LPFC_MBOXQ_t *mboxq; - int length, cmdsent; - uint32_t mbox_tmo; - uint32_t rc = 0; - uint32_t shdr_status, shdr_add_status; - union lpfc_sli4_cfg_shdr *shdr; - - if (cnt == 0) { - lpfc_printf_log(phba, KERN_WARNING, LOG_INIT, - "2518 Requested to send 0 NOP mailbox cmd\n"); - return cnt; - } - - mboxq = (LPFC_MBOXQ_t *)mempool_alloc(phba->mbox_mem_pool, GFP_KERNEL); - if (!mboxq) { - lpfc_printf_log(phba, KERN_ERR, LOG_INIT, - "2519 Unable to allocate memory for issuing " - "NOP mailbox command\n"); - return 0; - } - - /* Set up NOP SLI4_CONFIG mailbox-ioctl command */ - length = (sizeof(struct lpfc_mbx_nop) - - sizeof(struct lpfc_sli4_cfg_mhdr)); - - for (cmdsent = 0; cmdsent < cnt; cmdsent++) { - lpfc_sli4_config(phba, mboxq, LPFC_MBOX_SUBSYSTEM_COMMON, - LPFC_MBOX_OPCODE_NOP, length, - LPFC_SLI4_MBX_EMBED); - if (!phba->sli4_hba.intr_enable) - rc = lpfc_sli_issue_mbox(phba, mboxq, MBX_POLL); - else { - mbox_tmo = lpfc_mbox_tmo_val(phba, mboxq); - rc = lpfc_sli_issue_mbox_wait(phba, mboxq, mbox_tmo); - } - if (rc == MBX_TIMEOUT) - break; - /* Check return status */ - shdr = (union lpfc_sli4_cfg_shdr *) - &mboxq->u.mqe.un.sli4_config.header.cfg_shdr; - shdr_status = bf_get(lpfc_mbox_hdr_status, &shdr->response); - shdr_add_status = bf_get(lpfc_mbox_hdr_add_status, - &shdr->response); - if (shdr_status || shdr_add_status || rc) { - lpfc_printf_log(phba, KERN_WARNING, LOG_INIT, - "2520 NOP mailbox command failed " - "status x%x add_status x%x mbx " - "status x%x\n", shdr_status, - shdr_add_status, rc); - break; - } - } - - if (rc != MBX_TIMEOUT) - mempool_free(mboxq, phba->mbox_mem_pool); - - return cmdsent; -} - -/** * lpfc_sli4_pci_mem_setup - Setup SLI4 HBA PCI memory space. * @phba: pointer to lpfc hba data structure. * @@ -8503,37 +8491,6 @@ lpfc_unset_hba(struct lpfc_hba *phba) } /** - * lpfc_sli4_unset_hba - Unset SLI4 hba device initialization. - * @phba: pointer to lpfc hba data structure. - * - * This routine is invoked to unset the HBA device initialization steps to - * a device with SLI-4 interface spec. - **/ -static void -lpfc_sli4_unset_hba(struct lpfc_hba *phba) -{ - struct lpfc_vport *vport = phba->pport; - struct Scsi_Host *shost = lpfc_shost_from_vport(vport); - - spin_lock_irq(shost->host_lock); - vport->load_flag |= FC_UNLOADING; - spin_unlock_irq(shost->host_lock); - - phba->pport->work_port_events = 0; - - /* Stop the SLI4 device port */ - lpfc_stop_port(phba); - - lpfc_sli4_disable_intr(phba); - - /* Reset SLI4 HBA FCoE function */ - lpfc_pci_function_reset(phba); - lpfc_sli4_queue_destroy(phba); - - return; -} - -/** * lpfc_sli4_xri_exchange_busy_wait - Wait for device XRI exchange busy * @phba: Pointer to HBA context object. * @@ -9595,7 +9552,6 @@ lpfc_pci_probe_one_s4(struct pci_dev *pdev, const struct pci_device_id *pid) struct Scsi_Host *shost = NULL; int error, ret; uint32_t cfg_mode, intr_mode; - int mcnt; int adjusted_fcp_io_channel; /* Allocate memory for HBA structure */ @@ -9684,58 +9640,35 @@ lpfc_pci_probe_one_s4(struct pci_dev *pdev, const struct pci_device_id *pid) shost = lpfc_shost_from_vport(vport); /* save shost for error cleanup */ /* Now, trying to enable interrupt and bring up the device */ cfg_mode = phba->cfg_use_msi; - while (true) { - /* Put device to a known state before enabling interrupt */ - lpfc_stop_port(phba); - /* Configure and enable interrupt */ - intr_mode = lpfc_sli4_enable_intr(phba, cfg_mode); - if (intr_mode == LPFC_INTR_ERROR) { - lpfc_printf_log(phba, KERN_ERR, LOG_INIT, - "0426 Failed to enable interrupt.\n"); - error = -ENODEV; - goto out_free_sysfs_attr; - } - /* Default to single EQ for non-MSI-X */ - if (phba->intr_type != MSIX) - adjusted_fcp_io_channel = 1; - else - adjusted_fcp_io_channel = phba->cfg_fcp_io_channel; - phba->cfg_fcp_io_channel = adjusted_fcp_io_channel; - /* Set up SLI-4 HBA */ - if (lpfc_sli4_hba_setup(phba)) { - lpfc_printf_log(phba, KERN_ERR, LOG_INIT, - "1421 Failed to set up hba\n"); - error = -ENODEV; - goto out_disable_intr; - } - /* Send NOP mbx cmds for non-INTx mode active interrupt test */ - if (intr_mode != 0) - mcnt = lpfc_sli4_send_nop_mbox_cmds(phba, - LPFC_ACT_INTR_CNT); - - /* Check active interrupts received only for MSI/MSI-X */ - if (intr_mode == 0 || - phba->sli.slistat.sli_intr >= LPFC_ACT_INTR_CNT) { - /* Log the current active interrupt mode */ - phba->intr_mode = intr_mode; - lpfc_log_intr_mode(phba, intr_mode); - break; - } - lpfc_printf_log(phba, KERN_INFO, LOG_INIT, - "0451 Configure interrupt mode (%d) " - "failed active interrupt test.\n", - intr_mode); - /* Unset the previous SLI-4 HBA setup. */ - /* - * TODO: Is this operation compatible with IF TYPE 2 - * devices? All port state is deleted and cleared. - */ - lpfc_sli4_unset_hba(phba); - /* Try next level of interrupt mode */ - cfg_mode = --intr_mode; + /* Put device to a known state before enabling interrupt */ + lpfc_stop_port(phba); + /* Configure and enable interrupt */ + intr_mode = lpfc_sli4_enable_intr(phba, cfg_mode); + if (intr_mode == LPFC_INTR_ERROR) { + lpfc_printf_log(phba, KERN_ERR, LOG_INIT, + "0426 Failed to enable interrupt.\n"); + error = -ENODEV; + goto out_free_sysfs_attr; + } + /* Default to single EQ for non-MSI-X */ + if (phba->intr_type != MSIX) + adjusted_fcp_io_channel = 1; + else + adjusted_fcp_io_channel = phba->cfg_fcp_io_channel; + phba->cfg_fcp_io_channel = adjusted_fcp_io_channel; + /* Set up SLI-4 HBA */ + if (lpfc_sli4_hba_setup(phba)) { + lpfc_printf_log(phba, KERN_ERR, LOG_INIT, + "1421 Failed to set up hba\n"); + error = -ENODEV; + goto out_disable_intr; } + /* Log the current active interrupt mode */ + phba->intr_mode = intr_mode; + lpfc_log_intr_mode(phba, intr_mode); + /* Perform post initialization setup */ lpfc_post_init_setup(phba); diff --git a/drivers/scsi/lpfc/lpfc_nportdisc.c b/drivers/scsi/lpfc/lpfc_nportdisc.c index d8fadcb..46128c6 100644 --- a/drivers/scsi/lpfc/lpfc_nportdisc.c +++ b/drivers/scsi/lpfc/lpfc_nportdisc.c @@ -1115,6 +1115,13 @@ out: "0261 Cannot Register NameServer login\n"); } + /* + ** In case the node reference counter does not go to zero, ensure that + ** the stale state for the node is not processed. + */ + + ndlp->nlp_prev_state = ndlp->nlp_state; + lpfc_nlp_set_state(vport, ndlp, NLP_STE_NPR_NODE); spin_lock_irq(shost->host_lock); ndlp->nlp_flag |= NLP_DEFER_RM; spin_unlock_irq(shost->host_lock); @@ -2159,13 +2166,16 @@ lpfc_cmpl_plogi_npr_node(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp, { struct lpfc_iocbq *cmdiocb, *rspiocb; IOCB_t *irsp; + struct Scsi_Host *shost = lpfc_shost_from_vport(vport); cmdiocb = (struct lpfc_iocbq *) arg; rspiocb = cmdiocb->context_un.rsp_iocb; irsp = &rspiocb->iocb; if (irsp->ulpStatus) { + spin_lock_irq(shost->host_lock); ndlp->nlp_flag |= NLP_DEFER_RM; + spin_unlock_irq(shost->host_lock); return NLP_STE_FREED_NODE; } return ndlp->nlp_state; diff --git a/drivers/scsi/lpfc/lpfc_scsi.c b/drivers/scsi/lpfc/lpfc_scsi.c index 60e5a17..98af07c 100644 --- a/drivers/scsi/lpfc/lpfc_scsi.c +++ b/drivers/scsi/lpfc/lpfc_scsi.c @@ -288,6 +288,26 @@ lpfc_change_queue_depth(struct scsi_device *sdev, int qdepth, int reason) } /** + * lpfc_change_queue_type() - Change a device's scsi tag queuing type + * @sdev: Pointer the scsi device whose queue depth is to change + * @tag_type: Identifier for queue tag type + */ +static int +lpfc_change_queue_type(struct scsi_device *sdev, int tag_type) +{ + if (sdev->tagged_supported) { + scsi_set_tag_type(sdev, tag_type); + if (tag_type) + scsi_activate_tcq(sdev, sdev->queue_depth); + else + scsi_deactivate_tcq(sdev, sdev->queue_depth); + } else + tag_type = 0; + + return tag_type; +} + +/** * lpfc_rampdown_queue_depth - Post RAMP_DOWN_QUEUE event to worker thread * @phba: The Hba for which this call is being executed. * @@ -3972,7 +3992,7 @@ lpfc_scsi_prep_cmnd(struct lpfc_vport *vport, struct lpfc_scsi_buf *lpfc_cmd, break; } } else - fcp_cmnd->fcpCntl1 = 0; + fcp_cmnd->fcpCntl1 = SIMPLE_Q; sli4 = (phba->sli_rev == LPFC_SLI_REV4); @@ -5150,6 +5170,7 @@ struct scsi_host_template lpfc_template = { .max_sectors = 0xFFFF, .vendor_id = LPFC_NL_VENDOR_ID, .change_queue_depth = lpfc_change_queue_depth, + .change_queue_type = lpfc_change_queue_type, }; struct scsi_host_template lpfc_vport_template = { @@ -5172,4 +5193,5 @@ struct scsi_host_template lpfc_vport_template = { .shost_attrs = lpfc_vport_attrs, .max_sectors = 0xFFFF, .change_queue_depth = lpfc_change_queue_depth, + .change_queue_type = lpfc_change_queue_type, }; diff --git a/drivers/scsi/lpfc/lpfc_sli.c b/drivers/scsi/lpfc/lpfc_sli.c index 624eab3..55b6fc8 100644 --- a/drivers/scsi/lpfc/lpfc_sli.c +++ b/drivers/scsi/lpfc/lpfc_sli.c @@ -124,10 +124,17 @@ lpfc_sli4_wq_put(struct lpfc_queue *q, union lpfc_wqe *wqe) /* Ring Doorbell */ doorbell.word0 = 0; - bf_set(lpfc_wq_doorbell_num_posted, &doorbell, 1); - bf_set(lpfc_wq_doorbell_index, &doorbell, host_index); - bf_set(lpfc_wq_doorbell_id, &doorbell, q->queue_id); - writel(doorbell.word0, q->phba->sli4_hba.WQDBregaddr); + if (q->db_format == LPFC_DB_LIST_FORMAT) { + bf_set(lpfc_wq_db_list_fm_num_posted, &doorbell, 1); + bf_set(lpfc_wq_db_list_fm_index, &doorbell, host_index); + bf_set(lpfc_wq_db_list_fm_id, &doorbell, q->queue_id); + } else if (q->db_format == LPFC_DB_RING_FORMAT) { + bf_set(lpfc_wq_db_ring_fm_num_posted, &doorbell, 1); + bf_set(lpfc_wq_db_ring_fm_id, &doorbell, q->queue_id); + } else { + return -EINVAL; + } + writel(doorbell.word0, q->db_regaddr); return 0; } @@ -456,10 +463,20 @@ lpfc_sli4_rq_put(struct lpfc_queue *hq, struct lpfc_queue *dq, /* Ring The Header Receive Queue Doorbell */ if (!(hq->host_index % hq->entry_repost)) { doorbell.word0 = 0; - bf_set(lpfc_rq_doorbell_num_posted, &doorbell, - hq->entry_repost); - bf_set(lpfc_rq_doorbell_id, &doorbell, hq->queue_id); - writel(doorbell.word0, hq->phba->sli4_hba.RQDBregaddr); + if (hq->db_format == LPFC_DB_RING_FORMAT) { + bf_set(lpfc_rq_db_ring_fm_num_posted, &doorbell, + hq->entry_repost); + bf_set(lpfc_rq_db_ring_fm_id, &doorbell, hq->queue_id); + } else if (hq->db_format == LPFC_DB_LIST_FORMAT) { + bf_set(lpfc_rq_db_list_fm_num_posted, &doorbell, + hq->entry_repost); + bf_set(lpfc_rq_db_list_fm_index, &doorbell, + hq->host_index); + bf_set(lpfc_rq_db_list_fm_id, &doorbell, hq->queue_id); + } else { + return -EINVAL; + } + writel(doorbell.word0, hq->db_regaddr); } return put_index; } @@ -4939,7 +4956,7 @@ out_free_mboxq: static void lpfc_sli4_arm_cqeq_intr(struct lpfc_hba *phba) { - uint8_t fcp_eqidx; + int fcp_eqidx; lpfc_sli4_cq_release(phba->sli4_hba.mbx_cq, LPFC_QUEUE_REARM); lpfc_sli4_cq_release(phba->sli4_hba.els_cq, LPFC_QUEUE_REARM); @@ -5622,6 +5639,13 @@ lpfc_sli4_alloc_resource_identifiers(struct lpfc_hba *phba) } /* RPIs. */ count = phba->sli4_hba.max_cfg_param.max_rpi; + if (count <= 0) { + lpfc_printf_log(phba, KERN_ERR, LOG_SLI, + "3279 Invalid provisioning of " + "rpi:%d\n", count); + rc = -EINVAL; + goto err_exit; + } base = phba->sli4_hba.max_cfg_param.rpi_base; longs = (count + BITS_PER_LONG - 1) / BITS_PER_LONG; phba->sli4_hba.rpi_bmask = kzalloc(longs * @@ -5644,6 +5668,13 @@ lpfc_sli4_alloc_resource_identifiers(struct lpfc_hba *phba) /* VPIs. */ count = phba->sli4_hba.max_cfg_param.max_vpi; + if (count <= 0) { + lpfc_printf_log(phba, KERN_ERR, LOG_SLI, + "3280 Invalid provisioning of " + "vpi:%d\n", count); + rc = -EINVAL; + goto free_rpi_ids; + } base = phba->sli4_hba.max_cfg_param.vpi_base; longs = (count + BITS_PER_LONG - 1) / BITS_PER_LONG; phba->vpi_bmask = kzalloc(longs * @@ -5666,6 +5697,13 @@ lpfc_sli4_alloc_resource_identifiers(struct lpfc_hba *phba) /* XRIs. */ count = phba->sli4_hba.max_cfg_param.max_xri; + if (count <= 0) { + lpfc_printf_log(phba, KERN_ERR, LOG_SLI, + "3281 Invalid provisioning of " + "xri:%d\n", count); + rc = -EINVAL; + goto free_vpi_ids; + } base = phba->sli4_hba.max_cfg_param.xri_base; longs = (count + BITS_PER_LONG - 1) / BITS_PER_LONG; phba->sli4_hba.xri_bmask = kzalloc(longs * @@ -5689,6 +5727,13 @@ lpfc_sli4_alloc_resource_identifiers(struct lpfc_hba *phba) /* VFIs. */ count = phba->sli4_hba.max_cfg_param.max_vfi; + if (count <= 0) { + lpfc_printf_log(phba, KERN_ERR, LOG_SLI, + "3282 Invalid provisioning of " + "vfi:%d\n", count); + rc = -EINVAL; + goto free_xri_ids; + } base = phba->sli4_hba.max_cfg_param.vfi_base; longs = (count + BITS_PER_LONG - 1) / BITS_PER_LONG; phba->sli4_hba.vfi_bmask = kzalloc(longs * @@ -8370,7 +8415,7 @@ __lpfc_sli_issue_iocb_s4(struct lpfc_hba *phba, uint32_t ring_number, * This is a continuation of a commandi,(CX) so this * sglq is on the active list */ - sglq = __lpfc_get_active_sglq(phba, piocb->sli4_xritag); + sglq = __lpfc_get_active_sglq(phba, piocb->sli4_lxritag); if (!sglq) return IOCB_ERROR; } @@ -8855,12 +8900,6 @@ lpfc_sli_setup(struct lpfc_hba *phba) pring->prt[3].type = FC_TYPE_CT; pring->prt[3].lpfc_sli_rcv_unsol_event = lpfc_ct_unsol_event; - /* abort unsolicited sequence */ - pring->prt[4].profile = 0; /* Mask 4 */ - pring->prt[4].rctl = FC_RCTL_BA_ABTS; - pring->prt[4].type = FC_TYPE_BLS; - pring->prt[4].lpfc_sli_rcv_unsol_event = - lpfc_sli4_ct_abort_unsol_event; break; } totiocbsize += (pring->sli.sli3.numCiocb * @@ -11873,7 +11912,7 @@ lpfc_sli4_hba_intr_handler(int irq, void *dev_id) struct lpfc_eqe *eqe; unsigned long iflag; int ecount = 0; - uint32_t fcp_eqidx; + int fcp_eqidx; /* Get the driver's phba structure from the dev_id */ fcp_eq_hdl = (struct lpfc_fcp_eq_hdl *)dev_id; @@ -11975,7 +12014,7 @@ lpfc_sli4_intr_handler(int irq, void *dev_id) struct lpfc_hba *phba; irqreturn_t hba_irq_rc; bool hba_handled = false; - uint32_t fcp_eqidx; + int fcp_eqidx; /* Get the driver's phba structure from the dev_id */ phba = (struct lpfc_hba *)dev_id; @@ -12097,6 +12136,54 @@ out_fail: } /** + * lpfc_dual_chute_pci_bar_map - Map pci base address register to host memory + * @phba: HBA structure that indicates port to create a queue on. + * @pci_barset: PCI BAR set flag. + * + * This function shall perform iomap of the specified PCI BAR address to host + * memory address if not already done so and return it. The returned host + * memory address can be NULL. + */ +static void __iomem * +lpfc_dual_chute_pci_bar_map(struct lpfc_hba *phba, uint16_t pci_barset) +{ + struct pci_dev *pdev; + unsigned long bar_map, bar_map_len; + + if (!phba->pcidev) + return NULL; + else + pdev = phba->pcidev; + + switch (pci_barset) { + case WQ_PCI_BAR_0_AND_1: + if (!phba->pci_bar0_memmap_p) { + bar_map = pci_resource_start(pdev, PCI_64BIT_BAR0); + bar_map_len = pci_resource_len(pdev, PCI_64BIT_BAR0); + phba->pci_bar0_memmap_p = ioremap(bar_map, bar_map_len); + } + return phba->pci_bar0_memmap_p; + case WQ_PCI_BAR_2_AND_3: + if (!phba->pci_bar2_memmap_p) { + bar_map = pci_resource_start(pdev, PCI_64BIT_BAR2); + bar_map_len = pci_resource_len(pdev, PCI_64BIT_BAR2); + phba->pci_bar2_memmap_p = ioremap(bar_map, bar_map_len); + } + return phba->pci_bar2_memmap_p; + case WQ_PCI_BAR_4_AND_5: + if (!phba->pci_bar4_memmap_p) { + bar_map = pci_resource_start(pdev, PCI_64BIT_BAR4); + bar_map_len = pci_resource_len(pdev, PCI_64BIT_BAR4); + phba->pci_bar4_memmap_p = ioremap(bar_map, bar_map_len); + } + return phba->pci_bar4_memmap_p; + default: + break; + } + return NULL; +} + +/** * lpfc_modify_fcp_eq_delay - Modify Delay Multiplier on FCP EQs * @phba: HBA structure that indicates port to create a queue on. * @startq: The starting FCP EQ to modify @@ -12673,6 +12760,9 @@ lpfc_wq_create(struct lpfc_hba *phba, struct lpfc_queue *wq, union lpfc_sli4_cfg_shdr *shdr; uint32_t hw_page_size = phba->sli4_hba.pc_sli4_params.if_page_sz; struct dma_address *page; + void __iomem *bar_memmap_p; + uint32_t db_offset; + uint16_t pci_barset; /* sanity check on queue memory */ if (!wq || !cq) @@ -12696,6 +12786,7 @@ lpfc_wq_create(struct lpfc_hba *phba, struct lpfc_queue *wq, cq->queue_id); bf_set(lpfc_mbox_hdr_version, &shdr->request, phba->sli4_hba.pc_sli4_params.wqv); + if (phba->sli4_hba.pc_sli4_params.wqv == LPFC_Q_CREATE_VERSION_1) { bf_set(lpfc_mbx_wq_create_wqe_count, &wq_create->u.request_1, wq->entry_count); @@ -12723,6 +12814,10 @@ lpfc_wq_create(struct lpfc_hba *phba, struct lpfc_queue *wq, page[dmabuf->buffer_tag].addr_lo = putPaddrLow(dmabuf->phys); page[dmabuf->buffer_tag].addr_hi = putPaddrHigh(dmabuf->phys); } + + if (phba->sli4_hba.fw_func_mode & LPFC_DUA_MODE) + bf_set(lpfc_mbx_wq_create_dua, &wq_create->u.request, 1); + rc = lpfc_sli_issue_mbox(phba, mbox, MBX_POLL); /* The IOCTL status is embedded in the mailbox subheader. */ shdr_status = bf_get(lpfc_mbox_hdr_status, &shdr->response); @@ -12740,6 +12835,47 @@ lpfc_wq_create(struct lpfc_hba *phba, struct lpfc_queue *wq, status = -ENXIO; goto out; } + if (phba->sli4_hba.fw_func_mode & LPFC_DUA_MODE) { + wq->db_format = bf_get(lpfc_mbx_wq_create_db_format, + &wq_create->u.response); + if ((wq->db_format != LPFC_DB_LIST_FORMAT) && + (wq->db_format != LPFC_DB_RING_FORMAT)) { + lpfc_printf_log(phba, KERN_ERR, LOG_INIT, + "3265 WQ[%d] doorbell format not " + "supported: x%x\n", wq->queue_id, + wq->db_format); + status = -EINVAL; + goto out; + } + pci_barset = bf_get(lpfc_mbx_wq_create_bar_set, + &wq_create->u.response); + bar_memmap_p = lpfc_dual_chute_pci_bar_map(phba, pci_barset); + if (!bar_memmap_p) { + lpfc_printf_log(phba, KERN_ERR, LOG_INIT, + "3263 WQ[%d] failed to memmap pci " + "barset:x%x\n", wq->queue_id, + pci_barset); + status = -ENOMEM; + goto out; + } + db_offset = wq_create->u.response.doorbell_offset; + if ((db_offset != LPFC_ULP0_WQ_DOORBELL) && + (db_offset != LPFC_ULP1_WQ_DOORBELL)) { + lpfc_printf_log(phba, KERN_ERR, LOG_INIT, + "3252 WQ[%d] doorbell offset not " + "supported: x%x\n", wq->queue_id, + db_offset); + status = -EINVAL; + goto out; + } + wq->db_regaddr = bar_memmap_p + db_offset; + lpfc_printf_log(phba, KERN_INFO, LOG_INIT, + "3264 WQ[%d]: barset:x%x, offset:x%x\n", + wq->queue_id, pci_barset, db_offset); + } else { + wq->db_format = LPFC_DB_LIST_FORMAT; + wq->db_regaddr = phba->sli4_hba.WQDBregaddr; + } wq->type = LPFC_WQ; wq->assoc_qid = cq->queue_id; wq->subtype = subtype; @@ -12816,6 +12952,9 @@ lpfc_rq_create(struct lpfc_hba *phba, struct lpfc_queue *hrq, uint32_t shdr_status, shdr_add_status; union lpfc_sli4_cfg_shdr *shdr; uint32_t hw_page_size = phba->sli4_hba.pc_sli4_params.if_page_sz; + void __iomem *bar_memmap_p; + uint32_t db_offset; + uint16_t pci_barset; /* sanity check on queue memory */ if (!hrq || !drq || !cq) @@ -12894,6 +13033,9 @@ lpfc_rq_create(struct lpfc_hba *phba, struct lpfc_queue *hrq, rq_create->u.request.page[dmabuf->buffer_tag].addr_hi = putPaddrHigh(dmabuf->phys); } + if (phba->sli4_hba.fw_func_mode & LPFC_DUA_MODE) + bf_set(lpfc_mbx_rq_create_dua, &rq_create->u.request, 1); + rc = lpfc_sli_issue_mbox(phba, mbox, MBX_POLL); /* The IOCTL status is embedded in the mailbox subheader. */ shdr_status = bf_get(lpfc_mbox_hdr_status, &shdr->response); @@ -12911,6 +13053,50 @@ lpfc_rq_create(struct lpfc_hba *phba, struct lpfc_queue *hrq, status = -ENXIO; goto out; } + + if (phba->sli4_hba.fw_func_mode & LPFC_DUA_MODE) { + hrq->db_format = bf_get(lpfc_mbx_rq_create_db_format, + &rq_create->u.response); + if ((hrq->db_format != LPFC_DB_LIST_FORMAT) && + (hrq->db_format != LPFC_DB_RING_FORMAT)) { + lpfc_printf_log(phba, KERN_ERR, LOG_INIT, + "3262 RQ [%d] doorbell format not " + "supported: x%x\n", hrq->queue_id, + hrq->db_format); + status = -EINVAL; + goto out; + } + + pci_barset = bf_get(lpfc_mbx_rq_create_bar_set, + &rq_create->u.response); + bar_memmap_p = lpfc_dual_chute_pci_bar_map(phba, pci_barset); + if (!bar_memmap_p) { + lpfc_printf_log(phba, KERN_ERR, LOG_INIT, + "3269 RQ[%d] failed to memmap pci " + "barset:x%x\n", hrq->queue_id, + pci_barset); + status = -ENOMEM; + goto out; + } + + db_offset = rq_create->u.response.doorbell_offset; + if ((db_offset != LPFC_ULP0_RQ_DOORBELL) && + (db_offset != LPFC_ULP1_RQ_DOORBELL)) { + lpfc_printf_log(phba, KERN_ERR, LOG_INIT, + "3270 RQ[%d] doorbell offset not " + "supported: x%x\n", hrq->queue_id, + db_offset); + status = -EINVAL; + goto out; + } + hrq->db_regaddr = bar_memmap_p + db_offset; + lpfc_printf_log(phba, KERN_INFO, LOG_INIT, + "3266 RQ[qid:%d]: barset:x%x, offset:x%x\n", + hrq->queue_id, pci_barset, db_offset); + } else { + hrq->db_format = LPFC_DB_RING_FORMAT; + hrq->db_regaddr = phba->sli4_hba.RQDBregaddr; + } hrq->type = LPFC_HRQ; hrq->assoc_qid = cq->queue_id; hrq->subtype = subtype; @@ -12976,6 +13162,8 @@ lpfc_rq_create(struct lpfc_hba *phba, struct lpfc_queue *hrq, rq_create->u.request.page[dmabuf->buffer_tag].addr_hi = putPaddrHigh(dmabuf->phys); } + if (phba->sli4_hba.fw_func_mode & LPFC_DUA_MODE) + bf_set(lpfc_mbx_rq_create_dua, &rq_create->u.request, 1); rc = lpfc_sli_issue_mbox(phba, mbox, MBX_POLL); /* The IOCTL status is embedded in the mailbox subheader. */ shdr = (union lpfc_sli4_cfg_shdr *) &rq_create->header.cfg_shdr; @@ -14063,6 +14251,40 @@ lpfc_sli4_abort_partial_seq(struct lpfc_vport *vport, } /** + * lpfc_sli4_abort_ulp_seq - Abort assembled unsol sequence from ulp + * @vport: pointer to a vitural port + * @dmabuf: pointer to a dmabuf that describes the FC sequence + * + * This function tries to abort from the assembed sequence from upper level + * protocol, described by the information from basic abbort @dmabuf. It + * checks to see whether such pending context exists at upper level protocol. + * If so, it shall clean up the pending context. + * + * Return + * true -- if there is matching pending context of the sequence cleaned + * at ulp; + * false -- if there is no matching pending context of the sequence present + * at ulp. + **/ +static bool +lpfc_sli4_abort_ulp_seq(struct lpfc_vport *vport, struct hbq_dmabuf *dmabuf) +{ + struct lpfc_hba *phba = vport->phba; + int handled; + + /* Accepting abort at ulp with SLI4 only */ + if (phba->sli_rev < LPFC_SLI_REV4) + return false; + + /* Register all caring upper level protocols to attend abort */ + handled = lpfc_ct_handle_unsol_abort(phba, dmabuf); + if (handled) + return true; + + return false; +} + +/** * lpfc_sli4_seq_abort_rsp_cmpl - BLS ABORT RSP seq abort iocb complete handler * @phba: Pointer to HBA context object. * @cmd_iocbq: pointer to the command iocbq structure. @@ -14077,8 +14299,14 @@ lpfc_sli4_seq_abort_rsp_cmpl(struct lpfc_hba *phba, struct lpfc_iocbq *cmd_iocbq, struct lpfc_iocbq *rsp_iocbq) { - if (cmd_iocbq) + struct lpfc_nodelist *ndlp; + + if (cmd_iocbq) { + ndlp = (struct lpfc_nodelist *)cmd_iocbq->context1; + lpfc_nlp_put(ndlp); + lpfc_nlp_not_used(ndlp); lpfc_sli_release_iocbq(phba, cmd_iocbq); + } /* Failure means BLS ABORT RSP did not get delivered to remote node*/ if (rsp_iocbq && rsp_iocbq->iocb.ulpStatus) @@ -14118,9 +14346,10 @@ lpfc_sli4_xri_inrange(struct lpfc_hba *phba, * event after aborting the sequence handling. **/ static void -lpfc_sli4_seq_abort_rsp(struct lpfc_hba *phba, - struct fc_frame_header *fc_hdr) +lpfc_sli4_seq_abort_rsp(struct lpfc_vport *vport, + struct fc_frame_header *fc_hdr, bool aborted) { + struct lpfc_hba *phba = vport->phba; struct lpfc_iocbq *ctiocb = NULL; struct lpfc_nodelist *ndlp; uint16_t oxid, rxid, xri, lxri; @@ -14135,12 +14364,27 @@ lpfc_sli4_seq_abort_rsp(struct lpfc_hba *phba, oxid = be16_to_cpu(fc_hdr->fh_ox_id); rxid = be16_to_cpu(fc_hdr->fh_rx_id); - ndlp = lpfc_findnode_did(phba->pport, sid); + ndlp = lpfc_findnode_did(vport, sid); if (!ndlp) { - lpfc_printf_log(phba, KERN_WARNING, LOG_ELS, - "1268 Find ndlp returned NULL for oxid:x%x " - "SID:x%x\n", oxid, sid); - return; + ndlp = mempool_alloc(phba->nlp_mem_pool, GFP_KERNEL); + if (!ndlp) { + lpfc_printf_vlog(vport, KERN_WARNING, LOG_ELS, + "1268 Failed to allocate ndlp for " + "oxid:x%x SID:x%x\n", oxid, sid); + return; + } + lpfc_nlp_init(vport, ndlp, sid); + /* Put ndlp onto pport node list */ + lpfc_enqueue_node(vport, ndlp); + } else if (!NLP_CHK_NODE_ACT(ndlp)) { + /* re-setup ndlp without removing from node list */ + ndlp = lpfc_enable_node(vport, ndlp, NLP_STE_UNUSED_NODE); + if (!ndlp) { + lpfc_printf_vlog(vport, KERN_WARNING, LOG_ELS, + "3275 Failed to active ndlp found " + "for oxid:x%x SID:x%x\n", oxid, sid); + return; + } } /* Allocate buffer for rsp iocb */ @@ -14164,7 +14408,7 @@ lpfc_sli4_seq_abort_rsp(struct lpfc_hba *phba, icmd->ulpLe = 1; icmd->ulpClass = CLASS3; icmd->ulpContext = phba->sli4_hba.rpi_ids[ndlp->nlp_rpi]; - ctiocb->context1 = ndlp; + ctiocb->context1 = lpfc_nlp_get(ndlp); ctiocb->iocb_cmpl = NULL; ctiocb->vport = phba->pport; @@ -14183,14 +14427,24 @@ lpfc_sli4_seq_abort_rsp(struct lpfc_hba *phba, if (lxri != NO_XRI) lpfc_set_rrq_active(phba, ndlp, lxri, (xri == oxid) ? rxid : oxid, 0); - /* If the oxid maps to the FCP XRI range or if it is out of range, - * send a BLS_RJT. The driver no longer has that exchange. - * Override the IOCB for a BA_RJT. + /* For BA_ABTS from exchange responder, if the logical xri with + * the oxid maps to the FCP XRI range, the port no longer has + * that exchange context, send a BLS_RJT. Override the IOCB for + * a BA_RJT. */ - if (xri > (phba->sli4_hba.max_cfg_param.max_xri + - phba->sli4_hba.max_cfg_param.xri_base) || - xri > (lpfc_sli4_get_els_iocb_cnt(phba) + - phba->sli4_hba.max_cfg_param.xri_base)) { + if ((fctl & FC_FC_EX_CTX) && + (lxri > lpfc_sli4_get_els_iocb_cnt(phba))) { + icmd->un.xseq64.w5.hcsw.Rctl = FC_RCTL_BA_RJT; + bf_set(lpfc_vndr_code, &icmd->un.bls_rsp, 0); + bf_set(lpfc_rsn_expln, &icmd->un.bls_rsp, FC_BA_RJT_INV_XID); + bf_set(lpfc_rsn_code, &icmd->un.bls_rsp, FC_BA_RJT_UNABLE); + } + + /* If BA_ABTS failed to abort a partially assembled receive sequence, + * the driver no longer has that exchange, send a BLS_RJT. Override + * the IOCB for a BA_RJT. + */ + if (aborted == false) { icmd->un.xseq64.w5.hcsw.Rctl = FC_RCTL_BA_RJT; bf_set(lpfc_vndr_code, &icmd->un.bls_rsp, 0); bf_set(lpfc_rsn_expln, &icmd->un.bls_rsp, FC_BA_RJT_INV_XID); @@ -14214,17 +14468,19 @@ lpfc_sli4_seq_abort_rsp(struct lpfc_hba *phba, bf_set(lpfc_abts_oxid, &icmd->un.bls_rsp, oxid); /* Xmit CT abts response on exchange <xid> */ - lpfc_printf_log(phba, KERN_INFO, LOG_ELS, - "1200 Send BLS cmd x%x on oxid x%x Data: x%x\n", - icmd->un.xseq64.w5.hcsw.Rctl, oxid, phba->link_state); + lpfc_printf_vlog(vport, KERN_INFO, LOG_ELS, + "1200 Send BLS cmd x%x on oxid x%x Data: x%x\n", + icmd->un.xseq64.w5.hcsw.Rctl, oxid, phba->link_state); rc = lpfc_sli_issue_iocb(phba, LPFC_ELS_RING, ctiocb, 0); if (rc == IOCB_ERROR) { - lpfc_printf_log(phba, KERN_ERR, LOG_ELS, - "2925 Failed to issue CT ABTS RSP x%x on " - "xri x%x, Data x%x\n", - icmd->un.xseq64.w5.hcsw.Rctl, oxid, - phba->link_state); + lpfc_printf_vlog(vport, KERN_ERR, LOG_ELS, + "2925 Failed to issue CT ABTS RSP x%x on " + "xri x%x, Data x%x\n", + icmd->un.xseq64.w5.hcsw.Rctl, oxid, + phba->link_state); + lpfc_nlp_put(ndlp); + ctiocb->context1 = NULL; lpfc_sli_release_iocbq(phba, ctiocb); } } @@ -14249,32 +14505,25 @@ lpfc_sli4_handle_unsol_abort(struct lpfc_vport *vport, struct lpfc_hba *phba = vport->phba; struct fc_frame_header fc_hdr; uint32_t fctl; - bool abts_par; + bool aborted; /* Make a copy of fc_hdr before the dmabuf being released */ memcpy(&fc_hdr, dmabuf->hbuf.virt, sizeof(struct fc_frame_header)); fctl = sli4_fctl_from_fc_hdr(&fc_hdr); if (fctl & FC_FC_EX_CTX) { - /* - * ABTS sent by responder to exchange, just free the buffer - */ - lpfc_in_buf_free(phba, &dmabuf->dbuf); + /* ABTS by responder to exchange, no cleanup needed */ + aborted = true; } else { - /* - * ABTS sent by initiator to exchange, need to do cleanup - */ - /* Try to abort partially assembled seq */ - abts_par = lpfc_sli4_abort_partial_seq(vport, dmabuf); - - /* Send abort to ULP if partially seq abort failed */ - if (abts_par == false) - lpfc_sli4_send_seq_to_ulp(vport, dmabuf); - else - lpfc_in_buf_free(phba, &dmabuf->dbuf); + /* ABTS by initiator to exchange, need to do cleanup */ + aborted = lpfc_sli4_abort_partial_seq(vport, dmabuf); + if (aborted == false) + aborted = lpfc_sli4_abort_ulp_seq(vport, dmabuf); } - /* Send basic accept (BA_ACC) to the abort requester */ - lpfc_sli4_seq_abort_rsp(phba, &fc_hdr); + lpfc_in_buf_free(phba, &dmabuf->dbuf); + + /* Respond with BA_ACC or BA_RJT accordingly */ + lpfc_sli4_seq_abort_rsp(vport, &fc_hdr, aborted); } /** @@ -15307,10 +15556,13 @@ lpfc_sli4_fcf_rr_next_index_get(struct lpfc_hba *phba) { uint16_t next_fcf_index; +initial_priority: /* Search start from next bit of currently registered FCF index */ + next_fcf_index = phba->fcf.current_rec.fcf_indx; + next_priority: - next_fcf_index = (phba->fcf.current_rec.fcf_indx + 1) % - LPFC_SLI4_FCF_TBL_INDX_MAX; + /* Determine the next fcf index to check */ + next_fcf_index = (next_fcf_index + 1) % LPFC_SLI4_FCF_TBL_INDX_MAX; next_fcf_index = find_next_bit(phba->fcf.fcf_rr_bmask, LPFC_SLI4_FCF_TBL_INDX_MAX, next_fcf_index); @@ -15337,7 +15589,7 @@ next_priority: * at that level and continue the selection process. */ if (lpfc_check_next_fcf_pri_level(phba)) - goto next_priority; + goto initial_priority; lpfc_printf_log(phba, KERN_WARNING, LOG_FIP, "2844 No roundrobin failover FCF available\n"); if (next_fcf_index >= LPFC_SLI4_FCF_TBL_INDX_MAX) diff --git a/drivers/scsi/lpfc/lpfc_sli4.h b/drivers/scsi/lpfc/lpfc_sli4.h index 44c427a..be02b59 100644 --- a/drivers/scsi/lpfc/lpfc_sli4.h +++ b/drivers/scsi/lpfc/lpfc_sli4.h @@ -139,6 +139,10 @@ struct lpfc_queue { struct lpfc_sli_ring *pring; /* ptr to io ring associated with q */ + uint16_t db_format; +#define LPFC_DB_RING_FORMAT 0x01 +#define LPFC_DB_LIST_FORMAT 0x02 + void __iomem *db_regaddr; /* For q stats */ uint32_t q_cnt_1; uint32_t q_cnt_2; @@ -508,6 +512,10 @@ struct lpfc_sli4_hba { struct lpfc_queue *hdr_rq; /* Slow-path Header Receive queue */ struct lpfc_queue *dat_rq; /* Slow-path Data Receive queue */ + uint8_t fw_func_mode; /* FW function protocol mode */ + uint32_t ulp0_mode; /* ULP0 protocol mode */ + uint32_t ulp1_mode; /* ULP1 protocol mode */ + /* Setup information for various queue parameters */ int eq_esize; int eq_ecount; diff --git a/drivers/scsi/lpfc/lpfc_version.h b/drivers/scsi/lpfc/lpfc_version.h index ba596e8..f3b7795 100644 --- a/drivers/scsi/lpfc/lpfc_version.h +++ b/drivers/scsi/lpfc/lpfc_version.h @@ -18,7 +18,7 @@ * included with this package. * *******************************************************************/ -#define LPFC_DRIVER_VERSION "8.3.36" +#define LPFC_DRIVER_VERSION "8.3.37" #define LPFC_DRIVER_NAME "lpfc" /* Used for SLI 2/3 */ diff --git a/drivers/scsi/mpt2sas/mpt2sas_base.c b/drivers/scsi/mpt2sas/mpt2sas_base.c index ffd85c5..5e24e7e 100644 --- a/drivers/scsi/mpt2sas/mpt2sas_base.c +++ b/drivers/scsi/mpt2sas/mpt2sas_base.c @@ -155,7 +155,7 @@ _base_fault_reset_work(struct work_struct *work) struct task_struct *p; spin_lock_irqsave(&ioc->ioc_reset_in_progress_lock, flags); - if (ioc->shost_recovery) + if (ioc->shost_recovery || ioc->pci_error_recovery) goto rearm_timer; spin_unlock_irqrestore(&ioc->ioc_reset_in_progress_lock, flags); @@ -164,6 +164,20 @@ _base_fault_reset_work(struct work_struct *work) printk(MPT2SAS_INFO_FMT "%s : SAS host is non-operational !!!!\n", ioc->name, __func__); + /* It may be possible that EEH recovery can resolve some of + * pci bus failure issues rather removing the dead ioc function + * by considering controller is in a non-operational state. So + * here priority is given to the EEH recovery. If it doesn't + * not resolve this issue, mpt2sas driver will consider this + * controller to non-operational state and remove the dead ioc + * function. + */ + if (ioc->non_operational_loop++ < 5) { + spin_lock_irqsave(&ioc->ioc_reset_in_progress_lock, + flags); + goto rearm_timer; + } + /* * Call _scsih_flush_pending_cmds callback so that we flush all * pending commands back to OS. This call is required to aovid @@ -193,6 +207,8 @@ _base_fault_reset_work(struct work_struct *work) return; /* don't rearm timer */ } + ioc->non_operational_loop = 0; + if ((doorbell & MPI2_IOC_STATE_MASK) == MPI2_IOC_STATE_FAULT) { rc = mpt2sas_base_hard_reset_handler(ioc, CAN_SLEEP, FORCE_BIG_HAMMER); @@ -4386,6 +4402,7 @@ mpt2sas_base_attach(struct MPT2SAS_ADAPTER *ioc) if (missing_delay[0] != -1 && missing_delay[1] != -1) _base_update_missing_delay(ioc, missing_delay[0], missing_delay[1]); + ioc->non_operational_loop = 0; return 0; diff --git a/drivers/scsi/mpt2sas/mpt2sas_base.h b/drivers/scsi/mpt2sas/mpt2sas_base.h index 543d8d6..c6ee7aa 100644 --- a/drivers/scsi/mpt2sas/mpt2sas_base.h +++ b/drivers/scsi/mpt2sas/mpt2sas_base.h @@ -835,6 +835,7 @@ struct MPT2SAS_ADAPTER { u16 cpu_msix_table_sz; u32 ioc_reset_count; MPT2SAS_FLUSH_RUNNING_CMDS schedule_dead_ioc_flush_running_cmds; + u32 non_operational_loop; /* internal commands, callback index */ u8 scsi_io_cb_idx; diff --git a/drivers/scsi/mpt3sas/mpt3sas_base.c b/drivers/scsi/mpt3sas/mpt3sas_base.c index 04f8010..1836003 100644 --- a/drivers/scsi/mpt3sas/mpt3sas_base.c +++ b/drivers/scsi/mpt3sas/mpt3sas_base.c @@ -42,7 +42,6 @@ * USA. */ -#include <linux/version.h> #include <linux/kernel.h> #include <linux/module.h> #include <linux/errno.h> @@ -1310,7 +1309,6 @@ _base_build_sg_scmd_ieee(struct MPT3SAS_ADAPTER *ioc, void *sg_local, *chain; u32 chain_offset; u32 chain_length; - u32 chain_flags; int sges_left; u32 sges_in_segment; u8 simple_sgl_flags; @@ -1356,8 +1354,7 @@ _base_build_sg_scmd_ieee(struct MPT3SAS_ADAPTER *ioc, sges_in_segment--; } - /* initializing the chain flags and pointers */ - chain_flags = MPI2_SGE_FLAGS_CHAIN_ELEMENT << MPI2_SGE_FLAGS_SHIFT; + /* initializing the pointers */ chain_req = _base_get_chain_buffer_tracker(ioc, smid); if (!chain_req) return -1; diff --git a/drivers/scsi/mpt3sas/mpt3sas_config.c b/drivers/scsi/mpt3sas/mpt3sas_config.c index ce7e59b..1df9ed4 100644 --- a/drivers/scsi/mpt3sas/mpt3sas_config.c +++ b/drivers/scsi/mpt3sas/mpt3sas_config.c @@ -41,7 +41,6 @@ * USA. */ -#include <linux/version.h> #include <linux/module.h> #include <linux/kernel.h> #include <linux/init.h> diff --git a/drivers/scsi/mpt3sas/mpt3sas_ctl.c b/drivers/scsi/mpt3sas/mpt3sas_ctl.c index 8af944d..054d523 100644 --- a/drivers/scsi/mpt3sas/mpt3sas_ctl.c +++ b/drivers/scsi/mpt3sas/mpt3sas_ctl.c @@ -42,7 +42,6 @@ * USA. */ -#include <linux/version.h> #include <linux/kernel.h> #include <linux/module.h> #include <linux/errno.h> @@ -3136,7 +3135,7 @@ _ctl_diag_trigger_mpi_store(struct device *cdev, spin_lock_irqsave(&ioc->diag_trigger_lock, flags); sz = min(sizeof(struct SL_WH_MPI_TRIGGERS_T), count); memset(&ioc->diag_trigger_mpi, 0, - sizeof(struct SL_WH_EVENT_TRIGGERS_T)); + sizeof(ioc->diag_trigger_mpi)); memcpy(&ioc->diag_trigger_mpi, buf, sz); if (ioc->diag_trigger_mpi.ValidEntries > NUM_VALID_ENTRIES) ioc->diag_trigger_mpi.ValidEntries = NUM_VALID_ENTRIES; diff --git a/drivers/scsi/mpt3sas/mpt3sas_scsih.c b/drivers/scsi/mpt3sas/mpt3sas_scsih.c index 6421a06..dcbf7c8 100644 --- a/drivers/scsi/mpt3sas/mpt3sas_scsih.c +++ b/drivers/scsi/mpt3sas/mpt3sas_scsih.c @@ -41,7 +41,6 @@ * USA. */ -#include <linux/version.h> #include <linux/module.h> #include <linux/kernel.h> #include <linux/init.h> @@ -2755,13 +2754,11 @@ _scsih_block_io_to_children_attached_directly(struct MPT3SAS_ADAPTER *ioc, int i; u16 handle; u16 reason_code; - u8 phy_number; for (i = 0; i < event_data->NumEntries; i++) { handle = le16_to_cpu(event_data->PHY[i].AttachedDevHandle); if (!handle) continue; - phy_number = event_data->StartPhyNum + i; reason_code = event_data->PHY[i].PhyStatus & MPI2_EVENT_SAS_TOPO_RC_MASK; if (reason_code == MPI2_EVENT_SAS_TOPO_RC_DELAY_NOT_RESPONDING) diff --git a/drivers/scsi/mpt3sas/mpt3sas_trigger_diag.c b/drivers/scsi/mpt3sas/mpt3sas_trigger_diag.c index da6c5f2..6f8d621 100644 --- a/drivers/scsi/mpt3sas/mpt3sas_trigger_diag.c +++ b/drivers/scsi/mpt3sas/mpt3sas_trigger_diag.c @@ -42,7 +42,6 @@ * USA. */ -#include <linux/version.h> #include <linux/kernel.h> #include <linux/module.h> #include <linux/errno.h> diff --git a/drivers/scsi/qla4xxx/ql4_83xx.c b/drivers/scsi/qla4xxx/ql4_83xx.c index 6e9af20..5d8fe4f 100644 --- a/drivers/scsi/qla4xxx/ql4_83xx.c +++ b/drivers/scsi/qla4xxx/ql4_83xx.c @@ -538,7 +538,7 @@ struct device_info { int port_num; }; -static int qla4_83xx_can_perform_reset(struct scsi_qla_host *ha) +int qla4_83xx_can_perform_reset(struct scsi_qla_host *ha) { uint32_t drv_active; uint32_t dev_part, dev_part1, dev_part2; @@ -1351,31 +1351,58 @@ exit_start_fw: /*----------------------Interrupt Related functions ---------------------*/ -void qla4_83xx_disable_intrs(struct scsi_qla_host *ha) +static void qla4_83xx_disable_iocb_intrs(struct scsi_qla_host *ha) +{ + if (test_and_clear_bit(AF_83XX_IOCB_INTR_ON, &ha->flags)) + qla4_8xxx_intr_disable(ha); +} + +static void qla4_83xx_disable_mbox_intrs(struct scsi_qla_host *ha) { uint32_t mb_int, ret; - if (test_and_clear_bit(AF_INTERRUPTS_ON, &ha->flags)) - qla4_8xxx_mbx_intr_disable(ha); + if (test_and_clear_bit(AF_83XX_MBOX_INTR_ON, &ha->flags)) { + ret = readl(&ha->qla4_83xx_reg->mbox_int); + mb_int = ret & ~INT_ENABLE_FW_MB; + writel(mb_int, &ha->qla4_83xx_reg->mbox_int); + writel(1, &ha->qla4_83xx_reg->leg_int_mask); + } +} - ret = readl(&ha->qla4_83xx_reg->mbox_int); - mb_int = ret & ~INT_ENABLE_FW_MB; - writel(mb_int, &ha->qla4_83xx_reg->mbox_int); - writel(1, &ha->qla4_83xx_reg->leg_int_mask); +void qla4_83xx_disable_intrs(struct scsi_qla_host *ha) +{ + qla4_83xx_disable_mbox_intrs(ha); + qla4_83xx_disable_iocb_intrs(ha); } -void qla4_83xx_enable_intrs(struct scsi_qla_host *ha) +static void qla4_83xx_enable_iocb_intrs(struct scsi_qla_host *ha) +{ + if (!test_bit(AF_83XX_IOCB_INTR_ON, &ha->flags)) { + qla4_8xxx_intr_enable(ha); + set_bit(AF_83XX_IOCB_INTR_ON, &ha->flags); + } +} + +void qla4_83xx_enable_mbox_intrs(struct scsi_qla_host *ha) { uint32_t mb_int; - qla4_8xxx_mbx_intr_enable(ha); - mb_int = INT_ENABLE_FW_MB; - writel(mb_int, &ha->qla4_83xx_reg->mbox_int); - writel(0, &ha->qla4_83xx_reg->leg_int_mask); + if (!test_bit(AF_83XX_MBOX_INTR_ON, &ha->flags)) { + mb_int = INT_ENABLE_FW_MB; + writel(mb_int, &ha->qla4_83xx_reg->mbox_int); + writel(0, &ha->qla4_83xx_reg->leg_int_mask); + set_bit(AF_83XX_MBOX_INTR_ON, &ha->flags); + } +} - set_bit(AF_INTERRUPTS_ON, &ha->flags); + +void qla4_83xx_enable_intrs(struct scsi_qla_host *ha) +{ + qla4_83xx_enable_mbox_intrs(ha); + qla4_83xx_enable_iocb_intrs(ha); } + void qla4_83xx_queue_mbox_cmd(struct scsi_qla_host *ha, uint32_t *mbx_cmd, int incount) { diff --git a/drivers/scsi/qla4xxx/ql4_attr.c b/drivers/scsi/qla4xxx/ql4_attr.c index 76819b7..19ee55a 100644 --- a/drivers/scsi/qla4xxx/ql4_attr.c +++ b/drivers/scsi/qla4xxx/ql4_attr.c @@ -74,16 +74,22 @@ qla4_8xxx_sysfs_write_fw_dump(struct file *filep, struct kobject *kobj, } break; case 2: - /* Reset HBA */ + /* Reset HBA and collect FW dump */ ha->isp_ops->idc_lock(ha); dev_state = qla4_8xxx_rd_direct(ha, QLA8XXX_CRB_DEV_STATE); if (dev_state == QLA8XXX_DEV_READY) { - ql4_printk(KERN_INFO, ha, - "%s: Setting Need reset, reset_owner is 0x%x.\n", - __func__, ha->func_num); + ql4_printk(KERN_INFO, ha, "%s: Setting Need reset\n", + __func__); qla4_8xxx_wr_direct(ha, QLA8XXX_CRB_DEV_STATE, QLA8XXX_DEV_NEED_RESET); - set_bit(AF_8XXX_RST_OWNER, &ha->flags); + if (is_qla8022(ha) || + (is_qla8032(ha) && + qla4_83xx_can_perform_reset(ha))) { + set_bit(AF_8XXX_RST_OWNER, &ha->flags); + set_bit(AF_FW_RECOVERY, &ha->flags); + ql4_printk(KERN_INFO, ha, "%s: Reset owner is 0x%x\n", + __func__, ha->func_num); + } } else ql4_printk(KERN_INFO, ha, "%s: Reset not performed as device state is 0x%x\n", diff --git a/drivers/scsi/qla4xxx/ql4_def.h b/drivers/scsi/qla4xxx/ql4_def.h index 329d553..129f5dd 100644 --- a/drivers/scsi/qla4xxx/ql4_def.h +++ b/drivers/scsi/qla4xxx/ql4_def.h @@ -136,6 +136,7 @@ #define RESPONSE_QUEUE_DEPTH 64 #define QUEUE_SIZE 64 #define DMA_BUFFER_SIZE 512 +#define IOCB_HIWAT_CUSHION 4 /* * Misc @@ -180,6 +181,7 @@ #define DISABLE_ACB_TOV 30 #define IP_CONFIG_TOV 30 #define LOGIN_TOV 12 +#define BOOT_LOGIN_RESP_TOV 60 #define MAX_RESET_HA_RETRIES 2 #define FW_ALIVE_WAIT_TOV 3 @@ -314,6 +316,7 @@ struct ql4_tuple_ddb { * DDB flags. */ #define DF_RELOGIN 0 /* Relogin to device */ +#define DF_BOOT_TGT 1 /* Boot target entry */ #define DF_ISNS_DISCOVERED 2 /* Device was discovered via iSNS */ #define DF_FO_MASKED 3 @@ -501,6 +504,7 @@ struct scsi_qla_host { #define AF_INTERRUPTS_ON 6 /* 0x00000040 */ #define AF_GET_CRASH_RECORD 7 /* 0x00000080 */ #define AF_LINK_UP 8 /* 0x00000100 */ +#define AF_LOOPBACK 9 /* 0x00000200 */ #define AF_IRQ_ATTACHED 10 /* 0x00000400 */ #define AF_DISABLE_ACB_COMPLETE 11 /* 0x00000800 */ #define AF_HA_REMOVAL 12 /* 0x00001000 */ @@ -516,6 +520,8 @@ struct scsi_qla_host { #define AF_8XXX_RST_OWNER 25 /* 0x02000000 */ #define AF_82XX_DUMP_READING 26 /* 0x04000000 */ #define AF_83XX_NO_FW_DUMP 27 /* 0x08000000 */ +#define AF_83XX_IOCB_INTR_ON 28 /* 0x10000000 */ +#define AF_83XX_MBOX_INTR_ON 29 /* 0x20000000 */ unsigned long dpc_flags; @@ -537,6 +543,7 @@ struct scsi_qla_host { uint32_t tot_ddbs; uint16_t iocb_cnt; + uint16_t iocb_hiwat; /* SRB cache. */ #define SRB_MIN_REQ 128 @@ -838,7 +845,8 @@ static inline int is_aer_supported(struct scsi_qla_host *ha) static inline int adapter_up(struct scsi_qla_host *ha) { return (test_bit(AF_ONLINE, &ha->flags) != 0) && - (test_bit(AF_LINK_UP, &ha->flags) != 0); + (test_bit(AF_LINK_UP, &ha->flags) != 0) && + (!test_bit(AF_LOOPBACK, &ha->flags)); } static inline struct scsi_qla_host* to_qla_host(struct Scsi_Host *shost) diff --git a/drivers/scsi/qla4xxx/ql4_fw.h b/drivers/scsi/qla4xxx/ql4_fw.h index 1c47950..ad9d2e2 100644 --- a/drivers/scsi/qla4xxx/ql4_fw.h +++ b/drivers/scsi/qla4xxx/ql4_fw.h @@ -495,7 +495,7 @@ struct qla_flt_region { #define MBOX_ASTS_IPV6_LCL_PREFIX_IGNORED 0x802D #define MBOX_ASTS_ICMPV6_ERROR_MSG_RCVD 0x802E #define MBOX_ASTS_IDC_COMPLETE 0x8100 -#define MBOX_ASTS_IDC_NOTIFY 0x8101 +#define MBOX_ASTS_IDC_REQUEST_NOTIFICATION 0x8101 #define MBOX_ASTS_TXSCVR_INSERTED 0x8130 #define MBOX_ASTS_TXSCVR_REMOVED 0x8131 @@ -522,6 +522,10 @@ struct qla_flt_region { #define FLASH_OPT_COMMIT 2 #define FLASH_OPT_RMW_COMMIT 3 +/* Loopback type */ +#define ENABLE_INTERNAL_LOOPBACK 0x04 +#define ENABLE_EXTERNAL_LOOPBACK 0x08 + /*************************************************************************/ /* Host Adapter Initialization Control Block (from host) */ diff --git a/drivers/scsi/qla4xxx/ql4_glbl.h b/drivers/scsi/qla4xxx/ql4_glbl.h index 57a5a3c..982293e 100644 --- a/drivers/scsi/qla4xxx/ql4_glbl.h +++ b/drivers/scsi/qla4xxx/ql4_glbl.h @@ -253,12 +253,14 @@ void qla4_8xxx_set_rst_ready(struct scsi_qla_host *ha); void qla4_8xxx_clear_rst_ready(struct scsi_qla_host *ha); int qla4_8xxx_device_bootstrap(struct scsi_qla_host *ha); void qla4_8xxx_get_minidump(struct scsi_qla_host *ha); -int qla4_8xxx_mbx_intr_disable(struct scsi_qla_host *ha); -int qla4_8xxx_mbx_intr_enable(struct scsi_qla_host *ha); +int qla4_8xxx_intr_disable(struct scsi_qla_host *ha); +int qla4_8xxx_intr_enable(struct scsi_qla_host *ha); int qla4_8xxx_set_param(struct scsi_qla_host *ha, int param); int qla4_8xxx_update_idc_reg(struct scsi_qla_host *ha); int qla4_83xx_post_idc_ack(struct scsi_qla_host *ha); void qla4_83xx_disable_pause(struct scsi_qla_host *ha); +void qla4_83xx_enable_mbox_intrs(struct scsi_qla_host *ha); +int qla4_83xx_can_perform_reset(struct scsi_qla_host *ha); extern int ql4xextended_error_logging; extern int ql4xdontresethba; diff --git a/drivers/scsi/qla4xxx/ql4_init.c b/drivers/scsi/qla4xxx/ql4_init.c index 1aca1b4..8fc8548 100644 --- a/drivers/scsi/qla4xxx/ql4_init.c +++ b/drivers/scsi/qla4xxx/ql4_init.c @@ -195,12 +195,10 @@ exit_get_sys_info_no_free: * @ha: pointer to host adapter structure. * **/ -static int qla4xxx_init_local_data(struct scsi_qla_host *ha) +static void qla4xxx_init_local_data(struct scsi_qla_host *ha) { /* Initialize aen queue */ ha->aen_q_count = MAX_AEN_ENTRIES; - - return qla4xxx_get_firmware_status(ha); } static uint8_t @@ -935,14 +933,23 @@ int qla4xxx_initialize_adapter(struct scsi_qla_host *ha, int is_reset) if (ha->isp_ops->start_firmware(ha) == QLA_ERROR) goto exit_init_hba; + /* + * For ISP83XX, mailbox and IOCB interrupts are enabled separately. + * Mailbox interrupts must be enabled prior to issuing any mailbox + * command in order to prevent the possibility of losing interrupts + * while switching from polling to interrupt mode. IOCB interrupts are + * enabled via isp_ops->enable_intrs. + */ + if (is_qla8032(ha)) + qla4_83xx_enable_mbox_intrs(ha); + if (qla4xxx_about_firmware(ha) == QLA_ERROR) goto exit_init_hba; if (ha->isp_ops->get_sys_info(ha) == QLA_ERROR) goto exit_init_hba; - if (qla4xxx_init_local_data(ha) == QLA_ERROR) - goto exit_init_hba; + qla4xxx_init_local_data(ha); status = qla4xxx_init_firmware(ha); if (status == QLA_ERROR) diff --git a/drivers/scsi/qla4xxx/ql4_iocb.c b/drivers/scsi/qla4xxx/ql4_iocb.c index f48f37a..14fec97 100644 --- a/drivers/scsi/qla4xxx/ql4_iocb.c +++ b/drivers/scsi/qla4xxx/ql4_iocb.c @@ -316,7 +316,7 @@ int qla4xxx_send_command_to_isp(struct scsi_qla_host *ha, struct srb * srb) goto queuing_error; /* total iocbs active */ - if ((ha->iocb_cnt + req_cnt) >= REQUEST_QUEUE_DEPTH) + if ((ha->iocb_cnt + req_cnt) >= ha->iocb_hiwat) goto queuing_error; /* Build command packet */ diff --git a/drivers/scsi/qla4xxx/ql4_isr.c b/drivers/scsi/qla4xxx/ql4_isr.c index 15ea814..1b83dc2 100644 --- a/drivers/scsi/qla4xxx/ql4_isr.c +++ b/drivers/scsi/qla4xxx/ql4_isr.c @@ -582,6 +582,33 @@ exit_prq_error: } /** + * qla4_83xx_loopback_in_progress: Is loopback in progress? + * @ha: Pointer to host adapter structure. + * @ret: 1 = loopback in progress, 0 = loopback not in progress + **/ +static int qla4_83xx_loopback_in_progress(struct scsi_qla_host *ha) +{ + int rval = 1; + + if (is_qla8032(ha)) { + if ((ha->idc_info.info2 & ENABLE_INTERNAL_LOOPBACK) || + (ha->idc_info.info2 & ENABLE_EXTERNAL_LOOPBACK)) { + DEBUG2(ql4_printk(KERN_INFO, ha, + "%s: Loopback diagnostics in progress\n", + __func__)); + rval = 1; + } else { + DEBUG2(ql4_printk(KERN_INFO, ha, + "%s: Loopback diagnostics not in progress\n", + __func__)); + rval = 0; + } + } + + return rval; +} + +/** * qla4xxx_isr_decode_mailbox - decodes mailbox status * @ha: Pointer to host adapter structure. * @mailbox_status: Mailbox status. @@ -676,8 +703,10 @@ static void qla4xxx_isr_decode_mailbox(struct scsi_qla_host * ha, case MBOX_ASTS_LINK_DOWN: clear_bit(AF_LINK_UP, &ha->flags); - if (test_bit(AF_INIT_DONE, &ha->flags)) + if (test_bit(AF_INIT_DONE, &ha->flags)) { set_bit(DPC_LINK_CHANGED, &ha->dpc_flags); + qla4xxx_wake_dpc(ha); + } ql4_printk(KERN_INFO, ha, "%s: LINK DOWN\n", __func__); qla4xxx_post_aen_work(ha, ISCSI_EVENT_LINKDOWN, @@ -806,7 +835,7 @@ static void qla4xxx_isr_decode_mailbox(struct scsi_qla_host * ha, " removed\n", ha->host_no, mbox_sts[0])); break; - case MBOX_ASTS_IDC_NOTIFY: + case MBOX_ASTS_IDC_REQUEST_NOTIFICATION: { uint32_t opcode; if (is_qla8032(ha)) { @@ -840,6 +869,11 @@ static void qla4xxx_isr_decode_mailbox(struct scsi_qla_host * ha, DEBUG2(ql4_printk(KERN_INFO, ha, "scsi:%ld: AEN %04x IDC Complete notification\n", ha->host_no, mbox_sts[0])); + + if (qla4_83xx_loopback_in_progress(ha)) + set_bit(AF_LOOPBACK, &ha->flags); + else + clear_bit(AF_LOOPBACK, &ha->flags); } break; @@ -1124,17 +1158,18 @@ irqreturn_t qla4_83xx_intr_handler(int irq, void *dev_id) /* Legacy interrupt is valid if bit31 of leg_int_ptr is set */ if (!(leg_int_ptr & LEG_INT_PTR_B31)) { - ql4_printk(KERN_ERR, ha, - "%s: Legacy Interrupt Bit 31 not set, spurious interrupt!\n", - __func__); + DEBUG2(ql4_printk(KERN_ERR, ha, + "%s: Legacy Interrupt Bit 31 not set, spurious interrupt!\n", + __func__)); return IRQ_NONE; } /* Validate the PCIE function ID set in leg_int_ptr bits [19..16] */ if ((leg_int_ptr & PF_BITS_MASK) != ha->pf_bit) { - ql4_printk(KERN_ERR, ha, - "%s: Incorrect function ID 0x%x in legacy interrupt register, ha->pf_bit = 0x%x\n", - __func__, (leg_int_ptr & PF_BITS_MASK), ha->pf_bit); + DEBUG2(ql4_printk(KERN_ERR, ha, + "%s: Incorrect function ID 0x%x in legacy interrupt register, ha->pf_bit = 0x%x\n", + __func__, (leg_int_ptr & PF_BITS_MASK), + ha->pf_bit)); return IRQ_NONE; } @@ -1437,11 +1472,14 @@ irq_not_attached: void qla4xxx_free_irqs(struct scsi_qla_host *ha) { - if (test_bit(AF_MSIX_ENABLED, &ha->flags)) - qla4_8xxx_disable_msix(ha); - else if (test_and_clear_bit(AF_MSI_ENABLED, &ha->flags)) { - free_irq(ha->pdev->irq, ha); - pci_disable_msi(ha->pdev); - } else if (test_and_clear_bit(AF_INTx_ENABLED, &ha->flags)) - free_irq(ha->pdev->irq, ha); + if (test_and_clear_bit(AF_IRQ_ATTACHED, &ha->flags)) { + if (test_bit(AF_MSIX_ENABLED, &ha->flags)) { + qla4_8xxx_disable_msix(ha); + } else if (test_and_clear_bit(AF_MSI_ENABLED, &ha->flags)) { + free_irq(ha->pdev->irq, ha); + pci_disable_msi(ha->pdev); + } else if (test_and_clear_bit(AF_INTx_ENABLED, &ha->flags)) { + free_irq(ha->pdev->irq, ha); + } + } } diff --git a/drivers/scsi/qla4xxx/ql4_mbx.c b/drivers/scsi/qla4xxx/ql4_mbx.c index 3d41034..81e738d 100644 --- a/drivers/scsi/qla4xxx/ql4_mbx.c +++ b/drivers/scsi/qla4xxx/ql4_mbx.c @@ -44,6 +44,30 @@ void qla4xxx_process_mbox_intr(struct scsi_qla_host *ha, int out_count) } /** + * qla4xxx_is_intr_poll_mode – Are we allowed to poll for interrupts? + * @ha: Pointer to host adapter structure. + * @ret: 1=polling mode, 0=non-polling mode + **/ +static int qla4xxx_is_intr_poll_mode(struct scsi_qla_host *ha) +{ + int rval = 1; + + if (is_qla8032(ha)) { + if (test_bit(AF_IRQ_ATTACHED, &ha->flags) && + test_bit(AF_83XX_MBOX_INTR_ON, &ha->flags)) + rval = 0; + } else { + if (test_bit(AF_IRQ_ATTACHED, &ha->flags) && + test_bit(AF_INTERRUPTS_ON, &ha->flags) && + test_bit(AF_ONLINE, &ha->flags) && + !test_bit(AF_HA_REMOVAL, &ha->flags)) + rval = 0; + } + + return rval; +} + +/** * qla4xxx_mailbox_command - issues mailbox commands * @ha: Pointer to host adapter structure. * @inCount: number of mailbox registers to load. @@ -153,33 +177,28 @@ int qla4xxx_mailbox_command(struct scsi_qla_host *ha, uint8_t inCount, /* * Wait for completion: Poll or completion queue */ - if (test_bit(AF_IRQ_ATTACHED, &ha->flags) && - test_bit(AF_INTERRUPTS_ON, &ha->flags) && - test_bit(AF_ONLINE, &ha->flags) && - !test_bit(AF_HA_REMOVAL, &ha->flags)) { - /* Do not poll for completion. Use completion queue */ - set_bit(AF_MBOX_COMMAND_NOPOLL, &ha->flags); - wait_for_completion_timeout(&ha->mbx_intr_comp, MBOX_TOV * HZ); - clear_bit(AF_MBOX_COMMAND_NOPOLL, &ha->flags); - } else { + if (qla4xxx_is_intr_poll_mode(ha)) { /* Poll for command to complete */ wait_count = jiffies + MBOX_TOV * HZ; while (test_bit(AF_MBOX_COMMAND_DONE, &ha->flags) == 0) { if (time_after_eq(jiffies, wait_count)) break; - /* * Service the interrupt. * The ISR will save the mailbox status registers * to a temporary storage location in the adapter * structure. */ - spin_lock_irqsave(&ha->hardware_lock, flags); ha->isp_ops->process_mailbox_interrupt(ha, outCount); spin_unlock_irqrestore(&ha->hardware_lock, flags); msleep(10); } + } else { + /* Do not poll for completion. Use completion queue */ + set_bit(AF_MBOX_COMMAND_NOPOLL, &ha->flags); + wait_for_completion_timeout(&ha->mbx_intr_comp, MBOX_TOV * HZ); + clear_bit(AF_MBOX_COMMAND_NOPOLL, &ha->flags); } /* Check for mailbox timeout. */ @@ -678,8 +697,24 @@ int qla4xxx_get_firmware_status(struct scsi_qla_host * ha) return QLA_ERROR; } - ql4_printk(KERN_INFO, ha, "%ld firmware IOCBs available (%d).\n", - ha->host_no, mbox_sts[2]); + /* High-water mark of IOCBs */ + ha->iocb_hiwat = mbox_sts[2]; + DEBUG2(ql4_printk(KERN_INFO, ha, + "%s: firmware IOCBs available = %d\n", __func__, + ha->iocb_hiwat)); + + if (ha->iocb_hiwat > IOCB_HIWAT_CUSHION) + ha->iocb_hiwat -= IOCB_HIWAT_CUSHION; + + /* Ideally, we should not enter this code, as the # of firmware + * IOCBs is hard-coded in the firmware. We set a default + * iocb_hiwat here just in case */ + if (ha->iocb_hiwat == 0) { + ha->iocb_hiwat = REQUEST_QUEUE_DEPTH / 4; + DEBUG2(ql4_printk(KERN_WARNING, ha, + "%s: Setting IOCB's to = %d\n", __func__, + ha->iocb_hiwat)); + } return QLA_SUCCESS; } diff --git a/drivers/scsi/qla4xxx/ql4_nx.c b/drivers/scsi/qla4xxx/ql4_nx.c index 499a92d..71d3d23 100644 --- a/drivers/scsi/qla4xxx/ql4_nx.c +++ b/drivers/scsi/qla4xxx/ql4_nx.c @@ -2986,7 +2986,7 @@ int qla4_8xxx_load_risc(struct scsi_qla_host *ha) retval = qla4_8xxx_device_state_handler(ha); - if (retval == QLA_SUCCESS && !test_bit(AF_INIT_DONE, &ha->flags)) + if (retval == QLA_SUCCESS && !test_bit(AF_IRQ_ATTACHED, &ha->flags)) retval = qla4xxx_request_irqs(ha); return retval; @@ -3427,11 +3427,11 @@ int qla4_8xxx_get_sys_info(struct scsi_qla_host *ha) } /* Make sure we receive the minimum required data to cache internally */ - if (mbox_sts[4] < offsetof(struct mbx_sys_info, reserved)) { + if ((is_qla8032(ha) ? mbox_sts[3] : mbox_sts[4]) < + offsetof(struct mbx_sys_info, reserved)) { DEBUG2(printk("scsi%ld: %s: GET_SYS_INFO data receive" " error (%x)\n", ha->host_no, __func__, mbox_sts[4])); goto exit_validate_mac82; - } /* Save M.A.C. address & serial_number */ @@ -3463,7 +3463,7 @@ exit_validate_mac82: /* Interrupt handling helpers. */ -int qla4_8xxx_mbx_intr_enable(struct scsi_qla_host *ha) +int qla4_8xxx_intr_enable(struct scsi_qla_host *ha) { uint32_t mbox_cmd[MBOX_REG_COUNT]; uint32_t mbox_sts[MBOX_REG_COUNT]; @@ -3484,7 +3484,7 @@ int qla4_8xxx_mbx_intr_enable(struct scsi_qla_host *ha) return QLA_SUCCESS; } -int qla4_8xxx_mbx_intr_disable(struct scsi_qla_host *ha) +int qla4_8xxx_intr_disable(struct scsi_qla_host *ha) { uint32_t mbox_cmd[MBOX_REG_COUNT]; uint32_t mbox_sts[MBOX_REG_COUNT]; @@ -3509,7 +3509,7 @@ int qla4_8xxx_mbx_intr_disable(struct scsi_qla_host *ha) void qla4_82xx_enable_intrs(struct scsi_qla_host *ha) { - qla4_8xxx_mbx_intr_enable(ha); + qla4_8xxx_intr_enable(ha); spin_lock_irq(&ha->hardware_lock); /* BIT 10 - reset */ @@ -3522,7 +3522,7 @@ void qla4_82xx_disable_intrs(struct scsi_qla_host *ha) { if (test_and_clear_bit(AF_INTERRUPTS_ON, &ha->flags)) - qla4_8xxx_mbx_intr_disable(ha); + qla4_8xxx_intr_disable(ha); spin_lock_irq(&ha->hardware_lock); /* BIT 10 - set */ diff --git a/drivers/scsi/qla4xxx/ql4_os.c b/drivers/scsi/qla4xxx/ql4_os.c index 4cec123..6142729 100644 --- a/drivers/scsi/qla4xxx/ql4_os.c +++ b/drivers/scsi/qla4xxx/ql4_os.c @@ -1337,18 +1337,18 @@ static int qla4xxx_session_get_param(struct iscsi_cls_session *cls_sess, sess->password_in, BIDI_CHAP, &idx); if (rval) - return -EINVAL; - - len = sprintf(buf, "%hu\n", idx); + len = sprintf(buf, "\n"); + else + len = sprintf(buf, "%hu\n", idx); break; case ISCSI_PARAM_CHAP_OUT_IDX: rval = qla4xxx_get_chap_index(ha, sess->username, sess->password, LOCAL_CHAP, &idx); if (rval) - return -EINVAL; - - len = sprintf(buf, "%hu\n", idx); + len = sprintf(buf, "\n"); + else + len = sprintf(buf, "%hu\n", idx); break; default: return iscsi_session_get_param(cls_sess, param, buf); @@ -2242,6 +2242,7 @@ static int qla4xxx_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *cmd) test_bit(DPC_HA_NEED_QUIESCENT, &ha->dpc_flags) || !test_bit(AF_ONLINE, &ha->flags) || !test_bit(AF_LINK_UP, &ha->flags) || + test_bit(AF_LOOPBACK, &ha->flags) || test_bit(DPC_RESET_HA_FW_CONTEXT, &ha->dpc_flags)) goto qc_host_busy; @@ -2978,6 +2979,7 @@ static int qla4xxx_recover_adapter(struct scsi_qla_host *ha) if (status == QLA_SUCCESS) { if (!test_bit(AF_FW_RECOVERY, &ha->flags)) qla4xxx_cmd_wait(ha); + ha->isp_ops->disable_intrs(ha); qla4xxx_process_aen(ha, FLUSH_DDB_CHANGED_AENS); qla4xxx_abort_active_cmds(ha, DID_RESET << 16); @@ -3479,7 +3481,8 @@ dpc_post_reset_ha: } /* ---- link change? --- */ - if (test_and_clear_bit(DPC_LINK_CHANGED, &ha->dpc_flags)) { + if (!test_bit(AF_LOOPBACK, &ha->flags) && + test_and_clear_bit(DPC_LINK_CHANGED, &ha->dpc_flags)) { if (!test_bit(AF_LINK_UP, &ha->flags)) { /* ---- link down? --- */ qla4xxx_mark_all_devices_missing(ha); @@ -3508,10 +3511,8 @@ static void qla4xxx_free_adapter(struct scsi_qla_host *ha) { qla4xxx_abort_active_cmds(ha, DID_NO_CONNECT << 16); - if (test_bit(AF_INTERRUPTS_ON, &ha->flags)) { - /* Turn-off interrupts on the card. */ - ha->isp_ops->disable_intrs(ha); - } + /* Turn-off interrupts on the card. */ + ha->isp_ops->disable_intrs(ha); if (is_qla40XX(ha)) { writel(set_rmask(CSR_SCSI_PROCESSOR_INTR), @@ -3547,8 +3548,7 @@ static void qla4xxx_free_adapter(struct scsi_qla_host *ha) } /* Detach interrupts */ - if (test_and_clear_bit(AF_IRQ_ATTACHED, &ha->flags)) - qla4xxx_free_irqs(ha); + qla4xxx_free_irqs(ha); /* free extra memory */ qla4xxx_mem_free(ha); @@ -4687,7 +4687,8 @@ static struct iscsi_endpoint *qla4xxx_get_ep_fwdb(struct scsi_qla_host *ha, struct iscsi_endpoint *ep; struct sockaddr_in *addr; struct sockaddr_in6 *addr6; - struct sockaddr *dst_addr; + struct sockaddr *t_addr; + struct sockaddr_storage *dst_addr; char *ip; /* TODO: need to destroy on unload iscsi_endpoint*/ @@ -4696,21 +4697,23 @@ static struct iscsi_endpoint *qla4xxx_get_ep_fwdb(struct scsi_qla_host *ha, return NULL; if (fw_ddb_entry->options & DDB_OPT_IPV6_DEVICE) { - dst_addr->sa_family = AF_INET6; + t_addr = (struct sockaddr *)dst_addr; + t_addr->sa_family = AF_INET6; addr6 = (struct sockaddr_in6 *)dst_addr; ip = (char *)&addr6->sin6_addr; memcpy(ip, fw_ddb_entry->ip_addr, IPv6_ADDR_LEN); addr6->sin6_port = htons(le16_to_cpu(fw_ddb_entry->port)); } else { - dst_addr->sa_family = AF_INET; + t_addr = (struct sockaddr *)dst_addr; + t_addr->sa_family = AF_INET; addr = (struct sockaddr_in *)dst_addr; ip = (char *)&addr->sin_addr; memcpy(ip, fw_ddb_entry->ip_addr, IP_ADDR_LEN); addr->sin_port = htons(le16_to_cpu(fw_ddb_entry->port)); } - ep = qla4xxx_ep_connect(ha->host, dst_addr, 0); + ep = qla4xxx_ep_connect(ha->host, (struct sockaddr *)dst_addr, 0); vfree(dst_addr); return ep; } @@ -4725,7 +4728,8 @@ static int qla4xxx_verify_boot_idx(struct scsi_qla_host *ha, uint16_t idx) } static void qla4xxx_setup_flash_ddb_entry(struct scsi_qla_host *ha, - struct ddb_entry *ddb_entry) + struct ddb_entry *ddb_entry, + uint16_t idx) { uint16_t def_timeout; @@ -4745,6 +4749,10 @@ static void qla4xxx_setup_flash_ddb_entry(struct scsi_qla_host *ha, def_timeout : LOGIN_TOV; ddb_entry->default_time2wait = le16_to_cpu(ddb_entry->fw_ddb_entry.iscsi_def_time2wait); + + if (ql4xdisablesysfsboot && + (idx == ha->pri_ddb_idx || idx == ha->sec_ddb_idx)) + set_bit(DF_BOOT_TGT, &ddb_entry->flags); } static void qla4xxx_wait_for_ip_configuration(struct scsi_qla_host *ha) @@ -4881,7 +4889,7 @@ static void qla4xxx_remove_failed_ddb(struct scsi_qla_host *ha, static int qla4xxx_sess_conn_setup(struct scsi_qla_host *ha, struct dev_db_entry *fw_ddb_entry, - int is_reset) + int is_reset, uint16_t idx) { struct iscsi_cls_session *cls_sess; struct iscsi_session *sess; @@ -4919,7 +4927,7 @@ static int qla4xxx_sess_conn_setup(struct scsi_qla_host *ha, memcpy(&ddb_entry->fw_ddb_entry, fw_ddb_entry, sizeof(struct dev_db_entry)); - qla4xxx_setup_flash_ddb_entry(ha, ddb_entry); + qla4xxx_setup_flash_ddb_entry(ha, ddb_entry, idx); cls_conn = iscsi_conn_setup(cls_sess, sizeof(struct qla_conn), conn_id); @@ -5036,7 +5044,7 @@ static void qla4xxx_build_nt_list(struct scsi_qla_host *ha, goto continue_next_nt; } - ret = qla4xxx_sess_conn_setup(ha, fw_ddb_entry, is_reset); + ret = qla4xxx_sess_conn_setup(ha, fw_ddb_entry, is_reset, idx); if (ret == QLA_ERROR) goto exit_nt_list; @@ -5116,6 +5124,78 @@ void qla4xxx_build_ddb_list(struct scsi_qla_host *ha, int is_reset) } /** + * qla4xxx_wait_login_resp_boot_tgt - Wait for iSCSI boot target login + * response. + * @ha: pointer to adapter structure + * + * When the boot entry is normal iSCSI target then DF_BOOT_TGT flag will be + * set in DDB and we will wait for login response of boot targets during + * probe. + **/ +static void qla4xxx_wait_login_resp_boot_tgt(struct scsi_qla_host *ha) +{ + struct ddb_entry *ddb_entry; + struct dev_db_entry *fw_ddb_entry = NULL; + dma_addr_t fw_ddb_entry_dma; + unsigned long wtime; + uint32_t ddb_state; + int max_ddbs, idx, ret; + + max_ddbs = is_qla40XX(ha) ? MAX_DEV_DB_ENTRIES_40XX : + MAX_DEV_DB_ENTRIES; + + fw_ddb_entry = dma_alloc_coherent(&ha->pdev->dev, sizeof(*fw_ddb_entry), + &fw_ddb_entry_dma, GFP_KERNEL); + if (!fw_ddb_entry) { + ql4_printk(KERN_ERR, ha, + "%s: Unable to allocate dma buffer\n", __func__); + goto exit_login_resp; + } + + wtime = jiffies + (HZ * BOOT_LOGIN_RESP_TOV); + + for (idx = 0; idx < max_ddbs; idx++) { + ddb_entry = qla4xxx_lookup_ddb_by_fw_index(ha, idx); + if (ddb_entry == NULL) + continue; + + if (test_bit(DF_BOOT_TGT, &ddb_entry->flags)) { + DEBUG2(ql4_printk(KERN_INFO, ha, + "%s: DDB index [%d]\n", __func__, + ddb_entry->fw_ddb_index)); + do { + ret = qla4xxx_get_fwddb_entry(ha, + ddb_entry->fw_ddb_index, + fw_ddb_entry, fw_ddb_entry_dma, + NULL, NULL, &ddb_state, NULL, + NULL, NULL); + if (ret == QLA_ERROR) + goto exit_login_resp; + + if ((ddb_state == DDB_DS_SESSION_ACTIVE) || + (ddb_state == DDB_DS_SESSION_FAILED)) + break; + + schedule_timeout_uninterruptible(HZ); + + } while ((time_after(wtime, jiffies))); + + if (!time_after(wtime, jiffies)) { + DEBUG2(ql4_printk(KERN_INFO, ha, + "%s: Login response wait timer expired\n", + __func__)); + goto exit_login_resp; + } + } + } + +exit_login_resp: + if (fw_ddb_entry) + dma_free_coherent(&ha->pdev->dev, sizeof(*fw_ddb_entry), + fw_ddb_entry, fw_ddb_entry_dma); +} + +/** * qla4xxx_probe_adapter - callback function to probe HBA * @pdev: pointer to pci_dev structure * @pci_device_id: pointer to pci_device entry @@ -5270,7 +5350,7 @@ static int qla4xxx_probe_adapter(struct pci_dev *pdev, if (is_qla80XX(ha)) { ha->isp_ops->idc_lock(ha); dev_state = qla4_8xxx_rd_direct(ha, - QLA82XX_CRB_DEV_STATE); + QLA8XXX_CRB_DEV_STATE); ha->isp_ops->idc_unlock(ha); if (dev_state == QLA8XXX_DEV_FAILED) { ql4_printk(KERN_WARNING, ha, "%s: don't retry " @@ -5368,6 +5448,7 @@ skip_retry_init: /* Perform the build ddb list and login to each */ qla4xxx_build_ddb_list(ha, INIT_ADAPTER); iscsi_host_for_each_session(ha->host, qla4xxx_login_flash_ddb); + qla4xxx_wait_login_resp_boot_tgt(ha); qla4xxx_create_chap_list(ha); @@ -6008,14 +6089,6 @@ static int qla4xxx_host_reset(struct Scsi_Host *shost, int reset_type) goto exit_host_reset; } - rval = qla4xxx_wait_for_hba_online(ha); - if (rval != QLA_SUCCESS) { - DEBUG2(ql4_printk(KERN_INFO, ha, "%s: Unable to reset host " - "adapter\n", __func__)); - rval = -EIO; - goto exit_host_reset; - } - if (test_bit(DPC_RESET_HA, &ha->dpc_flags)) goto recover_adapter; @@ -6115,7 +6188,6 @@ qla4xxx_pci_mmio_enabled(struct pci_dev *pdev) static uint32_t qla4_8xxx_error_recovery(struct scsi_qla_host *ha) { uint32_t rval = QLA_ERROR; - uint32_t ret = 0; int fn; struct pci_dev *other_pdev = NULL; @@ -6201,16 +6273,7 @@ static uint32_t qla4_8xxx_error_recovery(struct scsi_qla_host *ha) qla4_8xxx_wr_direct(ha, QLA8XXX_CRB_DRV_STATE, 0); qla4_8xxx_set_drv_active(ha); ha->isp_ops->idc_unlock(ha); - ret = qla4xxx_request_irqs(ha); - if (ret) { - ql4_printk(KERN_WARNING, ha, "Failed to " - "reserve interrupt %d already in use.\n", - ha->pdev->irq); - rval = QLA_ERROR; - } else { - ha->isp_ops->enable_intrs(ha); - rval = QLA_SUCCESS; - } + ha->isp_ops->enable_intrs(ha); } } else { ql4_printk(KERN_INFO, ha, "scsi%ld: %s: devfn 0x%x is not " @@ -6220,18 +6283,9 @@ static uint32_t qla4_8xxx_error_recovery(struct scsi_qla_host *ha) QLA8XXX_DEV_READY)) { clear_bit(AF_FW_RECOVERY, &ha->flags); rval = qla4xxx_initialize_adapter(ha, RESET_ADAPTER); - if (rval == QLA_SUCCESS) { - ret = qla4xxx_request_irqs(ha); - if (ret) { - ql4_printk(KERN_WARNING, ha, "Failed to" - " reserve interrupt %d already in" - " use.\n", ha->pdev->irq); - rval = QLA_ERROR; - } else { - ha->isp_ops->enable_intrs(ha); - rval = QLA_SUCCESS; - } - } + if (rval == QLA_SUCCESS) + ha->isp_ops->enable_intrs(ha); + ha->isp_ops->idc_lock(ha); qla4_8xxx_set_drv_active(ha); ha->isp_ops->idc_unlock(ha); diff --git a/drivers/scsi/qla4xxx/ql4_version.h b/drivers/scsi/qla4xxx/ql4_version.h index f6df2ea..6775a45 100644 --- a/drivers/scsi/qla4xxx/ql4_version.h +++ b/drivers/scsi/qla4xxx/ql4_version.h @@ -5,4 +5,4 @@ * See LICENSE.qla4xxx for copyright and licensing details. */ -#define QLA4XXX_DRIVER_VERSION "5.03.00-k1" +#define QLA4XXX_DRIVER_VERSION "5.03.00-k4" diff --git a/drivers/scsi/scsi_transport_iscsi.c b/drivers/scsi/scsi_transport_iscsi.c index 59d427b..0a74b97 100644 --- a/drivers/scsi/scsi_transport_iscsi.c +++ b/drivers/scsi/scsi_transport_iscsi.c @@ -2503,6 +2503,15 @@ show_priv_session_creator(struct device *dev, struct device_attribute *attr, } static ISCSI_CLASS_ATTR(priv_sess, creator, S_IRUGO, show_priv_session_creator, NULL); +static ssize_t +show_priv_session_target_id(struct device *dev, struct device_attribute *attr, + char *buf) +{ + struct iscsi_cls_session *session = iscsi_dev_to_session(dev->parent); + return sprintf(buf, "%d\n", session->target_id); +} +static ISCSI_CLASS_ATTR(priv_sess, target_id, S_IRUGO, + show_priv_session_target_id, NULL); #define iscsi_priv_session_attr_show(field, format) \ static ssize_t \ @@ -2575,6 +2584,7 @@ static struct attribute *iscsi_session_attrs[] = { &dev_attr_priv_sess_creator.attr, &dev_attr_sess_chap_out_idx.attr, &dev_attr_sess_chap_in_idx.attr, + &dev_attr_priv_sess_target_id.attr, NULL, }; @@ -2638,6 +2648,8 @@ static umode_t iscsi_session_attr_is_visible(struct kobject *kobj, return S_IRUGO; else if (attr == &dev_attr_priv_sess_creator.attr) return S_IRUGO; + else if (attr == &dev_attr_priv_sess_target_id.attr) + return S_IRUGO; else { WARN_ONCE(1, "Invalid session attr"); return 0; diff --git a/drivers/scsi/sg.c b/drivers/scsi/sg.c index be2c9a6..9f0c465 100644 --- a/drivers/scsi/sg.c +++ b/drivers/scsi/sg.c @@ -1391,24 +1391,23 @@ static Sg_device *sg_alloc(struct gendisk *disk, struct scsi_device *scsidp) return ERR_PTR(-ENOMEM); } - if (!idr_pre_get(&sg_index_idr, GFP_KERNEL)) { - printk(KERN_WARNING "idr expansion Sg_device failure\n"); - error = -ENOMEM; - goto out; - } - + idr_preload(GFP_KERNEL); write_lock_irqsave(&sg_index_lock, iflags); - error = idr_get_new(&sg_index_idr, sdp, &k); - if (error) { - write_unlock_irqrestore(&sg_index_lock, iflags); - printk(KERN_WARNING "idr allocation Sg_device failure: %d\n", - error); - goto out; + error = idr_alloc(&sg_index_idr, sdp, 0, SG_MAX_DEVS, GFP_NOWAIT); + if (error < 0) { + if (error == -ENOSPC) { + sdev_printk(KERN_WARNING, scsidp, + "Unable to attach sg device type=%d, minor number exceeds %d\n", + scsidp->type, SG_MAX_DEVS - 1); + error = -ENODEV; + } else { + printk(KERN_WARNING + "idr allocation Sg_device failure: %d\n", error); + } + goto out_unlock; } - - if (unlikely(k >= SG_MAX_DEVS)) - goto overflow; + k = error; SCSI_LOG_TIMEOUT(3, printk("sg_alloc: dev=%d \n", k)); sprintf(disk->disk_name, "sg%d", k); @@ -1420,25 +1419,17 @@ static Sg_device *sg_alloc(struct gendisk *disk, struct scsi_device *scsidp) sdp->sg_tablesize = queue_max_segments(q); sdp->index = k; kref_init(&sdp->d_ref); + error = 0; +out_unlock: write_unlock_irqrestore(&sg_index_lock, iflags); + idr_preload_end(); - error = 0; - out: if (error) { kfree(sdp); return ERR_PTR(error); } return sdp; - - overflow: - idr_remove(&sg_index_idr, k); - write_unlock_irqrestore(&sg_index_lock, iflags); - sdev_printk(KERN_WARNING, scsidp, - "Unable to attach sg device type=%d, minor " - "number exceeds %d\n", scsidp->type, SG_MAX_DEVS - 1); - error = -ENODEV; - goto out; } static int diff --git a/drivers/scsi/st.c b/drivers/scsi/st.c index 3e2b371..8697447 100644 --- a/drivers/scsi/st.c +++ b/drivers/scsi/st.c @@ -4076,7 +4076,7 @@ static int st_probe(struct device *dev) struct st_modedef *STm; struct st_partstat *STps; struct st_buffer *buffer; - int i, dev_num, error; + int i, error; char *stp; if (SDp->type != TYPE_TAPE) @@ -4178,27 +4178,17 @@ static int st_probe(struct device *dev) tpnt->blksize_changed = 0; mutex_init(&tpnt->lock); - if (!idr_pre_get(&st_index_idr, GFP_KERNEL)) { - pr_warn("st: idr expansion failed\n"); - error = -ENOMEM; - goto out_put_disk; - } - + idr_preload(GFP_KERNEL); spin_lock(&st_index_lock); - error = idr_get_new(&st_index_idr, tpnt, &dev_num); + error = idr_alloc(&st_index_idr, tpnt, 0, ST_MAX_TAPES + 1, GFP_NOWAIT); spin_unlock(&st_index_lock); - if (error) { + idr_preload_end(); + if (error < 0) { pr_warn("st: idr allocation failed: %d\n", error); goto out_put_disk; } - - if (dev_num > ST_MAX_TAPES) { - pr_err("st: Too many tape devices (max. %d).\n", ST_MAX_TAPES); - goto out_put_index; - } - - tpnt->index = dev_num; - sprintf(disk->disk_name, "st%d", dev_num); + tpnt->index = error; + sprintf(disk->disk_name, "st%d", tpnt->index); dev_set_drvdata(dev, tpnt); @@ -4218,9 +4208,8 @@ static int st_probe(struct device *dev) out_remove_devs: remove_cdevs(tpnt); -out_put_index: spin_lock(&st_index_lock); - idr_remove(&st_index_idr, dev_num); + idr_remove(&st_index_idr, tpnt->index); spin_unlock(&st_index_lock); out_put_disk: put_disk(disk); diff --git a/drivers/staging/android/binder.c b/drivers/staging/android/binder.c index 538ebe2..24456a0 100644 --- a/drivers/staging/android/binder.c +++ b/drivers/staging/android/binder.c @@ -2880,7 +2880,6 @@ static int binder_release(struct inode *nodp, struct file *filp) static void binder_deferred_release(struct binder_proc *proc) { - struct hlist_node *pos; struct binder_transaction *t; struct rb_node *n; int threads, nodes, incoming_refs, outgoing_refs, buffers, active_transactions, page_count; @@ -2924,7 +2923,7 @@ static void binder_deferred_release(struct binder_proc *proc) node->local_weak_refs = 0; hlist_add_head(&node->dead_node, &binder_dead_nodes); - hlist_for_each_entry(ref, pos, &node->refs, node_entry) { + hlist_for_each_entry(ref, &node->refs, node_entry) { incoming_refs++; if (ref->death) { death++; @@ -3156,12 +3155,11 @@ static void print_binder_thread(struct seq_file *m, static void print_binder_node(struct seq_file *m, struct binder_node *node) { struct binder_ref *ref; - struct hlist_node *pos; struct binder_work *w; int count; count = 0; - hlist_for_each_entry(ref, pos, &node->refs, node_entry) + hlist_for_each_entry(ref, &node->refs, node_entry) count++; seq_printf(m, " node %d: u%p c%p hs %d hw %d ls %d lw %d is %d iw %d", @@ -3171,7 +3169,7 @@ static void print_binder_node(struct seq_file *m, struct binder_node *node) node->internal_strong_refs, count); if (count) { seq_puts(m, " proc"); - hlist_for_each_entry(ref, pos, &node->refs, node_entry) + hlist_for_each_entry(ref, &node->refs, node_entry) seq_printf(m, " %d", ref->proc->pid); } seq_puts(m, "\n"); @@ -3369,7 +3367,6 @@ static void print_binder_proc_stats(struct seq_file *m, static int binder_state_show(struct seq_file *m, void *unused) { struct binder_proc *proc; - struct hlist_node *pos; struct binder_node *node; int do_lock = !binder_debug_no_lock; @@ -3380,10 +3377,10 @@ static int binder_state_show(struct seq_file *m, void *unused) if (!hlist_empty(&binder_dead_nodes)) seq_puts(m, "dead nodes:\n"); - hlist_for_each_entry(node, pos, &binder_dead_nodes, dead_node) + hlist_for_each_entry(node, &binder_dead_nodes, dead_node) print_binder_node(m, node); - hlist_for_each_entry(proc, pos, &binder_procs, proc_node) + hlist_for_each_entry(proc, &binder_procs, proc_node) print_binder_proc(m, proc, 1); if (do_lock) binder_unlock(__func__); @@ -3393,7 +3390,6 @@ static int binder_state_show(struct seq_file *m, void *unused) static int binder_stats_show(struct seq_file *m, void *unused) { struct binder_proc *proc; - struct hlist_node *pos; int do_lock = !binder_debug_no_lock; if (do_lock) @@ -3403,7 +3399,7 @@ static int binder_stats_show(struct seq_file *m, void *unused) print_binder_stats(m, "", &binder_stats); - hlist_for_each_entry(proc, pos, &binder_procs, proc_node) + hlist_for_each_entry(proc, &binder_procs, proc_node) print_binder_proc_stats(m, proc); if (do_lock) binder_unlock(__func__); @@ -3413,14 +3409,13 @@ static int binder_stats_show(struct seq_file *m, void *unused) static int binder_transactions_show(struct seq_file *m, void *unused) { struct binder_proc *proc; - struct hlist_node *pos; int do_lock = !binder_debug_no_lock; if (do_lock) binder_lock(__func__); seq_puts(m, "binder transactions:\n"); - hlist_for_each_entry(proc, pos, &binder_procs, proc_node) + hlist_for_each_entry(proc, &binder_procs, proc_node) print_binder_proc(m, proc, 0); if (do_lock) binder_unlock(__func__); diff --git a/drivers/target/iscsi/iscsi_target.c b/drivers/target/iscsi/iscsi_target.c index 23a98e6..9435a3d 100644 --- a/drivers/target/iscsi/iscsi_target.c +++ b/drivers/target/iscsi/iscsi_target.c @@ -144,23 +144,24 @@ struct iscsi_tiqn *iscsit_add_tiqn(unsigned char *buf) spin_lock_init(&tiqn->login_stats.lock); spin_lock_init(&tiqn->logout_stats.lock); - if (!idr_pre_get(&tiqn_idr, GFP_KERNEL)) { - pr_err("idr_pre_get() for tiqn_idr failed\n"); - kfree(tiqn); - return ERR_PTR(-ENOMEM); - } tiqn->tiqn_state = TIQN_STATE_ACTIVE; + idr_preload(GFP_KERNEL); spin_lock(&tiqn_lock); - ret = idr_get_new(&tiqn_idr, NULL, &tiqn->tiqn_index); + + ret = idr_alloc(&tiqn_idr, NULL, 0, 0, GFP_NOWAIT); if (ret < 0) { - pr_err("idr_get_new() failed for tiqn->tiqn_index\n"); + pr_err("idr_alloc() failed for tiqn->tiqn_index\n"); spin_unlock(&tiqn_lock); + idr_preload_end(); kfree(tiqn); return ERR_PTR(ret); } + tiqn->tiqn_index = ret; list_add_tail(&tiqn->tiqn_list, &g_tiqn_list); + spin_unlock(&tiqn_lock); + idr_preload_end(); pr_debug("CORE[0] - Added iSCSI Target IQN: %s\n", tiqn->tiqn); diff --git a/drivers/target/iscsi/iscsi_target_login.c b/drivers/target/iscsi/iscsi_target_login.c index fdb632f..2535d4d 100644 --- a/drivers/target/iscsi/iscsi_target_login.c +++ b/drivers/target/iscsi/iscsi_target_login.c @@ -247,19 +247,16 @@ static int iscsi_login_zero_tsih_s1( spin_lock_init(&sess->session_usage_lock); spin_lock_init(&sess->ttt_lock); - if (!idr_pre_get(&sess_idr, GFP_KERNEL)) { - pr_err("idr_pre_get() for sess_idr failed\n"); - iscsit_tx_login_rsp(conn, ISCSI_STATUS_CLS_TARGET_ERR, - ISCSI_LOGIN_STATUS_NO_RESOURCES); - kfree(sess); - return -ENOMEM; - } + idr_preload(GFP_KERNEL); spin_lock_bh(&sess_idr_lock); - ret = idr_get_new(&sess_idr, NULL, &sess->session_index); + ret = idr_alloc(&sess_idr, NULL, 0, 0, GFP_NOWAIT); + if (ret >= 0) + sess->session_index = ret; spin_unlock_bh(&sess_idr_lock); + idr_preload_end(); if (ret < 0) { - pr_err("idr_get_new() for sess_idr failed\n"); + pr_err("idr_alloc() for sess_idr failed\n"); iscsit_tx_login_rsp(conn, ISCSI_STATUS_CLS_TARGET_ERR, ISCSI_LOGIN_STATUS_NO_RESOURCES); kfree(sess); diff --git a/drivers/target/tcm_fc/tfc_sess.c b/drivers/target/tcm_fc/tfc_sess.c index 6659dd3..113f335 100644 --- a/drivers/target/tcm_fc/tfc_sess.c +++ b/drivers/target/tcm_fc/tfc_sess.c @@ -169,7 +169,6 @@ static struct ft_sess *ft_sess_get(struct fc_lport *lport, u32 port_id) { struct ft_tport *tport; struct hlist_head *head; - struct hlist_node *pos; struct ft_sess *sess; rcu_read_lock(); @@ -178,7 +177,7 @@ static struct ft_sess *ft_sess_get(struct fc_lport *lport, u32 port_id) goto out; head = &tport->hash[ft_sess_hash(port_id)]; - hlist_for_each_entry_rcu(sess, pos, head, hash) { + hlist_for_each_entry_rcu(sess, head, hash) { if (sess->port_id == port_id) { kref_get(&sess->kref); rcu_read_unlock(); @@ -201,10 +200,9 @@ static struct ft_sess *ft_sess_create(struct ft_tport *tport, u32 port_id, { struct ft_sess *sess; struct hlist_head *head; - struct hlist_node *pos; head = &tport->hash[ft_sess_hash(port_id)]; - hlist_for_each_entry_rcu(sess, pos, head, hash) + hlist_for_each_entry_rcu(sess, head, hash) if (sess->port_id == port_id) return sess; @@ -253,11 +251,10 @@ static void ft_sess_unhash(struct ft_sess *sess) static struct ft_sess *ft_sess_delete(struct ft_tport *tport, u32 port_id) { struct hlist_head *head; - struct hlist_node *pos; struct ft_sess *sess; head = &tport->hash[ft_sess_hash(port_id)]; - hlist_for_each_entry_rcu(sess, pos, head, hash) { + hlist_for_each_entry_rcu(sess, head, hash) { if (sess->port_id == port_id) { ft_sess_unhash(sess); return sess; @@ -273,12 +270,11 @@ static struct ft_sess *ft_sess_delete(struct ft_tport *tport, u32 port_id) static void ft_sess_delete_all(struct ft_tport *tport) { struct hlist_head *head; - struct hlist_node *pos; struct ft_sess *sess; for (head = tport->hash; head < &tport->hash[FT_SESS_HASH_SIZE]; head++) { - hlist_for_each_entry_rcu(sess, pos, head, hash) { + hlist_for_each_entry_rcu(sess, head, hash) { ft_sess_unhash(sess); transport_deregister_session_configfs(sess->se_sess); ft_sess_put(sess); /* release from table */ diff --git a/drivers/thermal/Kconfig b/drivers/thermal/Kconfig index c2c77d1..a764f16 100644 --- a/drivers/thermal/Kconfig +++ b/drivers/thermal/Kconfig @@ -29,14 +29,14 @@ choice config THERMAL_DEFAULT_GOV_STEP_WISE bool "step_wise" - select STEP_WISE + select THERMAL_GOV_STEP_WISE help Use the step_wise governor as default. This throttles the devices one step at a time. config THERMAL_DEFAULT_GOV_FAIR_SHARE bool "fair_share" - select FAIR_SHARE + select THERMAL_GOV_FAIR_SHARE help Use the fair_share governor as default. This throttles the devices based on their 'contribution' to a zone. The @@ -44,24 +44,24 @@ config THERMAL_DEFAULT_GOV_FAIR_SHARE config THERMAL_DEFAULT_GOV_USER_SPACE bool "user_space" - select USER_SPACE + select THERMAL_GOV_USER_SPACE help Select this if you want to let the user space manage the lpatform thermals. endchoice -config FAIR_SHARE +config THERMAL_GOV_FAIR_SHARE bool "Fair-share thermal governor" help Enable this to manage platform thermals using fair-share governor. -config STEP_WISE +config THERMAL_GOV_STEP_WISE bool "Step_wise thermal governor" help Enable this to manage platform thermals using a simple linear -config USER_SPACE +config THERMAL_GOV_USER_SPACE bool "User_space thermal governor" help Enable this to let the user space manage the platform thermals. @@ -78,6 +78,14 @@ config CPU_THERMAL and not the ACPI interface. If you want this support, you should say Y here. +config THERMAL_EMULATION + bool "Thermal emulation mode support" + help + Enable this option to make a emul_temp sysfs node in thermal zone + directory to support temperature emulation. With emulation sysfs node, + user can manually input temperature and test the different trip + threshold behaviour for simulation purpose. + config SPEAR_THERMAL bool "SPEAr thermal sensor driver" depends on PLAT_SPEAR @@ -93,6 +101,14 @@ config RCAR_THERMAL Enable this to plug the R-Car thermal sensor driver into the Linux thermal framework +config KIRKWOOD_THERMAL + tristate "Temperature sensor on Marvell Kirkwood SoCs" + depends on ARCH_KIRKWOOD + depends on OF + help + Support for the Kirkwood thermal sensor driver into the Linux thermal + framework. Only kirkwood 88F6282 and 88F6283 have this sensor. + config EXYNOS_THERMAL tristate "Temperature sensor on Samsung EXYNOS" depends on (ARCH_EXYNOS4 || ARCH_EXYNOS5) @@ -101,6 +117,23 @@ config EXYNOS_THERMAL If you say yes here you get support for TMU (Thermal Management Unit) on SAMSUNG EXYNOS series of SoC. +config EXYNOS_THERMAL_EMUL + bool "EXYNOS TMU emulation mode support" + depends on EXYNOS_THERMAL + help + Exynos 4412 and 4414 and 5 series has emulation mode on TMU. + Enable this option will be make sysfs node in exynos thermal platform + device directory to support emulation mode. With emulation mode sysfs + node, you can manually input temperature to TMU for simulation purpose. + +config DOVE_THERMAL + tristate "Temperature sensor on Marvell Dove SoCs" + depends on ARCH_DOVE + depends on OF + help + Support for the Dove thermal sensor driver in the Linux thermal + framework. + config DB8500_THERMAL bool "DB8500 thermal management" depends on ARCH_U8500 @@ -122,4 +155,14 @@ config DB8500_CPUFREQ_COOLING bound cpufreq cooling device turns active to set CPU frequency low to cool down the CPU. +config INTEL_POWERCLAMP + tristate "Intel PowerClamp idle injection driver" + depends on THERMAL + depends on X86 + depends on CPU_SUP_INTEL + help + Enable this to enable Intel PowerClamp idle injection driver. This + enforce idle time which results in more package C-state residency. The + user interface is exposed via generic thermal framework. + endif diff --git a/drivers/thermal/Makefile b/drivers/thermal/Makefile index d8da683..d3a2b38 100644 --- a/drivers/thermal/Makefile +++ b/drivers/thermal/Makefile @@ -5,9 +5,9 @@ obj-$(CONFIG_THERMAL) += thermal_sys.o # governors -obj-$(CONFIG_FAIR_SHARE) += fair_share.o -obj-$(CONFIG_STEP_WISE) += step_wise.o -obj-$(CONFIG_USER_SPACE) += user_space.o +obj-$(CONFIG_THERMAL_GOV_FAIR_SHARE) += fair_share.o +obj-$(CONFIG_THERMAL_GOV_STEP_WISE) += step_wise.o +obj-$(CONFIG_THERMAL_GOV_USER_SPACE) += user_space.o # cpufreq cooling obj-$(CONFIG_CPU_THERMAL) += cpu_cooling.o @@ -15,6 +15,10 @@ obj-$(CONFIG_CPU_THERMAL) += cpu_cooling.o # platform thermal drivers obj-$(CONFIG_SPEAR_THERMAL) += spear_thermal.o obj-$(CONFIG_RCAR_THERMAL) += rcar_thermal.o +obj-$(CONFIG_KIRKWOOD_THERMAL) += kirkwood_thermal.o obj-$(CONFIG_EXYNOS_THERMAL) += exynos_thermal.o +obj-$(CONFIG_DOVE_THERMAL) += dove_thermal.o obj-$(CONFIG_DB8500_THERMAL) += db8500_thermal.o obj-$(CONFIG_DB8500_CPUFREQ_COOLING) += db8500_cpufreq_cooling.o +obj-$(CONFIG_INTEL_POWERCLAMP) += intel_powerclamp.o + diff --git a/drivers/thermal/cpu_cooling.c b/drivers/thermal/cpu_cooling.c index 836828e..8dc44cb 100644 --- a/drivers/thermal/cpu_cooling.c +++ b/drivers/thermal/cpu_cooling.c @@ -73,21 +73,14 @@ static struct cpufreq_cooling_device *notify_device; */ static int get_idr(struct idr *idr, int *id) { - int err; -again: - if (unlikely(idr_pre_get(idr, GFP_KERNEL) == 0)) - return -ENOMEM; + int ret; mutex_lock(&cooling_cpufreq_lock); - err = idr_get_new(idr, NULL, id); + ret = idr_alloc(idr, NULL, 0, 0, GFP_KERNEL); mutex_unlock(&cooling_cpufreq_lock); - - if (unlikely(err == -EAGAIN)) - goto again; - else if (unlikely(err)) - return err; - - *id = *id & MAX_IDR_MASK; + if (unlikely(ret < 0)) + return ret; + *id = ret; return 0; } @@ -118,8 +111,8 @@ static int is_cpufreq_valid(int cpu) /** * get_cpu_frequency - get the absolute value of frequency from level. * @cpu: cpu for which frequency is fetched. - * @level: level of frequency of the CPU - * e.g level=1 --> 1st MAX FREQ, LEVEL=2 ---> 2nd MAX FREQ, .... etc + * @level: level of frequency, equals cooling state of cpu cooling device + * e.g level=0 --> 1st MAX FREQ, level=1 ---> 2nd MAX FREQ, .... etc */ static unsigned int get_cpu_frequency(unsigned int cpu, unsigned long level) { diff --git a/drivers/thermal/db8500_cpufreq_cooling.c b/drivers/thermal/db8500_cpufreq_cooling.c index 4cf8e72..2141985 100644 --- a/drivers/thermal/db8500_cpufreq_cooling.c +++ b/drivers/thermal/db8500_cpufreq_cooling.c @@ -21,6 +21,7 @@ #include <linux/cpufreq.h> #include <linux/err.h> #include <linux/module.h> +#include <linux/of.h> #include <linux/platform_device.h> #include <linux/slab.h> @@ -73,15 +74,13 @@ static const struct of_device_id db8500_cpufreq_cooling_match[] = { { .compatible = "stericsson,db8500-cpufreq-cooling" }, {}, }; -#else -#define db8500_cpufreq_cooling_match NULL #endif static struct platform_driver db8500_cpufreq_cooling_driver = { .driver = { .owner = THIS_MODULE, .name = "db8500-cpufreq-cooling", - .of_match_table = db8500_cpufreq_cooling_match, + .of_match_table = of_match_ptr(db8500_cpufreq_cooling_match), }, .probe = db8500_cpufreq_cooling_probe, .suspend = db8500_cpufreq_cooling_suspend, diff --git a/drivers/thermal/db8500_thermal.c b/drivers/thermal/db8500_thermal.c index ec71ade..61ce60a 100644 --- a/drivers/thermal/db8500_thermal.c +++ b/drivers/thermal/db8500_thermal.c @@ -508,15 +508,13 @@ static const struct of_device_id db8500_thermal_match[] = { { .compatible = "stericsson,db8500-thermal" }, {}, }; -#else -#define db8500_thermal_match NULL #endif static struct platform_driver db8500_thermal_driver = { .driver = { .owner = THIS_MODULE, .name = "db8500-thermal", - .of_match_table = db8500_thermal_match, + .of_match_table = of_match_ptr(db8500_thermal_match), }, .probe = db8500_thermal_probe, .suspend = db8500_thermal_suspend, diff --git a/drivers/thermal/dove_thermal.c b/drivers/thermal/dove_thermal.c new file mode 100644 index 0000000..7b0bfa0 --- /dev/null +++ b/drivers/thermal/dove_thermal.c @@ -0,0 +1,209 @@ +/* + * Dove thermal sensor driver + * + * Copyright (C) 2013 Andrew Lunn <andrew@lunn.ch> + * + * This software is licensed under the terms of the GNU General Public + * License version 2, as published by the Free Software Foundation, and + * may be copied, distributed, and modified under those terms. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ +#include <linux/device.h> +#include <linux/err.h> +#include <linux/io.h> +#include <linux/kernel.h> +#include <linux/of.h> +#include <linux/module.h> +#include <linux/platform_device.h> +#include <linux/thermal.h> + +#define DOVE_THERMAL_TEMP_OFFSET 1 +#define DOVE_THERMAL_TEMP_MASK 0x1FF + +/* Dove Thermal Manager Control and Status Register */ +#define PMU_TM_DISABLE_OFFS 0 +#define PMU_TM_DISABLE_MASK (0x1 << PMU_TM_DISABLE_OFFS) + +/* Dove Theraml Diode Control 0 Register */ +#define PMU_TDC0_SW_RST_MASK (0x1 << 1) +#define PMU_TDC0_SEL_VCAL_OFFS 5 +#define PMU_TDC0_SEL_VCAL_MASK (0x3 << PMU_TDC0_SEL_VCAL_OFFS) +#define PMU_TDC0_REF_CAL_CNT_OFFS 11 +#define PMU_TDC0_REF_CAL_CNT_MASK (0x1FF << PMU_TDC0_REF_CAL_CNT_OFFS) +#define PMU_TDC0_AVG_NUM_OFFS 25 +#define PMU_TDC0_AVG_NUM_MASK (0x7 << PMU_TDC0_AVG_NUM_OFFS) + +/* Dove Thermal Diode Control 1 Register */ +#define PMU_TEMP_DIOD_CTRL1_REG 0x04 +#define PMU_TDC1_TEMP_VALID_MASK (0x1 << 10) + +/* Dove Thermal Sensor Dev Structure */ +struct dove_thermal_priv { + void __iomem *sensor; + void __iomem *control; +}; + +static int dove_init_sensor(const struct dove_thermal_priv *priv) +{ + u32 reg; + u32 i; + + /* Configure the Diode Control Register #0 */ + reg = readl_relaxed(priv->control); + + /* Use average of 2 */ + reg &= ~PMU_TDC0_AVG_NUM_MASK; + reg |= (0x1 << PMU_TDC0_AVG_NUM_OFFS); + + /* Reference calibration value */ + reg &= ~PMU_TDC0_REF_CAL_CNT_MASK; + reg |= (0x0F1 << PMU_TDC0_REF_CAL_CNT_OFFS); + + /* Set the high level reference for calibration */ + reg &= ~PMU_TDC0_SEL_VCAL_MASK; + reg |= (0x2 << PMU_TDC0_SEL_VCAL_OFFS); + writel(reg, priv->control); + + /* Reset the sensor */ + reg = readl_relaxed(priv->control); + writel((reg | PMU_TDC0_SW_RST_MASK), priv->control); + writel(reg, priv->control); + + /* Enable the sensor */ + reg = readl_relaxed(priv->sensor); + reg &= ~PMU_TM_DISABLE_MASK; + writel(reg, priv->sensor); + + /* Poll the sensor for the first reading */ + for (i = 0; i < 1000000; i++) { + reg = readl_relaxed(priv->sensor); + if (reg & DOVE_THERMAL_TEMP_MASK) + break; + } + + if (i == 1000000) + return -EIO; + + return 0; +} + +static int dove_get_temp(struct thermal_zone_device *thermal, + unsigned long *temp) +{ + unsigned long reg; + struct dove_thermal_priv *priv = thermal->devdata; + + /* Valid check */ + reg = readl_relaxed(priv->control + PMU_TEMP_DIOD_CTRL1_REG); + if ((reg & PMU_TDC1_TEMP_VALID_MASK) == 0x0) { + dev_err(&thermal->device, + "Temperature sensor reading not valid\n"); + return -EIO; + } + + /* + * Calculate temperature. See Section 8.10.1 of 88AP510, + * Documentation/arm/Marvell/README + */ + reg = readl_relaxed(priv->sensor); + reg = (reg >> DOVE_THERMAL_TEMP_OFFSET) & DOVE_THERMAL_TEMP_MASK; + *temp = ((2281638UL - (7298*reg)) / 10); + + return 0; +} + +static struct thermal_zone_device_ops ops = { + .get_temp = dove_get_temp, +}; + +static const struct of_device_id dove_thermal_id_table[] = { + { .compatible = "marvell,dove-thermal" }, + {} +}; + +static int dove_thermal_probe(struct platform_device *pdev) +{ + struct thermal_zone_device *thermal = NULL; + struct dove_thermal_priv *priv; + struct resource *res; + int ret; + + res = platform_get_resource(pdev, IORESOURCE_MEM, 0); + if (!res) { + dev_err(&pdev->dev, "Failed to get platform resource\n"); + return -ENODEV; + } + + priv = devm_kzalloc(&pdev->dev, sizeof(*priv), GFP_KERNEL); + if (!priv) + return -ENOMEM; + + priv->sensor = devm_request_and_ioremap(&pdev->dev, res); + if (!priv->sensor) { + dev_err(&pdev->dev, "Failed to request_ioremap memory\n"); + return -EADDRNOTAVAIL; + } + + res = platform_get_resource(pdev, IORESOURCE_MEM, 1); + if (!res) { + dev_err(&pdev->dev, "Failed to get platform resource\n"); + return -ENODEV; + } + priv->control = devm_request_and_ioremap(&pdev->dev, res); + if (!priv->control) { + dev_err(&pdev->dev, "Failed to request_ioremap memory\n"); + return -EADDRNOTAVAIL; + } + + ret = dove_init_sensor(priv); + if (ret) { + dev_err(&pdev->dev, "Failed to initialize sensor\n"); + return ret; + } + + thermal = thermal_zone_device_register("dove_thermal", 0, 0, + priv, &ops, NULL, 0, 0); + if (IS_ERR(thermal)) { + dev_err(&pdev->dev, + "Failed to register thermal zone device\n"); + return PTR_ERR(thermal); + } + + platform_set_drvdata(pdev, thermal); + + return 0; +} + +static int dove_thermal_exit(struct platform_device *pdev) +{ + struct thermal_zone_device *dove_thermal = + platform_get_drvdata(pdev); + + thermal_zone_device_unregister(dove_thermal); + platform_set_drvdata(pdev, NULL); + + return 0; +} + +MODULE_DEVICE_TABLE(of, dove_thermal_id_table); + +static struct platform_driver dove_thermal_driver = { + .probe = dove_thermal_probe, + .remove = dove_thermal_exit, + .driver = { + .name = "dove_thermal", + .owner = THIS_MODULE, + .of_match_table = of_match_ptr(dove_thermal_id_table), + }, +}; + +module_platform_driver(dove_thermal_driver); + +MODULE_AUTHOR("Andrew Lunn <andrew@lunn.ch>"); +MODULE_DESCRIPTION("Dove thermal driver"); +MODULE_LICENSE("GPL"); diff --git a/drivers/thermal/exynos_thermal.c b/drivers/thermal/exynos_thermal.c index bada130..e04ebd8 100644 --- a/drivers/thermal/exynos_thermal.c +++ b/drivers/thermal/exynos_thermal.c @@ -82,7 +82,7 @@ #define EXYNOS_TRIMINFO_RELOAD 0x1 #define EXYNOS_TMU_CLEAR_RISE_INT 0x111 -#define EXYNOS_TMU_CLEAR_FALL_INT (0x111 << 16) +#define EXYNOS_TMU_CLEAR_FALL_INT (0x111 << 12) #define EXYNOS_MUX_ADDR_VALUE 6 #define EXYNOS_MUX_ADDR_SHIFT 20 #define EXYNOS_TMU_TRIP_MODE_SHIFT 13 @@ -94,11 +94,20 @@ #define SENSOR_NAME_LEN 16 #define MAX_TRIP_COUNT 8 #define MAX_COOLING_DEVICE 4 +#define MAX_THRESHOLD_LEVS 4 #define ACTIVE_INTERVAL 500 #define IDLE_INTERVAL 10000 #define MCELSIUS 1000 +#ifdef CONFIG_EXYNOS_THERMAL_EMUL +#define EXYNOS_EMUL_TIME 0x57F0 +#define EXYNOS_EMUL_TIME_SHIFT 16 +#define EXYNOS_EMUL_DATA_SHIFT 8 +#define EXYNOS_EMUL_DATA_MASK 0xFF +#define EXYNOS_EMUL_ENABLE 0x1 +#endif /* CONFIG_EXYNOS_THERMAL_EMUL */ + /* CPU Zone information */ #define PANIC_ZONE 4 #define WARN_ZONE 3 @@ -125,6 +134,7 @@ struct exynos_tmu_data { struct thermal_trip_point_conf { int trip_val[MAX_TRIP_COUNT]; int trip_count; + u8 trigger_falling; }; struct thermal_cooling_conf { @@ -174,7 +184,8 @@ static int exynos_set_mode(struct thermal_zone_device *thermal, mutex_lock(&th_zone->therm_dev->lock); - if (mode == THERMAL_DEVICE_ENABLED) + if (mode == THERMAL_DEVICE_ENABLED && + !th_zone->sensor_conf->trip_data.trigger_falling) th_zone->therm_dev->polling_delay = IDLE_INTERVAL; else th_zone->therm_dev->polling_delay = 0; @@ -284,7 +295,7 @@ static int exynos_bind(struct thermal_zone_device *thermal, case MONITOR_ZONE: case WARN_ZONE: if (thermal_zone_bind_cooling_device(thermal, i, cdev, - level, level)) { + level, 0)) { pr_err("error binding cdev inst %d\n", i); ret = -EINVAL; } @@ -362,10 +373,17 @@ static int exynos_get_temp(struct thermal_zone_device *thermal, static int exynos_get_trend(struct thermal_zone_device *thermal, int trip, enum thermal_trend *trend) { - if (thermal->temperature >= trip) - *trend = THERMAL_TREND_RAISING; + int ret; + unsigned long trip_temp; + + ret = exynos_get_trip_temp(thermal, trip, &trip_temp); + if (ret < 0) + return ret; + + if (thermal->temperature >= trip_temp) + *trend = THERMAL_TREND_RAISE_FULL; else - *trend = THERMAL_TREND_DROPPING; + *trend = THERMAL_TREND_DROP_FULL; return 0; } @@ -413,7 +431,8 @@ static void exynos_report_trigger(void) break; } - if (th_zone->mode == THERMAL_DEVICE_ENABLED) { + if (th_zone->mode == THERMAL_DEVICE_ENABLED && + !th_zone->sensor_conf->trip_data.trigger_falling) { if (i > 0) th_zone->therm_dev->polling_delay = ACTIVE_INTERVAL; else @@ -452,7 +471,8 @@ static int exynos_register_thermal(struct thermal_sensor_conf *sensor_conf) th_zone->therm_dev = thermal_zone_device_register(sensor_conf->name, EXYNOS_ZONE_COUNT, 0, NULL, &exynos_dev_ops, NULL, 0, - IDLE_INTERVAL); + sensor_conf->trip_data.trigger_falling ? + 0 : IDLE_INTERVAL); if (IS_ERR(th_zone->therm_dev)) { pr_err("Failed to register thermal zone device\n"); @@ -559,8 +579,9 @@ static int exynos_tmu_initialize(struct platform_device *pdev) { struct exynos_tmu_data *data = platform_get_drvdata(pdev); struct exynos_tmu_platform_data *pdata = data->pdata; - unsigned int status, trim_info, rising_threshold; - int ret = 0, threshold_code; + unsigned int status, trim_info; + unsigned int rising_threshold = 0, falling_threshold = 0; + int ret = 0, threshold_code, i, trigger_levs = 0; mutex_lock(&data->lock); clk_enable(data->clk); @@ -585,6 +606,11 @@ static int exynos_tmu_initialize(struct platform_device *pdev) (data->temp_error2 != 0)) data->temp_error1 = pdata->efuse_value; + /* Count trigger levels to be enabled */ + for (i = 0; i < MAX_THRESHOLD_LEVS; i++) + if (pdata->trigger_levels[i]) + trigger_levs++; + if (data->soc == SOC_ARCH_EXYNOS4210) { /* Write temperature code for threshold */ threshold_code = temp_to_code(data, pdata->threshold); @@ -594,44 +620,38 @@ static int exynos_tmu_initialize(struct platform_device *pdev) } writeb(threshold_code, data->base + EXYNOS4210_TMU_REG_THRESHOLD_TEMP); - - writeb(pdata->trigger_levels[0], - data->base + EXYNOS4210_TMU_REG_TRIG_LEVEL0); - writeb(pdata->trigger_levels[1], - data->base + EXYNOS4210_TMU_REG_TRIG_LEVEL1); - writeb(pdata->trigger_levels[2], - data->base + EXYNOS4210_TMU_REG_TRIG_LEVEL2); - writeb(pdata->trigger_levels[3], - data->base + EXYNOS4210_TMU_REG_TRIG_LEVEL3); + for (i = 0; i < trigger_levs; i++) + writeb(pdata->trigger_levels[i], + data->base + EXYNOS4210_TMU_REG_TRIG_LEVEL0 + i * 4); writel(EXYNOS4210_TMU_INTCLEAR_VAL, data->base + EXYNOS_TMU_REG_INTCLEAR); } else if (data->soc == SOC_ARCH_EXYNOS) { - /* Write temperature code for threshold */ - threshold_code = temp_to_code(data, pdata->trigger_levels[0]); - if (threshold_code < 0) { - ret = threshold_code; - goto out; - } - rising_threshold = threshold_code; - threshold_code = temp_to_code(data, pdata->trigger_levels[1]); - if (threshold_code < 0) { - ret = threshold_code; - goto out; - } - rising_threshold |= (threshold_code << 8); - threshold_code = temp_to_code(data, pdata->trigger_levels[2]); - if (threshold_code < 0) { - ret = threshold_code; - goto out; + /* Write temperature code for rising and falling threshold */ + for (i = 0; i < trigger_levs; i++) { + threshold_code = temp_to_code(data, + pdata->trigger_levels[i]); + if (threshold_code < 0) { + ret = threshold_code; + goto out; + } + rising_threshold |= threshold_code << 8 * i; + if (pdata->threshold_falling) { + threshold_code = temp_to_code(data, + pdata->trigger_levels[i] - + pdata->threshold_falling); + if (threshold_code > 0) + falling_threshold |= + threshold_code << 8 * i; + } } - rising_threshold |= (threshold_code << 16); writel(rising_threshold, data->base + EXYNOS_THD_TEMP_RISE); - writel(0, data->base + EXYNOS_THD_TEMP_FALL); + writel(falling_threshold, + data->base + EXYNOS_THD_TEMP_FALL); - writel(EXYNOS_TMU_CLEAR_RISE_INT|EXYNOS_TMU_CLEAR_FALL_INT, + writel(EXYNOS_TMU_CLEAR_RISE_INT | EXYNOS_TMU_CLEAR_FALL_INT, data->base + EXYNOS_TMU_REG_INTCLEAR); } out: @@ -664,6 +684,8 @@ static void exynos_tmu_control(struct platform_device *pdev, bool on) pdata->trigger_level2_en << 8 | pdata->trigger_level1_en << 4 | pdata->trigger_level0_en; + if (pdata->threshold_falling) + interrupt_en |= interrupt_en << 16; } else { con |= EXYNOS_TMU_CORE_OFF; interrupt_en = 0; /* Disable all interrupts */ @@ -697,20 +719,19 @@ static void exynos_tmu_work(struct work_struct *work) struct exynos_tmu_data *data = container_of(work, struct exynos_tmu_data, irq_work); + exynos_report_trigger(); mutex_lock(&data->lock); clk_enable(data->clk); - - if (data->soc == SOC_ARCH_EXYNOS) - writel(EXYNOS_TMU_CLEAR_RISE_INT, + writel(EXYNOS_TMU_CLEAR_RISE_INT | + EXYNOS_TMU_CLEAR_FALL_INT, data->base + EXYNOS_TMU_REG_INTCLEAR); else writel(EXYNOS4210_TMU_INTCLEAR_VAL, data->base + EXYNOS_TMU_REG_INTCLEAR); - clk_disable(data->clk); mutex_unlock(&data->lock); - exynos_report_trigger(); + enable_irq(data->irq); } @@ -759,6 +780,7 @@ static struct exynos_tmu_platform_data const exynos4210_default_tmu_data = { #if defined(CONFIG_SOC_EXYNOS5250) || defined(CONFIG_SOC_EXYNOS4412) static struct exynos_tmu_platform_data const exynos_default_tmu_data = { + .threshold_falling = 10, .trigger_levels[0] = 85, .trigger_levels[1] = 103, .trigger_levels[2] = 110, @@ -800,8 +822,6 @@ static const struct of_device_id exynos_tmu_match[] = { {}, }; MODULE_DEVICE_TABLE(of, exynos_tmu_match); -#else -#define exynos_tmu_match NULL #endif static struct platform_device_id exynos_tmu_driver_ids[] = { @@ -832,6 +852,94 @@ static inline struct exynos_tmu_platform_data *exynos_get_driver_data( return (struct exynos_tmu_platform_data *) platform_get_device_id(pdev)->driver_data; } + +#ifdef CONFIG_EXYNOS_THERMAL_EMUL +static ssize_t exynos_tmu_emulation_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct platform_device *pdev = container_of(dev, + struct platform_device, dev); + struct exynos_tmu_data *data = platform_get_drvdata(pdev); + unsigned int reg; + u8 temp_code; + int temp = 0; + + if (data->soc == SOC_ARCH_EXYNOS4210) + goto out; + + mutex_lock(&data->lock); + clk_enable(data->clk); + reg = readl(data->base + EXYNOS_EMUL_CON); + clk_disable(data->clk); + mutex_unlock(&data->lock); + + if (reg & EXYNOS_EMUL_ENABLE) { + reg >>= EXYNOS_EMUL_DATA_SHIFT; + temp_code = reg & EXYNOS_EMUL_DATA_MASK; + temp = code_to_temp(data, temp_code); + } +out: + return sprintf(buf, "%d\n", temp * MCELSIUS); +} + +static ssize_t exynos_tmu_emulation_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + struct platform_device *pdev = container_of(dev, + struct platform_device, dev); + struct exynos_tmu_data *data = platform_get_drvdata(pdev); + unsigned int reg; + int temp; + + if (data->soc == SOC_ARCH_EXYNOS4210) + goto out; + + if (!sscanf(buf, "%d\n", &temp) || temp < 0) + return -EINVAL; + + mutex_lock(&data->lock); + clk_enable(data->clk); + + reg = readl(data->base + EXYNOS_EMUL_CON); + + if (temp) { + /* Both CELSIUS and MCELSIUS type are available for input */ + if (temp > MCELSIUS) + temp /= MCELSIUS; + + reg = (EXYNOS_EMUL_TIME << EXYNOS_EMUL_TIME_SHIFT) | + (temp_to_code(data, (temp / MCELSIUS)) + << EXYNOS_EMUL_DATA_SHIFT) | EXYNOS_EMUL_ENABLE; + } else { + reg &= ~EXYNOS_EMUL_ENABLE; + } + + writel(reg, data->base + EXYNOS_EMUL_CON); + + clk_disable(data->clk); + mutex_unlock(&data->lock); + +out: + return count; +} + +static DEVICE_ATTR(emulation, 0644, exynos_tmu_emulation_show, + exynos_tmu_emulation_store); +static int create_emulation_sysfs(struct device *dev) +{ + return device_create_file(dev, &dev_attr_emulation); +} +static void remove_emulation_sysfs(struct device *dev) +{ + device_remove_file(dev, &dev_attr_emulation); +} +#else +static inline int create_emulation_sysfs(struct device *dev) { return 0; } +static inline void remove_emulation_sysfs(struct device *dev) {} +#endif + static int exynos_tmu_probe(struct platform_device *pdev) { struct exynos_tmu_data *data; @@ -914,6 +1022,8 @@ static int exynos_tmu_probe(struct platform_device *pdev) exynos_sensor_conf.trip_data.trip_val[i] = pdata->threshold + pdata->trigger_levels[i]; + exynos_sensor_conf.trip_data.trigger_falling = pdata->threshold_falling; + exynos_sensor_conf.cooling_data.freq_clip_count = pdata->freq_tab_count; for (i = 0; i < pdata->freq_tab_count; i++) { @@ -928,6 +1038,11 @@ static int exynos_tmu_probe(struct platform_device *pdev) dev_err(&pdev->dev, "Failed to register thermal interface\n"); goto err_clk; } + + ret = create_emulation_sysfs(&pdev->dev); + if (ret) + dev_err(&pdev->dev, "Failed to create emulation mode sysfs node\n"); + return 0; err_clk: platform_set_drvdata(pdev, NULL); @@ -939,6 +1054,8 @@ static int exynos_tmu_remove(struct platform_device *pdev) { struct exynos_tmu_data *data = platform_get_drvdata(pdev); + remove_emulation_sysfs(&pdev->dev); + exynos_tmu_control(pdev, false); exynos_unregister_thermal(); @@ -980,7 +1097,7 @@ static struct platform_driver exynos_tmu_driver = { .name = "exynos-tmu", .owner = THIS_MODULE, .pm = EXYNOS_TMU_PM, - .of_match_table = exynos_tmu_match, + .of_match_table = of_match_ptr(exynos_tmu_match), }, .probe = exynos_tmu_probe, .remove = exynos_tmu_remove, diff --git a/drivers/thermal/intel_powerclamp.c b/drivers/thermal/intel_powerclamp.c new file mode 100644 index 0000000..b40b37c --- /dev/null +++ b/drivers/thermal/intel_powerclamp.c @@ -0,0 +1,795 @@ +/* + * intel_powerclamp.c - package c-state idle injection + * + * Copyright (c) 2012, Intel Corporation. + * + * Authors: + * Arjan van de Ven <arjan@linux.intel.com> + * Jacob Pan <jacob.jun.pan@linux.intel.com> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + * + * + * TODO: + * 1. better handle wakeup from external interrupts, currently a fixed + * compensation is added to clamping duration when excessive amount + * of wakeups are observed during idle time. the reason is that in + * case of external interrupts without need for ack, clamping down + * cpu in non-irq context does not reduce irq. for majority of the + * cases, clamping down cpu does help reduce irq as well, we should + * be able to differenciate the two cases and give a quantitative + * solution for the irqs that we can control. perhaps based on + * get_cpu_iowait_time_us() + * + * 2. synchronization with other hw blocks + * + * + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include <linux/module.h> +#include <linux/kernel.h> +#include <linux/delay.h> +#include <linux/kthread.h> +#include <linux/freezer.h> +#include <linux/cpu.h> +#include <linux/thermal.h> +#include <linux/slab.h> +#include <linux/tick.h> +#include <linux/debugfs.h> +#include <linux/seq_file.h> +#include <linux/sched/rt.h> + +#include <asm/nmi.h> +#include <asm/msr.h> +#include <asm/mwait.h> +#include <asm/cpu_device_id.h> +#include <asm/idle.h> +#include <asm/hardirq.h> + +#define MAX_TARGET_RATIO (50U) +/* For each undisturbed clamping period (no extra wake ups during idle time), + * we increment the confidence counter for the given target ratio. + * CONFIDENCE_OK defines the level where runtime calibration results are + * valid. + */ +#define CONFIDENCE_OK (3) +/* Default idle injection duration, driver adjust sleep time to meet target + * idle ratio. Similar to frequency modulation. + */ +#define DEFAULT_DURATION_JIFFIES (6) + +static unsigned int target_mwait; +static struct dentry *debug_dir; + +/* user selected target */ +static unsigned int set_target_ratio; +static unsigned int current_ratio; +static bool should_skip; +static bool reduce_irq; +static atomic_t idle_wakeup_counter; +static unsigned int control_cpu; /* The cpu assigned to collect stat and update + * control parameters. default to BSP but BSP + * can be offlined. + */ +static bool clamping; + + +static struct task_struct * __percpu *powerclamp_thread; +static struct thermal_cooling_device *cooling_dev; +static unsigned long *cpu_clamping_mask; /* bit map for tracking per cpu + * clamping thread + */ + +static unsigned int duration; +static unsigned int pkg_cstate_ratio_cur; +static unsigned int window_size; + +static int duration_set(const char *arg, const struct kernel_param *kp) +{ + int ret = 0; + unsigned long new_duration; + + ret = kstrtoul(arg, 10, &new_duration); + if (ret) + goto exit; + if (new_duration > 25 || new_duration < 6) { + pr_err("Out of recommended range %lu, between 6-25ms\n", + new_duration); + ret = -EINVAL; + } + + duration = clamp(new_duration, 6ul, 25ul); + smp_mb(); + +exit: + + return ret; +} + +static struct kernel_param_ops duration_ops = { + .set = duration_set, + .get = param_get_int, +}; + + +module_param_cb(duration, &duration_ops, &duration, 0644); +MODULE_PARM_DESC(duration, "forced idle time for each attempt in msec."); + +struct powerclamp_calibration_data { + unsigned long confidence; /* used for calibration, basically a counter + * gets incremented each time a clamping + * period is completed without extra wakeups + * once that counter is reached given level, + * compensation is deemed usable. + */ + unsigned long steady_comp; /* steady state compensation used when + * no extra wakeups occurred. + */ + unsigned long dynamic_comp; /* compensate excessive wakeup from idle + * mostly from external interrupts. + */ +}; + +static struct powerclamp_calibration_data cal_data[MAX_TARGET_RATIO]; + +static int window_size_set(const char *arg, const struct kernel_param *kp) +{ + int ret = 0; + unsigned long new_window_size; + + ret = kstrtoul(arg, 10, &new_window_size); + if (ret) + goto exit_win; + if (new_window_size > 10 || new_window_size < 2) { + pr_err("Out of recommended window size %lu, between 2-10\n", + new_window_size); + ret = -EINVAL; + } + + window_size = clamp(new_window_size, 2ul, 10ul); + smp_mb(); + +exit_win: + + return ret; +} + +static struct kernel_param_ops window_size_ops = { + .set = window_size_set, + .get = param_get_int, +}; + +module_param_cb(window_size, &window_size_ops, &window_size, 0644); +MODULE_PARM_DESC(window_size, "sliding window in number of clamping cycles\n" + "\tpowerclamp controls idle ratio within this window. larger\n" + "\twindow size results in slower response time but more smooth\n" + "\tclamping results. default to 2."); + +static void find_target_mwait(void) +{ + unsigned int eax, ebx, ecx, edx; + unsigned int highest_cstate = 0; + unsigned int highest_subcstate = 0; + int i; + + if (boot_cpu_data.cpuid_level < CPUID_MWAIT_LEAF) + return; + + cpuid(CPUID_MWAIT_LEAF, &eax, &ebx, &ecx, &edx); + + if (!(ecx & CPUID5_ECX_EXTENSIONS_SUPPORTED) || + !(ecx & CPUID5_ECX_INTERRUPT_BREAK)) + return; + + edx >>= MWAIT_SUBSTATE_SIZE; + for (i = 0; i < 7 && edx; i++, edx >>= MWAIT_SUBSTATE_SIZE) { + if (edx & MWAIT_SUBSTATE_MASK) { + highest_cstate = i; + highest_subcstate = edx & MWAIT_SUBSTATE_MASK; + } + } + target_mwait = (highest_cstate << MWAIT_SUBSTATE_SIZE) | + (highest_subcstate - 1); + +} + +static u64 pkg_state_counter(void) +{ + u64 val; + u64 count = 0; + + static bool skip_c2; + static bool skip_c3; + static bool skip_c6; + static bool skip_c7; + + if (!skip_c2) { + if (!rdmsrl_safe(MSR_PKG_C2_RESIDENCY, &val)) + count += val; + else + skip_c2 = true; + } + + if (!skip_c3) { + if (!rdmsrl_safe(MSR_PKG_C3_RESIDENCY, &val)) + count += val; + else + skip_c3 = true; + } + + if (!skip_c6) { + if (!rdmsrl_safe(MSR_PKG_C6_RESIDENCY, &val)) + count += val; + else + skip_c6 = true; + } + + if (!skip_c7) { + if (!rdmsrl_safe(MSR_PKG_C7_RESIDENCY, &val)) + count += val; + else + skip_c7 = true; + } + + return count; +} + +static void noop_timer(unsigned long foo) +{ + /* empty... just the fact that we get the interrupt wakes us up */ +} + +static unsigned int get_compensation(int ratio) +{ + unsigned int comp = 0; + + /* we only use compensation if all adjacent ones are good */ + if (ratio == 1 && + cal_data[ratio].confidence >= CONFIDENCE_OK && + cal_data[ratio + 1].confidence >= CONFIDENCE_OK && + cal_data[ratio + 2].confidence >= CONFIDENCE_OK) { + comp = (cal_data[ratio].steady_comp + + cal_data[ratio + 1].steady_comp + + cal_data[ratio + 2].steady_comp) / 3; + } else if (ratio == MAX_TARGET_RATIO - 1 && + cal_data[ratio].confidence >= CONFIDENCE_OK && + cal_data[ratio - 1].confidence >= CONFIDENCE_OK && + cal_data[ratio - 2].confidence >= CONFIDENCE_OK) { + comp = (cal_data[ratio].steady_comp + + cal_data[ratio - 1].steady_comp + + cal_data[ratio - 2].steady_comp) / 3; + } else if (cal_data[ratio].confidence >= CONFIDENCE_OK && + cal_data[ratio - 1].confidence >= CONFIDENCE_OK && + cal_data[ratio + 1].confidence >= CONFIDENCE_OK) { + comp = (cal_data[ratio].steady_comp + + cal_data[ratio - 1].steady_comp + + cal_data[ratio + 1].steady_comp) / 3; + } + + /* REVISIT: simple penalty of double idle injection */ + if (reduce_irq) + comp = ratio; + /* do not exceed limit */ + if (comp + ratio >= MAX_TARGET_RATIO) + comp = MAX_TARGET_RATIO - ratio - 1; + + return comp; +} + +static void adjust_compensation(int target_ratio, unsigned int win) +{ + int delta; + struct powerclamp_calibration_data *d = &cal_data[target_ratio]; + + /* + * adjust compensations if confidence level has not been reached or + * there are too many wakeups during the last idle injection period, we + * cannot trust the data for compensation. + */ + if (d->confidence >= CONFIDENCE_OK || + atomic_read(&idle_wakeup_counter) > + win * num_online_cpus()) + return; + + delta = set_target_ratio - current_ratio; + /* filter out bad data */ + if (delta >= 0 && delta <= (1+target_ratio/10)) { + if (d->steady_comp) + d->steady_comp = + roundup(delta+d->steady_comp, 2)/2; + else + d->steady_comp = delta; + d->confidence++; + } +} + +static bool powerclamp_adjust_controls(unsigned int target_ratio, + unsigned int guard, unsigned int win) +{ + static u64 msr_last, tsc_last; + u64 msr_now, tsc_now; + u64 val64; + + /* check result for the last window */ + msr_now = pkg_state_counter(); + rdtscll(tsc_now); + + /* calculate pkg cstate vs tsc ratio */ + if (!msr_last || !tsc_last) + current_ratio = 1; + else if (tsc_now-tsc_last) { + val64 = 100*(msr_now-msr_last); + do_div(val64, (tsc_now-tsc_last)); + current_ratio = val64; + } + + /* update record */ + msr_last = msr_now; + tsc_last = tsc_now; + + adjust_compensation(target_ratio, win); + /* + * too many external interrupts, set flag such + * that we can take measure later. + */ + reduce_irq = atomic_read(&idle_wakeup_counter) >= + 2 * win * num_online_cpus(); + + atomic_set(&idle_wakeup_counter, 0); + /* if we are above target+guard, skip */ + return set_target_ratio + guard <= current_ratio; +} + +static int clamp_thread(void *arg) +{ + int cpunr = (unsigned long)arg; + DEFINE_TIMER(wakeup_timer, noop_timer, 0, 0); + static const struct sched_param param = { + .sched_priority = MAX_USER_RT_PRIO/2, + }; + unsigned int count = 0; + unsigned int target_ratio; + + set_bit(cpunr, cpu_clamping_mask); + set_freezable(); + init_timer_on_stack(&wakeup_timer); + sched_setscheduler(current, SCHED_FIFO, ¶m); + + while (true == clamping && !kthread_should_stop() && + cpu_online(cpunr)) { + int sleeptime; + unsigned long target_jiffies; + unsigned int guard; + unsigned int compensation = 0; + int interval; /* jiffies to sleep for each attempt */ + unsigned int duration_jiffies = msecs_to_jiffies(duration); + unsigned int window_size_now; + + try_to_freeze(); + /* + * make sure user selected ratio does not take effect until + * the next round. adjust target_ratio if user has changed + * target such that we can converge quickly. + */ + target_ratio = set_target_ratio; + guard = 1 + target_ratio/20; + window_size_now = window_size; + count++; + + /* + * systems may have different ability to enter package level + * c-states, thus we need to compensate the injected idle ratio + * to achieve the actual target reported by the HW. + */ + compensation = get_compensation(target_ratio); + interval = duration_jiffies*100/(target_ratio+compensation); + + /* align idle time */ + target_jiffies = roundup(jiffies, interval); + sleeptime = target_jiffies - jiffies; + if (sleeptime <= 0) + sleeptime = 1; + schedule_timeout_interruptible(sleeptime); + /* + * only elected controlling cpu can collect stats and update + * control parameters. + */ + if (cpunr == control_cpu && !(count%window_size_now)) { + should_skip = + powerclamp_adjust_controls(target_ratio, + guard, window_size_now); + smp_mb(); + } + + if (should_skip) + continue; + + target_jiffies = jiffies + duration_jiffies; + mod_timer(&wakeup_timer, target_jiffies); + if (unlikely(local_softirq_pending())) + continue; + /* + * stop tick sched during idle time, interrupts are still + * allowed. thus jiffies are updated properly. + */ + preempt_disable(); + tick_nohz_idle_enter(); + /* mwait until target jiffies is reached */ + while (time_before(jiffies, target_jiffies)) { + unsigned long ecx = 1; + unsigned long eax = target_mwait; + + /* + * REVISIT: may call enter_idle() to notify drivers who + * can save power during cpu idle. same for exit_idle() + */ + local_touch_nmi(); + stop_critical_timings(); + __monitor((void *)¤t_thread_info()->flags, 0, 0); + cpu_relax(); /* allow HT sibling to run */ + __mwait(eax, ecx); + start_critical_timings(); + atomic_inc(&idle_wakeup_counter); + } + tick_nohz_idle_exit(); + preempt_enable_no_resched(); + } + del_timer_sync(&wakeup_timer); + clear_bit(cpunr, cpu_clamping_mask); + + return 0; +} + +/* + * 1 HZ polling while clamping is active, useful for userspace + * to monitor actual idle ratio. + */ +static void poll_pkg_cstate(struct work_struct *dummy); +static DECLARE_DELAYED_WORK(poll_pkg_cstate_work, poll_pkg_cstate); +static void poll_pkg_cstate(struct work_struct *dummy) +{ + static u64 msr_last; + static u64 tsc_last; + static unsigned long jiffies_last; + + u64 msr_now; + unsigned long jiffies_now; + u64 tsc_now; + u64 val64; + + msr_now = pkg_state_counter(); + rdtscll(tsc_now); + jiffies_now = jiffies; + + /* calculate pkg cstate vs tsc ratio */ + if (!msr_last || !tsc_last) + pkg_cstate_ratio_cur = 1; + else { + if (tsc_now - tsc_last) { + val64 = 100 * (msr_now - msr_last); + do_div(val64, (tsc_now - tsc_last)); + pkg_cstate_ratio_cur = val64; + } + } + + /* update record */ + msr_last = msr_now; + jiffies_last = jiffies_now; + tsc_last = tsc_now; + + if (true == clamping) + schedule_delayed_work(&poll_pkg_cstate_work, HZ); +} + +static int start_power_clamp(void) +{ + unsigned long cpu; + struct task_struct *thread; + + /* check if pkg cstate counter is completely 0, abort in this case */ + if (!pkg_state_counter()) { + pr_err("pkg cstate counter not functional, abort\n"); + return -EINVAL; + } + + set_target_ratio = clamp(set_target_ratio, 0U, MAX_TARGET_RATIO - 1); + /* prevent cpu hotplug */ + get_online_cpus(); + + /* prefer BSP */ + control_cpu = 0; + if (!cpu_online(control_cpu)) + control_cpu = smp_processor_id(); + + clamping = true; + schedule_delayed_work(&poll_pkg_cstate_work, 0); + + /* start one thread per online cpu */ + for_each_online_cpu(cpu) { + struct task_struct **p = + per_cpu_ptr(powerclamp_thread, cpu); + + thread = kthread_create_on_node(clamp_thread, + (void *) cpu, + cpu_to_node(cpu), + "kidle_inject/%ld", cpu); + /* bind to cpu here */ + if (likely(!IS_ERR(thread))) { + kthread_bind(thread, cpu); + wake_up_process(thread); + *p = thread; + } + + } + put_online_cpus(); + + return 0; +} + +static void end_power_clamp(void) +{ + int i; + struct task_struct *thread; + + clamping = false; + /* + * make clamping visible to other cpus and give per cpu clamping threads + * sometime to exit, or gets killed later. + */ + smp_mb(); + msleep(20); + if (bitmap_weight(cpu_clamping_mask, num_possible_cpus())) { + for_each_set_bit(i, cpu_clamping_mask, num_possible_cpus()) { + pr_debug("clamping thread for cpu %d alive, kill\n", i); + thread = *per_cpu_ptr(powerclamp_thread, i); + kthread_stop(thread); + } + } +} + +static int powerclamp_cpu_callback(struct notifier_block *nfb, + unsigned long action, void *hcpu) +{ + unsigned long cpu = (unsigned long)hcpu; + struct task_struct *thread; + struct task_struct **percpu_thread = + per_cpu_ptr(powerclamp_thread, cpu); + + if (false == clamping) + goto exit_ok; + + switch (action) { + case CPU_ONLINE: + thread = kthread_create_on_node(clamp_thread, + (void *) cpu, + cpu_to_node(cpu), + "kidle_inject/%lu", cpu); + if (likely(!IS_ERR(thread))) { + kthread_bind(thread, cpu); + wake_up_process(thread); + *percpu_thread = thread; + } + /* prefer BSP as controlling CPU */ + if (cpu == 0) { + control_cpu = 0; + smp_mb(); + } + break; + case CPU_DEAD: + if (test_bit(cpu, cpu_clamping_mask)) { + pr_err("cpu %lu dead but powerclamping thread is not\n", + cpu); + kthread_stop(*percpu_thread); + } + if (cpu == control_cpu) { + control_cpu = smp_processor_id(); + smp_mb(); + } + } + +exit_ok: + return NOTIFY_OK; +} + +static struct notifier_block powerclamp_cpu_notifier = { + .notifier_call = powerclamp_cpu_callback, +}; + +static int powerclamp_get_max_state(struct thermal_cooling_device *cdev, + unsigned long *state) +{ + *state = MAX_TARGET_RATIO; + + return 0; +} + +static int powerclamp_get_cur_state(struct thermal_cooling_device *cdev, + unsigned long *state) +{ + if (true == clamping) + *state = pkg_cstate_ratio_cur; + else + /* to save power, do not poll idle ratio while not clamping */ + *state = -1; /* indicates invalid state */ + + return 0; +} + +static int powerclamp_set_cur_state(struct thermal_cooling_device *cdev, + unsigned long new_target_ratio) +{ + int ret = 0; + + new_target_ratio = clamp(new_target_ratio, 0UL, + (unsigned long) (MAX_TARGET_RATIO-1)); + if (set_target_ratio == 0 && new_target_ratio > 0) { + pr_info("Start idle injection to reduce power\n"); + set_target_ratio = new_target_ratio; + ret = start_power_clamp(); + goto exit_set; + } else if (set_target_ratio > 0 && new_target_ratio == 0) { + pr_info("Stop forced idle injection\n"); + set_target_ratio = 0; + end_power_clamp(); + } else /* adjust currently running */ { + set_target_ratio = new_target_ratio; + /* make new set_target_ratio visible to other cpus */ + smp_mb(); + } + +exit_set: + return ret; +} + +/* bind to generic thermal layer as cooling device*/ +static struct thermal_cooling_device_ops powerclamp_cooling_ops = { + .get_max_state = powerclamp_get_max_state, + .get_cur_state = powerclamp_get_cur_state, + .set_cur_state = powerclamp_set_cur_state, +}; + +/* runs on Nehalem and later */ +static const struct x86_cpu_id intel_powerclamp_ids[] = { + { X86_VENDOR_INTEL, 6, 0x1a}, + { X86_VENDOR_INTEL, 6, 0x1c}, + { X86_VENDOR_INTEL, 6, 0x1e}, + { X86_VENDOR_INTEL, 6, 0x1f}, + { X86_VENDOR_INTEL, 6, 0x25}, + { X86_VENDOR_INTEL, 6, 0x26}, + { X86_VENDOR_INTEL, 6, 0x2a}, + { X86_VENDOR_INTEL, 6, 0x2c}, + { X86_VENDOR_INTEL, 6, 0x2d}, + { X86_VENDOR_INTEL, 6, 0x2e}, + { X86_VENDOR_INTEL, 6, 0x2f}, + { X86_VENDOR_INTEL, 6, 0x3a}, + {} +}; +MODULE_DEVICE_TABLE(x86cpu, intel_powerclamp_ids); + +static int powerclamp_probe(void) +{ + if (!x86_match_cpu(intel_powerclamp_ids)) { + pr_err("Intel powerclamp does not run on family %d model %d\n", + boot_cpu_data.x86, boot_cpu_data.x86_model); + return -ENODEV; + } + if (!boot_cpu_has(X86_FEATURE_NONSTOP_TSC) || + !boot_cpu_has(X86_FEATURE_CONSTANT_TSC) || + !boot_cpu_has(X86_FEATURE_MWAIT) || + !boot_cpu_has(X86_FEATURE_ARAT)) + return -ENODEV; + + /* find the deepest mwait value */ + find_target_mwait(); + + return 0; +} + +static int powerclamp_debug_show(struct seq_file *m, void *unused) +{ + int i = 0; + + seq_printf(m, "controlling cpu: %d\n", control_cpu); + seq_printf(m, "pct confidence steady dynamic (compensation)\n"); + for (i = 0; i < MAX_TARGET_RATIO; i++) { + seq_printf(m, "%d\t%lu\t%lu\t%lu\n", + i, + cal_data[i].confidence, + cal_data[i].steady_comp, + cal_data[i].dynamic_comp); + } + + return 0; +} + +static int powerclamp_debug_open(struct inode *inode, + struct file *file) +{ + return single_open(file, powerclamp_debug_show, inode->i_private); +} + +static const struct file_operations powerclamp_debug_fops = { + .open = powerclamp_debug_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, + .owner = THIS_MODULE, +}; + +static inline void powerclamp_create_debug_files(void) +{ + debug_dir = debugfs_create_dir("intel_powerclamp", NULL); + if (!debug_dir) + return; + + if (!debugfs_create_file("powerclamp_calib", S_IRUGO, debug_dir, + cal_data, &powerclamp_debug_fops)) + goto file_error; + + return; + +file_error: + debugfs_remove_recursive(debug_dir); +} + +static int powerclamp_init(void) +{ + int retval; + int bitmap_size; + + bitmap_size = BITS_TO_LONGS(num_possible_cpus()) * sizeof(long); + cpu_clamping_mask = kzalloc(bitmap_size, GFP_KERNEL); + if (!cpu_clamping_mask) + return -ENOMEM; + + /* probe cpu features and ids here */ + retval = powerclamp_probe(); + if (retval) + return retval; + /* set default limit, maybe adjusted during runtime based on feedback */ + window_size = 2; + register_hotcpu_notifier(&powerclamp_cpu_notifier); + powerclamp_thread = alloc_percpu(struct task_struct *); + cooling_dev = thermal_cooling_device_register("intel_powerclamp", NULL, + &powerclamp_cooling_ops); + if (IS_ERR(cooling_dev)) + return -ENODEV; + + if (!duration) + duration = jiffies_to_msecs(DEFAULT_DURATION_JIFFIES); + powerclamp_create_debug_files(); + + return 0; +} +module_init(powerclamp_init); + +static void powerclamp_exit(void) +{ + unregister_hotcpu_notifier(&powerclamp_cpu_notifier); + end_power_clamp(); + free_percpu(powerclamp_thread); + thermal_cooling_device_unregister(cooling_dev); + kfree(cpu_clamping_mask); + + cancel_delayed_work_sync(&poll_pkg_cstate_work); + debugfs_remove_recursive(debug_dir); +} +module_exit(powerclamp_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Arjan van de Ven <arjan@linux.intel.com>"); +MODULE_AUTHOR("Jacob Pan <jacob.jun.pan@linux.intel.com>"); +MODULE_DESCRIPTION("Package Level C-state Idle Injection for Intel CPUs"); diff --git a/drivers/thermal/kirkwood_thermal.c b/drivers/thermal/kirkwood_thermal.c new file mode 100644 index 0000000..65cb4f0 --- /dev/null +++ b/drivers/thermal/kirkwood_thermal.c @@ -0,0 +1,134 @@ +/* + * Kirkwood thermal sensor driver + * + * Copyright (C) 2012 Nobuhiro Iwamatsu <iwamatsu@nigauri.org> + * + * This software is licensed under the terms of the GNU General Public + * License version 2, as published by the Free Software Foundation, and + * may be copied, distributed, and modified under those terms. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ +#include <linux/device.h> +#include <linux/err.h> +#include <linux/io.h> +#include <linux/kernel.h> +#include <linux/of.h> +#include <linux/module.h> +#include <linux/platform_device.h> +#include <linux/thermal.h> + +#define KIRKWOOD_THERMAL_VALID_OFFSET 9 +#define KIRKWOOD_THERMAL_VALID_MASK 0x1 +#define KIRKWOOD_THERMAL_TEMP_OFFSET 10 +#define KIRKWOOD_THERMAL_TEMP_MASK 0x1FF + +/* Kirkwood Thermal Sensor Dev Structure */ +struct kirkwood_thermal_priv { + void __iomem *sensor; +}; + +static int kirkwood_get_temp(struct thermal_zone_device *thermal, + unsigned long *temp) +{ + unsigned long reg; + struct kirkwood_thermal_priv *priv = thermal->devdata; + + reg = readl_relaxed(priv->sensor); + + /* Valid check */ + if (!(reg >> KIRKWOOD_THERMAL_VALID_OFFSET) & + KIRKWOOD_THERMAL_VALID_MASK) { + dev_err(&thermal->device, + "Temperature sensor reading not valid\n"); + return -EIO; + } + + /* + * Calculate temperature. See Section 8.10.1 of the 88AP510, + * datasheet, which has the same sensor. + * Documentation/arm/Marvell/README + */ + reg = (reg >> KIRKWOOD_THERMAL_TEMP_OFFSET) & + KIRKWOOD_THERMAL_TEMP_MASK; + *temp = ((2281638UL - (7298*reg)) / 10); + + return 0; +} + +static struct thermal_zone_device_ops ops = { + .get_temp = kirkwood_get_temp, +}; + +static const struct of_device_id kirkwood_thermal_id_table[] = { + { .compatible = "marvell,kirkwood-thermal" }, + {} +}; + +static int kirkwood_thermal_probe(struct platform_device *pdev) +{ + struct thermal_zone_device *thermal = NULL; + struct kirkwood_thermal_priv *priv; + struct resource *res; + + res = platform_get_resource(pdev, IORESOURCE_MEM, 0); + if (!res) { + dev_err(&pdev->dev, "Failed to get platform resource\n"); + return -ENODEV; + } + + priv = devm_kzalloc(&pdev->dev, sizeof(*priv), GFP_KERNEL); + if (!priv) + return -ENOMEM; + + priv->sensor = devm_request_and_ioremap(&pdev->dev, res); + if (!priv->sensor) { + dev_err(&pdev->dev, "Failed to request_ioremap memory\n"); + return -EADDRNOTAVAIL; + } + + thermal = thermal_zone_device_register("kirkwood_thermal", 0, 0, + priv, &ops, NULL, 0, 0); + if (IS_ERR(thermal)) { + dev_err(&pdev->dev, + "Failed to register thermal zone device\n"); + return PTR_ERR(thermal); + } + + platform_set_drvdata(pdev, thermal); + + return 0; +} + +static int kirkwood_thermal_exit(struct platform_device *pdev) +{ + struct thermal_zone_device *kirkwood_thermal = + platform_get_drvdata(pdev); + + thermal_zone_device_unregister(kirkwood_thermal); + platform_set_drvdata(pdev, NULL); + + return 0; +} + +MODULE_DEVICE_TABLE(of, kirkwood_thermal_id_table); + +static struct platform_driver kirkwood_thermal_driver = { + .probe = kirkwood_thermal_probe, + .remove = kirkwood_thermal_exit, + .driver = { + .name = "kirkwood_thermal", + .owner = THIS_MODULE, + .of_match_table = of_match_ptr(kirkwood_thermal_id_table), + }, +}; + +module_platform_driver(kirkwood_thermal_driver); + +MODULE_AUTHOR("Nobuhiro Iwamatsu <iwamatsu@nigauri.org>"); +MODULE_DESCRIPTION("kirkwood thermal driver"); +MODULE_LICENSE("GPL"); diff --git a/drivers/thermal/rcar_thermal.c b/drivers/thermal/rcar_thermal.c index 90db951..28f0919 100644 --- a/drivers/thermal/rcar_thermal.c +++ b/drivers/thermal/rcar_thermal.c @@ -19,225 +19,473 @@ */ #include <linux/delay.h> #include <linux/err.h> +#include <linux/irq.h> +#include <linux/interrupt.h> #include <linux/io.h> #include <linux/module.h> #include <linux/platform_device.h> +#include <linux/reboot.h> #include <linux/slab.h> #include <linux/spinlock.h> #include <linux/thermal.h> -#define THSCR 0x2c -#define THSSR 0x30 +#define IDLE_INTERVAL 5000 + +#define COMMON_STR 0x00 +#define COMMON_ENR 0x04 +#define COMMON_INTMSK 0x0c + +#define REG_POSNEG 0x20 +#define REG_FILONOFF 0x28 +#define REG_THSCR 0x2c +#define REG_THSSR 0x30 +#define REG_INTCTRL 0x34 /* THSCR */ -#define CPTAP 0xf +#define CPCTL (1 << 12) /* THSSR */ #define CTEMP 0x3f - -struct rcar_thermal_priv { +struct rcar_thermal_common { void __iomem *base; struct device *dev; + struct list_head head; spinlock_t lock; - u32 comp; }; +struct rcar_thermal_priv { + void __iomem *base; + struct rcar_thermal_common *common; + struct thermal_zone_device *zone; + struct delayed_work work; + struct mutex lock; + struct list_head list; + int id; + int ctemp; +}; + +#define rcar_thermal_for_each_priv(pos, common) \ + list_for_each_entry(pos, &common->head, list) + #define MCELSIUS(temp) ((temp) * 1000) -#define rcar_zone_to_priv(zone) (zone->devdata) +#define rcar_zone_to_priv(zone) ((zone)->devdata) +#define rcar_priv_to_dev(priv) ((priv)->common->dev) +#define rcar_has_irq_support(priv) ((priv)->common->base) +#define rcar_id_to_shift(priv) ((priv)->id * 8) + +#ifdef DEBUG +# define rcar_force_update_temp(priv) 1 +#else +# define rcar_force_update_temp(priv) 0 +#endif /* * basic functions */ -static u32 rcar_thermal_read(struct rcar_thermal_priv *priv, u32 reg) +#define rcar_thermal_common_read(c, r) \ + _rcar_thermal_common_read(c, COMMON_ ##r) +static u32 _rcar_thermal_common_read(struct rcar_thermal_common *common, + u32 reg) { - unsigned long flags; - u32 ret; - - spin_lock_irqsave(&priv->lock, flags); + return ioread32(common->base + reg); +} - ret = ioread32(priv->base + reg); +#define rcar_thermal_common_write(c, r, d) \ + _rcar_thermal_common_write(c, COMMON_ ##r, d) +static void _rcar_thermal_common_write(struct rcar_thermal_common *common, + u32 reg, u32 data) +{ + iowrite32(data, common->base + reg); +} - spin_unlock_irqrestore(&priv->lock, flags); +#define rcar_thermal_common_bset(c, r, m, d) \ + _rcar_thermal_common_bset(c, COMMON_ ##r, m, d) +static void _rcar_thermal_common_bset(struct rcar_thermal_common *common, + u32 reg, u32 mask, u32 data) +{ + u32 val; - return ret; + val = ioread32(common->base + reg); + val &= ~mask; + val |= (data & mask); + iowrite32(val, common->base + reg); } -#if 0 /* no user at this point */ -static void rcar_thermal_write(struct rcar_thermal_priv *priv, - u32 reg, u32 data) +#define rcar_thermal_read(p, r) _rcar_thermal_read(p, REG_ ##r) +static u32 _rcar_thermal_read(struct rcar_thermal_priv *priv, u32 reg) { - unsigned long flags; - - spin_lock_irqsave(&priv->lock, flags); + return ioread32(priv->base + reg); +} +#define rcar_thermal_write(p, r, d) _rcar_thermal_write(p, REG_ ##r, d) +static void _rcar_thermal_write(struct rcar_thermal_priv *priv, + u32 reg, u32 data) +{ iowrite32(data, priv->base + reg); - - spin_unlock_irqrestore(&priv->lock, flags); } -#endif -static void rcar_thermal_bset(struct rcar_thermal_priv *priv, u32 reg, - u32 mask, u32 data) +#define rcar_thermal_bset(p, r, m, d) _rcar_thermal_bset(p, REG_ ##r, m, d) +static void _rcar_thermal_bset(struct rcar_thermal_priv *priv, u32 reg, + u32 mask, u32 data) { - unsigned long flags; u32 val; - spin_lock_irqsave(&priv->lock, flags); - val = ioread32(priv->base + reg); val &= ~mask; val |= (data & mask); iowrite32(val, priv->base + reg); - - spin_unlock_irqrestore(&priv->lock, flags); } /* * zone device functions */ -static int rcar_thermal_get_temp(struct thermal_zone_device *zone, - unsigned long *temp) +static int rcar_thermal_update_temp(struct rcar_thermal_priv *priv) { - struct rcar_thermal_priv *priv = rcar_zone_to_priv(zone); - int val, min, max, tmp; - - tmp = -200; /* default */ - while (1) { - if (priv->comp < 1 || priv->comp > 12) { - dev_err(priv->dev, - "THSSR invalid data (%d)\n", priv->comp); - priv->comp = 4; /* for next thermal */ - return -EINVAL; - } + struct device *dev = rcar_priv_to_dev(priv); + int i; + int ctemp, old, new; - /* - * THS comparator offset and the reference temperature - * - * Comparator | reference | Temperature field - * offset | temperature | measurement - * | (degrees C) | (degrees C) - * -------------+---------------+------------------- - * 1 | -45 | -45 to -30 - * 2 | -30 | -30 to -15 - * 3 | -15 | -15 to 0 - * 4 | 0 | 0 to +15 - * 5 | +15 | +15 to +30 - * 6 | +30 | +30 to +45 - * 7 | +45 | +45 to +60 - * 8 | +60 | +60 to +75 - * 9 | +75 | +75 to +90 - * 10 | +90 | +90 to +105 - * 11 | +105 | +105 to +120 - * 12 | +120 | +120 to +135 - */ + mutex_lock(&priv->lock); - /* calculate thermal limitation */ - min = (priv->comp * 15) - 60; - max = min + 15; + /* + * TSC decides a value of CPTAP automatically, + * and this is the conditions which validate interrupt. + */ + rcar_thermal_bset(priv, THSCR, CPCTL, CPCTL); + ctemp = 0; + old = ~0; + for (i = 0; i < 128; i++) { /* * we need to wait 300us after changing comparator offset * to get stable temperature. * see "Usage Notes" on datasheet */ - rcar_thermal_bset(priv, THSCR, CPTAP, priv->comp); udelay(300); - /* calculate current temperature */ - val = rcar_thermal_read(priv, THSSR) & CTEMP; - val = (val * 5) - 65; + new = rcar_thermal_read(priv, THSSR) & CTEMP; + if (new == old) { + ctemp = new; + break; + } + old = new; + } - dev_dbg(priv->dev, "comp/min/max/val = %d/%d/%d/%d\n", - priv->comp, min, max, val); + if (!ctemp) { + dev_err(dev, "thermal sensor was broken\n"); + return -EINVAL; + } - /* - * If val is same as min/max, then, - * it should try again on next comparator. - * But the val might be correct temperature. - * Keep it on "tmp" and compare with next val. - */ - if (tmp == val) - break; + /* + * enable IRQ + */ + if (rcar_has_irq_support(priv)) { + rcar_thermal_write(priv, FILONOFF, 0); - if (val <= min) { - tmp = min; - priv->comp--; /* try again */ - } else if (val >= max) { - tmp = max; - priv->comp++; /* try again */ - } else { - tmp = val; - break; - } + /* enable Rising/Falling edge interrupt */ + rcar_thermal_write(priv, POSNEG, 0x1); + rcar_thermal_write(priv, INTCTRL, (((ctemp - 0) << 8) | + ((ctemp - 1) << 0))); + } + + dev_dbg(dev, "thermal%d %d -> %d\n", priv->id, priv->ctemp, ctemp); + + priv->ctemp = ctemp; + + mutex_unlock(&priv->lock); + + return 0; +} + +static int rcar_thermal_get_temp(struct thermal_zone_device *zone, + unsigned long *temp) +{ + struct rcar_thermal_priv *priv = rcar_zone_to_priv(zone); + + if (!rcar_has_irq_support(priv) || rcar_force_update_temp(priv)) + rcar_thermal_update_temp(priv); + + mutex_lock(&priv->lock); + *temp = MCELSIUS((priv->ctemp * 5) - 65); + mutex_unlock(&priv->lock); + + return 0; +} + +static int rcar_thermal_get_trip_type(struct thermal_zone_device *zone, + int trip, enum thermal_trip_type *type) +{ + struct rcar_thermal_priv *priv = rcar_zone_to_priv(zone); + struct device *dev = rcar_priv_to_dev(priv); + + /* see rcar_thermal_get_temp() */ + switch (trip) { + case 0: /* +90 <= temp */ + *type = THERMAL_TRIP_CRITICAL; + break; + default: + dev_err(dev, "rcar driver trip error\n"); + return -EINVAL; + } + + return 0; +} + +static int rcar_thermal_get_trip_temp(struct thermal_zone_device *zone, + int trip, unsigned long *temp) +{ + struct rcar_thermal_priv *priv = rcar_zone_to_priv(zone); + struct device *dev = rcar_priv_to_dev(priv); + + /* see rcar_thermal_get_temp() */ + switch (trip) { + case 0: /* +90 <= temp */ + *temp = MCELSIUS(90); + break; + default: + dev_err(dev, "rcar driver trip error\n"); + return -EINVAL; + } + + return 0; +} + +static int rcar_thermal_notify(struct thermal_zone_device *zone, + int trip, enum thermal_trip_type type) +{ + struct rcar_thermal_priv *priv = rcar_zone_to_priv(zone); + struct device *dev = rcar_priv_to_dev(priv); + + switch (type) { + case THERMAL_TRIP_CRITICAL: + /* FIXME */ + dev_warn(dev, "Thermal reached to critical temperature\n"); + break; + default: + break; } - *temp = MCELSIUS(tmp); return 0; } static struct thermal_zone_device_ops rcar_thermal_zone_ops = { - .get_temp = rcar_thermal_get_temp, + .get_temp = rcar_thermal_get_temp, + .get_trip_type = rcar_thermal_get_trip_type, + .get_trip_temp = rcar_thermal_get_trip_temp, + .notify = rcar_thermal_notify, }; /* - * platform functions + * interrupt */ -static int rcar_thermal_probe(struct platform_device *pdev) +#define rcar_thermal_irq_enable(p) _rcar_thermal_irq_ctrl(p, 1) +#define rcar_thermal_irq_disable(p) _rcar_thermal_irq_ctrl(p, 0) +static void _rcar_thermal_irq_ctrl(struct rcar_thermal_priv *priv, int enable) +{ + struct rcar_thermal_common *common = priv->common; + unsigned long flags; + u32 mask = 0x3 << rcar_id_to_shift(priv); /* enable Rising/Falling */ + + spin_lock_irqsave(&common->lock, flags); + + rcar_thermal_common_bset(common, INTMSK, mask, enable ? 0 : mask); + + spin_unlock_irqrestore(&common->lock, flags); +} + +static void rcar_thermal_work(struct work_struct *work) { - struct thermal_zone_device *zone; struct rcar_thermal_priv *priv; - struct resource *res; - res = platform_get_resource(pdev, IORESOURCE_MEM, 0); - if (!res) { - dev_err(&pdev->dev, "Could not get platform resource\n"); - return -ENODEV; + priv = container_of(work, struct rcar_thermal_priv, work.work); + + rcar_thermal_update_temp(priv); + rcar_thermal_irq_enable(priv); + thermal_zone_device_update(priv->zone); +} + +static u32 rcar_thermal_had_changed(struct rcar_thermal_priv *priv, u32 status) +{ + struct device *dev = rcar_priv_to_dev(priv); + + status = (status >> rcar_id_to_shift(priv)) & 0x3; + + if (status & 0x3) { + dev_dbg(dev, "thermal%d %s%s\n", + priv->id, + (status & 0x2) ? "Rising " : "", + (status & 0x1) ? "Falling" : ""); } - priv = devm_kzalloc(&pdev->dev, sizeof(*priv), GFP_KERNEL); - if (!priv) { - dev_err(&pdev->dev, "Could not allocate priv\n"); - return -ENOMEM; + return status; +} + +static irqreturn_t rcar_thermal_irq(int irq, void *data) +{ + struct rcar_thermal_common *common = data; + struct rcar_thermal_priv *priv; + unsigned long flags; + u32 status, mask; + + spin_lock_irqsave(&common->lock, flags); + + mask = rcar_thermal_common_read(common, INTMSK); + status = rcar_thermal_common_read(common, STR); + rcar_thermal_common_write(common, STR, 0x000F0F0F & mask); + + spin_unlock_irqrestore(&common->lock, flags); + + status = status & ~mask; + + /* + * check the status + */ + rcar_thermal_for_each_priv(priv, common) { + if (rcar_thermal_had_changed(priv, status)) { + rcar_thermal_irq_disable(priv); + schedule_delayed_work(&priv->work, + msecs_to_jiffies(300)); + } } - priv->comp = 4; /* basic setup */ - priv->dev = &pdev->dev; - spin_lock_init(&priv->lock); - priv->base = devm_ioremap_nocache(&pdev->dev, - res->start, resource_size(res)); - if (!priv->base) { - dev_err(&pdev->dev, "Unable to ioremap thermal register\n"); + return IRQ_HANDLED; +} + +/* + * platform functions + */ +static int rcar_thermal_probe(struct platform_device *pdev) +{ + struct rcar_thermal_common *common; + struct rcar_thermal_priv *priv; + struct device *dev = &pdev->dev; + struct resource *res, *irq; + int mres = 0; + int i; + int idle = IDLE_INTERVAL; + + common = devm_kzalloc(dev, sizeof(*common), GFP_KERNEL); + if (!common) { + dev_err(dev, "Could not allocate common\n"); return -ENOMEM; } - zone = thermal_zone_device_register("rcar_thermal", 0, 0, priv, - &rcar_thermal_zone_ops, NULL, 0, 0); - if (IS_ERR(zone)) { - dev_err(&pdev->dev, "thermal zone device is NULL\n"); - return PTR_ERR(zone); + INIT_LIST_HEAD(&common->head); + spin_lock_init(&common->lock); + common->dev = dev; + + irq = platform_get_resource(pdev, IORESOURCE_IRQ, 0); + if (irq) { + int ret; + + /* + * platform has IRQ support. + * Then, drier use common register + */ + res = platform_get_resource(pdev, IORESOURCE_MEM, mres++); + if (!res) { + dev_err(dev, "Could not get platform resource\n"); + return -ENODEV; + } + + ret = devm_request_irq(dev, irq->start, rcar_thermal_irq, 0, + dev_name(dev), common); + if (ret) { + dev_err(dev, "irq request failed\n "); + return ret; + } + + /* + * rcar_has_irq_support() will be enabled + */ + common->base = devm_request_and_ioremap(dev, res); + if (!common->base) { + dev_err(dev, "Unable to ioremap thermal register\n"); + return -ENOMEM; + } + + /* enable temperature comparation */ + rcar_thermal_common_write(common, ENR, 0x00030303); + + idle = 0; /* polling delaye is not needed */ } - platform_set_drvdata(pdev, zone); + for (i = 0;; i++) { + res = platform_get_resource(pdev, IORESOURCE_MEM, mres++); + if (!res) + break; + + priv = devm_kzalloc(dev, sizeof(*priv), GFP_KERNEL); + if (!priv) { + dev_err(dev, "Could not allocate priv\n"); + return -ENOMEM; + } + + priv->base = devm_request_and_ioremap(dev, res); + if (!priv->base) { + dev_err(dev, "Unable to ioremap priv register\n"); + return -ENOMEM; + } - dev_info(&pdev->dev, "proved\n"); + priv->common = common; + priv->id = i; + mutex_init(&priv->lock); + INIT_LIST_HEAD(&priv->list); + INIT_DELAYED_WORK(&priv->work, rcar_thermal_work); + rcar_thermal_update_temp(priv); + + priv->zone = thermal_zone_device_register("rcar_thermal", + 1, 0, priv, + &rcar_thermal_zone_ops, NULL, 0, + idle); + if (IS_ERR(priv->zone)) { + dev_err(dev, "can't register thermal zone\n"); + goto error_unregister; + } + + list_move_tail(&priv->list, &common->head); + + if (rcar_has_irq_support(priv)) + rcar_thermal_irq_enable(priv); + } + + platform_set_drvdata(pdev, common); + + dev_info(dev, "%d sensor proved\n", i); return 0; + +error_unregister: + rcar_thermal_for_each_priv(priv, common) + thermal_zone_device_unregister(priv->zone); + + return -ENODEV; } static int rcar_thermal_remove(struct platform_device *pdev) { - struct thermal_zone_device *zone = platform_get_drvdata(pdev); + struct rcar_thermal_common *common = platform_get_drvdata(pdev); + struct rcar_thermal_priv *priv; + + rcar_thermal_for_each_priv(priv, common) + thermal_zone_device_unregister(priv->zone); - thermal_zone_device_unregister(zone); platform_set_drvdata(pdev, NULL); return 0; } +static const struct of_device_id rcar_thermal_dt_ids[] = { + { .compatible = "renesas,rcar-thermal", }, + {}, +}; +MODULE_DEVICE_TABLE(of, rcar_thermal_dt_ids); + static struct platform_driver rcar_thermal_driver = { .driver = { .name = "rcar_thermal", + .of_match_table = rcar_thermal_dt_ids, }, .probe = rcar_thermal_probe, .remove = rcar_thermal_remove, diff --git a/drivers/thermal/spear_thermal.c b/drivers/thermal/spear_thermal.c index 6b2d8b2..3c5ee56 100644 --- a/drivers/thermal/spear_thermal.c +++ b/drivers/thermal/spear_thermal.c @@ -131,7 +131,7 @@ static int spear_thermal_probe(struct platform_device *pdev) return -ENOMEM; } - stdev->clk = clk_get(&pdev->dev, NULL); + stdev->clk = devm_clk_get(&pdev->dev, NULL); if (IS_ERR(stdev->clk)) { dev_err(&pdev->dev, "Can't get clock\n"); return PTR_ERR(stdev->clk); @@ -140,7 +140,7 @@ static int spear_thermal_probe(struct platform_device *pdev) ret = clk_enable(stdev->clk); if (ret) { dev_err(&pdev->dev, "Can't enable clock\n"); - goto put_clk; + return ret; } stdev->flags = val; @@ -163,8 +163,6 @@ static int spear_thermal_probe(struct platform_device *pdev) disable_clk: clk_disable(stdev->clk); -put_clk: - clk_put(stdev->clk); return ret; } @@ -183,7 +181,6 @@ static int spear_thermal_exit(struct platform_device *pdev) writel_relaxed(actual_mask & ~stdev->flags, stdev->thermal_base); clk_disable(stdev->clk); - clk_put(stdev->clk); return 0; } diff --git a/drivers/thermal/step_wise.c b/drivers/thermal/step_wise.c index 0cd5e9f..407cde3 100644 --- a/drivers/thermal/step_wise.c +++ b/drivers/thermal/step_wise.c @@ -35,21 +35,54 @@ * state for this trip point * b. if the trend is THERMAL_TREND_DROPPING, use lower cooling * state for this trip point + * c. if the trend is THERMAL_TREND_RAISE_FULL, use upper limit + * for this trip point + * d. if the trend is THERMAL_TREND_DROP_FULL, use lower limit + * for this trip point + * If the temperature is lower than a trip point, + * a. if the trend is THERMAL_TREND_RAISING, do nothing + * b. if the trend is THERMAL_TREND_DROPPING, use lower cooling + * state for this trip point, if the cooling state already + * equals lower limit, deactivate the thermal instance + * c. if the trend is THERMAL_TREND_RAISE_FULL, do nothing + * d. if the trend is THERMAL_TREND_DROP_FULL, use lower limit, + * if the cooling state already equals lower limit, + * deactive the thermal instance */ static unsigned long get_target_state(struct thermal_instance *instance, - enum thermal_trend trend) + enum thermal_trend trend, bool throttle) { struct thermal_cooling_device *cdev = instance->cdev; unsigned long cur_state; cdev->ops->get_cur_state(cdev, &cur_state); - if (trend == THERMAL_TREND_RAISING) { - cur_state = cur_state < instance->upper ? - (cur_state + 1) : instance->upper; - } else if (trend == THERMAL_TREND_DROPPING) { - cur_state = cur_state > instance->lower ? - (cur_state - 1) : instance->lower; + switch (trend) { + case THERMAL_TREND_RAISING: + if (throttle) + cur_state = cur_state < instance->upper ? + (cur_state + 1) : instance->upper; + break; + case THERMAL_TREND_RAISE_FULL: + if (throttle) + cur_state = instance->upper; + break; + case THERMAL_TREND_DROPPING: + if (cur_state == instance->lower) { + if (!throttle) + cur_state = -1; + } else + cur_state -= 1; + break; + case THERMAL_TREND_DROP_FULL: + if (cur_state == instance->lower) { + if (!throttle) + cur_state = -1; + } else + cur_state = instance->lower; + break; + default: + break; } return cur_state; @@ -66,57 +99,14 @@ static void update_passive_instance(struct thermal_zone_device *tz, tz->passive += value; } -static void update_instance_for_throttle(struct thermal_zone_device *tz, - int trip, enum thermal_trip_type trip_type, - enum thermal_trend trend) -{ - struct thermal_instance *instance; - - list_for_each_entry(instance, &tz->thermal_instances, tz_node) { - if (instance->trip != trip) - continue; - - instance->target = get_target_state(instance, trend); - - /* Activate a passive thermal instance */ - if (instance->target == THERMAL_NO_TARGET) - update_passive_instance(tz, trip_type, 1); - - instance->cdev->updated = false; /* cdev needs update */ - } -} - -static void update_instance_for_dethrottle(struct thermal_zone_device *tz, - int trip, enum thermal_trip_type trip_type) -{ - struct thermal_instance *instance; - struct thermal_cooling_device *cdev; - unsigned long cur_state; - - list_for_each_entry(instance, &tz->thermal_instances, tz_node) { - if (instance->trip != trip || - instance->target == THERMAL_NO_TARGET) - continue; - - cdev = instance->cdev; - cdev->ops->get_cur_state(cdev, &cur_state); - - instance->target = cur_state > instance->lower ? - (cur_state - 1) : THERMAL_NO_TARGET; - - /* Deactivate a passive thermal instance */ - if (instance->target == THERMAL_NO_TARGET) - update_passive_instance(tz, trip_type, -1); - - cdev->updated = false; /* cdev needs update */ - } -} - static void thermal_zone_trip_update(struct thermal_zone_device *tz, int trip) { long trip_temp; enum thermal_trip_type trip_type; enum thermal_trend trend; + struct thermal_instance *instance; + bool throttle = false; + int old_target; if (trip == THERMAL_TRIPS_NONE) { trip_temp = tz->forced_passive; @@ -128,12 +118,30 @@ static void thermal_zone_trip_update(struct thermal_zone_device *tz, int trip) trend = get_tz_trend(tz, trip); + if (tz->temperature >= trip_temp) + throttle = true; + mutex_lock(&tz->lock); - if (tz->temperature >= trip_temp) - update_instance_for_throttle(tz, trip, trip_type, trend); - else - update_instance_for_dethrottle(tz, trip, trip_type); + list_for_each_entry(instance, &tz->thermal_instances, tz_node) { + if (instance->trip != trip) + continue; + + old_target = instance->target; + instance->target = get_target_state(instance, trend, throttle); + + /* Activate a passive thermal instance */ + if (old_target == THERMAL_NO_TARGET && + instance->target != THERMAL_NO_TARGET) + update_passive_instance(tz, trip_type, 1); + /* Deactivate a passive thermal instance */ + else if (old_target != THERMAL_NO_TARGET && + instance->target == THERMAL_NO_TARGET) + update_passive_instance(tz, trip_type, -1); + + + instance->cdev->updated = false; /* cdev needs update */ + } mutex_unlock(&tz->lock); } diff --git a/drivers/thermal/thermal_sys.c b/drivers/thermal/thermal_sys.c index 8c8ce80..5b7863a 100644 --- a/drivers/thermal/thermal_sys.c +++ b/drivers/thermal/thermal_sys.c @@ -32,7 +32,6 @@ #include <linux/kdev_t.h> #include <linux/idr.h> #include <linux/thermal.h> -#include <linux/spinlock.h> #include <linux/reboot.h> #include <net/netlink.h> #include <net/genetlink.h> @@ -132,23 +131,16 @@ EXPORT_SYMBOL_GPL(thermal_unregister_governor); static int get_idr(struct idr *idr, struct mutex *lock, int *id) { - int err; - -again: - if (unlikely(idr_pre_get(idr, GFP_KERNEL) == 0)) - return -ENOMEM; + int ret; if (lock) mutex_lock(lock); - err = idr_get_new(idr, NULL, id); + ret = idr_alloc(idr, NULL, 0, 0, GFP_KERNEL); if (lock) mutex_unlock(lock); - if (unlikely(err == -EAGAIN)) - goto again; - else if (unlikely(err)) - return err; - - *id = *id & MAX_IDR_MASK; + if (unlikely(ret < 0)) + return ret; + *id = ret; return 0; } @@ -355,8 +347,9 @@ static void handle_critical_trips(struct thermal_zone_device *tz, tz->ops->notify(tz, trip, trip_type); if (trip_type == THERMAL_TRIP_CRITICAL) { - pr_emerg("Critical temperature reached(%d C),shutting down\n", - tz->temperature / 1000); + dev_emerg(&tz->device, + "critical temperature reached(%d C),shutting down\n", + tz->temperature / 1000); orderly_poweroff(true); } } @@ -378,23 +371,57 @@ static void handle_thermal_trip(struct thermal_zone_device *tz, int trip) monitor_thermal_zone(tz); } +static int thermal_zone_get_temp(struct thermal_zone_device *tz, + unsigned long *temp) +{ + int ret = 0; +#ifdef CONFIG_THERMAL_EMULATION + int count; + unsigned long crit_temp = -1UL; + enum thermal_trip_type type; +#endif + + mutex_lock(&tz->lock); + + ret = tz->ops->get_temp(tz, temp); +#ifdef CONFIG_THERMAL_EMULATION + if (!tz->emul_temperature) + goto skip_emul; + + for (count = 0; count < tz->trips; count++) { + ret = tz->ops->get_trip_type(tz, count, &type); + if (!ret && type == THERMAL_TRIP_CRITICAL) { + ret = tz->ops->get_trip_temp(tz, count, &crit_temp); + break; + } + } + + if (ret) + goto skip_emul; + + if (*temp < crit_temp) + *temp = tz->emul_temperature; +skip_emul: +#endif + mutex_unlock(&tz->lock); + return ret; +} + static void update_temperature(struct thermal_zone_device *tz) { long temp; int ret; - mutex_lock(&tz->lock); - - ret = tz->ops->get_temp(tz, &temp); + ret = thermal_zone_get_temp(tz, &temp); if (ret) { - pr_warn("failed to read out thermal zone %d\n", tz->id); - goto exit; + dev_warn(&tz->device, "failed to read out thermal zone %d\n", + tz->id); + return; } + mutex_lock(&tz->lock); tz->last_temperature = tz->temperature; tz->temperature = temp; - -exit: mutex_unlock(&tz->lock); } @@ -437,10 +464,7 @@ temp_show(struct device *dev, struct device_attribute *attr, char *buf) long temperature; int ret; - if (!tz->ops->get_temp) - return -EPERM; - - ret = tz->ops->get_temp(tz, &temperature); + ret = thermal_zone_get_temp(tz, &temperature); if (ret) return ret; @@ -700,6 +724,31 @@ policy_show(struct device *dev, struct device_attribute *devattr, char *buf) return sprintf(buf, "%s\n", tz->governor->name); } +#ifdef CONFIG_THERMAL_EMULATION +static ssize_t +emul_temp_store(struct device *dev, struct device_attribute *attr, + const char *buf, size_t count) +{ + struct thermal_zone_device *tz = to_thermal_zone(dev); + int ret = 0; + unsigned long temperature; + + if (kstrtoul(buf, 10, &temperature)) + return -EINVAL; + + if (!tz->ops->set_emul_temp) { + mutex_lock(&tz->lock); + tz->emul_temperature = temperature; + mutex_unlock(&tz->lock); + } else { + ret = tz->ops->set_emul_temp(tz, temperature); + } + + return ret ? ret : count; +} +static DEVICE_ATTR(emul_temp, S_IWUSR, NULL, emul_temp_store); +#endif/*CONFIG_THERMAL_EMULATION*/ + static DEVICE_ATTR(type, 0444, type_show, NULL); static DEVICE_ATTR(temp, 0444, temp_show, NULL); static DEVICE_ATTR(mode, 0644, mode_show, mode_store); @@ -842,7 +891,7 @@ temp_input_show(struct device *dev, struct device_attribute *attr, char *buf) temp_input); struct thermal_zone_device *tz = temp->tz; - ret = tz->ops->get_temp(tz, &temperature); + ret = thermal_zone_get_temp(tz, &temperature); if (ret) return ret; @@ -1529,6 +1578,9 @@ struct thermal_zone_device *thermal_zone_device_register(const char *type, if (!ops || !ops->get_temp) return ERR_PTR(-EINVAL); + if (trips > 0 && !ops->get_trip_type) + return ERR_PTR(-EINVAL); + tz = kzalloc(sizeof(struct thermal_zone_device), GFP_KERNEL); if (!tz) return ERR_PTR(-ENOMEM); @@ -1592,6 +1644,11 @@ struct thermal_zone_device *thermal_zone_device_register(const char *type, goto unregister; } +#ifdef CONFIG_THERMAL_EMULATION + result = device_create_file(&tz->device, &dev_attr_emul_temp); + if (result) + goto unregister; +#endif /* Create policy attribute */ result = device_create_file(&tz->device, &dev_attr_policy); if (result) @@ -1711,7 +1768,8 @@ static struct genl_multicast_group thermal_event_mcgrp = { .name = THERMAL_GENL_MCAST_GROUP_NAME, }; -int thermal_generate_netlink_event(u32 orig, enum events event) +int thermal_generate_netlink_event(struct thermal_zone_device *tz, + enum events event) { struct sk_buff *skb; struct nlattr *attr; @@ -1721,6 +1779,9 @@ int thermal_generate_netlink_event(u32 orig, enum events event) int result; static unsigned int thermal_event_seqnum; + if (!tz) + return -EINVAL; + /* allocate memory */ size = nla_total_size(sizeof(struct thermal_genl_event)) + nla_total_size(0); @@ -1755,7 +1816,7 @@ int thermal_generate_netlink_event(u32 orig, enum events event) memset(thermal_event, 0, sizeof(struct thermal_genl_event)); - thermal_event->orig = orig; + thermal_event->orig = tz->id; thermal_event->event = event; /* send multicast genetlink message */ @@ -1767,7 +1828,7 @@ int thermal_generate_netlink_event(u32 orig, enum events event) result = genlmsg_multicast(skb, 0, thermal_event_mcgrp.id, GFP_ATOMIC); if (result) - pr_info("failed to send netlink event:%d\n", result); + dev_err(&tz->device, "Failed to send netlink event:%d", result); return result; } @@ -1807,6 +1868,7 @@ static int __init thermal_init(void) idr_destroy(&thermal_cdev_idr); mutex_destroy(&thermal_idr_lock); mutex_destroy(&thermal_list_lock); + return result; } result = genetlink_init(); return result; diff --git a/drivers/uio/uio.c b/drivers/uio/uio.c index 5110f36..c8b9262 100644 --- a/drivers/uio/uio.c +++ b/drivers/uio/uio.c @@ -369,26 +369,15 @@ static void uio_dev_del_attributes(struct uio_device *idev) static int uio_get_minor(struct uio_device *idev) { int retval = -ENOMEM; - int id; mutex_lock(&minor_lock); - if (idr_pre_get(&uio_idr, GFP_KERNEL) == 0) - goto exit; - - retval = idr_get_new(&uio_idr, idev, &id); - if (retval < 0) { - if (retval == -EAGAIN) - retval = -ENOMEM; - goto exit; - } - if (id < UIO_MAX_DEVICES) { - idev->minor = id; - } else { + retval = idr_alloc(&uio_idr, idev, 0, UIO_MAX_DEVICES, GFP_KERNEL); + if (retval >= 0) { + idev->minor = retval; + } else if (retval == -ENOSPC) { dev_err(idev->dev, "too many uio devices\n"); retval = -EINVAL; - idr_remove(&uio_idr, id); } -exit: mutex_unlock(&minor_lock); return retval; } diff --git a/drivers/vfio/vfio.c b/drivers/vfio/vfio.c index 28e2d5b..fcc12f3 100644 --- a/drivers/vfio/vfio.c +++ b/drivers/vfio/vfio.c @@ -139,23 +139,8 @@ EXPORT_SYMBOL_GPL(vfio_unregister_iommu_driver); */ static int vfio_alloc_group_minor(struct vfio_group *group) { - int ret, minor; - -again: - if (unlikely(idr_pre_get(&vfio.group_idr, GFP_KERNEL) == 0)) - return -ENOMEM; - /* index 0 is used by /dev/vfio/vfio */ - ret = idr_get_new_above(&vfio.group_idr, group, 1, &minor); - if (ret == -EAGAIN) - goto again; - if (ret || minor > MINORMASK) { - if (minor > MINORMASK) - idr_remove(&vfio.group_idr, minor); - return -ENOSPC; - } - - return minor; + return idr_alloc(&vfio.group_idr, group, 1, MINORMASK + 1, GFP_KERNEL); } static void vfio_free_group_minor(int minor) diff --git a/drivers/video/backlight/Kconfig b/drivers/video/backlight/Kconfig index be27b55..db10d01 100644 --- a/drivers/video/backlight/Kconfig +++ b/drivers/video/backlight/Kconfig @@ -384,6 +384,12 @@ config BACKLIGHT_LP855X This supports TI LP8550, LP8551, LP8552, LP8553, LP8556 and LP8557 backlight driver. +config BACKLIGHT_LP8788 + tristate "Backlight driver for TI LP8788 MFD" + depends on BACKLIGHT_CLASS_DEVICE && MFD_LP8788 + help + This supports TI LP8788 backlight driver. + config BACKLIGHT_OT200 tristate "Backlight driver for ot200 visualisation device" depends on BACKLIGHT_CLASS_DEVICE && CS5535_MFGPT && GPIO_CS5535 diff --git a/drivers/video/backlight/Makefile b/drivers/video/backlight/Makefile index 4606c21..96c4d62 100644 --- a/drivers/video/backlight/Makefile +++ b/drivers/video/backlight/Makefile @@ -38,6 +38,7 @@ obj-$(CONFIG_BACKLIGHT_LM3630) += lm3630_bl.o obj-$(CONFIG_BACKLIGHT_LM3639) += lm3639_bl.o obj-$(CONFIG_BACKLIGHT_LOCOMO) += locomolcd.o obj-$(CONFIG_BACKLIGHT_LP855X) += lp855x_bl.o +obj-$(CONFIG_BACKLIGHT_LP8788) += lp8788_bl.o obj-$(CONFIG_BACKLIGHT_MAX8925) += max8925_bl.o obj-$(CONFIG_BACKLIGHT_OMAP1) += omap1_bl.o obj-$(CONFIG_BACKLIGHT_OT200) += ot200_bl.o diff --git a/drivers/video/backlight/ams369fg06.c b/drivers/video/backlight/ams369fg06.c index d29e494..c02aa2c 100644 --- a/drivers/video/backlight/ams369fg06.c +++ b/drivers/video/backlight/ams369fg06.c @@ -317,10 +317,7 @@ static int ams369fg06_power_on(struct ams369fg06 *lcd) pd = lcd->lcd_pd; bd = lcd->bd; - if (!pd->power_on) { - dev_err(lcd->dev, "power_on is NULL.\n"); - return -EINVAL; - } else { + if (pd->power_on) { pd->power_on(lcd->ld, 1); msleep(pd->power_on_delay); } @@ -370,7 +367,8 @@ static int ams369fg06_power_off(struct ams369fg06 *lcd) msleep(pd->power_off_delay); - pd->power_on(lcd->ld, 0); + if (pd->power_on) + pd->power_on(lcd->ld, 0); return 0; } diff --git a/drivers/video/backlight/lp8788_bl.c b/drivers/video/backlight/lp8788_bl.c new file mode 100644 index 0000000..4bb8b4f --- /dev/null +++ b/drivers/video/backlight/lp8788_bl.c @@ -0,0 +1,333 @@ +/* + * TI LP8788 MFD - backlight driver + * + * Copyright 2012 Texas Instruments + * + * Author: Milo(Woogyom) Kim <milo.kim@ti.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + */ + +#include <linux/backlight.h> +#include <linux/err.h> +#include <linux/mfd/lp8788.h> +#include <linux/module.h> +#include <linux/platform_device.h> +#include <linux/pwm.h> +#include <linux/slab.h> + +/* Register address */ +#define LP8788_BL_CONFIG 0x96 +#define LP8788_BL_EN BIT(0) +#define LP8788_BL_PWM_INPUT_EN BIT(5) +#define LP8788_BL_FULLSCALE_SHIFT 2 +#define LP8788_BL_DIM_MODE_SHIFT 1 +#define LP8788_BL_PWM_POLARITY_SHIFT 6 + +#define LP8788_BL_BRIGHTNESS 0x97 + +#define LP8788_BL_RAMP 0x98 +#define LP8788_BL_RAMP_RISE_SHIFT 4 + +#define MAX_BRIGHTNESS 127 +#define DEFAULT_BL_NAME "lcd-backlight" + +struct lp8788_bl_config { + enum lp8788_bl_ctrl_mode bl_mode; + enum lp8788_bl_dim_mode dim_mode; + enum lp8788_bl_full_scale_current full_scale; + enum lp8788_bl_ramp_step rise_time; + enum lp8788_bl_ramp_step fall_time; + enum pwm_polarity pwm_pol; +}; + +struct lp8788_bl { + struct lp8788 *lp; + struct backlight_device *bl_dev; + struct lp8788_backlight_platform_data *pdata; + enum lp8788_bl_ctrl_mode mode; + struct pwm_device *pwm; +}; + +struct lp8788_bl_config default_bl_config = { + .bl_mode = LP8788_BL_REGISTER_ONLY, + .dim_mode = LP8788_DIM_EXPONENTIAL, + .full_scale = LP8788_FULLSCALE_1900uA, + .rise_time = LP8788_RAMP_8192us, + .fall_time = LP8788_RAMP_8192us, + .pwm_pol = PWM_POLARITY_NORMAL, +}; + +static inline bool is_brightness_ctrl_by_pwm(enum lp8788_bl_ctrl_mode mode) +{ + return (mode == LP8788_BL_COMB_PWM_BASED); +} + +static inline bool is_brightness_ctrl_by_register(enum lp8788_bl_ctrl_mode mode) +{ + return (mode == LP8788_BL_REGISTER_ONLY || + mode == LP8788_BL_COMB_REGISTER_BASED); +} + +static int lp8788_backlight_configure(struct lp8788_bl *bl) +{ + struct lp8788_backlight_platform_data *pdata = bl->pdata; + struct lp8788_bl_config *cfg = &default_bl_config; + int ret; + u8 val; + + /* + * Update chip configuration if platform data exists, + * otherwise use the default settings. + */ + if (pdata) { + cfg->bl_mode = pdata->bl_mode; + cfg->dim_mode = pdata->dim_mode; + cfg->full_scale = pdata->full_scale; + cfg->rise_time = pdata->rise_time; + cfg->fall_time = pdata->fall_time; + cfg->pwm_pol = pdata->pwm_pol; + } + + /* Brightness ramp up/down */ + val = (cfg->rise_time << LP8788_BL_RAMP_RISE_SHIFT) | cfg->fall_time; + ret = lp8788_write_byte(bl->lp, LP8788_BL_RAMP, val); + if (ret) + return ret; + + /* Fullscale current setting */ + val = (cfg->full_scale << LP8788_BL_FULLSCALE_SHIFT) | + (cfg->dim_mode << LP8788_BL_DIM_MODE_SHIFT); + + /* Brightness control mode */ + switch (cfg->bl_mode) { + case LP8788_BL_REGISTER_ONLY: + val |= LP8788_BL_EN; + break; + case LP8788_BL_COMB_PWM_BASED: + case LP8788_BL_COMB_REGISTER_BASED: + val |= LP8788_BL_EN | LP8788_BL_PWM_INPUT_EN | + (cfg->pwm_pol << LP8788_BL_PWM_POLARITY_SHIFT); + break; + default: + dev_err(bl->lp->dev, "invalid mode: %d\n", cfg->bl_mode); + return -EINVAL; + } + + bl->mode = cfg->bl_mode; + + return lp8788_write_byte(bl->lp, LP8788_BL_CONFIG, val); +} + +static void lp8788_pwm_ctrl(struct lp8788_bl *bl, int br, int max_br) +{ + unsigned int period; + unsigned int duty; + struct device *dev; + struct pwm_device *pwm; + + if (!bl->pdata) + return; + + period = bl->pdata->period_ns; + duty = br * period / max_br; + dev = bl->lp->dev; + + /* request PWM device with the consumer name */ + if (!bl->pwm) { + pwm = devm_pwm_get(dev, LP8788_DEV_BACKLIGHT); + if (IS_ERR(pwm)) { + dev_err(dev, "can not get PWM device\n"); + return; + } + + bl->pwm = pwm; + } + + pwm_config(bl->pwm, duty, period); + if (duty) + pwm_enable(bl->pwm); + else + pwm_disable(bl->pwm); +} + +static int lp8788_bl_update_status(struct backlight_device *bl_dev) +{ + struct lp8788_bl *bl = bl_get_data(bl_dev); + enum lp8788_bl_ctrl_mode mode = bl->mode; + + if (bl_dev->props.state & BL_CORE_SUSPENDED) + bl_dev->props.brightness = 0; + + if (is_brightness_ctrl_by_pwm(mode)) { + int brt = bl_dev->props.brightness; + int max = bl_dev->props.max_brightness; + + lp8788_pwm_ctrl(bl, brt, max); + } else if (is_brightness_ctrl_by_register(mode)) { + u8 brt = bl_dev->props.brightness; + + lp8788_write_byte(bl->lp, LP8788_BL_BRIGHTNESS, brt); + } + + return 0; +} + +static int lp8788_bl_get_brightness(struct backlight_device *bl_dev) +{ + return bl_dev->props.brightness; +} + +static const struct backlight_ops lp8788_bl_ops = { + .options = BL_CORE_SUSPENDRESUME, + .update_status = lp8788_bl_update_status, + .get_brightness = lp8788_bl_get_brightness, +}; + +static int lp8788_backlight_register(struct lp8788_bl *bl) +{ + struct backlight_device *bl_dev; + struct backlight_properties props; + struct lp8788_backlight_platform_data *pdata = bl->pdata; + int init_brt; + char *name; + + props.type = BACKLIGHT_PLATFORM; + props.max_brightness = MAX_BRIGHTNESS; + + /* Initial brightness */ + if (pdata) + init_brt = min_t(int, pdata->initial_brightness, + props.max_brightness); + else + init_brt = 0; + + props.brightness = init_brt; + + /* Backlight device name */ + if (!pdata || !pdata->name) + name = DEFAULT_BL_NAME; + else + name = pdata->name; + + bl_dev = backlight_device_register(name, bl->lp->dev, bl, + &lp8788_bl_ops, &props); + if (IS_ERR(bl_dev)) + return PTR_ERR(bl_dev); + + bl->bl_dev = bl_dev; + + return 0; +} + +static void lp8788_backlight_unregister(struct lp8788_bl *bl) +{ + struct backlight_device *bl_dev = bl->bl_dev; + + if (bl_dev) + backlight_device_unregister(bl_dev); +} + +static ssize_t lp8788_get_bl_ctl_mode(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct lp8788_bl *bl = dev_get_drvdata(dev); + enum lp8788_bl_ctrl_mode mode = bl->mode; + char *strmode; + + if (is_brightness_ctrl_by_pwm(mode)) + strmode = "PWM based"; + else if (is_brightness_ctrl_by_register(mode)) + strmode = "Register based"; + else + strmode = "Invalid mode"; + + return scnprintf(buf, PAGE_SIZE, "%s\n", strmode); +} + +static DEVICE_ATTR(bl_ctl_mode, S_IRUGO, lp8788_get_bl_ctl_mode, NULL); + +static struct attribute *lp8788_attributes[] = { + &dev_attr_bl_ctl_mode.attr, + NULL, +}; + +static const struct attribute_group lp8788_attr_group = { + .attrs = lp8788_attributes, +}; + +static int lp8788_backlight_probe(struct platform_device *pdev) +{ + struct lp8788 *lp = dev_get_drvdata(pdev->dev.parent); + struct lp8788_bl *bl; + int ret; + + bl = devm_kzalloc(lp->dev, sizeof(struct lp8788_bl), GFP_KERNEL); + if (!bl) + return -ENOMEM; + + bl->lp = lp; + if (lp->pdata) + bl->pdata = lp->pdata->bl_pdata; + + platform_set_drvdata(pdev, bl); + + ret = lp8788_backlight_configure(bl); + if (ret) { + dev_err(lp->dev, "backlight config err: %d\n", ret); + goto err_dev; + } + + ret = lp8788_backlight_register(bl); + if (ret) { + dev_err(lp->dev, "register backlight err: %d\n", ret); + goto err_dev; + } + + ret = sysfs_create_group(&pdev->dev.kobj, &lp8788_attr_group); + if (ret) { + dev_err(lp->dev, "register sysfs err: %d\n", ret); + goto err_sysfs; + } + + backlight_update_status(bl->bl_dev); + + return 0; + +err_sysfs: + lp8788_backlight_unregister(bl); +err_dev: + return ret; +} + +static int lp8788_backlight_remove(struct platform_device *pdev) +{ + struct lp8788_bl *bl = platform_get_drvdata(pdev); + struct backlight_device *bl_dev = bl->bl_dev; + + bl_dev->props.brightness = 0; + backlight_update_status(bl_dev); + sysfs_remove_group(&pdev->dev.kobj, &lp8788_attr_group); + lp8788_backlight_unregister(bl); + platform_set_drvdata(pdev, NULL); + + return 0; +} + +static struct platform_driver lp8788_bl_driver = { + .probe = lp8788_backlight_probe, + .remove = lp8788_backlight_remove, + .driver = { + .name = LP8788_DEV_BACKLIGHT, + .owner = THIS_MODULE, + }, +}; +module_platform_driver(lp8788_bl_driver); + +MODULE_DESCRIPTION("Texas Instruments LP8788 Backlight Driver"); +MODULE_AUTHOR("Milo Kim"); +MODULE_LICENSE("GPL"); +MODULE_ALIAS("platform:lp8788-backlight"); diff --git a/drivers/w1/masters/mxc_w1.c b/drivers/w1/masters/mxc_w1.c index 372c8c0..950d354 100644 --- a/drivers/w1/masters/mxc_w1.c +++ b/drivers/w1/masters/mxc_w1.c @@ -157,9 +157,16 @@ static int mxc_w1_remove(struct platform_device *pdev) return 0; } +static struct of_device_id mxc_w1_dt_ids[] = { + { .compatible = "fsl,imx21-owire" }, + { /* sentinel */ } +}; +MODULE_DEVICE_TABLE(of, mxc_w1_dt_ids); + static struct platform_driver mxc_w1_driver = { .driver = { - .name = "mxc_w1", + .name = "mxc_w1", + .of_match_table = mxc_w1_dt_ids, }, .probe = mxc_w1_probe, .remove = mxc_w1_remove, diff --git a/drivers/w1/slaves/Kconfig b/drivers/w1/slaves/Kconfig index 6752669..762561f 100644 --- a/drivers/w1/slaves/Kconfig +++ b/drivers/w1/slaves/Kconfig @@ -17,11 +17,16 @@ config W1_SLAVE_SMEM simple 64bit memory rom(ds2401/ds2411/ds1990*) to your wire. config W1_SLAVE_DS2408 - tristate "8-Channel Addressable Switch (IO Expander) 0x29 family support (DS2408)" - help - Say Y here if you want to use a 1-wire + tristate "8-Channel Addressable Switch (IO Expander) 0x29 family support (DS2408)" + help + Say Y here if you want to use a 1-wire + DS2408 8-Channel Addressable Switch device support - DS2408 8-Channel Addressable Switch device support +config W1_SLAVE_DS2413 + tristate "Dual Channel Addressable Switch 0x3a family support (DS2413)" + help + Say Y here if you want to use a 1-wire + DS2413 Dual Channel Addressable Switch device support config W1_SLAVE_DS2423 tristate "Counter 1-wire device (DS2423)" diff --git a/drivers/w1/slaves/Makefile b/drivers/w1/slaves/Makefile index 05188f6..06529f3 100644 --- a/drivers/w1/slaves/Makefile +++ b/drivers/w1/slaves/Makefile @@ -4,7 +4,8 @@ obj-$(CONFIG_W1_SLAVE_THERM) += w1_therm.o obj-$(CONFIG_W1_SLAVE_SMEM) += w1_smem.o -obj-$(CONFIG_W1_SLAVE_DS2408) += w1_ds2408.o +obj-$(CONFIG_W1_SLAVE_DS2408) += w1_ds2408.o +obj-$(CONFIG_W1_SLAVE_DS2413) += w1_ds2413.o obj-$(CONFIG_W1_SLAVE_DS2423) += w1_ds2423.o obj-$(CONFIG_W1_SLAVE_DS2431) += w1_ds2431.o obj-$(CONFIG_W1_SLAVE_DS2433) += w1_ds2433.o diff --git a/drivers/w1/slaves/w1_ds2413.c b/drivers/w1/slaves/w1_ds2413.c new file mode 100644 index 0000000..8297862 --- /dev/null +++ b/drivers/w1/slaves/w1_ds2413.c @@ -0,0 +1,177 @@ +/* + * w1_ds2413.c - w1 family 3a (DS2413) driver + * based on w1_ds2408.c by Jean-Francois Dagenais <dagenaisj@sonatest.com> + * + * Copyright (c) 2013 Mariusz Bialonczyk <manio@skyboo.net> + * + * This source code is licensed under the GNU General Public License, + * Version 2. See the file COPYING for more details. + */ + +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/moduleparam.h> +#include <linux/device.h> +#include <linux/types.h> +#include <linux/delay.h> +#include <linux/slab.h> + +#include "../w1.h" +#include "../w1_int.h" +#include "../w1_family.h" + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Mariusz Bialonczyk <manio@skyboo.net>"); +MODULE_DESCRIPTION("w1 family 3a driver for DS2413 2 Pin IO"); + +#define W1_F3A_RETRIES 3 +#define W1_F3A_FUNC_PIO_ACCESS_READ 0xF5 +#define W1_F3A_FUNC_PIO_ACCESS_WRITE 0x5A +#define W1_F3A_SUCCESS_CONFIRM_BYTE 0xAA + +static ssize_t w1_f3a_read_state( + struct file *filp, struct kobject *kobj, + struct bin_attribute *bin_attr, + char *buf, loff_t off, size_t count) +{ + struct w1_slave *sl = kobj_to_w1_slave(kobj); + dev_dbg(&sl->dev, + "Reading %s kobj: %p, off: %0#10x, count: %zu, buff addr: %p", + bin_attr->attr.name, kobj, (unsigned int)off, count, buf); + + if (off != 0) + return 0; + if (!buf) + return -EINVAL; + + mutex_lock(&sl->master->bus_mutex); + dev_dbg(&sl->dev, "mutex locked"); + + if (w1_reset_select_slave(sl)) { + mutex_unlock(&sl->master->bus_mutex); + return -EIO; + } + + w1_write_8(sl->master, W1_F3A_FUNC_PIO_ACCESS_READ); + *buf = w1_read_8(sl->master); + + mutex_unlock(&sl->master->bus_mutex); + dev_dbg(&sl->dev, "mutex unlocked"); + + /* check for correct complement */ + if ((*buf & 0x0F) != ((~*buf >> 4) & 0x0F)) + return -EIO; + else + return 1; +} + +static ssize_t w1_f3a_write_output( + struct file *filp, struct kobject *kobj, + struct bin_attribute *bin_attr, + char *buf, loff_t off, size_t count) +{ + struct w1_slave *sl = kobj_to_w1_slave(kobj); + u8 w1_buf[3]; + unsigned int retries = W1_F3A_RETRIES; + + if (count != 1 || off != 0) + return -EFAULT; + + dev_dbg(&sl->dev, "locking mutex for write_output"); + mutex_lock(&sl->master->bus_mutex); + dev_dbg(&sl->dev, "mutex locked"); + + if (w1_reset_select_slave(sl)) + goto error; + + /* according to the DS2413 datasheet the most significant 6 bits + should be set to "1"s, so do it now */ + *buf = *buf | 0xFC; + + while (retries--) { + w1_buf[0] = W1_F3A_FUNC_PIO_ACCESS_WRITE; + w1_buf[1] = *buf; + w1_buf[2] = ~(*buf); + w1_write_block(sl->master, w1_buf, 3); + + if (w1_read_8(sl->master) == W1_F3A_SUCCESS_CONFIRM_BYTE) { + mutex_unlock(&sl->master->bus_mutex); + dev_dbg(&sl->dev, "mutex unlocked, retries:%d", retries); + return 1; + } + if (w1_reset_resume_command(sl->master)) + goto error; + } + +error: + mutex_unlock(&sl->master->bus_mutex); + dev_dbg(&sl->dev, "mutex unlocked in error, retries:%d", retries); + return -EIO; +} + +#define NB_SYSFS_BIN_FILES 2 +static struct bin_attribute w1_f3a_sysfs_bin_files[NB_SYSFS_BIN_FILES] = { + { + .attr = { + .name = "state", + .mode = S_IRUGO, + }, + .size = 1, + .read = w1_f3a_read_state, + }, + { + .attr = { + .name = "output", + .mode = S_IRUGO | S_IWUSR | S_IWGRP, + }, + .size = 1, + .write = w1_f3a_write_output, + } +}; + +static int w1_f3a_add_slave(struct w1_slave *sl) +{ + int err = 0; + int i; + + for (i = 0; i < NB_SYSFS_BIN_FILES && !err; ++i) + err = sysfs_create_bin_file( + &sl->dev.kobj, + &(w1_f3a_sysfs_bin_files[i])); + if (err) + while (--i >= 0) + sysfs_remove_bin_file(&sl->dev.kobj, + &(w1_f3a_sysfs_bin_files[i])); + return err; +} + +static void w1_f3a_remove_slave(struct w1_slave *sl) +{ + int i; + for (i = NB_SYSFS_BIN_FILES - 1; i >= 0; --i) + sysfs_remove_bin_file(&sl->dev.kobj, + &(w1_f3a_sysfs_bin_files[i])); +} + +static struct w1_family_ops w1_f3a_fops = { + .add_slave = w1_f3a_add_slave, + .remove_slave = w1_f3a_remove_slave, +}; + +static struct w1_family w1_family_3a = { + .fid = W1_FAMILY_DS2413, + .fops = &w1_f3a_fops, +}; + +static int __init w1_f3a_init(void) +{ + return w1_register_family(&w1_family_3a); +} + +static void __exit w1_f3a_exit(void) +{ + w1_unregister_family(&w1_family_3a); +} + +module_init(w1_f3a_init); +module_exit(w1_f3a_exit); diff --git a/drivers/w1/w1_family.h b/drivers/w1/w1_family.h index a1f0ce1..625dd08 100644 --- a/drivers/w1/w1_family.h +++ b/drivers/w1/w1_family.h @@ -39,6 +39,7 @@ #define W1_EEPROM_DS2431 0x2D #define W1_FAMILY_DS2760 0x30 #define W1_FAMILY_DS2780 0x32 +#define W1_FAMILY_DS2413 0x3A #define W1_THERM_DS1825 0x3B #define W1_FAMILY_DS2781 0x3D #define W1_THERM_DS28EA00 0x42 |