From e1fafdcbe0e3e769c6a83317dd845bc99b4fe61d Mon Sep 17 00:00:00 2001
From: Dennis Dalessandro <dennis.dalessandro@intel.com>
Date: Mon, 10 Oct 2016 06:14:45 -0700
Subject: IB/rdmavt: rdmavt can handle non aligned page maps

The initial code for rdmavt carried with it a restriction that was a
vestige from the qib driver, that to dma map a page it had to be less
than a page size. This is not the case on modern hardware, both qib and
hfi1 will be just fine with unaligned map requests.

This fixes a 4.8 regression where by an IPoIB transfer of > PAGE_SIZE
will hang because the dma map page call always fails. This was
introduced after commit 5faba5469522 ("IB/ipoib: Report SG feature
regardless of HW UD CSUM capability") added the capability to use SG by
default. Rather than override this, the HW supports it, so allow SG.

Cc: Stable <stable@vger.kernel.org> # 4.8
Reviewed-by: Mike Marciniszyn <mike.marciniszyn@intel.com>
Signed-off-by: Dennis Dalessandro <dennis.dalessandro@intel.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>
diff --git a/drivers/infiniband/sw/rdmavt/dma.c b/drivers/infiniband/sw/rdmavt/dma.c
index 01f71ca..f2cefb0 100644
--- a/drivers/infiniband/sw/rdmavt/dma.c
+++ b/drivers/infiniband/sw/rdmavt/dma.c
@@ -90,9 +90,6 @@ static u64 rvt_dma_map_page(struct ib_device *dev, struct page *page,
 	if (WARN_ON(!valid_dma_direction(direction)))
 		return BAD_DMA_ADDRESS;
 
-	if (offset + size > PAGE_SIZE)
-		return BAD_DMA_ADDRESS;
-
 	addr = (u64)page_address(page);
 	if (addr)
 		addr += offset;
-- 
cgit v0.10.2


From 39eb2795f19233330bc14a8450b4042d784b15a7 Mon Sep 17 00:00:00 2001
From: Tadeusz Struk <tadeusz.struk@intel.com>
Date: Mon, 10 Oct 2016 06:14:50 -0700
Subject: IB/hfi1: Remove redundant sysfs irq affinity entry

The IRQ affinity entry is not needed after the irq notifier patch has been
added to the hfi1 driver.
The irq affinity settings for SDMA engine should be set using the standard
/proc/irq/<N>/ interface.

Reviewed-by: Jianxin Xiong <jianxin.xiong@intel.com>
Signed-off-by: Tadeusz Struk <tadeusz.struk@intel.com>
Signed-off-by: Dennis Dalessandro <dennis.dalessandro@intel.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>

diff --git a/drivers/infiniband/hw/hfi1/affinity.c b/drivers/infiniband/hw/hfi1/affinity.c
index a26a9a0..67ea85a 100644
--- a/drivers/infiniband/hw/hfi1/affinity.c
+++ b/drivers/infiniband/hw/hfi1/affinity.c
@@ -775,75 +775,3 @@ void hfi1_put_proc_affinity(int cpu)
 	}
 	mutex_unlock(&affinity->lock);
 }
-
-int hfi1_set_sdma_affinity(struct hfi1_devdata *dd, const char *buf,
-			   size_t count)
-{
-	struct hfi1_affinity_node *entry;
-	cpumask_var_t mask;
-	int ret, i;
-
-	mutex_lock(&node_affinity.lock);
-	entry = node_affinity_lookup(dd->node);
-
-	if (!entry) {
-		ret = -EINVAL;
-		goto unlock;
-	}
-
-	ret = zalloc_cpumask_var(&mask, GFP_KERNEL);
-	if (!ret) {
-		ret = -ENOMEM;
-		goto unlock;
-	}
-
-	ret = cpulist_parse(buf, mask);
-	if (ret)
-		goto out;
-
-	if (!cpumask_subset(mask, cpu_online_mask) || cpumask_empty(mask)) {
-		dd_dev_warn(dd, "Invalid CPU mask\n");
-		ret = -EINVAL;
-		goto out;
-	}
-
-	/* reset the SDMA interrupt affinity details */
-	init_cpu_mask_set(&entry->def_intr);
-	cpumask_copy(&entry->def_intr.mask, mask);
-
-	/* Reassign the affinity for each SDMA interrupt. */
-	for (i = 0; i < dd->num_msix_entries; i++) {
-		struct hfi1_msix_entry *msix;
-
-		msix = &dd->msix_entries[i];
-		if (msix->type != IRQ_SDMA)
-			continue;
-
-		ret = get_irq_affinity(dd, msix);
-
-		if (ret)
-			break;
-	}
-out:
-	free_cpumask_var(mask);
-unlock:
-	mutex_unlock(&node_affinity.lock);
-	return ret ? ret : strnlen(buf, PAGE_SIZE);
-}
-
-int hfi1_get_sdma_affinity(struct hfi1_devdata *dd, char *buf)
-{
-	struct hfi1_affinity_node *entry;
-
-	mutex_lock(&node_affinity.lock);
-	entry = node_affinity_lookup(dd->node);
-
-	if (!entry) {
-		mutex_unlock(&node_affinity.lock);
-		return -EINVAL;
-	}
-
-	cpumap_print_to_pagebuf(true, buf, &entry->def_intr.mask);
-	mutex_unlock(&node_affinity.lock);
-	return strnlen(buf, PAGE_SIZE);
-}
diff --git a/drivers/infiniband/hw/hfi1/affinity.h b/drivers/infiniband/hw/hfi1/affinity.h
index b89ea3c..42e6331 100644
--- a/drivers/infiniband/hw/hfi1/affinity.h
+++ b/drivers/infiniband/hw/hfi1/affinity.h
@@ -102,10 +102,6 @@ int hfi1_get_proc_affinity(int);
 /* Release a CPU used by a user process. */
 void hfi1_put_proc_affinity(int);
 
-int hfi1_get_sdma_affinity(struct hfi1_devdata *dd, char *buf);
-int hfi1_set_sdma_affinity(struct hfi1_devdata *dd, const char *buf,
-			   size_t count);
-
 struct hfi1_affinity_node {
 	int node;
 	struct cpu_mask_set def_intr;
diff --git a/drivers/infiniband/hw/hfi1/sysfs.c b/drivers/infiniband/hw/hfi1/sysfs.c
index edba224..919a547 100644
--- a/drivers/infiniband/hw/hfi1/sysfs.c
+++ b/drivers/infiniband/hw/hfi1/sysfs.c
@@ -49,7 +49,6 @@
 #include "hfi.h"
 #include "mad.h"
 #include "trace.h"
-#include "affinity.h"
 
 /*
  * Start of per-port congestion control structures and support code
@@ -623,27 +622,6 @@ static ssize_t show_tempsense(struct device *device,
 	return ret;
 }
 
-static ssize_t show_sdma_affinity(struct device *device,
-				  struct device_attribute *attr, char *buf)
-{
-	struct hfi1_ibdev *dev =
-		container_of(device, struct hfi1_ibdev, rdi.ibdev.dev);
-	struct hfi1_devdata *dd = dd_from_dev(dev);
-
-	return hfi1_get_sdma_affinity(dd, buf);
-}
-
-static ssize_t store_sdma_affinity(struct device *device,
-				   struct device_attribute *attr,
-				   const char *buf, size_t count)
-{
-	struct hfi1_ibdev *dev =
-		container_of(device, struct hfi1_ibdev, rdi.ibdev.dev);
-	struct hfi1_devdata *dd = dd_from_dev(dev);
-
-	return hfi1_set_sdma_affinity(dd, buf, count);
-}
-
 /*
  * end of per-unit (or driver, in some cases, but replicated
  * per unit) functions
@@ -658,8 +636,6 @@ static DEVICE_ATTR(serial, S_IRUGO, show_serial, NULL);
 static DEVICE_ATTR(boardversion, S_IRUGO, show_boardversion, NULL);
 static DEVICE_ATTR(tempsense, S_IRUGO, show_tempsense, NULL);
 static DEVICE_ATTR(chip_reset, S_IWUSR, NULL, store_chip_reset);
-static DEVICE_ATTR(sdma_affinity, S_IWUSR | S_IRUGO, show_sdma_affinity,
-		   store_sdma_affinity);
 
 static struct device_attribute *hfi1_attributes[] = {
 	&dev_attr_hw_rev,
@@ -670,7 +646,6 @@ static struct device_attribute *hfi1_attributes[] = {
 	&dev_attr_boardversion,
 	&dev_attr_tempsense,
 	&dev_attr_chip_reset,
-	&dev_attr_sdma_affinity,
 };
 
 int hfi1_create_port_files(struct ib_device *ibdev, u8 port_num,
-- 
cgit v0.10.2


From d9ac4555fb2bcd6b794aaa0b39acad81111d9f42 Mon Sep 17 00:00:00 2001
From: Jakub Pawlak <jakub.pawlak@intel.com>
Date: Mon, 10 Oct 2016 06:14:56 -0700
Subject: IB/hfi1: Fix integrity check flags default values

Prevent setting up integrity check flags when module is loaded
with NO_INTEGRITY capability.

Reviewed-by: Dean Luick <dean.luick@intel.com>
Signed-off-by: Jakub Pawlak <jakub.pawlak@intel.com>
Signed-off-by: Dennis Dalessandro <dennis.dalessandro@intel.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>

diff --git a/drivers/infiniband/hw/hfi1/hfi.h b/drivers/infiniband/hw/hfi1/hfi.h
index 7eef11b..3c06d204 100644
--- a/drivers/infiniband/hw/hfi1/hfi.h
+++ b/drivers/infiniband/hw/hfi1/hfi.h
@@ -1848,7 +1848,13 @@ extern struct mutex hfi1_mutex;
 static inline u64 hfi1_pkt_default_send_ctxt_mask(struct hfi1_devdata *dd,
 						  u16 ctxt_type)
 {
-	u64 base_sc_integrity =
+	u64 base_sc_integrity;
+
+	/* No integrity checks if HFI1_CAP_NO_INTEGRITY is set */
+	if (HFI1_CAP_IS_KSET(NO_INTEGRITY))
+		return 0;
+
+	base_sc_integrity =
 	SEND_CTXT_CHECK_ENABLE_DISALLOW_BYPASS_BAD_PKT_LEN_SMASK
 	| SEND_CTXT_CHECK_ENABLE_DISALLOW_PBC_STATIC_RATE_CONTROL_SMASK
 	| SEND_CTXT_CHECK_ENABLE_DISALLOW_TOO_LONG_BYPASS_PACKETS_SMASK
@@ -1863,7 +1869,6 @@ static inline u64 hfi1_pkt_default_send_ctxt_mask(struct hfi1_devdata *dd,
 	| SEND_CTXT_CHECK_ENABLE_CHECK_VL_MAPPING_SMASK
 	| SEND_CTXT_CHECK_ENABLE_CHECK_OPCODE_SMASK
 	| SEND_CTXT_CHECK_ENABLE_CHECK_SLID_SMASK
-	| SEND_CTXT_CHECK_ENABLE_CHECK_JOB_KEY_SMASK
 	| SEND_CTXT_CHECK_ENABLE_CHECK_VL_SMASK
 	| SEND_CTXT_CHECK_ENABLE_CHECK_ENABLE_SMASK;
 
@@ -1872,18 +1877,23 @@ static inline u64 hfi1_pkt_default_send_ctxt_mask(struct hfi1_devdata *dd,
 	else
 		base_sc_integrity |= HFI1_PKT_KERNEL_SC_INTEGRITY;
 
-	if (is_ax(dd))
-		/* turn off send-side job key checks - A0 */
-		return base_sc_integrity &
-		       ~SEND_CTXT_CHECK_ENABLE_CHECK_JOB_KEY_SMASK;
+	/* turn on send-side job key checks if !A0 */
+	if (!is_ax(dd))
+		base_sc_integrity |= SEND_CTXT_CHECK_ENABLE_CHECK_JOB_KEY_SMASK;
+
 	return base_sc_integrity;
 }
 
 static inline u64 hfi1_pkt_base_sdma_integrity(struct hfi1_devdata *dd)
 {
-	u64 base_sdma_integrity =
+	u64 base_sdma_integrity;
+
+	/* No integrity checks if HFI1_CAP_NO_INTEGRITY is set */
+	if (HFI1_CAP_IS_KSET(NO_INTEGRITY))
+		return 0;
+
+	base_sdma_integrity =
 	SEND_DMA_CHECK_ENABLE_DISALLOW_BYPASS_BAD_PKT_LEN_SMASK
-	| SEND_DMA_CHECK_ENABLE_DISALLOW_PBC_STATIC_RATE_CONTROL_SMASK
 	| SEND_DMA_CHECK_ENABLE_DISALLOW_TOO_LONG_BYPASS_PACKETS_SMASK
 	| SEND_DMA_CHECK_ENABLE_DISALLOW_TOO_LONG_IB_PACKETS_SMASK
 	| SEND_DMA_CHECK_ENABLE_DISALLOW_BAD_PKT_LEN_SMASK
@@ -1895,14 +1905,18 @@ static inline u64 hfi1_pkt_base_sdma_integrity(struct hfi1_devdata *dd)
 	| SEND_DMA_CHECK_ENABLE_CHECK_VL_MAPPING_SMASK
 	| SEND_DMA_CHECK_ENABLE_CHECK_OPCODE_SMASK
 	| SEND_DMA_CHECK_ENABLE_CHECK_SLID_SMASK
-	| SEND_DMA_CHECK_ENABLE_CHECK_JOB_KEY_SMASK
 	| SEND_DMA_CHECK_ENABLE_CHECK_VL_SMASK
 	| SEND_DMA_CHECK_ENABLE_CHECK_ENABLE_SMASK;
 
-	if (is_ax(dd))
-		/* turn off send-side job key checks - A0 */
-		return base_sdma_integrity &
-		       ~SEND_DMA_CHECK_ENABLE_CHECK_JOB_KEY_SMASK;
+	if (!HFI1_CAP_IS_KSET(STATIC_RATE_CTRL))
+		base_sdma_integrity |=
+		SEND_DMA_CHECK_ENABLE_DISALLOW_PBC_STATIC_RATE_CONTROL_SMASK;
+
+	/* turn on send-side job key checks if !A0 */
+	if (!is_ax(dd))
+		base_sdma_integrity |=
+			SEND_DMA_CHECK_ENABLE_CHECK_JOB_KEY_SMASK;
+
 	return base_sdma_integrity;
 }
 
diff --git a/drivers/infiniband/hw/hfi1/pio.c b/drivers/infiniband/hw/hfi1/pio.c
index 50a3a36..d89b874 100644
--- a/drivers/infiniband/hw/hfi1/pio.c
+++ b/drivers/infiniband/hw/hfi1/pio.c
@@ -668,19 +668,12 @@ void sc_set_cr_threshold(struct send_context *sc, u32 new_threshold)
 void set_pio_integrity(struct send_context *sc)
 {
 	struct hfi1_devdata *dd = sc->dd;
-	u64 reg = 0;
 	u32 hw_context = sc->hw_context;
 	int type = sc->type;
 
-	/*
-	 * No integrity checks if HFI1_CAP_NO_INTEGRITY is set, or if
-	 * we're snooping.
-	 */
-	if (likely(!HFI1_CAP_IS_KSET(NO_INTEGRITY)) &&
-	    dd->hfi1_snoop.mode_flag != HFI1_PORT_SNOOP_MODE)
-		reg = hfi1_pkt_default_send_ctxt_mask(dd, type);
-
-	write_kctxt_csr(dd, hw_context, SC(CHECK_ENABLE), reg);
+	write_kctxt_csr(dd, hw_context,
+			SC(CHECK_ENABLE),
+			hfi1_pkt_default_send_ctxt_mask(dd, type));
 }
 
 static u32 get_buffers_allocated(struct send_context *sc)
diff --git a/drivers/infiniband/hw/hfi1/sdma.c b/drivers/infiniband/hw/hfi1/sdma.c
index fd39bca..9cbe52d 100644
--- a/drivers/infiniband/hw/hfi1/sdma.c
+++ b/drivers/infiniband/hw/hfi1/sdma.c
@@ -2009,11 +2009,6 @@ static void sdma_hw_start_up(struct sdma_engine *sde)
 	write_sde_csr(sde, SD(ENG_ERR_CLEAR), reg);
 }
 
-#define CLEAR_STATIC_RATE_CONTROL_SMASK(r) \
-(r &= ~SEND_DMA_CHECK_ENABLE_DISALLOW_PBC_STATIC_RATE_CONTROL_SMASK)
-
-#define SET_STATIC_RATE_CONTROL_SMASK(r) \
-(r |= SEND_DMA_CHECK_ENABLE_DISALLOW_PBC_STATIC_RATE_CONTROL_SMASK)
 /*
  * set_sdma_integrity
  *
@@ -2022,19 +2017,9 @@ static void sdma_hw_start_up(struct sdma_engine *sde)
 static void set_sdma_integrity(struct sdma_engine *sde)
 {
 	struct hfi1_devdata *dd = sde->dd;
-	u64 reg;
-
-	if (unlikely(HFI1_CAP_IS_KSET(NO_INTEGRITY)))
-		return;
-
-	reg = hfi1_pkt_base_sdma_integrity(dd);
-
-	if (HFI1_CAP_IS_KSET(STATIC_RATE_CTRL))
-		CLEAR_STATIC_RATE_CONTROL_SMASK(reg);
-	else
-		SET_STATIC_RATE_CONTROL_SMASK(reg);
 
-	write_sde_csr(sde, SD(CHECK_ENABLE), reg);
+	write_sde_csr(sde, SD(CHECK_ENABLE),
+		      hfi1_pkt_base_sdma_integrity(dd));
 }
 
 static void init_sdma_regs(
-- 
cgit v0.10.2


From acd7c8fe14938a315f0ac1b92a92375f7226c2fd Mon Sep 17 00:00:00 2001
From: Tadeusz Struk <tadeusz.struk@intel.com>
Date: Tue, 25 Oct 2016 08:57:55 -0700
Subject: IB/hfi1: Fix an Oops on pci device force remove

This patch fixes an Oops on device unbind, when the device is used
by a PSM user process. PSM processes access device resources which
are freed on device removal. Similar protection exists in uverbs
in ib_core for Verbs clients, but PSM doesn't use ib_uverbs hence
a separate protection is required for PSM clients.

Cc: Jason Gunthorpe <jgunthorpe@obsidianresearch.com>
Reviewed-by: Ira Weiny <ira.weiny@intel.com>
Reviewed-by: Dean Luick <dean.luick@intel.com>
Reviewed-by: Dennis Dalessandro <dennis.dalessandro@intel.com>
Signed-off-by: Tadeusz Struk <tadeusz.struk@intel.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>

diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c
index 9bf5f23..7992152 100644
--- a/drivers/infiniband/hw/hfi1/chip.c
+++ b/drivers/infiniband/hw/hfi1/chip.c
@@ -14691,6 +14691,11 @@ struct hfi1_devdata *hfi1_init_dd(struct pci_dev *pdev,
 	if (ret)
 		goto bail_free_cntrs;
 
+	init_completion(&dd->user_comp);
+
+	/* The user refcount starts with one to inidicate an active device */
+	atomic_set(&dd->user_refcount, 1);
+
 	goto bail;
 
 bail_free_rcverr:
diff --git a/drivers/infiniband/hw/hfi1/file_ops.c b/drivers/infiniband/hw/hfi1/file_ops.c
index 677efa0..bd786b7 100644
--- a/drivers/infiniband/hw/hfi1/file_ops.c
+++ b/drivers/infiniband/hw/hfi1/file_ops.c
@@ -172,6 +172,9 @@ static int hfi1_file_open(struct inode *inode, struct file *fp)
 					       struct hfi1_devdata,
 					       user_cdev);
 
+	if (!atomic_inc_not_zero(&dd->user_refcount))
+		return -ENXIO;
+
 	/* Just take a ref now. Not all opens result in a context assign */
 	kobject_get(&dd->kobj);
 
@@ -183,11 +186,17 @@ static int hfi1_file_open(struct inode *inode, struct file *fp)
 		fd->rec_cpu_num = -1; /* no cpu affinity by default */
 		fd->mm = current->mm;
 		atomic_inc(&fd->mm->mm_count);
-	}
+		fp->private_data = fd;
+	} else {
+		fp->private_data = NULL;
+
+		if (atomic_dec_and_test(&dd->user_refcount))
+			complete(&dd->user_comp);
 
-	fp->private_data = fd;
+		return -ENOMEM;
+	}
 
-	return fd ? 0 : -ENOMEM;
+	return 0;
 }
 
 static long hfi1_file_ioctl(struct file *fp, unsigned int cmd,
@@ -798,6 +807,10 @@ static int hfi1_file_close(struct inode *inode, struct file *fp)
 done:
 	mmdrop(fdata->mm);
 	kobject_put(&dd->kobj);
+
+	if (atomic_dec_and_test(&dd->user_refcount))
+		complete(&dd->user_comp);
+
 	kfree(fdata);
 	return 0;
 }
diff --git a/drivers/infiniband/hw/hfi1/hfi.h b/drivers/infiniband/hw/hfi1/hfi.h
index 3c06d204..368e96c 100644
--- a/drivers/infiniband/hw/hfi1/hfi.h
+++ b/drivers/infiniband/hw/hfi1/hfi.h
@@ -1174,6 +1174,10 @@ struct hfi1_devdata {
 	spinlock_t aspm_lock;
 	/* Number of verbs contexts which have disabled ASPM */
 	atomic_t aspm_disabled_cnt;
+	/* Keeps track of user space clients */
+	atomic_t user_refcount;
+	/* Used to wait for outstanding user space clients before dev removal */
+	struct completion user_comp;
 
 	struct hfi1_affinity *affinity;
 	struct rhashtable sdma_rht;
diff --git a/drivers/infiniband/hw/hfi1/init.c b/drivers/infiniband/hw/hfi1/init.c
index 60db615..e28a6b6 100644
--- a/drivers/infiniband/hw/hfi1/init.c
+++ b/drivers/infiniband/hw/hfi1/init.c
@@ -1538,12 +1538,31 @@ bail:
 	return ret;
 }
 
+static void wait_for_clients(struct hfi1_devdata *dd)
+{
+	/*
+	 * Remove the device init value and complete the device if there is
+	 * no clients or wait for active clients to finish.
+	 */
+	if (atomic_dec_and_test(&dd->user_refcount))
+		complete(&dd->user_comp);
+
+	wait_for_completion(&dd->user_comp);
+}
+
 static void remove_one(struct pci_dev *pdev)
 {
 	struct hfi1_devdata *dd = pci_get_drvdata(pdev);
 
 	/* close debugfs files before ib unregister */
 	hfi1_dbg_ibdev_exit(&dd->verbs_dev);
+
+	/* remove the /dev hfi1 interface */
+	hfi1_device_remove(dd);
+
+	/* wait for existing user space clients to finish */
+	wait_for_clients(dd);
+
 	/* unregister from IB core */
 	hfi1_unregister_ib_device(dd);
 
@@ -1558,8 +1577,6 @@ static void remove_one(struct pci_dev *pdev)
 	/* wait until all of our (qsfp) queue_work() calls complete */
 	flush_workqueue(ib_wq);
 
-	hfi1_device_remove(dd);
-
 	postinit_cleanup(dd);
 }
 
-- 
cgit v0.10.2


From 83fb4af6800deb4f3d19b297df6148cda5c016de Mon Sep 17 00:00:00 2001
From: Krzysztof Blaszkowski <krzysztof.blaszkowski@intel.com>
Date: Mon, 17 Oct 2016 04:19:24 -0700
Subject: IB/hfi1: Return ENODEV for unsupported PCI device ids.

Clean up device type checking.

Reviewed-by: Dean Luick <dean.luick@intel.com>
Signed-off-by: Krzysztof Blaszkowski <krzysztof.blaszkowski@intel.com>
Signed-off-by: Tymoteusz Kielan <tymoteusz.kielan@intel.com>
Signed-off-by: Dennis Dalessandro <dennis.dalessandro@intel.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>

diff --git a/drivers/infiniband/hw/hfi1/init.c b/drivers/infiniband/hw/hfi1/init.c
index e28a6b6..baea53f 100644
--- a/drivers/infiniband/hw/hfi1/init.c
+++ b/drivers/infiniband/hw/hfi1/init.c
@@ -1402,7 +1402,7 @@ static void postinit_cleanup(struct hfi1_devdata *dd)
 static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 {
 	int ret = 0, j, pidx, initfail;
-	struct hfi1_devdata *dd = ERR_PTR(-EINVAL);
+	struct hfi1_devdata *dd;
 	struct hfi1_pportdata *ppd;
 
 	/* First, lock the non-writable module parameters */
@@ -1461,26 +1461,25 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 	if (ret)
 		goto bail;
 
-	/*
-	 * Do device-specific initialization, function table setup, dd
-	 * allocation, etc.
-	 */
-	switch (ent->device) {
-	case PCI_DEVICE_ID_INTEL0:
-	case PCI_DEVICE_ID_INTEL1:
-		dd = hfi1_init_dd(pdev, ent);
-		break;
-	default:
+	if (!(ent->device == PCI_DEVICE_ID_INTEL0 ||
+	      ent->device == PCI_DEVICE_ID_INTEL1)) {
 		hfi1_early_err(&pdev->dev,
 			       "Failing on unknown Intel deviceid 0x%x\n",
 			       ent->device);
 		ret = -ENODEV;
+		goto clean_bail;
 	}
 
-	if (IS_ERR(dd))
+	/*
+	 * Do device-specific initialization, function table setup, dd
+	 * allocation, etc.
+	 */
+	dd = hfi1_init_dd(pdev, ent);
+
+	if (IS_ERR(dd)) {
 		ret = PTR_ERR(dd);
-	if (ret)
 		goto clean_bail; /* error already printed */
+	}
 
 	ret = create_workqueues(dd);
 	if (ret)
-- 
cgit v0.10.2


From 4dfe7cceb2bfd98783b4966d7c881a7552932d31 Mon Sep 17 00:00:00 2001
From: Jianxin Xiong <jianxin.xiong@intel.com>
Date: Mon, 17 Oct 2016 04:19:41 -0700
Subject: IB/hfi1: Fix a potential memory leak in hfi1_create_ctxts()

In the function hfi1_create_ctxts the array "dd->rcd" is allocated and
then populated with allocated resources in a loop. Previously, if
error happened during the loop, only resource allocated in the current
iteration would be freed. The array itself would then be freed, leaving
the resources that were allocated in previous iterations and referenced
by the array elements in limbo.

This patch makes sure all allocated resources are freed before freeing
the array "dd->rcd". Also the resource allocation now takes account of
the numa node the device is attached to.

Reviewed-by: Tadeusz Struk <tadeusz.struk@intel.com>
Signed-off-by: Jianxin Xiong <jianxin.xiong@intel.com>
Signed-off-by: Dennis Dalessandro <dennis.dalessandro@intel.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>

diff --git a/drivers/infiniband/hw/hfi1/init.c b/drivers/infiniband/hw/hfi1/init.c
index baea53f..e27b65d 100644
--- a/drivers/infiniband/hw/hfi1/init.c
+++ b/drivers/infiniband/hw/hfi1/init.c
@@ -144,6 +144,8 @@ int hfi1_create_ctxts(struct hfi1_devdata *dd)
 		struct hfi1_ctxtdata *rcd;
 
 		ppd = dd->pport + (i % dd->num_pports);
+
+		/* dd->rcd[i] gets assigned inside the callee */
 		rcd = hfi1_create_ctxtdata(ppd, i, dd->node);
 		if (!rcd) {
 			dd_dev_err(dd,
@@ -169,8 +171,6 @@ int hfi1_create_ctxts(struct hfi1_devdata *dd)
 		if (!rcd->sc) {
 			dd_dev_err(dd,
 				   "Unable to allocate kernel send context, failing\n");
-			dd->rcd[rcd->ctxt] = NULL;
-			hfi1_free_ctxtdata(dd, rcd);
 			goto nomem;
 		}
 
@@ -178,9 +178,6 @@ int hfi1_create_ctxts(struct hfi1_devdata *dd)
 		if (ret < 0) {
 			dd_dev_err(dd,
 				   "Failed to setup kernel receive context, failing\n");
-			sc_free(rcd->sc);
-			dd->rcd[rcd->ctxt] = NULL;
-			hfi1_free_ctxtdata(dd, rcd);
 			ret = -EFAULT;
 			goto bail;
 		}
@@ -196,6 +193,10 @@ int hfi1_create_ctxts(struct hfi1_devdata *dd)
 nomem:
 	ret = -ENOMEM;
 bail:
+	if (dd->rcd) {
+		for (i = 0; i < dd->num_rcv_contexts; ++i)
+			hfi1_free_ctxtdata(dd, dd->rcd[i]);
+	}
 	kfree(dd->rcd);
 	dd->rcd = NULL;
 	return ret;
@@ -216,7 +217,7 @@ struct hfi1_ctxtdata *hfi1_create_ctxtdata(struct hfi1_pportdata *ppd, u32 ctxt,
 	    dd->num_rcv_contexts - dd->first_user_ctxt)
 		kctxt_ngroups = (dd->rcv_entries.nctxt_extra -
 				 (dd->num_rcv_contexts - dd->first_user_ctxt));
-	rcd = kzalloc(sizeof(*rcd), GFP_KERNEL);
+	rcd = kzalloc_node(sizeof(*rcd), GFP_KERNEL, numa);
 	if (rcd) {
 		u32 rcvtids, max_entries;
 
-- 
cgit v0.10.2


From eacc830f95c0d8c5cbbda1bdba2ddc8f14bc248d Mon Sep 17 00:00:00 2001
From: Dennis Dalessandro <dennis.dalessandro@intel.com>
Date: Mon, 17 Oct 2016 04:19:52 -0700
Subject: IB/hfi1: Remove leftover snoop references

A few snoop related variables were missed in the snoop/capture removal
to get out of staging. Go back and clean those up too.

Reviewed-by: Dean Luick <dean.luick@intel.com>
Signed-off-by: Dennis Dalessandro <dennis.dalessandro@intel.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>

diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c
index 7992152..859341a 100644
--- a/drivers/infiniband/hw/hfi1/chip.c
+++ b/drivers/infiniband/hw/hfi1/chip.c
@@ -6301,19 +6301,8 @@ void set_up_vl15(struct hfi1_devdata *dd, u8 vau, u16 vl15buf)
 	/* leave shared count at zero for both global and VL15 */
 	write_global_credit(dd, vau, vl15buf, 0);
 
-	/* We may need some credits for another VL when sending packets
-	 * with the snoop interface. Dividing it down the middle for VL15
-	 * and VL0 should suffice.
-	 */
-	if (unlikely(dd->hfi1_snoop.mode_flag == HFI1_PORT_SNOOP_MODE)) {
-		write_csr(dd, SEND_CM_CREDIT_VL15, (u64)(vl15buf >> 1)
-		    << SEND_CM_CREDIT_VL15_DEDICATED_LIMIT_VL_SHIFT);
-		write_csr(dd, SEND_CM_CREDIT_VL, (u64)(vl15buf >> 1)
-		    << SEND_CM_CREDIT_VL_DEDICATED_LIMIT_VL_SHIFT);
-	} else {
-		write_csr(dd, SEND_CM_CREDIT_VL15, (u64)vl15buf
-			<< SEND_CM_CREDIT_VL15_DEDICATED_LIMIT_VL_SHIFT);
-	}
+	write_csr(dd, SEND_CM_CREDIT_VL15, (u64)vl15buf
+		  << SEND_CM_CREDIT_VL15_DEDICATED_LIMIT_VL_SHIFT);
 }
 
 /*
@@ -9915,9 +9904,6 @@ static void set_lidlmc(struct hfi1_pportdata *ppd)
 	u32 mask = ~((1U << ppd->lmc) - 1);
 	u64 c1 = read_csr(ppd->dd, DCC_CFG_PORT_CONFIG1);
 
-	if (dd->hfi1_snoop.mode_flag)
-		dd_dev_info(dd, "Set lid/lmc while snooping");
-
 	c1 &= ~(DCC_CFG_PORT_CONFIG1_TARGET_DLID_SMASK
 		| DCC_CFG_PORT_CONFIG1_DLID_MASK_SMASK);
 	c1 |= ((ppd->lid & DCC_CFG_PORT_CONFIG1_TARGET_DLID_MASK)
diff --git a/drivers/infiniband/hw/hfi1/hfi.h b/drivers/infiniband/hw/hfi1/hfi.h
index 368e96c..93e2fce 100644
--- a/drivers/infiniband/hw/hfi1/hfi.h
+++ b/drivers/infiniband/hw/hfi1/hfi.h
@@ -367,26 +367,6 @@ struct hfi1_packet {
 	u8 etype;
 };
 
-/*
- * Private data for snoop/capture support.
- */
-struct hfi1_snoop_data {
-	int mode_flag;
-	struct cdev cdev;
-	struct device *class_dev;
-	/* protect snoop data */
-	spinlock_t snoop_lock;
-	struct list_head queue;
-	wait_queue_head_t waitq;
-	void *filter_value;
-	int (*filter_callback)(void *hdr, void *data, void *value);
-	u64 dcc_cfg; /* saved value of DCC Cfg register */
-};
-
-/* snoop mode_flag values */
-#define HFI1_PORT_SNOOP_MODE     1U
-#define HFI1_PORT_CAPTURE_MODE   2U
-
 struct rvt_sge_state;
 
 /*
@@ -1104,8 +1084,6 @@ struct hfi1_devdata {
 	char *portcntrnames;
 	size_t portcntrnameslen;
 
-	struct hfi1_snoop_data hfi1_snoop;
-
 	struct err_info_rcvport err_info_rcvport;
 	struct err_info_constraint err_info_rcv_constraint;
 	struct err_info_constraint err_info_xmit_constraint;
@@ -1141,8 +1119,8 @@ struct hfi1_devdata {
 	rhf_rcv_function_ptr normal_rhf_rcv_functions[8];
 
 	/*
-	 * Handlers for outgoing data so that snoop/capture does not
-	 * have to have its hooks in the send path
+	 * Capability to have different send engines simply by changing a
+	 * pointer value.
 	 */
 	send_routine process_pio_send;
 	send_routine process_dma_send;
@@ -1225,8 +1203,6 @@ struct hfi1_devdata *hfi1_lookup(int unit);
 extern u32 hfi1_cpulist_count;
 extern unsigned long *hfi1_cpulist;
 
-extern unsigned int snoop_drop_send;
-extern unsigned int snoop_force_capture;
 int hfi1_init(struct hfi1_devdata *, int);
 int hfi1_count_units(int *npresentp, int *nupp);
 int hfi1_count_active_units(void);
@@ -1561,13 +1537,6 @@ void set_up_vl15(struct hfi1_devdata *dd, u8 vau, u16 vl15buf);
 void reset_link_credits(struct hfi1_devdata *dd);
 void assign_remote_cm_au_table(struct hfi1_devdata *dd, u8 vcu);
 
-int snoop_recv_handler(struct hfi1_packet *packet);
-int snoop_send_dma_handler(struct rvt_qp *qp, struct hfi1_pkt_state *ps,
-			   u64 pbc);
-int snoop_send_pio_handler(struct rvt_qp *qp, struct hfi1_pkt_state *ps,
-			   u64 pbc);
-void snoop_inline_pio_send(struct hfi1_devdata *dd, struct pio_buf *pbuf,
-			   u64 pbc, const void *from, size_t count);
 int set_buffer_control(struct hfi1_pportdata *ppd, struct buffer_control *bc);
 
 static inline struct hfi1_devdata *dd_from_ppd(struct hfi1_pportdata *ppd)
@@ -1803,8 +1772,6 @@ int kdeth_process_expected(struct hfi1_packet *packet);
 int kdeth_process_eager(struct hfi1_packet *packet);
 int process_receive_invalid(struct hfi1_packet *packet);
 
-extern rhf_rcv_function_ptr snoop_rhf_rcv_functions[8];
-
 void update_sge(struct rvt_sge_state *ss, u32 length);
 
 /* global module parameter variables */
@@ -1831,9 +1798,6 @@ extern struct mutex hfi1_mutex;
 #define DRIVER_NAME		"hfi1"
 #define HFI1_USER_MINOR_BASE     0
 #define HFI1_TRACE_MINOR         127
-#define HFI1_DIAGPKT_MINOR       128
-#define HFI1_DIAG_MINOR_BASE     129
-#define HFI1_SNOOP_CAPTURE_BASE  200
 #define HFI1_NMINORS             255
 
 #define PCI_VENDOR_ID_INTEL 0x8086
diff --git a/drivers/infiniband/hw/hfi1/trace_rx.h b/drivers/infiniband/hw/hfi1/trace_rx.h
index 11e02b2..f77e59f 100644
--- a/drivers/infiniband/hw/hfi1/trace_rx.h
+++ b/drivers/infiniband/hw/hfi1/trace_rx.h
@@ -253,66 +253,6 @@ TRACE_EVENT(hfi1_mmu_invalidate,
 		      )
 	    );
 
-#define SNOOP_PRN \
-	"slid %.4x dlid %.4x qpn 0x%.6x opcode 0x%.2x,%s " \
-	"svc lvl %d pkey 0x%.4x [header = %d bytes] [data = %d bytes]"
-
-TRACE_EVENT(snoop_capture,
-	    TP_PROTO(struct hfi1_devdata *dd,
-		     int hdr_len,
-		     struct ib_header *hdr,
-		     int data_len,
-		     void *data),
-	    TP_ARGS(dd, hdr_len, hdr, data_len, data),
-	    TP_STRUCT__entry(
-			     DD_DEV_ENTRY(dd)
-			     __field(u16, slid)
-			     __field(u16, dlid)
-			     __field(u32, qpn)
-			     __field(u8, opcode)
-			     __field(u8, sl)
-			     __field(u16, pkey)
-			     __field(u32, hdr_len)
-			     __field(u32, data_len)
-			     __field(u8, lnh)
-			     __dynamic_array(u8, raw_hdr, hdr_len)
-			     __dynamic_array(u8, raw_pkt, data_len)
-			     ),
-	    TP_fast_assign(
-		struct ib_other_headers *ohdr;
-
-		__entry->lnh = (u8)(be16_to_cpu(hdr->lrh[0]) & 3);
-		if (__entry->lnh == HFI1_LRH_BTH)
-		ohdr = &hdr->u.oth;
-		else
-		ohdr = &hdr->u.l.oth;
-		DD_DEV_ASSIGN(dd);
-		__entry->slid = be16_to_cpu(hdr->lrh[3]);
-		__entry->dlid = be16_to_cpu(hdr->lrh[1]);
-		__entry->qpn = be32_to_cpu(ohdr->bth[1]) & RVT_QPN_MASK;
-		__entry->opcode = (be32_to_cpu(ohdr->bth[0]) >> 24) & 0xff;
-		__entry->sl = (u8)(be16_to_cpu(hdr->lrh[0]) >> 4) & 0xf;
-		__entry->pkey =	be32_to_cpu(ohdr->bth[0]) & 0xffff;
-		__entry->hdr_len = hdr_len;
-		__entry->data_len = data_len;
-		memcpy(__get_dynamic_array(raw_hdr), hdr, hdr_len);
-		memcpy(__get_dynamic_array(raw_pkt), data, data_len);
-		),
-	    TP_printk(
-		"[%s] " SNOOP_PRN,
-		__get_str(dev),
-		__entry->slid,
-		__entry->dlid,
-		__entry->qpn,
-		__entry->opcode,
-		show_ib_opcode(__entry->opcode),
-		__entry->sl,
-		__entry->pkey,
-		__entry->hdr_len,
-		__entry->data_len
-		)
-);
-
 #endif /* __HFI1_TRACE_RX_H */
 
 #undef TRACE_INCLUDE_PATH
-- 
cgit v0.10.2


From 26ea2544ddbe8855cb251e41ff3641c61655a15f Mon Sep 17 00:00:00 2001
From: Easwar Hariharan <easwar.hariharan@intel.com>
Date: Mon, 17 Oct 2016 04:19:58 -0700
Subject: IB/hfi1: Clean up unused argument

hfi1_pcie_ddinit takes the PCI device id as an argument but never
uses it. Clean it up.

Reviewed-by: Dennis Dalessandro <dennis.dalessandro@intel.com>
Signed-off-by: Easwar Hariharan <easwar.hariharan@intel.com>
Signed-off-by: Dennis Dalessandro <dennis.dalessandro@intel.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>

diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c
index 859341a..156ddf8 100644
--- a/drivers/infiniband/hw/hfi1/chip.c
+++ b/drivers/infiniband/hw/hfi1/chip.c
@@ -14449,7 +14449,7 @@ struct hfi1_devdata *hfi1_init_dd(struct pci_dev *pdev,
 	 * Any error printing is already done by the init code.
 	 * On return, we have the chip mapped.
 	 */
-	ret = hfi1_pcie_ddinit(dd, pdev, ent);
+	ret = hfi1_pcie_ddinit(dd, pdev);
 	if (ret < 0)
 		goto bail_free;
 
diff --git a/drivers/infiniband/hw/hfi1/hfi.h b/drivers/infiniband/hw/hfi1/hfi.h
index 93e2fce..d906cf0 100644
--- a/drivers/infiniband/hw/hfi1/hfi.h
+++ b/drivers/infiniband/hw/hfi1/hfi.h
@@ -1736,8 +1736,7 @@ int qsfp_dump(struct hfi1_pportdata *ppd, char *buf, int len);
 
 int hfi1_pcie_init(struct pci_dev *, const struct pci_device_id *);
 void hfi1_pcie_cleanup(struct pci_dev *);
-int hfi1_pcie_ddinit(struct hfi1_devdata *, struct pci_dev *,
-		     const struct pci_device_id *);
+int hfi1_pcie_ddinit(struct hfi1_devdata *, struct pci_dev *);
 void hfi1_pcie_ddcleanup(struct hfi1_devdata *);
 void hfi1_pcie_flr(struct hfi1_devdata *);
 int pcie_speeds(struct hfi1_devdata *);
diff --git a/drivers/infiniband/hw/hfi1/pcie.c b/drivers/infiniband/hw/hfi1/pcie.c
index 89c68da..4ac8f33 100644
--- a/drivers/infiniband/hw/hfi1/pcie.c
+++ b/drivers/infiniband/hw/hfi1/pcie.c
@@ -157,8 +157,7 @@ void hfi1_pcie_cleanup(struct pci_dev *pdev)
  * fields required to re-initialize after a chip reset, or for
  * various other purposes
  */
-int hfi1_pcie_ddinit(struct hfi1_devdata *dd, struct pci_dev *pdev,
-		     const struct pci_device_id *ent)
+int hfi1_pcie_ddinit(struct hfi1_devdata *dd, struct pci_dev *pdev)
 {
 	unsigned long len;
 	resource_size_t addr;
-- 
cgit v0.10.2


From f0f98f74c91c68502e97e0d5526aa4e81b40b28a Mon Sep 17 00:00:00 2001
From: Easwar Hariharan <easwar.hariharan@intel.com>
Date: Mon, 17 Oct 2016 04:20:04 -0700
Subject: IB/hfi1: Delete unused lock

The lock is an unused vestige from qib. Remove it.

Reviewed-by: Mike Marciniszyn <mike.marciniszyn@intel.com>
Signed-off-by: Easwar Hariharan <easwar.hariharan@intel.com>
Signed-off-by: Dennis Dalessandro <dennis.dalessandro@intel.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>

diff --git a/drivers/infiniband/hw/hfi1/hfi.h b/drivers/infiniband/hw/hfi1/hfi.h
index d906cf0..cc87fd4 100644
--- a/drivers/infiniband/hw/hfi1/hfi.h
+++ b/drivers/infiniband/hw/hfi1/hfi.h
@@ -593,8 +593,6 @@ struct hfi1_pportdata {
 	struct mutex hls_lock;
 	u32 host_link_state;
 
-	spinlock_t            sdma_alllock ____cacheline_aligned_in_smp;
-
 	u32 lstate;	/* logical link state */
 
 	/* these are the "32 bit" regs */
diff --git a/drivers/infiniband/hw/hfi1/init.c b/drivers/infiniband/hw/hfi1/init.c
index e27b65d..0f82eeb 100644
--- a/drivers/infiniband/hw/hfi1/init.c
+++ b/drivers/infiniband/hw/hfi1/init.c
@@ -507,7 +507,6 @@ void hfi1_init_pportdata(struct pci_dev *pdev, struct hfi1_pportdata *ppd,
 	INIT_WORK(&ppd->qsfp_info.qsfp_work, qsfp_event);
 
 	mutex_init(&ppd->hls_lock);
-	spin_lock_init(&ppd->sdma_alllock);
 	spin_lock_init(&ppd->qsfp_info.qsfp_lock);
 
 	ppd->qsfp_info.ppd = ppd;
-- 
cgit v0.10.2


From 458ed666fe14a54dfb6690a1a7f541782d1342c9 Mon Sep 17 00:00:00 2001
From: Ira Weiny <ira.weiny@intel.com>
Date: Mon, 17 Oct 2016 04:20:09 -0700
Subject: IB/hfi1: Fix rnr_timer addition

The new s_rnr_timeout was not properly being set and the code was
incorrectly setting a different timer.

Found by code inspection.

Cc: <stable@vger.kernel.org> # 4.7.x
Fixes: 08279d5c9424 ("staging/rdma/hfi1: use new RNR timer")
Reviewed-by: Mike Marciniszyn <mike.marciniszyn@intel.com>
Signed-off-by: Ira Weiny <ira.weiny@intel.com>
Signed-off-by: Dennis Dalessandro <dennis.dalessandro@intel.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>

diff --git a/drivers/infiniband/hw/hfi1/rc.c b/drivers/infiniband/hw/hfi1/rc.c
index 8bc5013..83198a8 100644
--- a/drivers/infiniband/hw/hfi1/rc.c
+++ b/drivers/infiniband/hw/hfi1/rc.c
@@ -89,7 +89,7 @@ void hfi1_add_rnr_timer(struct rvt_qp *qp, u32 to)
 
 	lockdep_assert_held(&qp->s_lock);
 	qp->s_flags |= RVT_S_WAIT_RNR;
-	qp->s_timer.expires = jiffies + usecs_to_jiffies(to);
+	priv->s_rnr_timer.expires = jiffies + usecs_to_jiffies(to);
 	add_timer(&priv->s_rnr_timer);
 }
 
-- 
cgit v0.10.2


From 11501ab9df687c6f0852719a5165e16cd3eb3c10 Mon Sep 17 00:00:00 2001
From: Krzysztof Blaszkowski <krzysztof.blaszkowski@intel.com>
Date: Tue, 25 Oct 2016 13:12:11 -0700
Subject: IB/hfi1: Relocate rcvhdrcnt module parameter check.

Validate the rcvhdrcnt module parameter in a single function at module
load time. This allows proper error reporting.

Reviewed-by: Dean Luick <dean.luick@intel.com>
Signed-off-by: Krzysztof Blaszkowski <krzysztof.blaszkowski@intel.com>
Signed-off-by: Tymoteusz Kielan <tymoteusz.kielan@intel.com>
Signed-off-by: Dennis Dalessandro <dennis.dalessandro@intel.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>

diff --git a/drivers/infiniband/hw/hfi1/init.c b/drivers/infiniband/hw/hfi1/init.c
index 0f82eeb..e3b5bc9 100644
--- a/drivers/infiniband/hw/hfi1/init.c
+++ b/drivers/infiniband/hw/hfi1/init.c
@@ -262,13 +262,6 @@ struct hfi1_ctxtdata *hfi1_create_ctxtdata(struct hfi1_pportdata *ppd, u32 ctxt,
 		}
 		rcd->eager_base = base * dd->rcv_entries.group_size;
 
-		/* Validate and initialize Rcv Hdr Q variables */
-		if (rcvhdrcnt % HDRQ_INCREMENT) {
-			dd_dev_err(dd,
-				   "ctxt%u: header queue count %d must be divisible by %lu\n",
-				   rcd->ctxt, rcvhdrcnt, HDRQ_INCREMENT);
-			goto bail;
-		}
 		rcd->rcvhdrq_cnt = rcvhdrcnt;
 		rcd->rcvhdrqentsize = hfi1_hdrq_entsize;
 		/*
@@ -1399,6 +1392,29 @@ static void postinit_cleanup(struct hfi1_devdata *dd)
 	hfi1_free_devdata(dd);
 }
 
+static int init_validate_rcvhdrcnt(struct device *dev, uint thecnt)
+{
+	if (thecnt <= HFI1_MIN_HDRQ_EGRBUF_CNT) {
+		hfi1_early_err(dev, "Receive header queue count too small\n");
+		return -EINVAL;
+	}
+
+	if (thecnt > HFI1_MAX_HDRQ_EGRBUF_CNT) {
+		hfi1_early_err(dev,
+			       "Receive header queue count cannot be greater than %u\n",
+			       HFI1_MAX_HDRQ_EGRBUF_CNT);
+		return -EINVAL;
+	}
+
+	if (thecnt % HDRQ_INCREMENT) {
+		hfi1_early_err(dev, "Receive header queue count %d must be divisible by %lu\n",
+			       thecnt, HDRQ_INCREMENT);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
 static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 {
 	int ret = 0, j, pidx, initfail;
@@ -1409,18 +1425,10 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 	HFI1_CAP_LOCK();
 
 	/* Validate some global module parameters */
-	if (rcvhdrcnt <= HFI1_MIN_HDRQ_EGRBUF_CNT) {
-		hfi1_early_err(&pdev->dev, "Header queue  count too small\n");
-		ret = -EINVAL;
-		goto bail;
-	}
-	if (rcvhdrcnt > HFI1_MAX_HDRQ_EGRBUF_CNT) {
-		hfi1_early_err(&pdev->dev,
-			       "Receive header queue count cannot be greater than %u\n",
-			       HFI1_MAX_HDRQ_EGRBUF_CNT);
-		ret = -EINVAL;
+	ret = init_validate_rcvhdrcnt(&pdev->dev, rcvhdrcnt);
+	if (ret)
 		goto bail;
-	}
+
 	/* use the encoding function as a sanitization check */
 	if (!encode_rcv_header_entry_size(hfi1_hdrq_entsize)) {
 		hfi1_early_err(&pdev->dev, "Invalid HdrQ Entry size %u\n",
-- 
cgit v0.10.2


From 505efe3e46d5eaab726295cd023fb86d5b789d00 Mon Sep 17 00:00:00 2001
From: Jakub Pawlak <jakub.pawlak@intel.com>
Date: Tue, 25 Oct 2016 13:12:17 -0700
Subject: IB/hfi1: Fix status error code for unsupported packets

Set the status code BAD_L2 when unsupported type of packet
is received and dropped.

Reviewed-by: Dennis Dalessandro <dennis.dalessandro@intel.com>
Signed-off-by: Jakub Pawlak <jakub.pawlak@intel.com>
Signed-off-by: Dennis Dalessandro <dennis.dalessandro@intel.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>

diff --git a/drivers/infiniband/hw/hfi1/chip.h b/drivers/infiniband/hw/hfi1/chip.h
index 9234525..043fd21 100644
--- a/drivers/infiniband/hw/hfi1/chip.h
+++ b/drivers/infiniband/hw/hfi1/chip.h
@@ -320,6 +320,9 @@
 /* DC_DC8051_CFG_MODE.GENERAL bits */
 #define DISABLE_SELF_GUID_CHECK 0x2
 
+/* Bad L2 frame error code */
+#define BAD_L2_ERR      0x6
+
 /*
  * Eager buffer minimum and maximum sizes supported by the hardware.
  * All power-of-two sizes in between are supported as well.
diff --git a/drivers/infiniband/hw/hfi1/driver.c b/drivers/infiniband/hw/hfi1/driver.c
index 6563e4d..dadd35e 100644
--- a/drivers/infiniband/hw/hfi1/driver.c
+++ b/drivers/infiniband/hw/hfi1/driver.c
@@ -1360,12 +1360,25 @@ int process_receive_ib(struct hfi1_packet *packet)
 
 int process_receive_bypass(struct hfi1_packet *packet)
 {
+	struct hfi1_devdata *dd = packet->rcd->dd;
+
 	if (unlikely(rhf_err_flags(packet->rhf)))
 		handle_eflags(packet);
 
-	dd_dev_err(packet->rcd->dd,
+	dd_dev_err(dd,
 		   "Bypass packets are not supported in normal operation. Dropping\n");
-	incr_cntr64(&packet->rcd->dd->sw_rcv_bypass_packet_errors);
+	incr_cntr64(&dd->sw_rcv_bypass_packet_errors);
+	if (!(dd->err_info_rcvport.status_and_code & OPA_EI_STATUS_SMASK)) {
+		u64 *flits = packet->ebuf;
+
+		if (flits && !(packet->rhf & RHF_LEN_ERR)) {
+			dd->err_info_rcvport.packet_flit1 = flits[0];
+			dd->err_info_rcvport.packet_flit2 =
+				packet->tlen > sizeof(flits[0]) ? flits[1] : 0;
+		}
+		dd->err_info_rcvport.status_and_code |=
+			(OPA_EI_STATUS_SMASK | BAD_L2_ERR);
+	}
 	return RHF_RCV_CONTINUE;
 }
 
-- 
cgit v0.10.2


From f2d8a0b367e735ab157222ce74a5f2481216c878 Mon Sep 17 00:00:00 2001
From: Dasaratharaman Chandramouli <dasaratharaman.chandramouli@intel.com>
Date: Tue, 25 Oct 2016 13:12:23 -0700
Subject: IB/hfi1: Fix ECN processing in prescan_rxq

When processing ECN via the prescan_rxq path, some fields in the packet
structure are passed uninitialized. This can potentially
cause NULL pointer exceptions during ECN handling.

Reviewed-by: Ira Weiny <ira.weiny@intel.com>
Reviewed-by: Dennis Dalessandro <dennis.dalessandro@intel.com>
Signed-off-by: Dasaratharaman Chandramouli <dasaratharaman.chandramouli@intel.com>
Signed-off-by: Dennis Dalessandro <dennis.dalessandro@intel.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>

diff --git a/drivers/infiniband/hw/hfi1/driver.c b/drivers/infiniband/hw/hfi1/driver.c
index dadd35e..c5efff2 100644
--- a/drivers/infiniband/hw/hfi1/driver.c
+++ b/drivers/infiniband/hw/hfi1/driver.c
@@ -599,7 +599,6 @@ static void __prescan_rxq(struct hfi1_packet *packet)
 					 dd->rhf_offset;
 		struct rvt_qp *qp;
 		struct ib_header *hdr;
-		struct ib_other_headers *ohdr;
 		struct rvt_dev_info *rdi = &dd->verbs_dev.rdi;
 		u64 rhf = rhf_to_cpu(rhf_addr);
 		u32 etype = rhf_rcv_type(rhf), qpn, bth1;
@@ -615,18 +614,21 @@ static void __prescan_rxq(struct hfi1_packet *packet)
 		if (etype != RHF_RCV_TYPE_IB)
 			goto next;
 
-		hdr = hfi1_get_msgheader(dd, rhf_addr);
+		packet->hdr = hfi1_get_msgheader(dd, rhf_addr);
+		hdr = packet->hdr;
 
 		lnh = be16_to_cpu(hdr->lrh[0]) & 3;
 
-		if (lnh == HFI1_LRH_BTH)
-			ohdr = &hdr->u.oth;
-		else if (lnh == HFI1_LRH_GRH)
-			ohdr = &hdr->u.l.oth;
-		else
+		if (lnh == HFI1_LRH_BTH) {
+			packet->ohdr = &hdr->u.oth;
+		} else if (lnh == HFI1_LRH_GRH) {
+			packet->ohdr = &hdr->u.l.oth;
+			packet->rcv_flags |= HFI1_HAS_GRH;
+		} else {
 			goto next; /* just in case */
+		}
 
-		bth1 = be32_to_cpu(ohdr->bth[1]);
+		bth1 = be32_to_cpu(packet->ohdr->bth[1]);
 		is_ecn = !!(bth1 & (HFI1_FECN_SMASK | HFI1_BECN_SMASK));
 
 		if (!is_ecn)
@@ -646,7 +648,7 @@ static void __prescan_rxq(struct hfi1_packet *packet)
 
 		/* turn off BECN, FECN */
 		bth1 &= ~(HFI1_FECN_SMASK | HFI1_BECN_SMASK);
-		ohdr->bth[1] = cpu_to_be32(bth1);
+		packet->ohdr->bth[1] = cpu_to_be32(bth1);
 next:
 		update_ps_mdata(&mdata, rcd);
 	}
-- 
cgit v0.10.2


From 09a7908b1ba616eed349d49058ee909907ee0885 Mon Sep 17 00:00:00 2001
From: Jianxin Xiong <jianxin.xiong@intel.com>
Date: Tue, 25 Oct 2016 13:12:40 -0700
Subject: IB/hfi1: Prevent hardware counter names from being cut off

Increase the size of the buffer that is used to construct per-VL
and per-SDMA counter names.

Reviewed-by: Dennis Dalessandro <dennis.dalessandro@intel.com>
Signed-off-by: Jianxin Xiong <jianxin.xiong@intel.com>
Signed-off-by: Dennis Dalessandro <dennis.dalessandro@intel.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>

diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c
index 156ddf8..24d0820 100644
--- a/drivers/infiniband/hw/hfi1/chip.c
+++ b/drivers/infiniband/hw/hfi1/chip.c
@@ -12098,7 +12098,7 @@ static void update_synth_timer(unsigned long opaque)
 	mod_timer(&dd->synth_stats_timer, jiffies + HZ * SYNTH_CNT_TIME);
 }
 
-#define C_MAX_NAME 13 /* 12 chars + one for /0 */
+#define C_MAX_NAME 16 /* 15 chars + one for /0 */
 static int init_cntrs(struct hfi1_devdata *dd)
 {
 	int i, rcv_ctxts, j;
-- 
cgit v0.10.2


From 2b16056f845207967a32497f41cf92b57849f934 Mon Sep 17 00:00:00 2001
From: Dennis Dalessandro <dennis.dalessandro@intel.com>
Date: Tue, 25 Oct 2016 13:12:46 -0700
Subject: IB/hfi1: Remove incorrect IS_ERR check

Remove IS_ERR check from caching code as the function being called does
not actually return error pointers.

Fixes: f19bd643dbde: "IB/hfi1: Prevent NULL pointer deferences in caching code"
Reported-by: Dan Carpenter <dan.carpenter@oracle.com>
Reviewed-by: Dean Luick <dean.luick@intel.com>
Signed-off-by: Dennis Dalessandro <dennis.dalessandro@intel.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>

diff --git a/drivers/infiniband/hw/hfi1/user_sdma.c b/drivers/infiniband/hw/hfi1/user_sdma.c
index a761f80..77697d6 100644
--- a/drivers/infiniband/hw/hfi1/user_sdma.c
+++ b/drivers/infiniband/hw/hfi1/user_sdma.c
@@ -1144,7 +1144,7 @@ static int pin_vector_pages(struct user_sdma_request *req,
 	rb_node = hfi1_mmu_rb_extract(pq->handler,
 				      (unsigned long)iovec->iov.iov_base,
 				      iovec->iov.iov_len);
-	if (rb_node && !IS_ERR(rb_node))
+	if (rb_node)
 		node = container_of(rb_node, struct sdma_mmu_node, rb);
 	else
 		rb_node = NULL;
-- 
cgit v0.10.2


From 5b810a242c28e1d8d64d718cebe75b79d86a0b2d Mon Sep 17 00:00:00 2001
From: Tariq Toukan <tariqt@mellanox.com>
Date: Thu, 27 Oct 2016 16:36:26 +0300
Subject: IB/uverbs: Fix leak of XRC target QPs

The real QP is destroyed in case of the ref count reaches zero, but
for XRC target QPs this call was missed and caused to QP leaks.

Let's call to destroy for all flows.

Fixes: 0e0ec7e0638e ('RDMA/core: Export ib_open_qp() to share XRC...')
Signed-off-by: Tariq Toukan <tariqt@mellanox.com>
Signed-off-by: Noa Osherovich <noaos@mellanox.com>
Signed-off-by: Leon Romanovsky <leon@kernel.org>
Signed-off-by: Doug Ledford <dledford@redhat.com>

diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c
index 0012fa5..44b1104 100644
--- a/drivers/infiniband/core/uverbs_main.c
+++ b/drivers/infiniband/core/uverbs_main.c
@@ -262,12 +262,9 @@ static int ib_uverbs_cleanup_ucontext(struct ib_uverbs_file *file,
 			container_of(uobj, struct ib_uqp_object, uevent.uobject);
 
 		idr_remove_uobj(&ib_uverbs_qp_idr, uobj);
-		if (qp != qp->real_qp) {
-			ib_close_qp(qp);
-		} else {
+		if (qp == qp->real_qp)
 			ib_uverbs_detach_umcast(qp, uqp);
-			ib_destroy_qp(qp);
-		}
+		ib_destroy_qp(qp);
 		ib_uverbs_release_uevent(file, &uqp->uevent);
 		kfree(uqp);
 	}
-- 
cgit v0.10.2


From 9db0ff53cb9b43ed75bacd42a89c1a0ab048b2b0 Mon Sep 17 00:00:00 2001
From: Mark Bloch <markb@mellanox.com>
Date: Thu, 27 Oct 2016 16:36:27 +0300
Subject: IB/cm: Mark stale CM id's whenever the mad agent was unregistered

When there is a CM id object that has port assigned to it, it means that
the cm-id asked for the specific port that it should go by it, but if
that port was removed (hot-unplug event) the cm-id was not updated.
In order to fix that the port keeps a list of all the cm-id's that are
planning to go by it, whenever the port is removed it marks all of them
as invalid.

This commit fixes a kernel panic which happens when running traffic between
guests and we force reboot a guest mid traffic, it triggers a kernel panic:

 Call Trace:
  [<ffffffff815271fa>] ? panic+0xa7/0x16f
  [<ffffffff8152b534>] ? oops_end+0xe4/0x100
  [<ffffffff8104a00b>] ? no_context+0xfb/0x260
  [<ffffffff81084db2>] ? del_timer_sync+0x22/0x30
  [<ffffffff8104a295>] ? __bad_area_nosemaphore+0x125/0x1e0
  [<ffffffff81084240>] ? process_timeout+0x0/0x10
  [<ffffffff8104a363>] ? bad_area_nosemaphore+0x13/0x20
  [<ffffffff8104aabf>] ? __do_page_fault+0x31f/0x480
  [<ffffffff81065df0>] ? default_wake_function+0x0/0x20
  [<ffffffffa0752675>] ? free_msg+0x55/0x70 [mlx5_core]
  [<ffffffffa0753434>] ? cmd_exec+0x124/0x840 [mlx5_core]
  [<ffffffff8105a924>] ? find_busiest_group+0x244/0x9f0
  [<ffffffff8152d45e>] ? do_page_fault+0x3e/0xa0
  [<ffffffff8152a815>] ? page_fault+0x25/0x30
  [<ffffffffa024da25>] ? cm_alloc_msg+0x35/0xc0 [ib_cm]
  [<ffffffffa024e821>] ? ib_send_cm_dreq+0xb1/0x1e0 [ib_cm]
  [<ffffffffa024f836>] ? cm_destroy_id+0x176/0x320 [ib_cm]
  [<ffffffffa024fb00>] ? ib_destroy_cm_id+0x10/0x20 [ib_cm]
  [<ffffffffa034f527>] ? ipoib_cm_free_rx_reap_list+0xa7/0x110 [ib_ipoib]
  [<ffffffffa034f590>] ? ipoib_cm_rx_reap+0x0/0x20 [ib_ipoib]
  [<ffffffffa034f5a5>] ? ipoib_cm_rx_reap+0x15/0x20 [ib_ipoib]
  [<ffffffff81094d20>] ? worker_thread+0x170/0x2a0
  [<ffffffff8109b2a0>] ? autoremove_wake_function+0x0/0x40
  [<ffffffff81094bb0>] ? worker_thread+0x0/0x2a0
  [<ffffffff8109aef6>] ? kthread+0x96/0xa0
  [<ffffffff8100c20a>] ? child_rip+0xa/0x20
  [<ffffffff8109ae60>] ? kthread+0x0/0xa0
  [<ffffffff8100c200>] ? child_rip+0x0/0x20

Fixes: a977049dacde ("[PATCH] IB: Add the kernel CM implementation")
Signed-off-by: Mark Bloch <markb@mellanox.com>
Signed-off-by: Erez Shitrit <erezsh@mellanox.com>
Reviewed-by: Maor Gottlieb <maorg@mellanox.com>
Signed-off-by: Leon Romanovsky <leon@kernel.org>
Signed-off-by: Doug Ledford <dledford@redhat.com>

diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c
index c995255..71c7c4c 100644
--- a/drivers/infiniband/core/cm.c
+++ b/drivers/infiniband/core/cm.c
@@ -80,6 +80,8 @@ static struct ib_cm {
 	__be32 random_id_operand;
 	struct list_head timewait_list;
 	struct workqueue_struct *wq;
+	/* Sync on cm change port state */
+	spinlock_t state_lock;
 } cm;
 
 /* Counter indexes ordered by attribute ID */
@@ -161,6 +163,8 @@ struct cm_port {
 	struct ib_mad_agent *mad_agent;
 	struct kobject port_obj;
 	u8 port_num;
+	struct list_head cm_priv_prim_list;
+	struct list_head cm_priv_altr_list;
 	struct cm_counter_group counter_group[CM_COUNTER_GROUPS];
 };
 
@@ -241,6 +245,12 @@ struct cm_id_private {
 	u8 service_timeout;
 	u8 target_ack_delay;
 
+	struct list_head prim_list;
+	struct list_head altr_list;
+	/* Indicates that the send port mad is registered and av is set */
+	int prim_send_port_not_ready;
+	int altr_send_port_not_ready;
+
 	struct list_head work_list;
 	atomic_t work_count;
 };
@@ -259,20 +269,47 @@ static int cm_alloc_msg(struct cm_id_private *cm_id_priv,
 	struct ib_mad_agent *mad_agent;
 	struct ib_mad_send_buf *m;
 	struct ib_ah *ah;
+	struct cm_av *av;
+	unsigned long flags, flags2;
+	int ret = 0;
 
+	/* don't let the port to be released till the agent is down */
+	spin_lock_irqsave(&cm.state_lock, flags2);
+	spin_lock_irqsave(&cm.lock, flags);
+	if (!cm_id_priv->prim_send_port_not_ready)
+		av = &cm_id_priv->av;
+	else if (!cm_id_priv->altr_send_port_not_ready &&
+		 (cm_id_priv->alt_av.port))
+		av = &cm_id_priv->alt_av;
+	else {
+		pr_info("%s: not valid CM id\n", __func__);
+		ret = -ENODEV;
+		spin_unlock_irqrestore(&cm.lock, flags);
+		goto out;
+	}
+	spin_unlock_irqrestore(&cm.lock, flags);
+	/* Make sure the port haven't released the mad yet */
 	mad_agent = cm_id_priv->av.port->mad_agent;
-	ah = ib_create_ah(mad_agent->qp->pd, &cm_id_priv->av.ah_attr);
-	if (IS_ERR(ah))
-		return PTR_ERR(ah);
+	if (!mad_agent) {
+		pr_info("%s: not a valid MAD agent\n", __func__);
+		ret = -ENODEV;
+		goto out;
+	}
+	ah = ib_create_ah(mad_agent->qp->pd, &av->ah_attr);
+	if (IS_ERR(ah)) {
+		ret = PTR_ERR(ah);
+		goto out;
+	}
 
 	m = ib_create_send_mad(mad_agent, cm_id_priv->id.remote_cm_qpn,
-			       cm_id_priv->av.pkey_index,
+			       av->pkey_index,
 			       0, IB_MGMT_MAD_HDR, IB_MGMT_MAD_DATA,
 			       GFP_ATOMIC,
 			       IB_MGMT_BASE_VERSION);
 	if (IS_ERR(m)) {
 		ib_destroy_ah(ah);
-		return PTR_ERR(m);
+		ret = PTR_ERR(m);
+		goto out;
 	}
 
 	/* Timeout set by caller if response is expected. */
@@ -282,7 +319,10 @@ static int cm_alloc_msg(struct cm_id_private *cm_id_priv,
 	atomic_inc(&cm_id_priv->refcount);
 	m->context[0] = cm_id_priv;
 	*msg = m;
-	return 0;
+
+out:
+	spin_unlock_irqrestore(&cm.state_lock, flags2);
+	return ret;
 }
 
 static int cm_alloc_response_msg(struct cm_port *port,
@@ -352,7 +392,8 @@ static void cm_init_av_for_response(struct cm_port *port, struct ib_wc *wc,
 			   grh, &av->ah_attr);
 }
 
-static int cm_init_av_by_path(struct ib_sa_path_rec *path, struct cm_av *av)
+static int cm_init_av_by_path(struct ib_sa_path_rec *path, struct cm_av *av,
+			      struct cm_id_private *cm_id_priv)
 {
 	struct cm_device *cm_dev;
 	struct cm_port *port = NULL;
@@ -387,7 +428,17 @@ static int cm_init_av_by_path(struct ib_sa_path_rec *path, struct cm_av *av)
 			     &av->ah_attr);
 	av->timeout = path->packet_life_time + 1;
 
-	return 0;
+	spin_lock_irqsave(&cm.lock, flags);
+	if (&cm_id_priv->av == av)
+		list_add_tail(&cm_id_priv->prim_list, &port->cm_priv_prim_list);
+	else if (&cm_id_priv->alt_av == av)
+		list_add_tail(&cm_id_priv->altr_list, &port->cm_priv_altr_list);
+	else
+		ret = -EINVAL;
+
+	spin_unlock_irqrestore(&cm.lock, flags);
+
+	return ret;
 }
 
 static int cm_alloc_id(struct cm_id_private *cm_id_priv)
@@ -677,6 +728,8 @@ struct ib_cm_id *ib_create_cm_id(struct ib_device *device,
 	spin_lock_init(&cm_id_priv->lock);
 	init_completion(&cm_id_priv->comp);
 	INIT_LIST_HEAD(&cm_id_priv->work_list);
+	INIT_LIST_HEAD(&cm_id_priv->prim_list);
+	INIT_LIST_HEAD(&cm_id_priv->altr_list);
 	atomic_set(&cm_id_priv->work_count, -1);
 	atomic_set(&cm_id_priv->refcount, 1);
 	return &cm_id_priv->id;
@@ -892,6 +945,15 @@ retest:
 		break;
 	}
 
+	spin_lock_irq(&cm.lock);
+	if (!list_empty(&cm_id_priv->altr_list) &&
+	    (!cm_id_priv->altr_send_port_not_ready))
+		list_del(&cm_id_priv->altr_list);
+	if (!list_empty(&cm_id_priv->prim_list) &&
+	    (!cm_id_priv->prim_send_port_not_ready))
+		list_del(&cm_id_priv->prim_list);
+	spin_unlock_irq(&cm.lock);
+
 	cm_free_id(cm_id->local_id);
 	cm_deref_id(cm_id_priv);
 	wait_for_completion(&cm_id_priv->comp);
@@ -1192,12 +1254,13 @@ int ib_send_cm_req(struct ib_cm_id *cm_id,
 		goto out;
 	}
 
-	ret = cm_init_av_by_path(param->primary_path, &cm_id_priv->av);
+	ret = cm_init_av_by_path(param->primary_path, &cm_id_priv->av,
+				 cm_id_priv);
 	if (ret)
 		goto error1;
 	if (param->alternate_path) {
 		ret = cm_init_av_by_path(param->alternate_path,
-					 &cm_id_priv->alt_av);
+					 &cm_id_priv->alt_av, cm_id_priv);
 		if (ret)
 			goto error1;
 	}
@@ -1653,7 +1716,8 @@ static int cm_req_handler(struct cm_work *work)
 			dev_put(gid_attr.ndev);
 		}
 		work->path[0].gid_type = gid_attr.gid_type;
-		ret = cm_init_av_by_path(&work->path[0], &cm_id_priv->av);
+		ret = cm_init_av_by_path(&work->path[0], &cm_id_priv->av,
+					 cm_id_priv);
 	}
 	if (ret) {
 		int err = ib_get_cached_gid(work->port->cm_dev->ib_device,
@@ -1672,7 +1736,8 @@ static int cm_req_handler(struct cm_work *work)
 		goto rejected;
 	}
 	if (req_msg->alt_local_lid) {
-		ret = cm_init_av_by_path(&work->path[1], &cm_id_priv->alt_av);
+		ret = cm_init_av_by_path(&work->path[1], &cm_id_priv->alt_av,
+					 cm_id_priv);
 		if (ret) {
 			ib_send_cm_rej(cm_id, IB_CM_REJ_INVALID_ALT_GID,
 				       &work->path[0].sgid,
@@ -2727,7 +2792,8 @@ int ib_send_cm_lap(struct ib_cm_id *cm_id,
 		goto out;
 	}
 
-	ret = cm_init_av_by_path(alternate_path, &cm_id_priv->alt_av);
+	ret = cm_init_av_by_path(alternate_path, &cm_id_priv->alt_av,
+				 cm_id_priv);
 	if (ret)
 		goto out;
 	cm_id_priv->alt_av.timeout =
@@ -2839,7 +2905,8 @@ static int cm_lap_handler(struct cm_work *work)
 	cm_init_av_for_response(work->port, work->mad_recv_wc->wc,
 				work->mad_recv_wc->recv_buf.grh,
 				&cm_id_priv->av);
-	cm_init_av_by_path(param->alternate_path, &cm_id_priv->alt_av);
+	cm_init_av_by_path(param->alternate_path, &cm_id_priv->alt_av,
+			   cm_id_priv);
 	ret = atomic_inc_and_test(&cm_id_priv->work_count);
 	if (!ret)
 		list_add_tail(&work->list, &cm_id_priv->work_list);
@@ -3031,7 +3098,7 @@ int ib_send_cm_sidr_req(struct ib_cm_id *cm_id,
 		return -EINVAL;
 
 	cm_id_priv = container_of(cm_id, struct cm_id_private, id);
-	ret = cm_init_av_by_path(param->path, &cm_id_priv->av);
+	ret = cm_init_av_by_path(param->path, &cm_id_priv->av, cm_id_priv);
 	if (ret)
 		goto out;
 
@@ -3468,7 +3535,9 @@ out:
 static int cm_migrate(struct ib_cm_id *cm_id)
 {
 	struct cm_id_private *cm_id_priv;
+	struct cm_av tmp_av;
 	unsigned long flags;
+	int tmp_send_port_not_ready;
 	int ret = 0;
 
 	cm_id_priv = container_of(cm_id, struct cm_id_private, id);
@@ -3477,7 +3546,14 @@ static int cm_migrate(struct ib_cm_id *cm_id)
 	    (cm_id->lap_state == IB_CM_LAP_UNINIT ||
 	     cm_id->lap_state == IB_CM_LAP_IDLE)) {
 		cm_id->lap_state = IB_CM_LAP_IDLE;
+		/* Swap address vector */
+		tmp_av = cm_id_priv->av;
 		cm_id_priv->av = cm_id_priv->alt_av;
+		cm_id_priv->alt_av = tmp_av;
+		/* Swap port send ready state */
+		tmp_send_port_not_ready = cm_id_priv->prim_send_port_not_ready;
+		cm_id_priv->prim_send_port_not_ready = cm_id_priv->altr_send_port_not_ready;
+		cm_id_priv->altr_send_port_not_ready = tmp_send_port_not_ready;
 	} else
 		ret = -EINVAL;
 	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
@@ -3888,6 +3964,9 @@ static void cm_add_one(struct ib_device *ib_device)
 		port->cm_dev = cm_dev;
 		port->port_num = i;
 
+		INIT_LIST_HEAD(&port->cm_priv_prim_list);
+		INIT_LIST_HEAD(&port->cm_priv_altr_list);
+
 		ret = cm_create_port_fs(port);
 		if (ret)
 			goto error1;
@@ -3945,6 +4024,8 @@ static void cm_remove_one(struct ib_device *ib_device, void *client_data)
 {
 	struct cm_device *cm_dev = client_data;
 	struct cm_port *port;
+	struct cm_id_private *cm_id_priv;
+	struct ib_mad_agent *cur_mad_agent;
 	struct ib_port_modify port_modify = {
 		.clr_port_cap_mask = IB_PORT_CM_SUP
 	};
@@ -3968,15 +4049,27 @@ static void cm_remove_one(struct ib_device *ib_device, void *client_data)
 
 		port = cm_dev->port[i-1];
 		ib_modify_port(ib_device, port->port_num, 0, &port_modify);
+		/* Mark all the cm_id's as not valid */
+		spin_lock_irq(&cm.lock);
+		list_for_each_entry(cm_id_priv, &port->cm_priv_altr_list, altr_list)
+			cm_id_priv->altr_send_port_not_ready = 1;
+		list_for_each_entry(cm_id_priv, &port->cm_priv_prim_list, prim_list)
+			cm_id_priv->prim_send_port_not_ready = 1;
+		spin_unlock_irq(&cm.lock);
 		/*
 		 * We flush the queue here after the going_down set, this
 		 * verify that no new works will be queued in the recv handler,
 		 * after that we can call the unregister_mad_agent
 		 */
 		flush_workqueue(cm.wq);
-		ib_unregister_mad_agent(port->mad_agent);
+		spin_lock_irq(&cm.state_lock);
+		cur_mad_agent = port->mad_agent;
+		port->mad_agent = NULL;
+		spin_unlock_irq(&cm.state_lock);
+		ib_unregister_mad_agent(cur_mad_agent);
 		cm_remove_port_fs(port);
 	}
+
 	device_unregister(cm_dev->device);
 	kfree(cm_dev);
 }
@@ -3989,6 +4082,7 @@ static int __init ib_cm_init(void)
 	INIT_LIST_HEAD(&cm.device_list);
 	rwlock_init(&cm.device_lock);
 	spin_lock_init(&cm.lock);
+	spin_lock_init(&cm.state_lock);
 	cm.listen_service_table = RB_ROOT;
 	cm.listen_service_id = be64_to_cpu(IB_CM_ASSIGN_SERVICE_ID);
 	cm.remote_id_table = RB_ROOT;
-- 
cgit v0.10.2


From aeb76df46d1158d5f7f3d30f993a1bb6ee9c67a0 Mon Sep 17 00:00:00 2001
From: Leon Romanovsky <leon@kernel.org>
Date: Mon, 31 Oct 2016 07:50:56 +0200
Subject: IB/core: Set routable RoCE gid type for ipv4/ipv6 networks

On Thu, Oct 27, 2016 at 04:36:28PM +0300, Leon Romanovsky wrote:
> From: Mark Bloch <markb@mellanox.com>
>
> If the underlying netowrk type is ipv4 or ipv6 and the device supports
> routable RoCE, prefer it so the traffic could cross subnets.
>
> Signed-off-by: Mark Bloch <markb@mellanox.com>
> Signed-off-by: Maor Gottlieb <maorg@mellanox.com>
> Signed-off-by: Leon Romanovsky <leon@kernel.org>
> ---

Hi Doug,

Please take the following v1 of this patch where I fixed spelling error
from "netowrk" to be "network".

Thanks.

>From 09f96ba3e9b4442cfb44dca04c6726e55525c9c3 Mon Sep 17 00:00:00 2001
From: Mark Bloch <markb@mellanox.com>
Date: Sun, 11 Sep 2016 06:25:10 +0000
Subject: [PATCH rdma-rc v1 3/6] IB/core: Set routable RoCE gid type for ipv4/ipv6
 networks

If the underlying network type is ipv4 or ipv6 and the device supports
routable RoCE, prefer it so the traffic could cross subnets.

Signed-off-by: Mark Bloch <markb@mellanox.com>
Signed-off-by: Maor Gottlieb <maorg@mellanox.com>
Signed-off-by: Leon Romanovsky <leon@kernel.org>
Signed-off-by: Doug Ledford <dledford@redhat.com>

diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c
index 36bf50e..9ca0da0 100644
--- a/drivers/infiniband/core/cma.c
+++ b/drivers/infiniband/core/cma.c
@@ -2436,6 +2436,18 @@ static int iboe_tos_to_sl(struct net_device *ndev, int tos)
 	return 0;
 }
 
+static enum ib_gid_type cma_route_gid_type(enum rdma_network_type network_type,
+					   unsigned long supported_gids,
+					   enum ib_gid_type default_gid)
+{
+	if ((network_type == RDMA_NETWORK_IPV4 ||
+	     network_type == RDMA_NETWORK_IPV6) &&
+	    test_bit(IB_GID_TYPE_ROCE_UDP_ENCAP, &supported_gids))
+		return IB_GID_TYPE_ROCE_UDP_ENCAP;
+
+	return default_gid;
+}
+
 static int cma_resolve_iboe_route(struct rdma_id_private *id_priv)
 {
 	struct rdma_route *route = &id_priv->id.route;
@@ -2461,6 +2473,8 @@ static int cma_resolve_iboe_route(struct rdma_id_private *id_priv)
 	route->num_paths = 1;
 
 	if (addr->dev_addr.bound_dev_if) {
+		unsigned long supported_gids;
+
 		ndev = dev_get_by_index(&init_net, addr->dev_addr.bound_dev_if);
 		if (!ndev) {
 			ret = -ENODEV;
@@ -2484,7 +2498,12 @@ static int cma_resolve_iboe_route(struct rdma_id_private *id_priv)
 
 		route->path_rec->net = &init_net;
 		route->path_rec->ifindex = ndev->ifindex;
-		route->path_rec->gid_type = id_priv->gid_type;
+		supported_gids = roce_gid_type_mask_support(id_priv->id.device,
+							    id_priv->id.port_num);
+		route->path_rec->gid_type =
+			cma_route_gid_type(addr->dev_addr.network,
+					   supported_gids,
+					   id_priv->gid_type);
 	}
 	if (!ndev) {
 		ret = -ENODEV;
-- 
cgit v0.10.2


From 61c3702863be9e9f1ef12ed5a5b17bae6cdfac0b Mon Sep 17 00:00:00 2001
From: Mark Bloch <markb@mellanox.com>
Date: Thu, 27 Oct 2016 16:36:29 +0300
Subject: IB/core: Add missing check for addr_resolve callback return value

When calling rdma_resolve_ip inside rdma_addr_find_l2_eth_by_grh,
the return status of the request was ignored in the callback function
causing a successful return and an empty dmac.

Signed-off-by: Mark Bloch <markb@mellanox.com>
Signed-off-by: Alex Vesker <valex@mellanox.com>
Reviewed-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: Leon Romanovsky <leon@kernel.org>
Signed-off-by: Doug Ledford <dledford@redhat.com>

diff --git a/drivers/infiniband/core/addr.c b/drivers/infiniband/core/addr.c
index b136d3a..0f58f46 100644
--- a/drivers/infiniband/core/addr.c
+++ b/drivers/infiniband/core/addr.c
@@ -699,13 +699,16 @@ EXPORT_SYMBOL(rdma_addr_cancel);
 struct resolve_cb_context {
 	struct rdma_dev_addr *addr;
 	struct completion comp;
+	int status;
 };
 
 static void resolve_cb(int status, struct sockaddr *src_addr,
 	     struct rdma_dev_addr *addr, void *context)
 {
-	memcpy(((struct resolve_cb_context *)context)->addr, addr, sizeof(struct
-				rdma_dev_addr));
+	if (!status)
+		memcpy(((struct resolve_cb_context *)context)->addr,
+		       addr, sizeof(struct rdma_dev_addr));
+	((struct resolve_cb_context *)context)->status = status;
 	complete(&((struct resolve_cb_context *)context)->comp);
 }
 
@@ -743,6 +746,10 @@ int rdma_addr_find_l2_eth_by_grh(const union ib_gid *sgid,
 
 	wait_for_completion(&ctx.comp);
 
+	ret = ctx.status;
+	if (ret)
+		return ret;
+
 	memcpy(dmac, dev_addr.dst_dev_addr, ETH_ALEN);
 	dev = dev_get_by_index(&init_net, dev_addr.bound_dev_if);
 	if (!dev)
-- 
cgit v0.10.2


From 3c7ba5760ab8eedec01159b267bb9bfcffe522ac Mon Sep 17 00:00:00 2001
From: Mark Bloch <markb@mellanox.com>
Date: Thu, 27 Oct 2016 16:36:31 +0300
Subject: IB/core: Avoid unsigned int overflow in sg_alloc_table

sg_alloc_table gets unsigned int as parameter while the driver
returns it as size_t. Check npages isn't greater than maximum
unsigned int.

Fixes: eeb8461e36c9 ("IB: Refactor umem to use linear SG table")
Signed-off-by: Mark Bloch <markb@mellanox.com>
Signed-off-by: Maor Gottlieb <maorg@mellanox.com>
Signed-off-by: Leon Romanovsky <leon@kernel.org>
Signed-off-by: Doug Ledford <dledford@redhat.com>

diff --git a/drivers/infiniband/core/umem.c b/drivers/infiniband/core/umem.c
index 224ad27..84b4eff 100644
--- a/drivers/infiniband/core/umem.c
+++ b/drivers/infiniband/core/umem.c
@@ -175,7 +175,7 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr,
 
 	cur_base = addr & PAGE_MASK;
 
-	if (npages == 0) {
+	if (npages == 0 || npages > UINT_MAX) {
 		ret = -EINVAL;
 		goto out;
 	}
-- 
cgit v0.10.2


From 90be7c8ab72853ff9fc407f01518a898df1f3045 Mon Sep 17 00:00:00 2001
From: Majd Dibbiny <majd@mellanox.com>
Date: Thu, 27 Oct 2016 16:36:39 +0300
Subject: IB/mlx5: Fix memory leak in query device

We need to free dev->port when we fail to enable RoCE or
initialize node data.

Fixes: 0837e86a7a34 ('IB/mlx5: Add per port counters')
Signed-off-by: Majd Dibbiny <majd@mellanox.com>
Signed-off-by: Maor Gottlieb <maorg@mellanox.com>
Reviewed-by: Mark Bloch <markb@mellanox.com>
Signed-off-by: Leon Romanovsky <leon@kernel.org>
Signed-off-by: Doug Ledford <dledford@redhat.com>

diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c
index 2217477..bb61487 100644
--- a/drivers/infiniband/hw/mlx5/main.c
+++ b/drivers/infiniband/hw/mlx5/main.c
@@ -3115,7 +3115,7 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev)
 	}
 	err = init_node_data(dev);
 	if (err)
-		goto err_dealloc;
+		goto err_free_port;
 
 	mutex_init(&dev->flow_db.lock);
 	mutex_init(&dev->cap_mask_mutex);
@@ -3125,7 +3125,7 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev)
 	if (ll == IB_LINK_LAYER_ETHERNET) {
 		err = mlx5_enable_roce(dev);
 		if (err)
-			goto err_dealloc;
+			goto err_free_port;
 	}
 
 	err = create_dev_resources(&dev->devr);
-- 
cgit v0.10.2


From efd7f40082a0dfd112eb87ff2124467a5739216f Mon Sep 17 00:00:00 2001
From: Maor Gottlieb <maorg@mellanox.com>
Date: Thu, 27 Oct 2016 16:36:40 +0300
Subject: IB/mlx5: Validate requested RQT size

Validate that the requested size of RQT is supported by firmware.

Fixes: c5f9092936fe ('IB/mlx5: Add Receive Work Queue Indirection table operations')
Signed-off-by: Maor Gottlieb <maorg@mellanox.com>
Reviewed-by: Yishai Hadas <yishaih@mellanox.com>
Signed-off-by: Leon Romanovsky <leon@kernel.org>
Signed-off-by: Doug Ledford <dledford@redhat.com>

diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c
index 41f4c2a..2be0d06 100644
--- a/drivers/infiniband/hw/mlx5/qp.c
+++ b/drivers/infiniband/hw/mlx5/qp.c
@@ -4815,6 +4815,14 @@ struct ib_rwq_ind_table *mlx5_ib_create_rwq_ind_table(struct ib_device *device,
 				 udata->inlen))
 		return ERR_PTR(-EOPNOTSUPP);
 
+	if (init_attr->log_ind_tbl_size >
+	    MLX5_CAP_GEN(dev->mdev, log_max_rqt_size)) {
+		mlx5_ib_dbg(dev, "log_ind_tbl_size = %d is bigger than supported = %d\n",
+			    init_attr->log_ind_tbl_size,
+			    MLX5_CAP_GEN(dev->mdev, log_max_rqt_size));
+		return ERR_PTR(-EINVAL);
+	}
+
 	min_resp_len = offsetof(typeof(resp), reserved) + sizeof(resp.reserved);
 	if (udata->outlen && udata->outlen < min_resp_len)
 		return ERR_PTR(-EINVAL);
-- 
cgit v0.10.2


From 16b0e0695a73b68d8ca40288c8f9614ef208917b Mon Sep 17 00:00:00 2001
From: Daniel Jurgens <danielj@mellanox.com>
Date: Thu, 27 Oct 2016 16:36:41 +0300
Subject: IB/mlx5: Use cache line size to select CQE stride

When creating kernel CQs use 128B CQE stride if the
cache line size is 128B, 64B otherwise.  This prevents
multiple CQEs from residing in a 128B cache line,
which can cause retries when there are concurrent
read and writes in one cache line.

Tested with IPoIB on PPC64, saw ~5% throughput
improvement.

Fixes: e126ba97dba9 ('mlx5: Add driver for Mellanox Connect-IB adapters')
Signed-off-by: Daniel Jurgens <danielj@mellanox.com>
Signed-off-by: Maor Gottlieb <maorg@mellanox.com>
Signed-off-by: Leon Romanovsky <leon@kernel.org>
Signed-off-by: Doug Ledford <dledford@redhat.com>

diff --git a/drivers/infiniband/hw/mlx5/cq.c b/drivers/infiniband/hw/mlx5/cq.c
index 79d017b..fcd04b8 100644
--- a/drivers/infiniband/hw/mlx5/cq.c
+++ b/drivers/infiniband/hw/mlx5/cq.c
@@ -932,8 +932,7 @@ struct ib_cq *mlx5_ib_create_cq(struct ib_device *ibdev,
 		if (err)
 			goto err_create;
 	} else {
-		/* for now choose 64 bytes till we have a proper interface */
-		cqe_size = 64;
+		cqe_size = cache_line_size() == 128 ? 128 : 64;
 		err = create_cq_kernel(dev, cq, entries, cqe_size, &cqb,
 				       &index, &inlen);
 		if (err)
-- 
cgit v0.10.2


From 6bc1a656ab9f57f0112823b4a36930c9a29d1f89 Mon Sep 17 00:00:00 2001
From: Moshe Lazer <moshel@mellanox.com>
Date: Thu, 27 Oct 2016 16:36:42 +0300
Subject: IB/mlx5: Resolve soft lock on massive reg MRs

When calling reg_mr of large MRs (e.g. 4GB) from multiple processes
and MR caches can't supply the required amount of MRs the slow-path
of MR allocation may be used. In this case we need to serialize the
slow-path between the processes to avoid soft lock.

Fixes: e126ba97dba9 ('mlx5: Add driver for Mellanox Connect-IB adapters')
Signed-off-by: Moshe Lazer <moshel@mellanox.com>
Signed-off-by: Maor Gottlieb <maorg@mellanox.com>
Reviewed-by: Eli Cohen <eli@mellanox.com>
Signed-off-by: Leon Romanovsky <leon@kernel.org>
Signed-off-by: Doug Ledford <dledford@redhat.com>

diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h
index dcdcd19..7d68990 100644
--- a/drivers/infiniband/hw/mlx5/mlx5_ib.h
+++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h
@@ -626,6 +626,8 @@ struct mlx5_ib_dev {
 	struct mlx5_ib_resources	devr;
 	struct mlx5_mr_cache		cache;
 	struct timer_list		delay_timer;
+	/* Prevents soft lock on massive reg MRs */
+	struct mutex			slow_path_mutex;
 	int				fill_delay;
 #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
 	struct ib_odp_caps	odp_caps;
diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c
index d4ad672..4e90124 100644
--- a/drivers/infiniband/hw/mlx5/mr.c
+++ b/drivers/infiniband/hw/mlx5/mr.c
@@ -610,6 +610,7 @@ int mlx5_mr_cache_init(struct mlx5_ib_dev *dev)
 	int err;
 	int i;
 
+	mutex_init(&dev->slow_path_mutex);
 	cache->wq = alloc_ordered_workqueue("mkey_cache", WQ_MEM_RECLAIM);
 	if (!cache->wq) {
 		mlx5_ib_warn(dev, "failed to create work queue\n");
@@ -1182,9 +1183,12 @@ struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
 		goto error;
 	}
 
-	if (!mr)
+	if (!mr) {
+		mutex_lock(&dev->slow_path_mutex);
 		mr = reg_create(NULL, pd, virt_addr, length, umem, ncont,
 				page_shift, access_flags);
+		mutex_unlock(&dev->slow_path_mutex);
+	}
 
 	if (IS_ERR(mr)) {
 		err = PTR_ERR(mr);
-- 
cgit v0.10.2


From dbaaff2a2caa03d472b5cc53a3fbfd415c97dc26 Mon Sep 17 00:00:00 2001
From: Eli Cohen <eli@mellanox.com>
Date: Thu, 27 Oct 2016 16:36:44 +0300
Subject: IB/mlx5: Fix fatal error dispatching

When an internal error condition is detected, make sure to set the
device inactive after dispatching the event so ULPs can get a
notification of this event.

Fixes: e126ba97dba9 ('mlx5: Add driver for Mellanox Connect-IB adapters')
Signed-off-by: Eli Cohen <eli@mellanox.com>
Signed-off-by: Maor Gottlieb <maorg@mellanox.com>
Reviewed-by: Mohamad Haj Yahia <mohamad@mellanox.com>
Signed-off-by: Leon Romanovsky <leon@kernel.org>
Signed-off-by: Doug Ledford <dledford@redhat.com>

diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c
index bb61487..a014ad3 100644
--- a/drivers/infiniband/hw/mlx5/main.c
+++ b/drivers/infiniband/hw/mlx5/main.c
@@ -2311,14 +2311,14 @@ static void mlx5_ib_event(struct mlx5_core_dev *dev, void *context,
 {
 	struct mlx5_ib_dev *ibdev = (struct mlx5_ib_dev *)context;
 	struct ib_event ibev;
-
+	bool fatal = false;
 	u8 port = 0;
 
 	switch (event) {
 	case MLX5_DEV_EVENT_SYS_ERROR:
-		ibdev->ib_active = false;
 		ibev.event = IB_EVENT_DEVICE_FATAL;
 		mlx5_ib_handle_internal_error(ibdev);
+		fatal = true;
 		break;
 
 	case MLX5_DEV_EVENT_PORT_UP:
@@ -2370,6 +2370,9 @@ static void mlx5_ib_event(struct mlx5_core_dev *dev, void *context,
 
 	if (ibdev->ib_active)
 		ib_dispatch_event(&ibev);
+
+	if (fatal)
+		ibdev->ib_active = false;
 }
 
 static void get_ext_port_caps(struct mlx5_ib_dev *dev)
-- 
cgit v0.10.2


From a1ab8402d15d2305d2315d96ec3294bfdf16587e Mon Sep 17 00:00:00 2001
From: Eli Cohen <eli@mellanox.com>
Date: Thu, 27 Oct 2016 16:36:46 +0300
Subject: IB/mlx5: Fix NULL pointer dereference on debug print

For XRC QP CQs may not exist. Check before attempting dereference.

Fixes: e126ba97dba9 ('mlx5: Add driver for Mellanox Connect-IB adapters')
Signed-off-by: Eli Cohen <eli@mellanox.com>
Signed-off-by: Maor Gottlieb <maorg@mellanox.com>
Reviewed-by: Yishai Hadas <yishaih@mellanox.com>
Signed-off-by: Leon Romanovsky <leon@kernel.org>
Signed-off-by: Doug Ledford <dledford@redhat.com>

diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c
index 2be0d06..59c4c89 100644
--- a/drivers/infiniband/hw/mlx5/qp.c
+++ b/drivers/infiniband/hw/mlx5/qp.c
@@ -2052,8 +2052,8 @@ struct ib_qp *mlx5_ib_create_qp(struct ib_pd *pd,
 
 		mlx5_ib_dbg(dev, "ib qpnum 0x%x, mlx qpn 0x%x, rcqn 0x%x, scqn 0x%x\n",
 			    qp->ibqp.qp_num, qp->trans_qp.base.mqp.qpn,
-			    to_mcq(init_attr->recv_cq)->mcq.cqn,
-			    to_mcq(init_attr->send_cq)->mcq.cqn);
+			    init_attr->recv_cq ? to_mcq(init_attr->recv_cq)->mcq.cqn : -1,
+			    init_attr->send_cq ? to_mcq(init_attr->send_cq)->mcq.cqn : -1);
 
 		qp->trans_qp.xrcdn = xrcdn;
 
-- 
cgit v0.10.2


From 37995116fecfce2b61ee3da6e73b3e394c6818f9 Mon Sep 17 00:00:00 2001
From: Daniel Jurgens <danielj@mellanox.com>
Date: Thu, 10 Nov 2016 11:30:54 +0200
Subject: IB/mlx4: Check gid_index return value

Check the returned GID index value and return an error if it is invalid.

Fixes: 5070cd2239bd ('IB/mlx4: Replace mechanism for RoCE GID management')
Signed-off-by: Daniel Jurgens <danielj@mellanox.com>
Reviewed-by: Mark Bloch <markb@mellanox.com>
Reviewed-by: Yuval Shaia <yuval.shaia@oracle.com>
Signed-off-by: Leon Romanovsky <leon@kernel.org>
Signed-off-by: Doug Ledford <dledford@redhat.com>

diff --git a/drivers/infiniband/hw/mlx4/ah.c b/drivers/infiniband/hw/mlx4/ah.c
index 5fc6233..b9bf075 100644
--- a/drivers/infiniband/hw/mlx4/ah.c
+++ b/drivers/infiniband/hw/mlx4/ah.c
@@ -102,7 +102,10 @@ static struct ib_ah *create_iboe_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr
 	if (vlan_tag < 0x1000)
 		vlan_tag |= (ah_attr->sl & 7) << 13;
 	ah->av.eth.port_pd = cpu_to_be32(to_mpd(pd)->pdn | (ah_attr->port_num << 24));
-	ah->av.eth.gid_index = mlx4_ib_gid_index_to_real_index(ibdev, ah_attr->port_num, ah_attr->grh.sgid_index);
+	ret = mlx4_ib_gid_index_to_real_index(ibdev, ah_attr->port_num, ah_attr->grh.sgid_index);
+	if (ret < 0)
+		return ERR_PTR(ret);
+	ah->av.eth.gid_index = ret;
 	ah->av.eth.vlan = cpu_to_be16(vlan_tag);
 	ah->av.eth.hop_limit = ah_attr->grh.hop_limit;
 	if (ah_attr->static_rate) {
-- 
cgit v0.10.2


From 593ff73bcfdc79f79a8a0df55504f75ad3e5d1a9 Mon Sep 17 00:00:00 2001
From: Matan Barak <matanb@mellanox.com>
Date: Thu, 10 Nov 2016 11:30:55 +0200
Subject: IB/mlx4: Fix create CQ error flow

Currently, if ib_copy_to_udata fails, the CQ
won't be deleted from the radix tree and the HW (HW2SW).

Fixes: 225c7b1feef1 ('IB/mlx4: Add a driver Mellanox ConnectX InfiniBand adapters')
Signed-off-by: Matan Barak <matanb@mellanox.com>
Signed-off-by: Daniel Jurgens <danielj@mellanox.com>
Reviewed-by: Mark Bloch <markb@mellanox.com>
Signed-off-by: Leon Romanovsky <leon@kernel.org>
Signed-off-by: Doug Ledford <dledford@redhat.com>

diff --git a/drivers/infiniband/hw/mlx4/cq.c b/drivers/infiniband/hw/mlx4/cq.c
index 1ea686b..6a0fec3 100644
--- a/drivers/infiniband/hw/mlx4/cq.c
+++ b/drivers/infiniband/hw/mlx4/cq.c
@@ -253,11 +253,14 @@ struct ib_cq *mlx4_ib_create_cq(struct ib_device *ibdev,
 	if (context)
 		if (ib_copy_to_udata(udata, &cq->mcq.cqn, sizeof (__u32))) {
 			err = -EFAULT;
-			goto err_dbmap;
+			goto err_cq_free;
 		}
 
 	return &cq->ibcq;
 
+err_cq_free:
+	mlx4_cq_free(dev->dev, &cq->mcq);
+
 err_dbmap:
 	if (context)
 		mlx4_ib_db_unmap_user(to_mucontext(context), &cq->db);
-- 
cgit v0.10.2


From 1454ca3a97e147bb91e98b087446c39cf6692a48 Mon Sep 17 00:00:00 2001
From: Yonatan Cohen <yonatanc@mellanox.com>
Date: Wed, 16 Nov 2016 10:39:14 +0200
Subject: IB/rxe: Fix kernel panic in UDP tunnel with GRO and RX checksum

Missing initialization of udp_tunnel_sock_cfg causes to following
kernel panic, while kernel tries to execute gro_receive().

While being there, we converted udp_port_cfg to use the same
initialization scheme as udp_tunnel_sock_cfg.

------------[ cut here ]------------
kernel tried to execute NX-protected page - exploit attempt? (uid: 0)
BUG: unable to handle kernel paging request at ffffffffa0588c50
IP: [<ffffffffa0588c50>] __this_module+0x50/0xffffffffffff8400 [ib_rxe]
PGD 1c09067 PUD 1c0a063 PMD bb394067 PTE 80000000ad5e8163
Oops: 0011 [#1] SMP
Modules linked in: ib_rxe ip6_udp_tunnel udp_tunnel
CPU: 5 PID: 0 Comm: swapper/5 Not tainted 4.7.0-rc3+ #2
Hardware name: Red Hat KVM, BIOS Bochs 01/01/2011
task: ffff880235e4e680 ti: ffff880235e68000 task.ti: ffff880235e68000
RIP: 0010:[<ffffffffa0588c50>]
[<ffffffffa0588c50>] __this_module+0x50/0xffffffffffff8400 [ib_rxe]
RSP: 0018:ffff880237343c80  EFLAGS: 00010282
RAX: 00000000dffe482d RBX: ffff8800ae330900 RCX: 000000002001b712
RDX: ffff8800ae330900 RSI: ffff8800ae102578 RDI: ffff880235589c00
RBP: ffff880237343cb0 R08: 0000000000000000 R09: 0000000000000000
R10: 0000000000000000 R11: 0000000000000000 R12: ffff8800ae33e262
R13: ffff880235589c00 R14: 0000000000000014 R15: ffff8800ae102578
FS:  0000000000000000(0000) GS:ffff880237340000(0000) knlGS:0000000000000000
CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: ffffffffa0588c50 CR3: 0000000001c06000 CR4: 00000000000006e0
Stack:
ffffffff8160860e ffff8800ae330900 ffff8800ae102578 0000000000000014
000000000000004e ffff8800ae102578 ffff880237343ce0 ffffffff816088fb
0000000000000000 ffff8800ae330900 0000000000000000 00000000ffad0000
Call Trace:
<IRQ>
[<ffffffff8160860e>] ? udp_gro_receive+0xde/0x130
[<ffffffff816088fb>] udp4_gro_receive+0x10b/0x2d0
[<ffffffff81611373>] inet_gro_receive+0x1d3/0x270
[<ffffffff81594e29>] dev_gro_receive+0x269/0x3b0
[<ffffffff81595188>] napi_gro_receive+0x38/0x120
[<ffffffffa011caee>] mlx5e_handle_rx_cqe+0x27e/0x340 [mlx5_core]
[<ffffffffa011d076>] mlx5e_poll_rx_cq+0x66/0x6d0 [mlx5_core]
[<ffffffffa011d7ae>] mlx5e_napi_poll+0x8e/0x400 [mlx5_core]
[<ffffffff815949a0>] net_rx_action+0x160/0x380
[<ffffffff816a9197>] __do_softirq+0xd7/0x2c5
[<ffffffff81085c35>] irq_exit+0xf5/0x100
[<ffffffff816a8f16>] do_IRQ+0x56/0xd0
[<ffffffff816a6dcc>] common_interrupt+0x8c/0x8c
<EOI>
[<ffffffff81061f96>] ? native_safe_halt+0x6/0x10
[<ffffffff81037ade>] default_idle+0x1e/0xd0
[<ffffffff8103828f>] arch_cpu_idle+0xf/0x20
[<ffffffff810c37dc>] default_idle_call+0x3c/0x50
[<ffffffff810c3b13>] cpu_startup_entry+0x323/0x3c0
[<ffffffff81050d8c>] start_secondary+0x15c/0x1a0
RIP  [<ffffffffa0588c50>] __this_module+0x50/0xffffffffffff8400 [ib_rxe]
RSP <ffff880237343c80>
CR2: ffffffffa0588c50
---[ end trace 489ee31fa7614ac5 ]---
Kernel panic - not syncing: Fatal exception in interrupt
Kernel Offset: disabled
---[ end Kernel panic - not syncing: Fatal exception in interrupt
------------[ cut here ]------------

Fixes: 8700e3e7c485 ("Soft RoCE driver")
Signed-off-by: Yonatan Cohen <yonatanc@mellanox.com>
Reviewed-by: Moni Shoua <monis@mellanox.com>
Signed-off-by: Leon Romanovsky <leon@kernel.org>
Signed-off-by: Doug Ledford <dledford@redhat.com>

diff --git a/drivers/infiniband/sw/rxe/rxe_net.c b/drivers/infiniband/sw/rxe/rxe_net.c
index b8258e4..ffff5a5 100644
--- a/drivers/infiniband/sw/rxe/rxe_net.c
+++ b/drivers/infiniband/sw/rxe/rxe_net.c
@@ -243,10 +243,8 @@ static struct socket *rxe_setup_udp_tunnel(struct net *net, __be16 port,
 {
 	int err;
 	struct socket *sock;
-	struct udp_port_cfg udp_cfg;
-	struct udp_tunnel_sock_cfg tnl_cfg;
-
-	memset(&udp_cfg, 0, sizeof(udp_cfg));
+	struct udp_port_cfg udp_cfg = {0};
+	struct udp_tunnel_sock_cfg tnl_cfg = {0};
 
 	if (ipv6) {
 		udp_cfg.family = AF_INET6;
@@ -264,10 +262,8 @@ static struct socket *rxe_setup_udp_tunnel(struct net *net, __be16 port,
 		return ERR_PTR(err);
 	}
 
-	tnl_cfg.sk_user_data = NULL;
 	tnl_cfg.encap_type = 1;
 	tnl_cfg.encap_rcv = rxe_udp_encap_recv;
-	tnl_cfg.encap_destroy = NULL;
 
 	/* Setup UDP tunnel */
 	setup_udp_tunnel_sock(net, sock, &tnl_cfg);
-- 
cgit v0.10.2


From 002e062e13db10973adb8302f231e48b477c7ccf Mon Sep 17 00:00:00 2001
From: Yonatan Cohen <yonatanc@mellanox.com>
Date: Wed, 16 Nov 2016 10:39:15 +0200
Subject: IB/rxe: Fix handling of erroneous WR

To correctly handle a erroneous WR this fix does the following
1. Make sure the bad WQE causes a user completion event.
2. Call rxe_completer to handle the erred WQE.

Before the fix, when rxe_requester found a bad WQE, it changed its
status to IB_WC_LOC_PROT_ERR and exit with 0 for non RC QPs.

If this was the 1st WQE then there would be no ACK to invoke the
completer and this bad WQE would be stuck in the QP's send-q.

On top of that the requester exiting with 0 caused rxe_do_task to
endlessly invoke rxe_requester, resulting in a soft-lockup attached
below.

In case the WQE was not the 1st and rxe_completer did get a chance to
handle the bad WQE, it did not cause a complete event since the WQE's
IB_SEND_SIGNALED flag was not set.

Setting WQE status to IB_SEND_SIGNALED is subject to IBA spec
version 1.2.1, section 10.7.3.1 Signaled Completions.

NMI watchdog: BUG: soft lockup - CPU#7 stuck for 22s!
[<ffffffffa0590145>] ? rxe_pool_get_index+0x35/0xb0 [rdma_rxe]
[<ffffffffa05952ec>] lookup_mem+0x3c/0xc0 [rdma_rxe]
[<ffffffffa0595534>] copy_data+0x1c4/0x230 [rdma_rxe]
[<ffffffffa058c180>] rxe_requester+0x9d0/0x1100 [rdma_rxe]
[<ffffffff8158e98a>] ? kfree_skbmem+0x5a/0x60
[<ffffffffa05962c9>] rxe_do_task+0x89/0xf0 [rdma_rxe]
[<ffffffffa05963e2>] rxe_run_task+0x12/0x30 [rdma_rxe]
[<ffffffffa059110a>] rxe_post_send+0x41a/0x550 [rdma_rxe]
[<ffffffff811ef922>] ? __kmalloc+0x182/0x200
[<ffffffff816ba512>] ? down_read+0x12/0x40
[<ffffffffa054bd32>] ib_uverbs_post_send+0x532/0x540 [ib_uverbs]
[<ffffffff815f8722>] ? tcp_sendmsg+0x402/0xb80
[<ffffffffa05453dc>] ib_uverbs_write+0x18c/0x3f0 [ib_uverbs]
[<ffffffff81623c2e>] ? inet_recvmsg+0x7e/0xb0
[<ffffffff8158764d>] ? sock_recvmsg+0x3d/0x50
[<ffffffff81215b87>] __vfs_write+0x37/0x140
[<ffffffff81216892>] vfs_write+0xb2/0x1b0
[<ffffffff81217ce5>] SyS_write+0x55/0xc0
[<ffffffff816bc672>] entry_SYSCALL_64_fastpath+0x1a/0xa

Fixes: 8700e3e7c485 ("Soft RoCE driver")
Signed-off-by: Yonatan Cohen <yonatanc@mellanox.com>
Reviewed-by: Moni Shoua <monis@mellanox.com>
Signed-off-by: Leon Romanovsky <leon@kernel.org>
Signed-off-by: Doug Ledford <dledford@redhat.com>

diff --git a/drivers/infiniband/sw/rxe/rxe_req.c b/drivers/infiniband/sw/rxe/rxe_req.c
index 832846b..22bd963 100644
--- a/drivers/infiniband/sw/rxe/rxe_req.c
+++ b/drivers/infiniband/sw/rxe/rxe_req.c
@@ -696,7 +696,8 @@ next_wqe:
 						       qp->req.wqe_index);
 			wqe->state = wqe_state_done;
 			wqe->status = IB_WC_SUCCESS;
-			goto complete;
+			__rxe_do_task(&qp->comp.task);
+			return 0;
 		}
 		payload = mtu;
 	}
@@ -745,13 +746,17 @@ err:
 	wqe->status = IB_WC_LOC_PROT_ERR;
 	wqe->state = wqe_state_error;
 
-complete:
-	if (qp_type(qp) != IB_QPT_RC) {
-		while (rxe_completer(qp) == 0)
-			;
-	}
-
-	return 0;
+	/*
+	 * IBA Spec. Section 10.7.3.1 SIGNALED COMPLETIONS
+	 * ---------8<---------8<-------------
+	 * ...Note that if a completion error occurs, a Work Completion
+	 * will always be generated, even if the signaling
+	 * indicator requests an Unsignaled Completion.
+	 * ---------8<---------8<-------------
+	 */
+	wqe->wr.send_flags |= IB_SEND_SIGNALED;
+	__rxe_do_task(&qp->comp.task);
+	return -EAGAIN;
 
 exit:
 	return -EAGAIN;
-- 
cgit v0.10.2


From aa75b07b478a774b1432e2df1be5cd8ae834de0f Mon Sep 17 00:00:00 2001
From: Yonatan Cohen <yonatanc@mellanox.com>
Date: Wed, 16 Nov 2016 10:39:17 +0200
Subject: IB/rxe: Clear queue buffer when modifying QP to reset

RXE resets the send-q only once in rxe_qp_init_req() when
QP is created, but when the QP is reused after QP reset, the send-q
holds previous garbage data.

This garbage data wrongly fails CQEs that otherwise
should have completed successfully.

Fixes: 8700e3e7c485 ("Soft RoCE driver")
Signed-off-by: Yonatan Cohen <yonatanc@mellanox.com>
Reviewed-by: Moni Shoua <monis@mellanox.com>
Signed-off-by: Leon Romanovsky <leon@kernel.org>
Signed-off-by: Doug Ledford <dledford@redhat.com>

diff --git a/drivers/infiniband/sw/rxe/rxe_qp.c b/drivers/infiniband/sw/rxe/rxe_qp.c
index b8036cf..95aaaa2 100644
--- a/drivers/infiniband/sw/rxe/rxe_qp.c
+++ b/drivers/infiniband/sw/rxe/rxe_qp.c
@@ -522,6 +522,7 @@ static void rxe_qp_reset(struct rxe_qp *qp)
 	if (qp->sq.queue) {
 		__rxe_do_task(&qp->comp.task);
 		__rxe_do_task(&qp->req.task);
+		rxe_queue_reset(qp->sq.queue);
 	}
 
 	/* cleanup attributes */
diff --git a/drivers/infiniband/sw/rxe/rxe_queue.c b/drivers/infiniband/sw/rxe/rxe_queue.c
index 0827425..d14bf49 100644
--- a/drivers/infiniband/sw/rxe/rxe_queue.c
+++ b/drivers/infiniband/sw/rxe/rxe_queue.c
@@ -84,6 +84,15 @@ err1:
 	return -EINVAL;
 }
 
+inline void rxe_queue_reset(struct rxe_queue *q)
+{
+	/* queue is comprised from header and the memory
+	 * of the actual queue. See "struct rxe_queue_buf" in rxe_queue.h
+	 * reset only the queue itself and not the management header
+	 */
+	memset(q->buf->data, 0, q->buf_size - sizeof(struct rxe_queue_buf));
+}
+
 struct rxe_queue *rxe_queue_init(struct rxe_dev *rxe,
 				 int *num_elem,
 				 unsigned int elem_size)
diff --git a/drivers/infiniband/sw/rxe/rxe_queue.h b/drivers/infiniband/sw/rxe/rxe_queue.h
index 239fd60..8c8641c 100644
--- a/drivers/infiniband/sw/rxe/rxe_queue.h
+++ b/drivers/infiniband/sw/rxe/rxe_queue.h
@@ -84,6 +84,8 @@ int do_mmap_info(struct rxe_dev *rxe,
 		 size_t buf_size,
 		 struct rxe_mmap_info **ip_p);
 
+void rxe_queue_reset(struct rxe_queue *q);
+
 struct rxe_queue *rxe_queue_init(struct rxe_dev *rxe,
 				 int *num_elem,
 				 unsigned int elem_size);
-- 
cgit v0.10.2


From 6d931308f55faaef3f30bd0346c47f99528b229d Mon Sep 17 00:00:00 2001
From: Yonatan Cohen <yonatanc@mellanox.com>
Date: Wed, 16 Nov 2016 10:39:18 +0200
Subject: IB/rxe: Update qp state for user query

The method rxe_qp_error() transitions QP to error state
and make sure the QP is drained. It did not though update
the QP state for user's query.

This patch fixes this.

Fixes: 8700e3e7c485 ("Soft RoCE driver")
Signed-off-by: Yonatan Cohen <yonatanc@mellanox.com>
Reviewed-by: Moni Shoua <monis@mellanox.com>
Signed-off-by: Leon Romanovsky <leon@kernel.org>
Signed-off-by: Doug Ledford <dledford@redhat.com>

diff --git a/drivers/infiniband/sw/rxe/rxe_qp.c b/drivers/infiniband/sw/rxe/rxe_qp.c
index 95aaaa2..c3e60e4 100644
--- a/drivers/infiniband/sw/rxe/rxe_qp.c
+++ b/drivers/infiniband/sw/rxe/rxe_qp.c
@@ -574,6 +574,7 @@ void rxe_qp_error(struct rxe_qp *qp)
 {
 	qp->req.state = QP_STATE_ERROR;
 	qp->resp.state = QP_STATE_ERROR;
+	qp->attr.qp_state = IB_QPS_ERR;
 
 	/* drain work and packet queues */
 	rxe_run_task(&qp->resp.task, 1);
-- 
cgit v0.10.2


From 4ff522ea47944ffd3d4d27023ace8bc6a722c834 Mon Sep 17 00:00:00 2001
From: Steve Wise <swise@opengridcomputing.com>
Date: Tue, 18 Oct 2016 14:04:39 -0700
Subject: iw_cxgb4: set *bad_wr for post_send/post_recv errors

There are a few cases in c4iw_post_send() and c4iw_post_receive()
where *bad_wr is not set when an error is returned.  This can
cause a crash if the application tries to use bad_wr.

Signed-off-by: Steve Wise <swise@opengridcomputing.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>

diff --git a/drivers/infiniband/hw/cxgb4/qp.c b/drivers/infiniband/hw/cxgb4/qp.c
index f57deba..5790e1d 100644
--- a/drivers/infiniband/hw/cxgb4/qp.c
+++ b/drivers/infiniband/hw/cxgb4/qp.c
@@ -797,11 +797,13 @@ int c4iw_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
 	spin_lock_irqsave(&qhp->lock, flag);
 	if (t4_wq_in_error(&qhp->wq)) {
 		spin_unlock_irqrestore(&qhp->lock, flag);
+		*bad_wr = wr;
 		return -EINVAL;
 	}
 	num_wrs = t4_sq_avail(&qhp->wq);
 	if (num_wrs == 0) {
 		spin_unlock_irqrestore(&qhp->lock, flag);
+		*bad_wr = wr;
 		return -ENOMEM;
 	}
 	while (wr) {
@@ -934,11 +936,13 @@ int c4iw_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr,
 	spin_lock_irqsave(&qhp->lock, flag);
 	if (t4_wq_in_error(&qhp->wq)) {
 		spin_unlock_irqrestore(&qhp->lock, flag);
+		*bad_wr = wr;
 		return -EINVAL;
 	}
 	num_wrs = t4_rq_avail(&qhp->wq);
 	if (num_wrs == 0) {
 		spin_unlock_irqrestore(&qhp->lock, flag);
+		*bad_wr = wr;
 		return -ENOMEM;
 	}
 	while (wr) {
-- 
cgit v0.10.2


From 5c6b2aaf9316fd0983c0c999d920306ddc65bd2d Mon Sep 17 00:00:00 2001
From: Steve Wise <swise@opengridcomputing.com>
Date: Thu, 3 Nov 2016 12:09:38 -0700
Subject: iw_cxgb4: invalidate the mr when posting a read_w_inv wr

Also, rearrange things a bit to have a common c4iw_invalidate_mr()
function used everywhere that we need to invalidate.

Fixes: 49b53a93a64a ("iw_cxgb4: add fast-path for small REG_MR operations")
Signed-off-by: Steve Wise <swise@opengridcomputing.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>

diff --git a/drivers/infiniband/hw/cxgb4/cq.c b/drivers/infiniband/hw/cxgb4/cq.c
index 867b8cf..19c6477 100644
--- a/drivers/infiniband/hw/cxgb4/cq.c
+++ b/drivers/infiniband/hw/cxgb4/cq.c
@@ -666,18 +666,6 @@ skip_cqe:
 	return ret;
 }
 
-static void invalidate_mr(struct c4iw_dev *rhp, u32 rkey)
-{
-	struct c4iw_mr *mhp;
-	unsigned long flags;
-
-	spin_lock_irqsave(&rhp->lock, flags);
-	mhp = get_mhp(rhp, rkey >> 8);
-	if (mhp)
-		mhp->attr.state = 0;
-	spin_unlock_irqrestore(&rhp->lock, flags);
-}
-
 /*
  * Get one cq entry from c4iw and map it to openib.
  *
@@ -733,7 +721,7 @@ static int c4iw_poll_cq_one(struct c4iw_cq *chp, struct ib_wc *wc)
 		    CQE_OPCODE(&cqe) == FW_RI_SEND_WITH_SE_INV) {
 			wc->ex.invalidate_rkey = CQE_WRID_STAG(&cqe);
 			wc->wc_flags |= IB_WC_WITH_INVALIDATE;
-			invalidate_mr(qhp->rhp, wc->ex.invalidate_rkey);
+			c4iw_invalidate_mr(qhp->rhp, wc->ex.invalidate_rkey);
 		}
 	} else {
 		switch (CQE_OPCODE(&cqe)) {
@@ -762,7 +750,8 @@ static int c4iw_poll_cq_one(struct c4iw_cq *chp, struct ib_wc *wc)
 
 			/* Invalidate the MR if the fastreg failed */
 			if (CQE_STATUS(&cqe) != T4_ERR_SUCCESS)
-				invalidate_mr(qhp->rhp, CQE_WRID_FR_STAG(&cqe));
+				c4iw_invalidate_mr(qhp->rhp,
+						   CQE_WRID_FR_STAG(&cqe));
 			break;
 		default:
 			printk(KERN_ERR MOD "Unexpected opcode %d "
diff --git a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h
index 7e7f79e..4788e1a 100644
--- a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h
+++ b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h
@@ -999,6 +999,6 @@ extern int db_coalescing_threshold;
 extern int use_dsgl;
 void c4iw_drain_rq(struct ib_qp *qp);
 void c4iw_drain_sq(struct ib_qp *qp);
-
+void c4iw_invalidate_mr(struct c4iw_dev *rhp, u32 rkey);
 
 #endif
diff --git a/drivers/infiniband/hw/cxgb4/mem.c b/drivers/infiniband/hw/cxgb4/mem.c
index 80e2774..410408f 100644
--- a/drivers/infiniband/hw/cxgb4/mem.c
+++ b/drivers/infiniband/hw/cxgb4/mem.c
@@ -770,3 +770,15 @@ int c4iw_dereg_mr(struct ib_mr *ib_mr)
 	kfree(mhp);
 	return 0;
 }
+
+void c4iw_invalidate_mr(struct c4iw_dev *rhp, u32 rkey)
+{
+	struct c4iw_mr *mhp;
+	unsigned long flags;
+
+	spin_lock_irqsave(&rhp->lock, flags);
+	mhp = get_mhp(rhp, rkey >> 8);
+	if (mhp)
+		mhp->attr.state = 0;
+	spin_unlock_irqrestore(&rhp->lock, flags);
+}
diff --git a/drivers/infiniband/hw/cxgb4/qp.c b/drivers/infiniband/hw/cxgb4/qp.c
index 5790e1d..b7ac97b 100644
--- a/drivers/infiniband/hw/cxgb4/qp.c
+++ b/drivers/infiniband/hw/cxgb4/qp.c
@@ -706,12 +706,8 @@ static int build_memreg(struct t4_sq *sq, union t4_wr *wqe,
 	return 0;
 }
 
-static int build_inv_stag(struct c4iw_dev *dev, union t4_wr *wqe,
-			  struct ib_send_wr *wr, u8 *len16)
+static int build_inv_stag(union t4_wr *wqe, struct ib_send_wr *wr, u8 *len16)
 {
-	struct c4iw_mr *mhp = get_mhp(dev, wr->ex.invalidate_rkey >> 8);
-
-	mhp->attr.state = 0;
 	wqe->inv.stag_inv = cpu_to_be32(wr->ex.invalidate_rkey);
 	wqe->inv.r2 = 0;
 	*len16 = DIV_ROUND_UP(sizeof wqe->inv, 16);
@@ -842,10 +838,13 @@ int c4iw_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
 		case IB_WR_RDMA_READ_WITH_INV:
 			fw_opcode = FW_RI_RDMA_READ_WR;
 			swsqe->opcode = FW_RI_READ_REQ;
-			if (wr->opcode == IB_WR_RDMA_READ_WITH_INV)
+			if (wr->opcode == IB_WR_RDMA_READ_WITH_INV) {
+				c4iw_invalidate_mr(qhp->rhp,
+						   wr->sg_list[0].lkey);
 				fw_flags = FW_RI_RDMA_READ_INVALIDATE;
-			else
+			} else {
 				fw_flags = 0;
+			}
 			err = build_rdma_read(wqe, wr, &len16);
 			if (err)
 				break;
@@ -878,7 +877,8 @@ int c4iw_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
 				fw_flags |= FW_RI_LOCAL_FENCE_FLAG;
 			fw_opcode = FW_RI_INV_LSTAG_WR;
 			swsqe->opcode = FW_RI_LOCAL_INV;
-			err = build_inv_stag(qhp->rhp, wqe, wr, &len16);
+			err = build_inv_stag(wqe, wr, &len16);
+			c4iw_invalidate_mr(qhp->rhp, wr->ex.invalidate_rkey);
 			break;
 		default:
 			PDBG("%s post of type=%d TBD!\n", __func__,
-- 
cgit v0.10.2