summaryrefslogtreecommitdiff
path: root/drivers/infiniband
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/infiniband')
-rw-r--r--drivers/infiniband/core/multicast.c13
-rw-r--r--drivers/infiniband/hw/cxgb4/cm.c6
-rw-r--r--drivers/infiniband/hw/cxgb4/device.c5
-rw-r--r--drivers/infiniband/hw/cxgb4/iw_cxgb4.h1
-rw-r--r--drivers/infiniband/hw/cxgb4/qp.c2
-rw-r--r--drivers/infiniband/hw/hfi1/chip.c92
-rw-r--r--drivers/infiniband/hw/hfi1/chip.h1
-rw-r--r--drivers/infiniband/hw/hfi1/debugfs.c132
-rw-r--r--drivers/infiniband/hw/hfi1/hfi.h4
-rw-r--r--drivers/infiniband/hw/hfi1/init.c3
-rw-r--r--drivers/infiniband/hw/hfi1/mad.c12
-rw-r--r--drivers/infiniband/hw/hfi1/pio_copy.c12
-rw-r--r--drivers/infiniband/hw/hfi1/user_sdma.c5
-rw-r--r--drivers/infiniband/hw/i40iw/i40iw_hw.c1
-rw-r--r--drivers/infiniband/hw/i40iw/i40iw_main.c8
-rw-r--r--drivers/infiniband/hw/mlx4/cq.c26
-rw-r--r--drivers/infiniband/hw/mlx4/mad.c23
-rw-r--r--drivers/infiniband/hw/mlx4/main.c3
-rw-r--r--drivers/infiniband/hw/mlx4/mcg.c14
-rw-r--r--drivers/infiniband/hw/mlx4/mlx4_ib.h2
-rw-r--r--drivers/infiniband/hw/mlx4/qp.c37
-rw-r--r--drivers/infiniband/hw/mlx5/cq.c22
-rw-r--r--drivers/infiniband/hw/mlx5/main.c17
-rw-r--r--drivers/infiniband/hw/mlx5/mem.c6
-rw-r--r--drivers/infiniband/hw/mlx5/mlx5_ib.h1
-rw-r--r--drivers/infiniband/hw/mlx5/qp.c13
-rw-r--r--drivers/infiniband/sw/rdmavt/mr.c2
-rw-r--r--drivers/infiniband/sw/rxe/rxe.c25
-rw-r--r--drivers/infiniband/sw/rxe/rxe_comp.c13
-rw-r--r--drivers/infiniband/sw/rxe/rxe_net.c55
-rw-r--r--drivers/infiniband/sw/rxe/rxe_net.h5
-rw-r--r--drivers/infiniband/sw/rxe/rxe_recv.c2
-rw-r--r--drivers/infiniband/sw/rxe/rxe_req.c57
-rw-r--r--drivers/infiniband/sw/rxe/rxe_resp.c11
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib.h1
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_cm.c16
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_ib.c9
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_main.c2
-rw-r--r--drivers/infiniband/ulp/isert/ib_isert.c23
-rw-r--r--drivers/infiniband/ulp/isert/ib_isert.h2
40 files changed, 426 insertions, 258 deletions
diff --git a/drivers/infiniband/core/multicast.c b/drivers/infiniband/core/multicast.c
index 3a3c5d7..51c79b2 100644
--- a/drivers/infiniband/core/multicast.c
+++ b/drivers/infiniband/core/multicast.c
@@ -106,7 +106,6 @@ struct mcast_group {
atomic_t refcount;
enum mcast_group_state state;
struct ib_sa_query *query;
- int query_id;
u16 pkey_index;
u8 leave_state;
int retries;
@@ -340,11 +339,7 @@ static int send_join(struct mcast_group *group, struct mcast_member *member)
member->multicast.comp_mask,
3000, GFP_KERNEL, join_handler, group,
&group->query);
- if (ret >= 0) {
- group->query_id = ret;
- ret = 0;
- }
- return ret;
+ return (ret > 0) ? 0 : ret;
}
static int send_leave(struct mcast_group *group, u8 leave_state)
@@ -364,11 +359,7 @@ static int send_leave(struct mcast_group *group, u8 leave_state)
IB_SA_MCMEMBER_REC_JOIN_STATE,
3000, GFP_KERNEL, leave_handler,
group, &group->query);
- if (ret >= 0) {
- group->query_id = ret;
- ret = 0;
- }
- return ret;
+ return (ret > 0) ? 0 : ret;
}
static void join_group(struct mcast_group *group, struct mcast_member *member,
diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c
index b6a953a..80f9889 100644
--- a/drivers/infiniband/hw/cxgb4/cm.c
+++ b/drivers/infiniband/hw/cxgb4/cm.c
@@ -333,6 +333,8 @@ static void remove_ep_tid(struct c4iw_ep *ep)
spin_lock_irqsave(&ep->com.dev->lock, flags);
_remove_handle(ep->com.dev, &ep->com.dev->hwtid_idr, ep->hwtid, 0);
+ if (idr_is_empty(&ep->com.dev->hwtid_idr))
+ wake_up(&ep->com.dev->wait);
spin_unlock_irqrestore(&ep->com.dev->lock, flags);
}
@@ -2117,8 +2119,10 @@ static int import_ep(struct c4iw_ep *ep, int iptype, __u8 *peer_ip,
}
ep->l2t = cxgb4_l2t_get(cdev->rdev.lldi.l2t,
n, pdev, rt_tos2priority(tos));
- if (!ep->l2t)
+ if (!ep->l2t) {
+ dev_put(pdev);
goto out;
+ }
ep->mtu = pdev->mtu;
ep->tx_chan = cxgb4_port_chan(pdev);
ep->smac_idx = cxgb4_tp_smt_idx(adapter_type,
diff --git a/drivers/infiniband/hw/cxgb4/device.c b/drivers/infiniband/hw/cxgb4/device.c
index 071d733..3c4b212 100644
--- a/drivers/infiniband/hw/cxgb4/device.c
+++ b/drivers/infiniband/hw/cxgb4/device.c
@@ -872,9 +872,13 @@ static void c4iw_rdev_close(struct c4iw_rdev *rdev)
static void c4iw_dealloc(struct uld_ctx *ctx)
{
c4iw_rdev_close(&ctx->dev->rdev);
+ WARN_ON_ONCE(!idr_is_empty(&ctx->dev->cqidr));
idr_destroy(&ctx->dev->cqidr);
+ WARN_ON_ONCE(!idr_is_empty(&ctx->dev->qpidr));
idr_destroy(&ctx->dev->qpidr);
+ WARN_ON_ONCE(!idr_is_empty(&ctx->dev->mmidr));
idr_destroy(&ctx->dev->mmidr);
+ wait_event(ctx->dev->wait, idr_is_empty(&ctx->dev->hwtid_idr));
idr_destroy(&ctx->dev->hwtid_idr);
idr_destroy(&ctx->dev->stid_idr);
idr_destroy(&ctx->dev->atid_idr);
@@ -992,6 +996,7 @@ static struct c4iw_dev *c4iw_alloc(const struct cxgb4_lld_info *infop)
mutex_init(&devp->rdev.stats.lock);
mutex_init(&devp->db_mutex);
INIT_LIST_HEAD(&devp->db_fc_list);
+ init_waitqueue_head(&devp->wait);
devp->avail_ird = devp->rdev.lldi.max_ird_adapter;
if (c4iw_debugfs_root) {
diff --git a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h
index aa47e0a..4b83b84 100644
--- a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h
+++ b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h
@@ -263,6 +263,7 @@ struct c4iw_dev {
struct idr stid_idr;
struct list_head db_fc_list;
u32 avail_ird;
+ wait_queue_head_t wait;
};
static inline struct c4iw_dev *to_c4iw_dev(struct ib_device *ibdev)
diff --git a/drivers/infiniband/hw/cxgb4/qp.c b/drivers/infiniband/hw/cxgb4/qp.c
index edb1172..6904352 100644
--- a/drivers/infiniband/hw/cxgb4/qp.c
+++ b/drivers/infiniband/hw/cxgb4/qp.c
@@ -683,7 +683,7 @@ static int build_inv_stag(union t4_wr *wqe, struct ib_send_wr *wr,
return 0;
}
-void _free_qp(struct kref *kref)
+static void _free_qp(struct kref *kref)
{
struct c4iw_qp *qhp;
diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c
index b32638d..cc38004 100644
--- a/drivers/infiniband/hw/hfi1/chip.c
+++ b/drivers/infiniband/hw/hfi1/chip.c
@@ -9490,6 +9490,78 @@ static void init_lcb(struct hfi1_devdata *dd)
write_csr(dd, DC_LCB_CFG_TX_FIFOS_RESET, 0x00);
}
+/*
+ * Perform a test read on the QSFP. Return 0 on success, -ERRNO
+ * on error.
+ */
+static int test_qsfp_read(struct hfi1_pportdata *ppd)
+{
+ int ret;
+ u8 status;
+
+ /* report success if not a QSFP */
+ if (ppd->port_type != PORT_TYPE_QSFP)
+ return 0;
+
+ /* read byte 2, the status byte */
+ ret = one_qsfp_read(ppd, ppd->dd->hfi1_id, 2, &status, 1);
+ if (ret < 0)
+ return ret;
+ if (ret != 1)
+ return -EIO;
+
+ return 0; /* success */
+}
+
+/*
+ * Values for QSFP retry.
+ *
+ * Give up after 10s (20 x 500ms). The overall timeout was empirically
+ * arrived at from experience on a large cluster.
+ */
+#define MAX_QSFP_RETRIES 20
+#define QSFP_RETRY_WAIT 500 /* msec */
+
+/*
+ * Try a QSFP read. If it fails, schedule a retry for later.
+ * Called on first link activation after driver load.
+ */
+static void try_start_link(struct hfi1_pportdata *ppd)
+{
+ if (test_qsfp_read(ppd)) {
+ /* read failed */
+ if (ppd->qsfp_retry_count >= MAX_QSFP_RETRIES) {
+ dd_dev_err(ppd->dd, "QSFP not responding, giving up\n");
+ return;
+ }
+ dd_dev_info(ppd->dd,
+ "QSFP not responding, waiting and retrying %d\n",
+ (int)ppd->qsfp_retry_count);
+ ppd->qsfp_retry_count++;
+ queue_delayed_work(ppd->hfi1_wq, &ppd->start_link_work,
+ msecs_to_jiffies(QSFP_RETRY_WAIT));
+ return;
+ }
+ ppd->qsfp_retry_count = 0;
+
+ /*
+ * Tune the SerDes to a ballpark setting for optimal signal and bit
+ * error rate. Needs to be done before starting the link.
+ */
+ tune_serdes(ppd);
+ start_link(ppd);
+}
+
+/*
+ * Workqueue function to start the link after a delay.
+ */
+void handle_start_link(struct work_struct *work)
+{
+ struct hfi1_pportdata *ppd = container_of(work, struct hfi1_pportdata,
+ start_link_work.work);
+ try_start_link(ppd);
+}
+
int bringup_serdes(struct hfi1_pportdata *ppd)
{
struct hfi1_devdata *dd = ppd->dd;
@@ -9525,14 +9597,8 @@ int bringup_serdes(struct hfi1_pportdata *ppd)
set_qsfp_int_n(ppd, 1);
}
- /*
- * Tune the SerDes to a ballpark setting for
- * optimal signal and bit error rate
- * Needs to be done before starting the link
- */
- tune_serdes(ppd);
-
- return start_link(ppd);
+ try_start_link(ppd);
+ return 0;
}
void hfi1_quiet_serdes(struct hfi1_pportdata *ppd)
@@ -9549,6 +9615,10 @@ void hfi1_quiet_serdes(struct hfi1_pportdata *ppd)
ppd->driver_link_ready = 0;
ppd->link_enabled = 0;
+ ppd->qsfp_retry_count = MAX_QSFP_RETRIES; /* prevent more retries */
+ flush_delayed_work(&ppd->start_link_work);
+ cancel_delayed_work_sync(&ppd->start_link_work);
+
ppd->offline_disabled_reason =
HFI1_ODR_MASK(OPA_LINKDOWN_REASON_SMA_DISABLED);
set_link_down_reason(ppd, OPA_LINKDOWN_REASON_SMA_DISABLED, 0,
@@ -12865,7 +12935,7 @@ fail:
*/
static int set_up_context_variables(struct hfi1_devdata *dd)
{
- int num_kernel_contexts;
+ unsigned long num_kernel_contexts;
int total_contexts;
int ret;
unsigned ngroups;
@@ -12894,9 +12964,9 @@ static int set_up_context_variables(struct hfi1_devdata *dd)
*/
if (num_kernel_contexts > (dd->chip_send_contexts - num_vls - 1)) {
dd_dev_err(dd,
- "Reducing # kernel rcv contexts to: %d, from %d\n",
+ "Reducing # kernel rcv contexts to: %d, from %lu\n",
(int)(dd->chip_send_contexts - num_vls - 1),
- (int)num_kernel_contexts);
+ num_kernel_contexts);
num_kernel_contexts = dd->chip_send_contexts - num_vls - 1;
}
/*
diff --git a/drivers/infiniband/hw/hfi1/chip.h b/drivers/infiniband/hw/hfi1/chip.h
index ed11107..e295737 100644
--- a/drivers/infiniband/hw/hfi1/chip.h
+++ b/drivers/infiniband/hw/hfi1/chip.h
@@ -706,6 +706,7 @@ void handle_link_up(struct work_struct *work);
void handle_link_down(struct work_struct *work);
void handle_link_downgrade(struct work_struct *work);
void handle_link_bounce(struct work_struct *work);
+void handle_start_link(struct work_struct *work);
void handle_sma_message(struct work_struct *work);
void reset_qsfp(struct hfi1_pportdata *ppd);
void qsfp_event(struct work_struct *work);
diff --git a/drivers/infiniband/hw/hfi1/debugfs.c b/drivers/infiniband/hw/hfi1/debugfs.c
index a49cc88..5e9be16 100644
--- a/drivers/infiniband/hw/hfi1/debugfs.c
+++ b/drivers/infiniband/hw/hfi1/debugfs.c
@@ -59,6 +59,40 @@
static struct dentry *hfi1_dbg_root;
+/* wrappers to enforce srcu in seq file */
+static ssize_t hfi1_seq_read(
+ struct file *file,
+ char __user *buf,
+ size_t size,
+ loff_t *ppos)
+{
+ struct dentry *d = file->f_path.dentry;
+ int srcu_idx;
+ ssize_t r;
+
+ r = debugfs_use_file_start(d, &srcu_idx);
+ if (likely(!r))
+ r = seq_read(file, buf, size, ppos);
+ debugfs_use_file_finish(srcu_idx);
+ return r;
+}
+
+static loff_t hfi1_seq_lseek(
+ struct file *file,
+ loff_t offset,
+ int whence)
+{
+ struct dentry *d = file->f_path.dentry;
+ int srcu_idx;
+ loff_t r;
+
+ r = debugfs_use_file_start(d, &srcu_idx);
+ if (likely(!r))
+ r = seq_lseek(file, offset, whence);
+ debugfs_use_file_finish(srcu_idx);
+ return r;
+}
+
#define private2dd(file) (file_inode(file)->i_private)
#define private2ppd(file) (file_inode(file)->i_private)
@@ -87,8 +121,8 @@ static int _##name##_open(struct inode *inode, struct file *s) \
static const struct file_operations _##name##_file_ops = { \
.owner = THIS_MODULE, \
.open = _##name##_open, \
- .read = seq_read, \
- .llseek = seq_lseek, \
+ .read = hfi1_seq_read, \
+ .llseek = hfi1_seq_lseek, \
.release = seq_release \
}
@@ -105,11 +139,9 @@ do { \
DEBUGFS_FILE_CREATE(#name, parent, data, &_##name##_file_ops, S_IRUGO)
static void *_opcode_stats_seq_start(struct seq_file *s, loff_t *pos)
-__acquires(RCU)
{
struct hfi1_opcode_stats_perctx *opstats;
- rcu_read_lock();
if (*pos >= ARRAY_SIZE(opstats->stats))
return NULL;
return pos;
@@ -126,9 +158,7 @@ static void *_opcode_stats_seq_next(struct seq_file *s, void *v, loff_t *pos)
}
static void _opcode_stats_seq_stop(struct seq_file *s, void *v)
-__releases(RCU)
{
- rcu_read_unlock();
}
static int _opcode_stats_seq_show(struct seq_file *s, void *v)
@@ -285,12 +315,10 @@ DEBUGFS_SEQ_FILE_OPEN(qp_stats)
DEBUGFS_FILE_OPS(qp_stats);
static void *_sdes_seq_start(struct seq_file *s, loff_t *pos)
-__acquires(RCU)
{
struct hfi1_ibdev *ibd;
struct hfi1_devdata *dd;
- rcu_read_lock();
ibd = (struct hfi1_ibdev *)s->private;
dd = dd_from_dev(ibd);
if (!dd->per_sdma || *pos >= dd->num_sdma)
@@ -310,9 +338,7 @@ static void *_sdes_seq_next(struct seq_file *s, void *v, loff_t *pos)
}
static void _sdes_seq_stop(struct seq_file *s, void *v)
-__releases(RCU)
{
- rcu_read_unlock();
}
static int _sdes_seq_show(struct seq_file *s, void *v)
@@ -339,11 +365,9 @@ static ssize_t dev_counters_read(struct file *file, char __user *buf,
struct hfi1_devdata *dd;
ssize_t rval;
- rcu_read_lock();
dd = private2dd(file);
avail = hfi1_read_cntrs(dd, NULL, &counters);
rval = simple_read_from_buffer(buf, count, ppos, counters, avail);
- rcu_read_unlock();
return rval;
}
@@ -356,11 +380,9 @@ static ssize_t dev_names_read(struct file *file, char __user *buf,
struct hfi1_devdata *dd;
ssize_t rval;
- rcu_read_lock();
dd = private2dd(file);
avail = hfi1_read_cntrs(dd, &names, NULL);
rval = simple_read_from_buffer(buf, count, ppos, names, avail);
- rcu_read_unlock();
return rval;
}
@@ -383,11 +405,9 @@ static ssize_t portnames_read(struct file *file, char __user *buf,
struct hfi1_devdata *dd;
ssize_t rval;
- rcu_read_lock();
dd = private2dd(file);
avail = hfi1_read_portcntrs(dd->pport, &names, NULL);
rval = simple_read_from_buffer(buf, count, ppos, names, avail);
- rcu_read_unlock();
return rval;
}
@@ -400,11 +420,9 @@ static ssize_t portcntrs_debugfs_read(struct file *file, char __user *buf,
struct hfi1_pportdata *ppd;
ssize_t rval;
- rcu_read_lock();
ppd = private2ppd(file);
avail = hfi1_read_portcntrs(ppd, NULL, &counters);
rval = simple_read_from_buffer(buf, count, ppos, counters, avail);
- rcu_read_unlock();
return rval;
}
@@ -434,16 +452,13 @@ static ssize_t asic_flags_read(struct file *file, char __user *buf,
int used;
int i;
- rcu_read_lock();
ppd = private2ppd(file);
dd = ppd->dd;
size = PAGE_SIZE;
used = 0;
tmp = kmalloc(size, GFP_KERNEL);
- if (!tmp) {
- rcu_read_unlock();
+ if (!tmp)
return -ENOMEM;
- }
scratch0 = read_csr(dd, ASIC_CFG_SCRATCH);
used += scnprintf(tmp + used, size - used,
@@ -470,7 +485,6 @@ static ssize_t asic_flags_read(struct file *file, char __user *buf,
used += scnprintf(tmp + used, size - used, "Write bits to clear\n");
ret = simple_read_from_buffer(buf, count, ppos, tmp, used);
- rcu_read_unlock();
kfree(tmp);
return ret;
}
@@ -486,15 +500,12 @@ static ssize_t asic_flags_write(struct file *file, const char __user *buf,
u64 scratch0;
u64 clear;
- rcu_read_lock();
ppd = private2ppd(file);
dd = ppd->dd;
buff = kmalloc(count + 1, GFP_KERNEL);
- if (!buff) {
- ret = -ENOMEM;
- goto do_return;
- }
+ if (!buff)
+ return -ENOMEM;
ret = copy_from_user(buff, buf, count);
if (ret > 0) {
@@ -527,8 +538,6 @@ static ssize_t asic_flags_write(struct file *file, const char __user *buf,
do_free:
kfree(buff);
- do_return:
- rcu_read_unlock();
return ret;
}
@@ -542,18 +551,14 @@ static ssize_t qsfp_debugfs_dump(struct file *file, char __user *buf,
char *tmp;
int ret;
- rcu_read_lock();
ppd = private2ppd(file);
tmp = kmalloc(PAGE_SIZE, GFP_KERNEL);
- if (!tmp) {
- rcu_read_unlock();
+ if (!tmp)
return -ENOMEM;
- }
ret = qsfp_dump(ppd, tmp, PAGE_SIZE);
if (ret > 0)
ret = simple_read_from_buffer(buf, count, ppos, tmp, ret);
- rcu_read_unlock();
kfree(tmp);
return ret;
}
@@ -569,7 +574,6 @@ static ssize_t __i2c_debugfs_write(struct file *file, const char __user *buf,
int offset;
int total_written;
- rcu_read_lock();
ppd = private2ppd(file);
/* byte offset format: [offsetSize][i2cAddr][offsetHigh][offsetLow] */
@@ -577,16 +581,12 @@ static ssize_t __i2c_debugfs_write(struct file *file, const char __user *buf,
offset = *ppos & 0xffff;
/* explicitly reject invalid address 0 to catch cp and cat */
- if (i2c_addr == 0) {
- ret = -EINVAL;
- goto _return;
- }
+ if (i2c_addr == 0)
+ return -EINVAL;
buff = kmalloc(count, GFP_KERNEL);
- if (!buff) {
- ret = -ENOMEM;
- goto _return;
- }
+ if (!buff)
+ return -ENOMEM;
ret = copy_from_user(buff, buf, count);
if (ret > 0) {
@@ -606,8 +606,6 @@ static ssize_t __i2c_debugfs_write(struct file *file, const char __user *buf,
_free:
kfree(buff);
- _return:
- rcu_read_unlock();
return ret;
}
@@ -636,7 +634,6 @@ static ssize_t __i2c_debugfs_read(struct file *file, char __user *buf,
int offset;
int total_read;
- rcu_read_lock();
ppd = private2ppd(file);
/* byte offset format: [offsetSize][i2cAddr][offsetHigh][offsetLow] */
@@ -644,16 +641,12 @@ static ssize_t __i2c_debugfs_read(struct file *file, char __user *buf,
offset = *ppos & 0xffff;
/* explicitly reject invalid address 0 to catch cp and cat */
- if (i2c_addr == 0) {
- ret = -EINVAL;
- goto _return;
- }
+ if (i2c_addr == 0)
+ return -EINVAL;
buff = kmalloc(count, GFP_KERNEL);
- if (!buff) {
- ret = -ENOMEM;
- goto _return;
- }
+ if (!buff)
+ return -ENOMEM;
total_read = i2c_read(ppd, target, i2c_addr, offset, buff, count);
if (total_read < 0) {
@@ -673,8 +666,6 @@ static ssize_t __i2c_debugfs_read(struct file *file, char __user *buf,
_free:
kfree(buff);
- _return:
- rcu_read_unlock();
return ret;
}
@@ -701,26 +692,20 @@ static ssize_t __qsfp_debugfs_write(struct file *file, const char __user *buf,
int ret;
int total_written;
- rcu_read_lock();
- if (*ppos + count > QSFP_PAGESIZE * 4) { /* base page + page00-page03 */
- ret = -EINVAL;
- goto _return;
- }
+ if (*ppos + count > QSFP_PAGESIZE * 4) /* base page + page00-page03 */
+ return -EINVAL;
ppd = private2ppd(file);
buff = kmalloc(count, GFP_KERNEL);
- if (!buff) {
- ret = -ENOMEM;
- goto _return;
- }
+ if (!buff)
+ return -ENOMEM;
ret = copy_from_user(buff, buf, count);
if (ret > 0) {
ret = -EFAULT;
goto _free;
}
-
total_written = qsfp_write(ppd, target, *ppos, buff, count);
if (total_written < 0) {
ret = total_written;
@@ -733,8 +718,6 @@ static ssize_t __qsfp_debugfs_write(struct file *file, const char __user *buf,
_free:
kfree(buff);
- _return:
- rcu_read_unlock();
return ret;
}
@@ -761,7 +744,6 @@ static ssize_t __qsfp_debugfs_read(struct file *file, char __user *buf,
int ret;
int total_read;
- rcu_read_lock();
if (*ppos + count > QSFP_PAGESIZE * 4) { /* base page + page00-page03 */
ret = -EINVAL;
goto _return;
@@ -794,7 +776,6 @@ static ssize_t __qsfp_debugfs_read(struct file *file, char __user *buf,
_free:
kfree(buff);
_return:
- rcu_read_unlock();
return ret;
}
@@ -1010,7 +991,6 @@ void hfi1_dbg_ibdev_exit(struct hfi1_ibdev *ibd)
debugfs_remove_recursive(ibd->hfi1_ibdev_dbg);
out:
ibd->hfi1_ibdev_dbg = NULL;
- synchronize_rcu();
}
/*
@@ -1035,9 +1015,7 @@ static const char * const hfi1_statnames[] = {
};
static void *_driver_stats_names_seq_start(struct seq_file *s, loff_t *pos)
-__acquires(RCU)
{
- rcu_read_lock();
if (*pos >= ARRAY_SIZE(hfi1_statnames))
return NULL;
return pos;
@@ -1055,9 +1033,7 @@ static void *_driver_stats_names_seq_next(
}
static void _driver_stats_names_seq_stop(struct seq_file *s, void *v)
-__releases(RCU)
{
- rcu_read_unlock();
}
static int _driver_stats_names_seq_show(struct seq_file *s, void *v)
@@ -1073,9 +1049,7 @@ DEBUGFS_SEQ_FILE_OPEN(driver_stats_names)
DEBUGFS_FILE_OPS(driver_stats_names);
static void *_driver_stats_seq_start(struct seq_file *s, loff_t *pos)
-__acquires(RCU)
{
- rcu_read_lock();
if (*pos >= ARRAY_SIZE(hfi1_statnames))
return NULL;
return pos;
@@ -1090,9 +1064,7 @@ static void *_driver_stats_seq_next(struct seq_file *s, void *v, loff_t *pos)
}
static void _driver_stats_seq_stop(struct seq_file *s, void *v)
-__releases(RCU)
{
- rcu_read_unlock();
}
static u64 hfi1_sps_ints(void)
diff --git a/drivers/infiniband/hw/hfi1/hfi.h b/drivers/infiniband/hw/hfi1/hfi.h
index a021e66..325ec21 100644
--- a/drivers/infiniband/hw/hfi1/hfi.h
+++ b/drivers/infiniband/hw/hfi1/hfi.h
@@ -605,6 +605,7 @@ struct hfi1_pportdata {
struct work_struct freeze_work;
struct work_struct link_downgrade_work;
struct work_struct link_bounce_work;
+ struct delayed_work start_link_work;
/* host link state variables */
struct mutex hls_lock;
u32 host_link_state;
@@ -659,6 +660,7 @@ struct hfi1_pportdata {
u8 linkinit_reason;
u8 local_tx_rate; /* rate given to 8051 firmware */
u8 last_pstate; /* info only */
+ u8 qsfp_retry_count;
/* placeholders for IB MAD packet settings */
u8 overrun_threshold;
@@ -1804,7 +1806,7 @@ extern unsigned int hfi1_max_mtu;
extern unsigned int hfi1_cu;
extern unsigned int user_credit_return_threshold;
extern int num_user_contexts;
-extern unsigned n_krcvqs;
+extern unsigned long n_krcvqs;
extern uint krcvqs[];
extern int krcvqsset;
extern uint kdeth_qp;
diff --git a/drivers/infiniband/hw/hfi1/init.c b/drivers/infiniband/hw/hfi1/init.c
index b793545..384b43d 100644
--- a/drivers/infiniband/hw/hfi1/init.c
+++ b/drivers/infiniband/hw/hfi1/init.c
@@ -94,7 +94,7 @@ module_param_array(krcvqs, uint, &krcvqsset, S_IRUGO);
MODULE_PARM_DESC(krcvqs, "Array of the number of non-control kernel receive queues by VL");
/* computed based on above array */
-unsigned n_krcvqs;
+unsigned long n_krcvqs;
static unsigned hfi1_rcvarr_split = 25;
module_param_named(rcvarr_split, hfi1_rcvarr_split, uint, S_IRUGO);
@@ -500,6 +500,7 @@ void hfi1_init_pportdata(struct pci_dev *pdev, struct hfi1_pportdata *ppd,
INIT_WORK(&ppd->link_downgrade_work, handle_link_downgrade);
INIT_WORK(&ppd->sma_message_work, handle_sma_message);
INIT_WORK(&ppd->link_bounce_work, handle_link_bounce);
+ INIT_DELAYED_WORK(&ppd->start_link_work, handle_start_link);
INIT_WORK(&ppd->linkstate_active_work, receive_interrupt_work);
INIT_WORK(&ppd->qsfp_info.qsfp_work, qsfp_event);
diff --git a/drivers/infiniband/hw/hfi1/mad.c b/drivers/infiniband/hw/hfi1/mad.c
index 39e42c3..7ffc14f 100644
--- a/drivers/infiniband/hw/hfi1/mad.c
+++ b/drivers/infiniband/hw/hfi1/mad.c
@@ -2604,7 +2604,7 @@ static int pma_get_opa_datacounters(struct opa_pma_mad *pmp,
u8 lq, num_vls;
u8 res_lli, res_ler;
u64 port_mask;
- unsigned long port_num;
+ u8 port_num;
unsigned long vl;
u32 vl_select_mask;
int vfi;
@@ -2638,9 +2638,9 @@ static int pma_get_opa_datacounters(struct opa_pma_mad *pmp,
*/
port_mask = be64_to_cpu(req->port_select_mask[3]);
port_num = find_first_bit((unsigned long *)&port_mask,
- sizeof(port_mask));
+ sizeof(port_mask) * 8);
- if ((u8)port_num != port) {
+ if (port_num != port) {
pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD;
return reply((struct ib_mad_hdr *)pmp);
}
@@ -2842,7 +2842,7 @@ static int pma_get_opa_porterrors(struct opa_pma_mad *pmp,
*/
port_mask = be64_to_cpu(req->port_select_mask[3]);
port_num = find_first_bit((unsigned long *)&port_mask,
- sizeof(port_mask));
+ sizeof(port_mask) * 8);
if (port_num != port) {
pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD;
@@ -3015,7 +3015,7 @@ static int pma_get_opa_errorinfo(struct opa_pma_mad *pmp,
*/
port_mask = be64_to_cpu(req->port_select_mask[3]);
port_num = find_first_bit((unsigned long *)&port_mask,
- sizeof(port_mask));
+ sizeof(port_mask) * 8);
if (port_num != port) {
pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD;
@@ -3252,7 +3252,7 @@ static int pma_set_opa_errorinfo(struct opa_pma_mad *pmp,
*/
port_mask = be64_to_cpu(req->port_select_mask[3]);
port_num = find_first_bit((unsigned long *)&port_mask,
- sizeof(port_mask));
+ sizeof(port_mask) * 8);
if (port_num != port) {
pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD;
diff --git a/drivers/infiniband/hw/hfi1/pio_copy.c b/drivers/infiniband/hw/hfi1/pio_copy.c
index 8c25e1b..3a1ef30 100644
--- a/drivers/infiniband/hw/hfi1/pio_copy.c
+++ b/drivers/infiniband/hw/hfi1/pio_copy.c
@@ -771,6 +771,9 @@ void seg_pio_copy_mid(struct pio_buf *pbuf, const void *from, size_t nbytes)
read_extra_bytes(pbuf, from, to_fill);
from += to_fill;
nbytes -= to_fill;
+ /* may not be enough valid bytes left to align */
+ if (extra > nbytes)
+ extra = nbytes;
/* ...now write carry */
dest = pbuf->start + (pbuf->qw_written * sizeof(u64));
@@ -798,6 +801,15 @@ void seg_pio_copy_mid(struct pio_buf *pbuf, const void *from, size_t nbytes)
read_low_bytes(pbuf, from, extra);
from += extra;
nbytes -= extra;
+ /*
+ * If no bytes are left, return early - we are done.
+ * NOTE: This short-circuit is *required* because
+ * "extra" may have been reduced in size and "from"
+ * is not aligned, as required when leaving this
+ * if block.
+ */
+ if (nbytes == 0)
+ return;
}
/* at this point, from is QW aligned */
diff --git a/drivers/infiniband/hw/hfi1/user_sdma.c b/drivers/infiniband/hw/hfi1/user_sdma.c
index 0ecf279..1694037 100644
--- a/drivers/infiniband/hw/hfi1/user_sdma.c
+++ b/drivers/infiniband/hw/hfi1/user_sdma.c
@@ -114,6 +114,8 @@ MODULE_PARM_DESC(sdma_comp_size, "Size of User SDMA completion ring. Default: 12
#define KDETH_HCRC_LOWER_SHIFT 24
#define KDETH_HCRC_LOWER_MASK 0xff
+#define AHG_KDETH_INTR_SHIFT 12
+
#define PBC2LRH(x) ((((x) & 0xfff) << 2) - 4)
#define LRH2PBC(x) ((((x) >> 2) + 1) & 0xfff)
@@ -1480,7 +1482,8 @@ static int set_txreq_header_ahg(struct user_sdma_request *req,
/* Clear KDETH.SH on last packet */
if (unlikely(tx->flags & TXREQ_FLAGS_REQ_LAST_PKT)) {
val |= cpu_to_le16(KDETH_GET(hdr->kdeth.ver_tid_offset,
- INTR) >> 16);
+ INTR) <<
+ AHG_KDETH_INTR_SHIFT);
val &= cpu_to_le16(~(1U << 13));
AHG_HEADER_SET(req->ahg, diff, 7, 16, 14, val);
} else {
diff --git a/drivers/infiniband/hw/i40iw/i40iw_hw.c b/drivers/infiniband/hw/i40iw/i40iw_hw.c
index 3ee0cad..0c92a40 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_hw.c
+++ b/drivers/infiniband/hw/i40iw/i40iw_hw.c
@@ -265,6 +265,7 @@ void i40iw_next_iw_state(struct i40iw_qp *iwqp,
info.dont_send_fin = false;
if (iwqp->sc_qp.term_flags && (state == I40IW_QP_STATE_ERROR))
info.reset_tcp_conn = true;
+ iwqp->hw_iwarp_state = state;
i40iw_hw_modify_qp(iwqp->iwdev, iwqp, &info, 0);
}
diff --git a/drivers/infiniband/hw/i40iw/i40iw_main.c b/drivers/infiniband/hw/i40iw/i40iw_main.c
index 0cbbe40..445e230 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_main.c
+++ b/drivers/infiniband/hw/i40iw/i40iw_main.c
@@ -100,7 +100,7 @@ static struct notifier_block i40iw_net_notifier = {
.notifier_call = i40iw_net_event
};
-static int i40iw_notifiers_registered;
+static atomic_t i40iw_notifiers_registered;
/**
* i40iw_find_i40e_handler - find a handler given a client info
@@ -1342,12 +1342,11 @@ exit:
*/
static void i40iw_register_notifiers(void)
{
- if (!i40iw_notifiers_registered) {
+ if (atomic_inc_return(&i40iw_notifiers_registered) == 1) {
register_inetaddr_notifier(&i40iw_inetaddr_notifier);
register_inet6addr_notifier(&i40iw_inetaddr6_notifier);
register_netevent_notifier(&i40iw_net_notifier);
}
- i40iw_notifiers_registered++;
}
/**
@@ -1429,8 +1428,7 @@ static void i40iw_deinit_device(struct i40iw_device *iwdev, bool reset, bool del
i40iw_del_macip_entry(iwdev, (u8)iwdev->mac_ip_table_idx);
/* fallthrough */
case INET_NOTIFIER:
- if (i40iw_notifiers_registered > 0) {
- i40iw_notifiers_registered--;
+ if (!atomic_dec_return(&i40iw_notifiers_registered)) {
unregister_netevent_notifier(&i40iw_net_notifier);
unregister_inetaddr_notifier(&i40iw_inetaddr_notifier);
unregister_inet6addr_notifier(&i40iw_inetaddr6_notifier);
diff --git a/drivers/infiniband/hw/mlx4/cq.c b/drivers/infiniband/hw/mlx4/cq.c
index 006db64..5df63da 100644
--- a/drivers/infiniband/hw/mlx4/cq.c
+++ b/drivers/infiniband/hw/mlx4/cq.c
@@ -687,12 +687,6 @@ repoll:
is_error = (cqe->owner_sr_opcode & MLX4_CQE_OPCODE_MASK) ==
MLX4_CQE_OPCODE_ERROR;
- if (unlikely((cqe->owner_sr_opcode & MLX4_CQE_OPCODE_MASK) == MLX4_OPCODE_NOP &&
- is_send)) {
- pr_warn("Completion for NOP opcode detected!\n");
- return -EAGAIN;
- }
-
/* Resize CQ in progress */
if (unlikely((cqe->owner_sr_opcode & MLX4_CQE_OPCODE_MASK) == MLX4_CQE_OPCODE_RESIZE)) {
if (cq->resize_buf) {
@@ -718,12 +712,6 @@ repoll:
*/
mqp = __mlx4_qp_lookup(to_mdev(cq->ibcq.device)->dev,
be32_to_cpu(cqe->vlan_my_qpn));
- if (unlikely(!mqp)) {
- pr_warn("CQ %06x with entry for unknown QPN %06x\n",
- cq->mcq.cqn, be32_to_cpu(cqe->vlan_my_qpn) & MLX4_CQE_QPN_MASK);
- return -EAGAIN;
- }
-
*cur_qp = to_mibqp(mqp);
}
@@ -736,11 +724,6 @@ repoll:
/* SRQ is also in the radix tree */
msrq = mlx4_srq_lookup(to_mdev(cq->ibcq.device)->dev,
srq_num);
- if (unlikely(!msrq)) {
- pr_warn("CQ %06x with entry for unknown SRQN %06x\n",
- cq->mcq.cqn, srq_num);
- return -EAGAIN;
- }
}
if (is_send) {
@@ -891,7 +874,6 @@ int mlx4_ib_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc)
struct mlx4_ib_qp *cur_qp = NULL;
unsigned long flags;
int npolled;
- int err = 0;
struct mlx4_ib_dev *mdev = to_mdev(cq->ibcq.device);
spin_lock_irqsave(&cq->lock, flags);
@@ -901,8 +883,7 @@ int mlx4_ib_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc)
}
for (npolled = 0; npolled < num_entries; ++npolled) {
- err = mlx4_ib_poll_one(cq, &cur_qp, wc + npolled);
- if (err)
+ if (mlx4_ib_poll_one(cq, &cur_qp, wc + npolled))
break;
}
@@ -911,10 +892,7 @@ int mlx4_ib_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc)
out:
spin_unlock_irqrestore(&cq->lock, flags);
- if (err == 0 || err == -EAGAIN)
- return npolled;
- else
- return err;
+ return npolled;
}
int mlx4_ib_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags)
diff --git a/drivers/infiniband/hw/mlx4/mad.c b/drivers/infiniband/hw/mlx4/mad.c
index 9c2e53d..0f21c3a 100644
--- a/drivers/infiniband/hw/mlx4/mad.c
+++ b/drivers/infiniband/hw/mlx4/mad.c
@@ -1128,6 +1128,27 @@ void handle_port_mgmt_change_event(struct work_struct *work)
/* Generate GUID changed event */
if (changed_attr & MLX4_EQ_PORT_INFO_GID_PFX_CHANGE_MASK) {
+ if (mlx4_is_master(dev->dev)) {
+ union ib_gid gid;
+ int err = 0;
+
+ if (!eqe->event.port_mgmt_change.params.port_info.gid_prefix)
+ err = __mlx4_ib_query_gid(&dev->ib_dev, port, 0, &gid, 1);
+ else
+ gid.global.subnet_prefix =
+ eqe->event.port_mgmt_change.params.port_info.gid_prefix;
+ if (err) {
+ pr_warn("Could not change QP1 subnet prefix for port %d: query_gid error (%d)\n",
+ port, err);
+ } else {
+ pr_debug("Changing QP1 subnet prefix for port %d. old=0x%llx. new=0x%llx\n",
+ port,
+ (u64)atomic64_read(&dev->sriov.demux[port - 1].subnet_prefix),
+ be64_to_cpu(gid.global.subnet_prefix));
+ atomic64_set(&dev->sriov.demux[port - 1].subnet_prefix,
+ be64_to_cpu(gid.global.subnet_prefix));
+ }
+ }
mlx4_ib_dispatch_event(dev, port, IB_EVENT_GID_CHANGE);
/*if master, notify all slaves*/
if (mlx4_is_master(dev->dev))
@@ -2202,6 +2223,8 @@ int mlx4_ib_init_sriov(struct mlx4_ib_dev *dev)
if (err)
goto demux_err;
dev->sriov.demux[i].guid_cache[0] = gid.global.interface_id;
+ atomic64_set(&dev->sriov.demux[i].subnet_prefix,
+ be64_to_cpu(gid.global.subnet_prefix));
err = alloc_pv_object(dev, mlx4_master_func_num(dev->dev), i + 1,
&dev->sriov.sqps[i]);
if (err)
diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c
index 2af44c2..87ba9bc 100644
--- a/drivers/infiniband/hw/mlx4/main.c
+++ b/drivers/infiniband/hw/mlx4/main.c
@@ -2202,6 +2202,9 @@ static int mlx4_ib_alloc_diag_counters(struct mlx4_ib_dev *ibdev)
bool per_port = !!(ibdev->dev->caps.flags2 &
MLX4_DEV_CAP_FLAG2_DIAG_PER_PORT);
+ if (mlx4_is_slave(ibdev->dev))
+ return 0;
+
for (i = 0; i < MLX4_DIAG_COUNTERS_TYPES; i++) {
/* i == 1 means we are building port counters */
if (i && !per_port)
diff --git a/drivers/infiniband/hw/mlx4/mcg.c b/drivers/infiniband/hw/mlx4/mcg.c
index 8f7ad07..097bfcc 100644
--- a/drivers/infiniband/hw/mlx4/mcg.c
+++ b/drivers/infiniband/hw/mlx4/mcg.c
@@ -489,7 +489,7 @@ static u8 get_leave_state(struct mcast_group *group)
if (!group->members[i])
leave_state |= (1 << i);
- return leave_state & (group->rec.scope_join_state & 7);
+ return leave_state & (group->rec.scope_join_state & 0xf);
}
static int join_group(struct mcast_group *group, int slave, u8 join_mask)
@@ -564,8 +564,8 @@ static void mlx4_ib_mcg_timeout_handler(struct work_struct *work)
} else
mcg_warn_group(group, "DRIVER BUG\n");
} else if (group->state == MCAST_LEAVE_SENT) {
- if (group->rec.scope_join_state & 7)
- group->rec.scope_join_state &= 0xf8;
+ if (group->rec.scope_join_state & 0xf)
+ group->rec.scope_join_state &= 0xf0;
group->state = MCAST_IDLE;
mutex_unlock(&group->lock);
if (release_group(group, 1))
@@ -605,7 +605,7 @@ static int handle_leave_req(struct mcast_group *group, u8 leave_mask,
static int handle_join_req(struct mcast_group *group, u8 join_mask,
struct mcast_req *req)
{
- u8 group_join_state = group->rec.scope_join_state & 7;
+ u8 group_join_state = group->rec.scope_join_state & 0xf;
int ref = 0;
u16 status;
struct ib_sa_mcmember_data *sa_data = (struct ib_sa_mcmember_data *)req->sa_mad.data;
@@ -690,8 +690,8 @@ static void mlx4_ib_mcg_work_handler(struct work_struct *work)
u8 cur_join_state;
resp_join_state = ((struct ib_sa_mcmember_data *)
- group->response_sa_mad.data)->scope_join_state & 7;
- cur_join_state = group->rec.scope_join_state & 7;
+ group->response_sa_mad.data)->scope_join_state & 0xf;
+ cur_join_state = group->rec.scope_join_state & 0xf;
if (method == IB_MGMT_METHOD_GET_RESP) {
/* successfull join */
@@ -710,7 +710,7 @@ process_requests:
req = list_first_entry(&group->pending_list, struct mcast_req,
group_list);
sa_data = (struct ib_sa_mcmember_data *)req->sa_mad.data;
- req_join_state = sa_data->scope_join_state & 0x7;
+ req_join_state = sa_data->scope_join_state & 0xf;
/* For a leave request, we will immediately answer the VF, and
* update our internal counters. The actual leave will be sent
diff --git a/drivers/infiniband/hw/mlx4/mlx4_ib.h b/drivers/infiniband/hw/mlx4/mlx4_ib.h
index 7c5832e..686ab48 100644
--- a/drivers/infiniband/hw/mlx4/mlx4_ib.h
+++ b/drivers/infiniband/hw/mlx4/mlx4_ib.h
@@ -448,7 +448,7 @@ struct mlx4_ib_demux_ctx {
struct workqueue_struct *wq;
struct workqueue_struct *ud_wq;
spinlock_t ud_lock;
- __be64 subnet_prefix;
+ atomic64_t subnet_prefix;
__be64 guid_cache[128];
struct mlx4_ib_dev *dev;
/* the following lock protects both mcg_table and mcg_mgid0_list */
diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c
index 768085f..7fb9629 100644
--- a/drivers/infiniband/hw/mlx4/qp.c
+++ b/drivers/infiniband/hw/mlx4/qp.c
@@ -2493,24 +2493,27 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_ud_wr *wr,
sqp->ud_header.grh.flow_label =
ah->av.ib.sl_tclass_flowlabel & cpu_to_be32(0xfffff);
sqp->ud_header.grh.hop_limit = ah->av.ib.hop_limit;
- if (is_eth)
+ if (is_eth) {
memcpy(sqp->ud_header.grh.source_gid.raw, sgid.raw, 16);
- else {
- if (mlx4_is_mfunc(to_mdev(ib_dev)->dev)) {
- /* When multi-function is enabled, the ib_core gid
- * indexes don't necessarily match the hw ones, so
- * we must use our own cache */
- sqp->ud_header.grh.source_gid.global.subnet_prefix =
- to_mdev(ib_dev)->sriov.demux[sqp->qp.port - 1].
- subnet_prefix;
- sqp->ud_header.grh.source_gid.global.interface_id =
- to_mdev(ib_dev)->sriov.demux[sqp->qp.port - 1].
- guid_cache[ah->av.ib.gid_index];
- } else
- ib_get_cached_gid(ib_dev,
- be32_to_cpu(ah->av.ib.port_pd) >> 24,
- ah->av.ib.gid_index,
- &sqp->ud_header.grh.source_gid, NULL);
+ } else {
+ if (mlx4_is_mfunc(to_mdev(ib_dev)->dev)) {
+ /* When multi-function is enabled, the ib_core gid
+ * indexes don't necessarily match the hw ones, so
+ * we must use our own cache
+ */
+ sqp->ud_header.grh.source_gid.global.subnet_prefix =
+ cpu_to_be64(atomic64_read(&(to_mdev(ib_dev)->sriov.
+ demux[sqp->qp.port - 1].
+ subnet_prefix)));
+ sqp->ud_header.grh.source_gid.global.interface_id =
+ to_mdev(ib_dev)->sriov.demux[sqp->qp.port - 1].
+ guid_cache[ah->av.ib.gid_index];
+ } else {
+ ib_get_cached_gid(ib_dev,
+ be32_to_cpu(ah->av.ib.port_pd) >> 24,
+ ah->av.ib.gid_index,
+ &sqp->ud_header.grh.source_gid, NULL);
+ }
}
memcpy(sqp->ud_header.grh.destination_gid.raw,
ah->av.ib.dgid, 16);
diff --git a/drivers/infiniband/hw/mlx5/cq.c b/drivers/infiniband/hw/mlx5/cq.c
index 308a358..e4fac929 100644
--- a/drivers/infiniband/hw/mlx5/cq.c
+++ b/drivers/infiniband/hw/mlx5/cq.c
@@ -553,12 +553,6 @@ repoll:
* from the table.
*/
mqp = __mlx5_qp_lookup(dev->mdev, qpn);
- if (unlikely(!mqp)) {
- mlx5_ib_warn(dev, "CQE@CQ %06x for unknown QPN %6x\n",
- cq->mcq.cqn, qpn);
- return -EINVAL;
- }
-
*cur_qp = to_mibqp(mqp);
}
@@ -619,13 +613,6 @@ repoll:
read_lock(&dev->mdev->priv.mkey_table.lock);
mmkey = __mlx5_mr_lookup(dev->mdev,
mlx5_base_mkey(be32_to_cpu(sig_err_cqe->mkey)));
- if (unlikely(!mmkey)) {
- read_unlock(&dev->mdev->priv.mkey_table.lock);
- mlx5_ib_warn(dev, "CQE@CQ %06x for unknown MR %6x\n",
- cq->mcq.cqn, be32_to_cpu(sig_err_cqe->mkey));
- return -EINVAL;
- }
-
mr = to_mibmr(mmkey);
get_sig_err_item(sig_err_cqe, &mr->sig->err_item);
mr->sig->sig_err_exists = true;
@@ -676,7 +663,6 @@ int mlx5_ib_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc)
unsigned long flags;
int soft_polled = 0;
int npolled;
- int err = 0;
spin_lock_irqsave(&cq->lock, flags);
if (mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) {
@@ -688,8 +674,7 @@ int mlx5_ib_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc)
soft_polled = poll_soft_wc(cq, num_entries, wc);
for (npolled = 0; npolled < num_entries - soft_polled; npolled++) {
- err = mlx5_poll_one(cq, &cur_qp, wc + soft_polled + npolled);
- if (err)
+ if (mlx5_poll_one(cq, &cur_qp, wc + soft_polled + npolled))
break;
}
@@ -698,10 +683,7 @@ int mlx5_ib_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc)
out:
spin_unlock_irqrestore(&cq->lock, flags);
- if (err == 0 || err == -EAGAIN)
- return soft_polled + npolled;
- else
- return err;
+ return soft_polled + npolled;
}
int mlx5_ib_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags)
diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c
index 1b4094b..e19537c 100644
--- a/drivers/infiniband/hw/mlx5/main.c
+++ b/drivers/infiniband/hw/mlx5/main.c
@@ -288,7 +288,9 @@ __be16 mlx5_get_roce_udp_sport(struct mlx5_ib_dev *dev, u8 port_num,
static int mlx5_use_mad_ifc(struct mlx5_ib_dev *dev)
{
- return !MLX5_CAP_GEN(dev->mdev, ib_virt);
+ if (MLX5_CAP_GEN(dev->mdev, port_type) == MLX5_CAP_PORT_TYPE_IB)
+ return !MLX5_CAP_GEN(dev->mdev, ib_virt);
+ return 0;
}
enum {
@@ -1428,6 +1430,13 @@ static int parse_flow_attr(u32 *match_c, u32 *match_v,
dmac_47_16),
ib_spec->eth.val.dst_mac);
+ ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, outer_headers_c,
+ smac_47_16),
+ ib_spec->eth.mask.src_mac);
+ ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, outer_headers_v,
+ smac_47_16),
+ ib_spec->eth.val.src_mac);
+
if (ib_spec->eth.mask.vlan_tag) {
MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c,
vlan_tag, 1);
@@ -1849,6 +1858,7 @@ static struct ib_flow *mlx5_ib_create_flow(struct ib_qp *qp,
int domain)
{
struct mlx5_ib_dev *dev = to_mdev(qp->device);
+ struct mlx5_ib_qp *mqp = to_mqp(qp);
struct mlx5_ib_flow_handler *handler = NULL;
struct mlx5_flow_destination *dst = NULL;
struct mlx5_ib_flow_prio *ft_prio;
@@ -1875,7 +1885,10 @@ static struct ib_flow *mlx5_ib_create_flow(struct ib_qp *qp,
}
dst->type = MLX5_FLOW_DESTINATION_TYPE_TIR;
- dst->tir_num = to_mqp(qp)->raw_packet_qp.rq.tirn;
+ if (mqp->flags & MLX5_IB_QP_RSS)
+ dst->tir_num = mqp->rss_qp.tirn;
+ else
+ dst->tir_num = mqp->raw_packet_qp.rq.tirn;
if (flow_attr->type == IB_FLOW_ATTR_NORMAL) {
if (flow_attr->flags & IB_FLOW_ATTR_FLAGS_DONT_TRAP) {
diff --git a/drivers/infiniband/hw/mlx5/mem.c b/drivers/infiniband/hw/mlx5/mem.c
index 40df2cc..996b54e 100644
--- a/drivers/infiniband/hw/mlx5/mem.c
+++ b/drivers/infiniband/hw/mlx5/mem.c
@@ -71,7 +71,7 @@ void mlx5_ib_cont_pages(struct ib_umem *umem, u64 addr, int *count, int *shift,
addr = addr >> page_shift;
tmp = (unsigned long)addr;
- m = find_first_bit(&tmp, sizeof(tmp));
+ m = find_first_bit(&tmp, BITS_PER_LONG);
skip = 1 << m;
mask = skip - 1;
i = 0;
@@ -81,7 +81,7 @@ void mlx5_ib_cont_pages(struct ib_umem *umem, u64 addr, int *count, int *shift,
for (k = 0; k < len; k++) {
if (!(i & mask)) {
tmp = (unsigned long)pfn;
- m = min_t(unsigned long, m, find_first_bit(&tmp, sizeof(tmp)));
+ m = min_t(unsigned long, m, find_first_bit(&tmp, BITS_PER_LONG));
skip = 1 << m;
mask = skip - 1;
base = pfn;
@@ -89,7 +89,7 @@ void mlx5_ib_cont_pages(struct ib_umem *umem, u64 addr, int *count, int *shift,
} else {
if (base + p != pfn) {
tmp = (unsigned long)p;
- m = find_first_bit(&tmp, sizeof(tmp));
+ m = find_first_bit(&tmp, BITS_PER_LONG);
skip = 1 << m;
mask = skip - 1;
base = pfn;
diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h
index 372385d..95146f4 100644
--- a/drivers/infiniband/hw/mlx5/mlx5_ib.h
+++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h
@@ -402,6 +402,7 @@ enum mlx5_ib_qp_flags {
/* QP uses 1 as its source QP number */
MLX5_IB_QP_SQPN_QP1 = 1 << 6,
MLX5_IB_QP_CAP_SCATTER_FCS = 1 << 7,
+ MLX5_IB_QP_RSS = 1 << 8,
};
struct mlx5_umr_wr {
diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c
index 0dd7d93..affc3f6 100644
--- a/drivers/infiniband/hw/mlx5/qp.c
+++ b/drivers/infiniband/hw/mlx5/qp.c
@@ -1449,6 +1449,7 @@ create_tir:
kvfree(in);
/* qpn is reserved for that QP */
qp->trans_qp.base.mqp.qpn = 0;
+ qp->flags |= MLX5_IB_QP_RSS;
return 0;
err:
@@ -3658,12 +3659,8 @@ static int begin_wqe(struct mlx5_ib_qp *qp, void **seg,
struct ib_send_wr *wr, unsigned *idx,
int *size, int nreq)
{
- int err = 0;
-
- if (unlikely(mlx5_wq_overflow(&qp->sq, nreq, qp->ibqp.send_cq))) {
- err = -ENOMEM;
- return err;
- }
+ if (unlikely(mlx5_wq_overflow(&qp->sq, nreq, qp->ibqp.send_cq)))
+ return -ENOMEM;
*idx = qp->sq.cur_post & (qp->sq.wqe_cnt - 1);
*seg = mlx5_get_send_wqe(qp, *idx);
@@ -3679,7 +3676,7 @@ static int begin_wqe(struct mlx5_ib_qp *qp, void **seg,
*seg += sizeof(**ctrl);
*size = sizeof(**ctrl) / 16;
- return err;
+ return 0;
}
static void finish_wqe(struct mlx5_ib_qp *qp,
@@ -3758,7 +3755,7 @@ int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
num_sge = wr->num_sge;
if (unlikely(num_sge > qp->sq.max_gs)) {
mlx5_ib_warn(dev, "\n");
- err = -ENOMEM;
+ err = -EINVAL;
*bad_wr = wr;
goto out;
}
diff --git a/drivers/infiniband/sw/rdmavt/mr.c b/drivers/infiniband/sw/rdmavt/mr.c
index 80c4b6b..46b6497 100644
--- a/drivers/infiniband/sw/rdmavt/mr.c
+++ b/drivers/infiniband/sw/rdmavt/mr.c
@@ -294,7 +294,7 @@ static void __rvt_free_mr(struct rvt_mr *mr)
{
rvt_deinit_mregion(&mr->mr);
rvt_free_lkey(&mr->mr);
- vfree(mr);
+ kfree(mr);
}
/**
diff --git a/drivers/infiniband/sw/rxe/rxe.c b/drivers/infiniband/sw/rxe/rxe.c
index 55f0e8f..ddd5927 100644
--- a/drivers/infiniband/sw/rxe/rxe.c
+++ b/drivers/infiniband/sw/rxe/rxe.c
@@ -362,15 +362,34 @@ static int __init rxe_module_init(void)
return err;
}
- err = rxe_net_init();
+ err = rxe_net_ipv4_init();
if (err) {
- pr_err("rxe: unable to init\n");
+ pr_err("rxe: unable to init ipv4 tunnel\n");
rxe_cache_exit();
- return err;
+ goto exit;
+ }
+
+ err = rxe_net_ipv6_init();
+ if (err) {
+ pr_err("rxe: unable to init ipv6 tunnel\n");
+ rxe_cache_exit();
+ goto exit;
}
+
+ err = register_netdevice_notifier(&rxe_net_notifier);
+ if (err) {
+ pr_err("rxe: Failed to rigister netdev notifier\n");
+ goto exit;
+ }
+
pr_info("rxe: loaded\n");
return 0;
+
+exit:
+ rxe_release_udp_tunnel(recv_sockets.sk4);
+ rxe_release_udp_tunnel(recv_sockets.sk6);
+ return err;
}
static void __exit rxe_module_exit(void)
diff --git a/drivers/infiniband/sw/rxe/rxe_comp.c b/drivers/infiniband/sw/rxe/rxe_comp.c
index 36f67de..1c59ef2 100644
--- a/drivers/infiniband/sw/rxe/rxe_comp.c
+++ b/drivers/infiniband/sw/rxe/rxe_comp.c
@@ -689,7 +689,14 @@ int rxe_completer(void *arg)
qp->req.need_retry = 1;
rxe_run_task(&qp->req.task, 1);
}
+
+ if (pkt) {
+ rxe_drop_ref(pkt->qp);
+ kfree_skb(skb);
+ }
+
goto exit;
+
} else {
wqe->status = IB_WC_RETRY_EXC_ERR;
state = COMPST_ERROR;
@@ -716,6 +723,12 @@ int rxe_completer(void *arg)
case COMPST_ERROR:
do_complete(qp, wqe);
rxe_qp_error(qp);
+
+ if (pkt) {
+ rxe_drop_ref(pkt->qp);
+ kfree_skb(skb);
+ }
+
goto exit;
}
}
diff --git a/drivers/infiniband/sw/rxe/rxe_net.c b/drivers/infiniband/sw/rxe/rxe_net.c
index 0b8d2ea..eedf2f1 100644
--- a/drivers/infiniband/sw/rxe/rxe_net.c
+++ b/drivers/infiniband/sw/rxe/rxe_net.c
@@ -275,9 +275,10 @@ static struct socket *rxe_setup_udp_tunnel(struct net *net, __be16 port,
return sock;
}
-static void rxe_release_udp_tunnel(struct socket *sk)
+void rxe_release_udp_tunnel(struct socket *sk)
{
- udp_tunnel_sock_release(sk);
+ if (sk)
+ udp_tunnel_sock_release(sk);
}
static void prepare_udp_hdr(struct sk_buff *skb, __be16 src_port,
@@ -658,51 +659,45 @@ out:
return NOTIFY_OK;
}
-static struct notifier_block rxe_net_notifier = {
+struct notifier_block rxe_net_notifier = {
.notifier_call = rxe_notify,
};
-int rxe_net_init(void)
+int rxe_net_ipv4_init(void)
{
- int err;
-
spin_lock_init(&dev_list_lock);
- recv_sockets.sk6 = rxe_setup_udp_tunnel(&init_net,
- htons(ROCE_V2_UDP_DPORT), true);
- if (IS_ERR(recv_sockets.sk6)) {
- recv_sockets.sk6 = NULL;
- pr_err("rxe: Failed to create IPv6 UDP tunnel\n");
- return -1;
- }
-
recv_sockets.sk4 = rxe_setup_udp_tunnel(&init_net,
- htons(ROCE_V2_UDP_DPORT), false);
+ htons(ROCE_V2_UDP_DPORT), false);
if (IS_ERR(recv_sockets.sk4)) {
- rxe_release_udp_tunnel(recv_sockets.sk6);
recv_sockets.sk4 = NULL;
- recv_sockets.sk6 = NULL;
pr_err("rxe: Failed to create IPv4 UDP tunnel\n");
return -1;
}
- err = register_netdevice_notifier(&rxe_net_notifier);
- if (err) {
- rxe_release_udp_tunnel(recv_sockets.sk6);
- rxe_release_udp_tunnel(recv_sockets.sk4);
- pr_err("rxe: Failed to rigister netdev notifier\n");
- }
-
- return err;
+ return 0;
}
-void rxe_net_exit(void)
+int rxe_net_ipv6_init(void)
{
- if (recv_sockets.sk6)
- rxe_release_udp_tunnel(recv_sockets.sk6);
+#if IS_ENABLED(CONFIG_IPV6)
- if (recv_sockets.sk4)
- rxe_release_udp_tunnel(recv_sockets.sk4);
+ spin_lock_init(&dev_list_lock);
+ recv_sockets.sk6 = rxe_setup_udp_tunnel(&init_net,
+ htons(ROCE_V2_UDP_DPORT), true);
+ if (IS_ERR(recv_sockets.sk6)) {
+ recv_sockets.sk6 = NULL;
+ pr_err("rxe: Failed to create IPv6 UDP tunnel\n");
+ return -1;
+ }
+#endif
+ return 0;
+}
+
+void rxe_net_exit(void)
+{
+ rxe_release_udp_tunnel(recv_sockets.sk6);
+ rxe_release_udp_tunnel(recv_sockets.sk4);
unregister_netdevice_notifier(&rxe_net_notifier);
}
diff --git a/drivers/infiniband/sw/rxe/rxe_net.h b/drivers/infiniband/sw/rxe/rxe_net.h
index 7b06f76..0daf7f0 100644
--- a/drivers/infiniband/sw/rxe/rxe_net.h
+++ b/drivers/infiniband/sw/rxe/rxe_net.h
@@ -44,10 +44,13 @@ struct rxe_recv_sockets {
};
extern struct rxe_recv_sockets recv_sockets;
+extern struct notifier_block rxe_net_notifier;
+void rxe_release_udp_tunnel(struct socket *sk);
struct rxe_dev *rxe_net_add(struct net_device *ndev);
-int rxe_net_init(void);
+int rxe_net_ipv4_init(void);
+int rxe_net_ipv6_init(void);
void rxe_net_exit(void);
#endif /* RXE_NET_H */
diff --git a/drivers/infiniband/sw/rxe/rxe_recv.c b/drivers/infiniband/sw/rxe/rxe_recv.c
index 3d464c2..144d2f1 100644
--- a/drivers/infiniband/sw/rxe/rxe_recv.c
+++ b/drivers/infiniband/sw/rxe/rxe_recv.c
@@ -312,7 +312,7 @@ static void rxe_rcv_mcast_pkt(struct rxe_dev *rxe, struct sk_buff *skb)
* make a copy of the skb to post to the next qp
*/
skb_copy = (mce->qp_list.next != &mcg->qp_list) ?
- skb_clone(skb, GFP_KERNEL) : NULL;
+ skb_clone(skb, GFP_ATOMIC) : NULL;
pkt->qp = qp;
rxe_add_ref(qp);
diff --git a/drivers/infiniband/sw/rxe/rxe_req.c b/drivers/infiniband/sw/rxe/rxe_req.c
index 33b2d9d..13a848a 100644
--- a/drivers/infiniband/sw/rxe/rxe_req.c
+++ b/drivers/infiniband/sw/rxe/rxe_req.c
@@ -511,24 +511,21 @@ static int fill_packet(struct rxe_qp *qp, struct rxe_send_wqe *wqe,
}
static void update_wqe_state(struct rxe_qp *qp,
- struct rxe_send_wqe *wqe,
- struct rxe_pkt_info *pkt,
- enum wqe_state *prev_state)
+ struct rxe_send_wqe *wqe,
+ struct rxe_pkt_info *pkt)
{
- enum wqe_state prev_state_ = wqe->state;
-
if (pkt->mask & RXE_END_MASK) {
if (qp_type(qp) == IB_QPT_RC)
wqe->state = wqe_state_pending;
} else {
wqe->state = wqe_state_processing;
}
-
- *prev_state = prev_state_;
}
-static void update_state(struct rxe_qp *qp, struct rxe_send_wqe *wqe,
- struct rxe_pkt_info *pkt, int payload)
+static void update_wqe_psn(struct rxe_qp *qp,
+ struct rxe_send_wqe *wqe,
+ struct rxe_pkt_info *pkt,
+ int payload)
{
/* number of packets left to send including current one */
int num_pkt = (wqe->dma.resid + payload + qp->mtu - 1) / qp->mtu;
@@ -546,9 +543,34 @@ static void update_state(struct rxe_qp *qp, struct rxe_send_wqe *wqe,
qp->req.psn = (wqe->first_psn + num_pkt) & BTH_PSN_MASK;
else
qp->req.psn = (qp->req.psn + 1) & BTH_PSN_MASK;
+}
- qp->req.opcode = pkt->opcode;
+static void save_state(struct rxe_send_wqe *wqe,
+ struct rxe_qp *qp,
+ struct rxe_send_wqe *rollback_wqe,
+ struct rxe_qp *rollback_qp)
+{
+ rollback_wqe->state = wqe->state;
+ rollback_wqe->first_psn = wqe->first_psn;
+ rollback_wqe->last_psn = wqe->last_psn;
+ rollback_qp->req.psn = qp->req.psn;
+}
+static void rollback_state(struct rxe_send_wqe *wqe,
+ struct rxe_qp *qp,
+ struct rxe_send_wqe *rollback_wqe,
+ struct rxe_qp *rollback_qp)
+{
+ wqe->state = rollback_wqe->state;
+ wqe->first_psn = rollback_wqe->first_psn;
+ wqe->last_psn = rollback_wqe->last_psn;
+ qp->req.psn = rollback_qp->req.psn;
+}
+
+static void update_state(struct rxe_qp *qp, struct rxe_send_wqe *wqe,
+ struct rxe_pkt_info *pkt, int payload)
+{
+ qp->req.opcode = pkt->opcode;
if (pkt->mask & RXE_END_MASK)
qp->req.wqe_index = next_index(qp->sq.queue, qp->req.wqe_index);
@@ -571,7 +593,8 @@ int rxe_requester(void *arg)
int mtu;
int opcode;
int ret;
- enum wqe_state prev_state;
+ struct rxe_qp rollback_qp;
+ struct rxe_send_wqe rollback_wqe;
next_wqe:
if (unlikely(!qp->valid || qp->req.state == QP_STATE_ERROR))
@@ -688,13 +711,21 @@ next_wqe:
goto err;
}
- update_wqe_state(qp, wqe, &pkt, &prev_state);
+ /*
+ * To prevent a race on wqe access between requester and completer,
+ * wqe members state and psn need to be set before calling
+ * rxe_xmit_packet().
+ * Otherwise, completer might initiate an unjustified retry flow.
+ */
+ save_state(wqe, qp, &rollback_wqe, &rollback_qp);
+ update_wqe_state(qp, wqe, &pkt);
+ update_wqe_psn(qp, wqe, &pkt, payload);
ret = rxe_xmit_packet(to_rdev(qp->ibqp.device), qp, &pkt, skb);
if (ret) {
qp->need_req_skb = 1;
kfree_skb(skb);
- wqe->state = prev_state;
+ rollback_state(wqe, qp, &rollback_wqe, &rollback_qp);
if (ret == -EAGAIN) {
rxe_run_task(&qp->req.task, 1);
diff --git a/drivers/infiniband/sw/rxe/rxe_resp.c b/drivers/infiniband/sw/rxe/rxe_resp.c
index ebb03b4..3e0f0f2 100644
--- a/drivers/infiniband/sw/rxe/rxe_resp.c
+++ b/drivers/infiniband/sw/rxe/rxe_resp.c
@@ -972,11 +972,13 @@ static int send_atomic_ack(struct rxe_qp *qp, struct rxe_pkt_info *pkt,
free_rd_atomic_resource(qp, res);
rxe_advance_resp_resource(qp);
+ memcpy(SKB_TO_PKT(skb), &ack_pkt, sizeof(skb->cb));
+
res->type = RXE_ATOMIC_MASK;
res->atomic.skb = skb;
- res->first_psn = qp->resp.psn;
- res->last_psn = qp->resp.psn;
- res->cur_psn = qp->resp.psn;
+ res->first_psn = ack_pkt.psn;
+ res->last_psn = ack_pkt.psn;
+ res->cur_psn = ack_pkt.psn;
rc = rxe_xmit_packet(rxe, qp, &ack_pkt, skb_copy);
if (rc) {
@@ -1116,8 +1118,7 @@ static enum resp_states duplicate_request(struct rxe_qp *qp,
rc = RESPST_CLEANUP;
goto out;
}
- bth_set_psn(SKB_TO_PKT(skb_copy),
- qp->resp.psn - 1);
+
/* Resend the result. */
rc = rxe_xmit_packet(to_rdev(qp->ibqp.device), qp,
pkt, skb_copy);
diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h
index 4f7d9b4..9dbfcc0 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib.h
+++ b/drivers/infiniband/ulp/ipoib/ipoib.h
@@ -478,6 +478,7 @@ void ipoib_send(struct net_device *dev, struct sk_buff *skb,
struct ipoib_ah *address, u32 qpn);
void ipoib_reap_ah(struct work_struct *work);
+struct ipoib_path *__path_find(struct net_device *dev, void *gid);
void ipoib_mark_paths_invalid(struct net_device *dev);
void ipoib_flush_paths(struct net_device *dev);
int ipoib_check_sm_sendonly_fullmember_support(struct ipoib_dev_priv *priv);
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_cm.c b/drivers/infiniband/ulp/ipoib/ipoib_cm.c
index 951d9ab..4ad297d 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_cm.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_cm.c
@@ -1318,6 +1318,8 @@ void ipoib_cm_destroy_tx(struct ipoib_cm_tx *tx)
}
}
+#define QPN_AND_OPTIONS_OFFSET 4
+
static void ipoib_cm_tx_start(struct work_struct *work)
{
struct ipoib_dev_priv *priv = container_of(work, struct ipoib_dev_priv,
@@ -1326,6 +1328,7 @@ static void ipoib_cm_tx_start(struct work_struct *work)
struct ipoib_neigh *neigh;
struct ipoib_cm_tx *p;
unsigned long flags;
+ struct ipoib_path *path;
int ret;
struct ib_sa_path_rec pathrec;
@@ -1338,7 +1341,19 @@ static void ipoib_cm_tx_start(struct work_struct *work)
p = list_entry(priv->cm.start_list.next, typeof(*p), list);
list_del_init(&p->list);
neigh = p->neigh;
+
qpn = IPOIB_QPN(neigh->daddr);
+ /*
+ * As long as the search is with these 2 locks,
+ * path existence indicates its validity.
+ */
+ path = __path_find(dev, neigh->daddr + QPN_AND_OPTIONS_OFFSET);
+ if (!path) {
+ pr_info("%s ignore not valid path %pI6\n",
+ __func__,
+ neigh->daddr + QPN_AND_OPTIONS_OFFSET);
+ goto free_neigh;
+ }
memcpy(&pathrec, &p->path->pathrec, sizeof pathrec);
spin_unlock_irqrestore(&priv->lock, flags);
@@ -1350,6 +1365,7 @@ static void ipoib_cm_tx_start(struct work_struct *work)
spin_lock_irqsave(&priv->lock, flags);
if (ret) {
+free_neigh:
neigh = p->neigh;
if (neigh) {
neigh->cm = NULL;
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ib.c b/drivers/infiniband/ulp/ipoib/ipoib_ib.c
index dc6d241..be11d5d 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_ib.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_ib.c
@@ -1161,8 +1161,17 @@ static void __ipoib_ib_dev_flush(struct ipoib_dev_priv *priv,
}
if (level == IPOIB_FLUSH_LIGHT) {
+ int oper_up;
ipoib_mark_paths_invalid(dev);
+ /* Set IPoIB operation as down to prevent races between:
+ * the flush flow which leaves MCG and on the fly joins
+ * which can happen during that time. mcast restart task
+ * should deal with join requests we missed.
+ */
+ oper_up = test_and_clear_bit(IPOIB_FLAG_OPER_UP, &priv->flags);
ipoib_mcast_dev_flush(dev);
+ if (oper_up)
+ set_bit(IPOIB_FLAG_OPER_UP, &priv->flags);
ipoib_flush_ah(dev);
}
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c
index 74bcaa0..cc1c1b0 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_main.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c
@@ -485,7 +485,7 @@ int ipoib_set_mode(struct net_device *dev, const char *buf)
return -EINVAL;
}
-static struct ipoib_path *__path_find(struct net_device *dev, void *gid)
+struct ipoib_path *__path_find(struct net_device *dev, void *gid)
{
struct ipoib_dev_priv *priv = netdev_priv(dev);
struct rb_node *n = priv->path_tree.rb_node;
diff --git a/drivers/infiniband/ulp/isert/ib_isert.c b/drivers/infiniband/ulp/isert/ib_isert.c
index 7914c14..cae9bbc 100644
--- a/drivers/infiniband/ulp/isert/ib_isert.c
+++ b/drivers/infiniband/ulp/isert/ib_isert.c
@@ -403,6 +403,7 @@ isert_init_conn(struct isert_conn *isert_conn)
INIT_LIST_HEAD(&isert_conn->node);
init_completion(&isert_conn->login_comp);
init_completion(&isert_conn->login_req_comp);
+ init_waitqueue_head(&isert_conn->rem_wait);
kref_init(&isert_conn->kref);
mutex_init(&isert_conn->mutex);
INIT_WORK(&isert_conn->release_work, isert_release_work);
@@ -578,7 +579,8 @@ isert_connect_release(struct isert_conn *isert_conn)
BUG_ON(!device);
isert_free_rx_descriptors(isert_conn);
- if (isert_conn->cm_id)
+ if (isert_conn->cm_id &&
+ !isert_conn->dev_removed)
rdma_destroy_id(isert_conn->cm_id);
if (isert_conn->qp) {
@@ -593,7 +595,10 @@ isert_connect_release(struct isert_conn *isert_conn)
isert_device_put(device);
- kfree(isert_conn);
+ if (isert_conn->dev_removed)
+ wake_up_interruptible(&isert_conn->rem_wait);
+ else
+ kfree(isert_conn);
}
static void
@@ -753,6 +758,7 @@ static int
isert_cma_handler(struct rdma_cm_id *cma_id, struct rdma_cm_event *event)
{
struct isert_np *isert_np = cma_id->context;
+ struct isert_conn *isert_conn;
int ret = 0;
isert_info("%s (%d): status %d id %p np %p\n",
@@ -773,10 +779,21 @@ isert_cma_handler(struct rdma_cm_id *cma_id, struct rdma_cm_event *event)
break;
case RDMA_CM_EVENT_ADDR_CHANGE: /* FALLTHRU */
case RDMA_CM_EVENT_DISCONNECTED: /* FALLTHRU */
- case RDMA_CM_EVENT_DEVICE_REMOVAL: /* FALLTHRU */
case RDMA_CM_EVENT_TIMEWAIT_EXIT: /* FALLTHRU */
ret = isert_disconnected_handler(cma_id, event->event);
break;
+ case RDMA_CM_EVENT_DEVICE_REMOVAL:
+ isert_conn = cma_id->qp->qp_context;
+ isert_conn->dev_removed = true;
+ isert_disconnected_handler(cma_id, event->event);
+ wait_event_interruptible(isert_conn->rem_wait,
+ isert_conn->state == ISER_CONN_DOWN);
+ kfree(isert_conn);
+ /*
+ * return non-zero from the callback to destroy
+ * the rdma cm id
+ */
+ return 1;
case RDMA_CM_EVENT_REJECTED: /* FALLTHRU */
case RDMA_CM_EVENT_UNREACHABLE: /* FALLTHRU */
case RDMA_CM_EVENT_CONNECT_ERROR:
diff --git a/drivers/infiniband/ulp/isert/ib_isert.h b/drivers/infiniband/ulp/isert/ib_isert.h
index fc791ef..c02ada5 100644
--- a/drivers/infiniband/ulp/isert/ib_isert.h
+++ b/drivers/infiniband/ulp/isert/ib_isert.h
@@ -158,6 +158,8 @@ struct isert_conn {
struct work_struct release_work;
bool logout_posted;
bool snd_w_inv;
+ wait_queue_head_t rem_wait;
+ bool dev_removed;
};
#define ISERT_MAX_CQ 64