summaryrefslogtreecommitdiff
path: root/drivers/infiniband/hw
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/infiniband/hw')
-rw-r--r--drivers/infiniband/hw/cxgb3/iwch_cm.c4
-rw-r--r--drivers/infiniband/hw/cxgb3/iwch_cq.c6
-rw-r--r--drivers/infiniband/hw/cxgb3/iwch_mem.c102
-rw-r--r--drivers/infiniband/hw/cxgb3/iwch_provider.c185
-rw-r--r--drivers/infiniband/hw/cxgb3/iwch_provider.h17
-rw-r--r--drivers/infiniband/hw/cxgb3/iwch_qp.c125
-rw-r--r--drivers/infiniband/hw/cxgb4/cm.c345
-rw-r--r--drivers/infiniband/hw/cxgb4/cq.c5
-rw-r--r--drivers/infiniband/hw/cxgb4/device.c67
-rw-r--r--drivers/infiniband/hw/cxgb4/iw_cxgb4.h38
-rw-r--r--drivers/infiniband/hw/cxgb4/mem.c314
-rw-r--r--drivers/infiniband/hw/cxgb4/provider.c8
-rw-r--r--drivers/infiniband/hw/cxgb4/qp.c78
-rw-r--r--drivers/infiniband/hw/cxgb4/t4.h12
-rw-r--r--drivers/infiniband/hw/cxgb4/user.h2
-rw-r--r--drivers/infiniband/hw/mlx4/ah.c18
-rw-r--r--drivers/infiniband/hw/mlx4/cq.c5
-rw-r--r--drivers/infiniband/hw/mlx4/mad.c171
-rw-r--r--drivers/infiniband/hw/mlx4/main.c177
-rw-r--r--drivers/infiniband/hw/mlx4/mcg.c2
-rw-r--r--drivers/infiniband/hw/mlx4/mlx4_ib.h47
-rw-r--r--drivers/infiniband/hw/mlx4/mr.c171
-rw-r--r--drivers/infiniband/hw/mlx4/qp.c632
-rw-r--r--drivers/infiniband/hw/mlx4/srq.c16
-rw-r--r--drivers/infiniband/hw/mlx5/ah.c32
-rw-r--r--drivers/infiniband/hw/mlx5/cq.c37
-rw-r--r--drivers/infiniband/hw/mlx5/main.c917
-rw-r--r--drivers/infiniband/hw/mlx5/mlx5_ib.h214
-rw-r--r--drivers/infiniband/hw/mlx5/mr.c201
-rw-r--r--drivers/infiniband/hw/mlx5/odp.c29
-rw-r--r--drivers/infiniband/hw/mlx5/qp.c1253
-rw-r--r--drivers/infiniband/hw/mlx5/srq.c54
-rw-r--r--drivers/infiniband/hw/mlx5/user.h63
-rw-r--r--drivers/infiniband/hw/mthca/mthca_av.c2
-rw-r--r--drivers/infiniband/hw/mthca/mthca_cq.c3
-rw-r--r--drivers/infiniband/hw/mthca/mthca_provider.c84
-rw-r--r--drivers/infiniband/hw/mthca/mthca_qp.c84
-rw-r--r--drivers/infiniband/hw/nes/nes_cm.c17
-rw-r--r--drivers/infiniband/hw/nes/nes_cm.h2
-rw-r--r--drivers/infiniband/hw/nes/nes_hw.h6
-rw-r--r--drivers/infiniband/hw/nes/nes_utils.c2
-rw-r--r--drivers/infiniband/hw/nes/nes_verbs.c386
-rw-r--r--drivers/infiniband/hw/nes/nes_verbs.h8
-rw-r--r--drivers/infiniband/hw/ocrdma/ocrdma.h18
-rw-r--r--drivers/infiniband/hw/ocrdma/ocrdma_ah.c25
-rw-r--r--drivers/infiniband/hw/ocrdma/ocrdma_hw.c105
-rw-r--r--drivers/infiniband/hw/ocrdma/ocrdma_hw.h4
-rw-r--r--drivers/infiniband/hw/ocrdma/ocrdma_main.c83
-rw-r--r--drivers/infiniband/hw/ocrdma/ocrdma_sli.h49
-rw-r--r--drivers/infiniband/hw/ocrdma/ocrdma_stats.c16
-rw-r--r--drivers/infiniband/hw/ocrdma/ocrdma_stats.h2
-rw-r--r--drivers/infiniband/hw/ocrdma/ocrdma_verbs.c346
-rw-r--r--drivers/infiniband/hw/ocrdma/ocrdma_verbs.h10
-rw-r--r--drivers/infiniband/hw/qib/qib_fs.c12
-rw-r--r--drivers/infiniband/hw/qib/qib_init.c2
-rw-r--r--drivers/infiniband/hw/qib/qib_keys.c41
-rw-r--r--drivers/infiniband/hw/qib/qib_mr.c97
-rw-r--r--drivers/infiniband/hw/qib/qib_qp.c48
-rw-r--r--drivers/infiniband/hw/qib/qib_qsfp.c4
-rw-r--r--drivers/infiniband/hw/qib/qib_rc.c38
-rw-r--r--drivers/infiniband/hw/qib/qib_ruc.c20
-rw-r--r--drivers/infiniband/hw/qib/qib_uc.c4
-rw-r--r--drivers/infiniband/hw/qib/qib_ud.c20
-rw-r--r--drivers/infiniband/hw/qib/qib_verbs.c41
-rw-r--r--drivers/infiniband/hw/qib/qib_verbs.h23
-rw-r--r--drivers/infiniband/hw/qib/qib_verbs_mcast.c35
-rw-r--r--drivers/infiniband/hw/usnic/usnic_debugfs.c5
-rw-r--r--drivers/infiniband/hw/usnic/usnic_ib_main.c9
-rw-r--r--drivers/infiniband/hw/usnic/usnic_ib_qp_grp.c12
-rw-r--r--drivers/infiniband/hw/usnic/usnic_ib_verbs.c24
-rw-r--r--drivers/infiniband/hw/usnic/usnic_ib_verbs.h2
-rw-r--r--drivers/infiniband/hw/usnic/usnic_vnic.c54
72 files changed, 4300 insertions, 2790 deletions
diff --git a/drivers/infiniband/hw/cxgb3/iwch_cm.c b/drivers/infiniband/hw/cxgb3/iwch_cm.c
index cb78b1e..f504ba7 100644
--- a/drivers/infiniband/hw/cxgb3/iwch_cm.c
+++ b/drivers/infiniband/hw/cxgb3/iwch_cm.c
@@ -149,7 +149,7 @@ static int iwch_l2t_send(struct t3cdev *tdev, struct sk_buff *skb, struct l2t_en
error = l2t_send(tdev, skb, l2e);
if (error < 0)
kfree_skb(skb);
- return error;
+ return error < 0 ? error : 0;
}
int iwch_cxgb3_ofld_send(struct t3cdev *tdev, struct sk_buff *skb)
@@ -165,7 +165,7 @@ int iwch_cxgb3_ofld_send(struct t3cdev *tdev, struct sk_buff *skb)
error = cxgb3_ofld_send(tdev, skb);
if (error < 0)
kfree_skb(skb);
- return error;
+ return error < 0 ? error : 0;
}
static void release_tid(struct t3cdev *tdev, u32 hwtid, struct sk_buff *skb)
diff --git a/drivers/infiniband/hw/cxgb3/iwch_cq.c b/drivers/infiniband/hw/cxgb3/iwch_cq.c
index cf5474a..97fbfd2 100644
--- a/drivers/infiniband/hw/cxgb3/iwch_cq.c
+++ b/drivers/infiniband/hw/cxgb3/iwch_cq.c
@@ -115,15 +115,11 @@ static int iwch_poll_cq_one(struct iwch_dev *rhp, struct iwch_cq *chp,
case T3_SEND_WITH_SE_INV:
wc->opcode = IB_WC_SEND;
break;
- case T3_BIND_MW:
- wc->opcode = IB_WC_BIND_MW;
- break;
-
case T3_LOCAL_INV:
wc->opcode = IB_WC_LOCAL_INV;
break;
case T3_FAST_REGISTER:
- wc->opcode = IB_WC_FAST_REG_MR;
+ wc->opcode = IB_WC_REG_MR;
break;
default:
printk(KERN_ERR MOD "Unexpected opcode %d "
diff --git a/drivers/infiniband/hw/cxgb3/iwch_mem.c b/drivers/infiniband/hw/cxgb3/iwch_mem.c
index 5c36ee2..1d04c87 100644
--- a/drivers/infiniband/hw/cxgb3/iwch_mem.c
+++ b/drivers/infiniband/hw/cxgb3/iwch_mem.c
@@ -75,37 +75,6 @@ int iwch_register_mem(struct iwch_dev *rhp, struct iwch_pd *php,
return ret;
}
-int iwch_reregister_mem(struct iwch_dev *rhp, struct iwch_pd *php,
- struct iwch_mr *mhp,
- int shift,
- int npages)
-{
- u32 stag;
- int ret;
-
- /* We could support this... */
- if (npages > mhp->attr.pbl_size)
- return -ENOMEM;
-
- stag = mhp->attr.stag;
- if (cxio_reregister_phys_mem(&rhp->rdev,
- &stag, mhp->attr.pdid,
- mhp->attr.perms,
- mhp->attr.zbva,
- mhp->attr.va_fbo,
- mhp->attr.len,
- shift - 12,
- mhp->attr.pbl_size, mhp->attr.pbl_addr))
- return -ENOMEM;
-
- ret = iwch_finish_mem_reg(mhp, stag);
- if (ret)
- cxio_dereg_mem(&rhp->rdev, mhp->attr.stag, mhp->attr.pbl_size,
- mhp->attr.pbl_addr);
-
- return ret;
-}
-
int iwch_alloc_pbl(struct iwch_mr *mhp, int npages)
{
mhp->attr.pbl_addr = cxio_hal_pblpool_alloc(&mhp->rhp->rdev,
@@ -130,74 +99,3 @@ int iwch_write_pbl(struct iwch_mr *mhp, __be64 *pages, int npages, int offset)
return cxio_write_pbl(&mhp->rhp->rdev, pages,
mhp->attr.pbl_addr + (offset << 3), npages);
}
-
-int build_phys_page_list(struct ib_phys_buf *buffer_list,
- int num_phys_buf,
- u64 *iova_start,
- u64 *total_size,
- int *npages,
- int *shift,
- __be64 **page_list)
-{
- u64 mask;
- int i, j, n;
-
- mask = 0;
- *total_size = 0;
- for (i = 0; i < num_phys_buf; ++i) {
- if (i != 0 && buffer_list[i].addr & ~PAGE_MASK)
- return -EINVAL;
- if (i != 0 && i != num_phys_buf - 1 &&
- (buffer_list[i].size & ~PAGE_MASK))
- return -EINVAL;
- *total_size += buffer_list[i].size;
- if (i > 0)
- mask |= buffer_list[i].addr;
- else
- mask |= buffer_list[i].addr & PAGE_MASK;
- if (i != num_phys_buf - 1)
- mask |= buffer_list[i].addr + buffer_list[i].size;
- else
- mask |= (buffer_list[i].addr + buffer_list[i].size +
- PAGE_SIZE - 1) & PAGE_MASK;
- }
-
- if (*total_size > 0xFFFFFFFFULL)
- return -ENOMEM;
-
- /* Find largest page shift we can use to cover buffers */
- for (*shift = PAGE_SHIFT; *shift < 27; ++(*shift))
- if ((1ULL << *shift) & mask)
- break;
-
- buffer_list[0].size += buffer_list[0].addr & ((1ULL << *shift) - 1);
- buffer_list[0].addr &= ~0ull << *shift;
-
- *npages = 0;
- for (i = 0; i < num_phys_buf; ++i)
- *npages += (buffer_list[i].size +
- (1ULL << *shift) - 1) >> *shift;
-
- if (!*npages)
- return -EINVAL;
-
- *page_list = kmalloc(sizeof(u64) * *npages, GFP_KERNEL);
- if (!*page_list)
- return -ENOMEM;
-
- n = 0;
- for (i = 0; i < num_phys_buf; ++i)
- for (j = 0;
- j < (buffer_list[i].size + (1ULL << *shift) - 1) >> *shift;
- ++j)
- (*page_list)[n++] = cpu_to_be64(buffer_list[i].addr +
- ((u64) j << *shift));
-
- PDBG("%s va 0x%llx mask 0x%llx shift %d len %lld pbl_size %d\n",
- __func__, (unsigned long long) *iova_start,
- (unsigned long long) mask, *shift, (unsigned long long) *total_size,
- *npages);
-
- return 0;
-
-}
diff --git a/drivers/infiniband/hw/cxgb3/iwch_provider.c b/drivers/infiniband/hw/cxgb3/iwch_provider.c
index 93308c4..2734820 100644
--- a/drivers/infiniband/hw/cxgb3/iwch_provider.c
+++ b/drivers/infiniband/hw/cxgb3/iwch_provider.c
@@ -458,11 +458,9 @@ static int iwch_dereg_mr(struct ib_mr *ib_mr)
u32 mmid;
PDBG("%s ib_mr %p\n", __func__, ib_mr);
- /* There can be no memory windows */
- if (atomic_read(&ib_mr->usecnt))
- return -EINVAL;
mhp = to_iwch_mr(ib_mr);
+ kfree(mhp->pages);
rhp = mhp->rhp;
mmid = mhp->attr.stag >> 8;
cxio_dereg_mem(&rhp->rdev, mhp->attr.stag, mhp->attr.pbl_size,
@@ -478,24 +476,25 @@ static int iwch_dereg_mr(struct ib_mr *ib_mr)
return 0;
}
-static struct ib_mr *iwch_register_phys_mem(struct ib_pd *pd,
- struct ib_phys_buf *buffer_list,
- int num_phys_buf,
- int acc,
- u64 *iova_start)
+static struct ib_mr *iwch_get_dma_mr(struct ib_pd *pd, int acc)
{
- __be64 *page_list;
- int shift;
- u64 total_size;
- int npages;
- struct iwch_dev *rhp;
- struct iwch_pd *php;
+ const u64 total_size = 0xffffffff;
+ const u64 mask = (total_size + PAGE_SIZE - 1) & PAGE_MASK;
+ struct iwch_pd *php = to_iwch_pd(pd);
+ struct iwch_dev *rhp = php->rhp;
struct iwch_mr *mhp;
- int ret;
+ __be64 *page_list;
+ int shift = 26, npages, ret, i;
PDBG("%s ib_pd %p\n", __func__, pd);
- php = to_iwch_pd(pd);
- rhp = php->rhp;
+
+ /*
+ * T3 only supports 32 bits of size.
+ */
+ if (sizeof(phys_addr_t) > 4) {
+ pr_warn_once(MOD "Cannot support dma_mrs on this platform.\n");
+ return ERR_PTR(-ENOTSUPP);
+ }
mhp = kzalloc(sizeof(*mhp), GFP_KERNEL);
if (!mhp)
@@ -503,22 +502,23 @@ static struct ib_mr *iwch_register_phys_mem(struct ib_pd *pd,
mhp->rhp = rhp;
- /* First check that we have enough alignment */
- if ((*iova_start & ~PAGE_MASK) != (buffer_list[0].addr & ~PAGE_MASK)) {
+ npages = (total_size + (1ULL << shift) - 1) >> shift;
+ if (!npages) {
ret = -EINVAL;
goto err;
}
- if (num_phys_buf > 1 &&
- ((buffer_list[0].addr + buffer_list[0].size) & ~PAGE_MASK)) {
- ret = -EINVAL;
+ page_list = kmalloc_array(npages, sizeof(u64), GFP_KERNEL);
+ if (!page_list) {
+ ret = -ENOMEM;
goto err;
}
- ret = build_phys_page_list(buffer_list, num_phys_buf, iova_start,
- &total_size, &npages, &shift, &page_list);
- if (ret)
- goto err;
+ for (i = 0; i < npages; i++)
+ page_list[i] = cpu_to_be64((u64)i << shift);
+
+ PDBG("%s mask 0x%llx shift %d len %lld pbl_size %d\n",
+ __func__, mask, shift, total_size, npages);
ret = iwch_alloc_pbl(mhp, npages);
if (ret) {
@@ -535,7 +535,7 @@ static struct ib_mr *iwch_register_phys_mem(struct ib_pd *pd,
mhp->attr.zbva = 0;
mhp->attr.perms = iwch_ib_to_tpt_access(acc);
- mhp->attr.va_fbo = *iova_start;
+ mhp->attr.va_fbo = 0;
mhp->attr.page_size = shift - 12;
mhp->attr.len = (u32) total_size;
@@ -552,76 +552,8 @@ err_pbl:
err:
kfree(mhp);
return ERR_PTR(ret);
-
}
-static int iwch_reregister_phys_mem(struct ib_mr *mr,
- int mr_rereg_mask,
- struct ib_pd *pd,
- struct ib_phys_buf *buffer_list,
- int num_phys_buf,
- int acc, u64 * iova_start)
-{
-
- struct iwch_mr mh, *mhp;
- struct iwch_pd *php;
- struct iwch_dev *rhp;
- __be64 *page_list = NULL;
- int shift = 0;
- u64 total_size;
- int npages = 0;
- int ret;
-
- PDBG("%s ib_mr %p ib_pd %p\n", __func__, mr, pd);
-
- /* There can be no memory windows */
- if (atomic_read(&mr->usecnt))
- return -EINVAL;
-
- mhp = to_iwch_mr(mr);
- rhp = mhp->rhp;
- php = to_iwch_pd(mr->pd);
-
- /* make sure we are on the same adapter */
- if (rhp != php->rhp)
- return -EINVAL;
-
- memcpy(&mh, mhp, sizeof *mhp);
-
- if (mr_rereg_mask & IB_MR_REREG_PD)
- php = to_iwch_pd(pd);
- if (mr_rereg_mask & IB_MR_REREG_ACCESS)
- mh.attr.perms = iwch_ib_to_tpt_access(acc);
- if (mr_rereg_mask & IB_MR_REREG_TRANS) {
- ret = build_phys_page_list(buffer_list, num_phys_buf,
- iova_start,
- &total_size, &npages,
- &shift, &page_list);
- if (ret)
- return ret;
- }
-
- ret = iwch_reregister_mem(rhp, php, &mh, shift, npages);
- kfree(page_list);
- if (ret) {
- return ret;
- }
- if (mr_rereg_mask & IB_MR_REREG_PD)
- mhp->attr.pdid = php->pdid;
- if (mr_rereg_mask & IB_MR_REREG_ACCESS)
- mhp->attr.perms = iwch_ib_to_tpt_access(acc);
- if (mr_rereg_mask & IB_MR_REREG_TRANS) {
- mhp->attr.zbva = 0;
- mhp->attr.va_fbo = *iova_start;
- mhp->attr.page_size = shift - 12;
- mhp->attr.len = (u32) total_size;
- mhp->attr.pbl_size = npages;
- }
-
- return 0;
-}
-
-
static struct ib_mr *iwch_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
u64 virt, int acc, struct ib_udata *udata)
{
@@ -725,28 +657,6 @@ err:
return ERR_PTR(err);
}
-static struct ib_mr *iwch_get_dma_mr(struct ib_pd *pd, int acc)
-{
- struct ib_phys_buf bl;
- u64 kva;
- struct ib_mr *ibmr;
-
- PDBG("%s ib_pd %p\n", __func__, pd);
-
- /*
- * T3 only supports 32 bits of size.
- */
- if (sizeof(phys_addr_t) > 4) {
- pr_warn_once(MOD "Cannot support dma_mrs on this platform.\n");
- return ERR_PTR(-ENOTSUPP);
- }
- bl.size = 0xffffffff;
- bl.addr = 0;
- kva = 0;
- ibmr = iwch_register_phys_mem(pd, &bl, 1, acc, &kva);
- return ibmr;
-}
-
static struct ib_mw *iwch_alloc_mw(struct ib_pd *pd, enum ib_mw_type type)
{
struct iwch_dev *rhp;
@@ -821,6 +731,12 @@ static struct ib_mr *iwch_alloc_mr(struct ib_pd *pd,
if (!mhp)
goto err;
+ mhp->pages = kcalloc(max_num_sg, sizeof(u64), GFP_KERNEL);
+ if (!mhp->pages) {
+ ret = -ENOMEM;
+ goto pl_err;
+ }
+
mhp->rhp = rhp;
ret = iwch_alloc_pbl(mhp, max_num_sg);
if (ret)
@@ -847,31 +763,34 @@ err3:
err2:
iwch_free_pbl(mhp);
err1:
+ kfree(mhp->pages);
+pl_err:
kfree(mhp);
err:
return ERR_PTR(ret);
}
-static struct ib_fast_reg_page_list *iwch_alloc_fastreg_pbl(
- struct ib_device *device,
- int page_list_len)
+static int iwch_set_page(struct ib_mr *ibmr, u64 addr)
{
- struct ib_fast_reg_page_list *page_list;
+ struct iwch_mr *mhp = to_iwch_mr(ibmr);
- page_list = kmalloc(sizeof *page_list + page_list_len * sizeof(u64),
- GFP_KERNEL);
- if (!page_list)
- return ERR_PTR(-ENOMEM);
+ if (unlikely(mhp->npages == mhp->attr.pbl_size))
+ return -ENOMEM;
- page_list->page_list = (u64 *)(page_list + 1);
- page_list->max_page_list_len = page_list_len;
+ mhp->pages[mhp->npages++] = addr;
- return page_list;
+ return 0;
}
-static void iwch_free_fastreg_pbl(struct ib_fast_reg_page_list *page_list)
+static int iwch_map_mr_sg(struct ib_mr *ibmr,
+ struct scatterlist *sg,
+ int sg_nents)
{
- kfree(page_list);
+ struct iwch_mr *mhp = to_iwch_mr(ibmr);
+
+ mhp->npages = 0;
+
+ return ib_sg_to_pages(ibmr, sg, sg_nents, iwch_set_page);
}
static int iwch_destroy_qp(struct ib_qp *ib_qp)
@@ -1442,16 +1361,12 @@ int iwch_register_device(struct iwch_dev *dev)
dev->ibdev.resize_cq = iwch_resize_cq;
dev->ibdev.poll_cq = iwch_poll_cq;
dev->ibdev.get_dma_mr = iwch_get_dma_mr;
- dev->ibdev.reg_phys_mr = iwch_register_phys_mem;
- dev->ibdev.rereg_phys_mr = iwch_reregister_phys_mem;
dev->ibdev.reg_user_mr = iwch_reg_user_mr;
dev->ibdev.dereg_mr = iwch_dereg_mr;
dev->ibdev.alloc_mw = iwch_alloc_mw;
- dev->ibdev.bind_mw = iwch_bind_mw;
dev->ibdev.dealloc_mw = iwch_dealloc_mw;
dev->ibdev.alloc_mr = iwch_alloc_mr;
- dev->ibdev.alloc_fast_reg_page_list = iwch_alloc_fastreg_pbl;
- dev->ibdev.free_fast_reg_page_list = iwch_free_fastreg_pbl;
+ dev->ibdev.map_mr_sg = iwch_map_mr_sg;
dev->ibdev.attach_mcast = iwch_multicast_attach;
dev->ibdev.detach_mcast = iwch_multicast_detach;
dev->ibdev.process_mad = iwch_process_mad;
diff --git a/drivers/infiniband/hw/cxgb3/iwch_provider.h b/drivers/infiniband/hw/cxgb3/iwch_provider.h
index 87c14b0..252c464 100644
--- a/drivers/infiniband/hw/cxgb3/iwch_provider.h
+++ b/drivers/infiniband/hw/cxgb3/iwch_provider.h
@@ -77,6 +77,8 @@ struct iwch_mr {
struct iwch_dev *rhp;
u64 kva;
struct tpt_attributes attr;
+ u64 *pages;
+ u32 npages;
};
typedef struct iwch_mw iwch_mw_handle;
@@ -328,9 +330,6 @@ int iwch_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
struct ib_send_wr **bad_wr);
int iwch_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr,
struct ib_recv_wr **bad_wr);
-int iwch_bind_mw(struct ib_qp *qp,
- struct ib_mw *mw,
- struct ib_mw_bind *mw_bind);
int iwch_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc);
int iwch_post_terminate(struct iwch_qp *qhp, struct respQ_msg_t *rsp_msg);
int iwch_post_zb_read(struct iwch_ep *ep);
@@ -339,21 +338,9 @@ void iwch_unregister_device(struct iwch_dev *dev);
void stop_read_rep_timer(struct iwch_qp *qhp);
int iwch_register_mem(struct iwch_dev *rhp, struct iwch_pd *php,
struct iwch_mr *mhp, int shift);
-int iwch_reregister_mem(struct iwch_dev *rhp, struct iwch_pd *php,
- struct iwch_mr *mhp,
- int shift,
- int npages);
int iwch_alloc_pbl(struct iwch_mr *mhp, int npages);
void iwch_free_pbl(struct iwch_mr *mhp);
int iwch_write_pbl(struct iwch_mr *mhp, __be64 *pages, int npages, int offset);
-int build_phys_page_list(struct ib_phys_buf *buffer_list,
- int num_phys_buf,
- u64 *iova_start,
- u64 *total_size,
- int *npages,
- int *shift,
- __be64 **page_list);
-
#define IWCH_NODE_DESC "cxgb3 Chelsio Communications"
diff --git a/drivers/infiniband/hw/cxgb3/iwch_qp.c b/drivers/infiniband/hw/cxgb3/iwch_qp.c
index b57c0be..d939980 100644
--- a/drivers/infiniband/hw/cxgb3/iwch_qp.c
+++ b/drivers/infiniband/hw/cxgb3/iwch_qp.c
@@ -95,8 +95,8 @@ static int build_rdma_write(union t3_wr *wqe, struct ib_send_wr *wr,
wqe->write.reserved[0] = 0;
wqe->write.reserved[1] = 0;
wqe->write.reserved[2] = 0;
- wqe->write.stag_sink = cpu_to_be32(wr->wr.rdma.rkey);
- wqe->write.to_sink = cpu_to_be64(wr->wr.rdma.remote_addr);
+ wqe->write.stag_sink = cpu_to_be32(rdma_wr(wr)->rkey);
+ wqe->write.to_sink = cpu_to_be64(rdma_wr(wr)->remote_addr);
if (wr->opcode == IB_WR_RDMA_WRITE_WITH_IMM) {
plen = 4;
@@ -137,8 +137,8 @@ static int build_rdma_read(union t3_wr *wqe, struct ib_send_wr *wr,
wqe->read.local_inv = 0;
wqe->read.reserved[0] = 0;
wqe->read.reserved[1] = 0;
- wqe->read.rem_stag = cpu_to_be32(wr->wr.rdma.rkey);
- wqe->read.rem_to = cpu_to_be64(wr->wr.rdma.remote_addr);
+ wqe->read.rem_stag = cpu_to_be32(rdma_wr(wr)->rkey);
+ wqe->read.rem_to = cpu_to_be64(rdma_wr(wr)->remote_addr);
wqe->read.local_stag = cpu_to_be32(wr->sg_list[0].lkey);
wqe->read.local_len = cpu_to_be32(wr->sg_list[0].length);
wqe->read.local_to = cpu_to_be64(wr->sg_list[0].addr);
@@ -146,27 +146,28 @@ static int build_rdma_read(union t3_wr *wqe, struct ib_send_wr *wr,
return 0;
}
-static int build_fastreg(union t3_wr *wqe, struct ib_send_wr *wr,
- u8 *flit_cnt, int *wr_cnt, struct t3_wq *wq)
+static int build_memreg(union t3_wr *wqe, struct ib_reg_wr *wr,
+ u8 *flit_cnt, int *wr_cnt, struct t3_wq *wq)
{
+ struct iwch_mr *mhp = to_iwch_mr(wr->mr);
int i;
__be64 *p;
- if (wr->wr.fast_reg.page_list_len > T3_MAX_FASTREG_DEPTH)
+ if (mhp->npages > T3_MAX_FASTREG_DEPTH)
return -EINVAL;
*wr_cnt = 1;
- wqe->fastreg.stag = cpu_to_be32(wr->wr.fast_reg.rkey);
- wqe->fastreg.len = cpu_to_be32(wr->wr.fast_reg.length);
- wqe->fastreg.va_base_hi = cpu_to_be32(wr->wr.fast_reg.iova_start >> 32);
+ wqe->fastreg.stag = cpu_to_be32(wr->key);
+ wqe->fastreg.len = cpu_to_be32(mhp->ibmr.length);
+ wqe->fastreg.va_base_hi = cpu_to_be32(mhp->ibmr.iova >> 32);
wqe->fastreg.va_base_lo_fbo =
- cpu_to_be32(wr->wr.fast_reg.iova_start & 0xffffffff);
+ cpu_to_be32(mhp->ibmr.iova & 0xffffffff);
wqe->fastreg.page_type_perms = cpu_to_be32(
- V_FR_PAGE_COUNT(wr->wr.fast_reg.page_list_len) |
- V_FR_PAGE_SIZE(wr->wr.fast_reg.page_shift-12) |
+ V_FR_PAGE_COUNT(mhp->npages) |
+ V_FR_PAGE_SIZE(ilog2(wr->mr->page_size) - 12) |
V_FR_TYPE(TPT_VATO) |
- V_FR_PERMS(iwch_ib_to_tpt_access(wr->wr.fast_reg.access_flags)));
+ V_FR_PERMS(iwch_ib_to_tpt_access(wr->access)));
p = &wqe->fastreg.pbl_addrs[0];
- for (i = 0; i < wr->wr.fast_reg.page_list_len; i++, p++) {
+ for (i = 0; i < mhp->npages; i++, p++) {
/* If we need a 2nd WR, then set it up */
if (i == T3_MAX_FASTREG_FRAG) {
@@ -175,14 +176,14 @@ static int build_fastreg(union t3_wr *wqe, struct ib_send_wr *wr,
Q_PTR2IDX((wq->wptr+1), wq->size_log2));
build_fw_riwrh((void *)wqe, T3_WR_FASTREG, 0,
Q_GENBIT(wq->wptr + 1, wq->size_log2),
- 0, 1 + wr->wr.fast_reg.page_list_len - T3_MAX_FASTREG_FRAG,
+ 0, 1 + mhp->npages - T3_MAX_FASTREG_FRAG,
T3_EOP);
p = &wqe->pbl_frag.pbl_addrs[0];
}
- *p = cpu_to_be64((u64)wr->wr.fast_reg.page_list->page_list[i]);
+ *p = cpu_to_be64((u64)mhp->pages[i]);
}
- *flit_cnt = 5 + wr->wr.fast_reg.page_list_len;
+ *flit_cnt = 5 + mhp->npages;
if (*flit_cnt > 15)
*flit_cnt = 15;
return 0;
@@ -414,10 +415,10 @@ int iwch_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
if (!qhp->wq.oldest_read)
qhp->wq.oldest_read = sqp;
break;
- case IB_WR_FAST_REG_MR:
+ case IB_WR_REG_MR:
t3_wr_opcode = T3_WR_FASTREG;
- err = build_fastreg(wqe, wr, &t3_wr_flit_cnt,
- &wr_cnt, &qhp->wq);
+ err = build_memreg(wqe, reg_wr(wr), &t3_wr_flit_cnt,
+ &wr_cnt, &qhp->wq);
break;
case IB_WR_LOCAL_INV:
if (wr->send_flags & IB_SEND_FENCE)
@@ -525,88 +526,6 @@ out:
return err;
}
-int iwch_bind_mw(struct ib_qp *qp,
- struct ib_mw *mw,
- struct ib_mw_bind *mw_bind)
-{
- struct iwch_dev *rhp;
- struct iwch_mw *mhp;
- struct iwch_qp *qhp;
- union t3_wr *wqe;
- u32 pbl_addr;
- u8 page_size;
- u32 num_wrs;
- unsigned long flag;
- struct ib_sge sgl;
- int err=0;
- enum t3_wr_flags t3_wr_flags;
- u32 idx;
- struct t3_swsq *sqp;
-
- qhp = to_iwch_qp(qp);
- mhp = to_iwch_mw(mw);
- rhp = qhp->rhp;
-
- spin_lock_irqsave(&qhp->lock, flag);
- if (qhp->attr.state > IWCH_QP_STATE_RTS) {
- spin_unlock_irqrestore(&qhp->lock, flag);
- return -EINVAL;
- }
- num_wrs = Q_FREECNT(qhp->wq.sq_rptr, qhp->wq.sq_wptr,
- qhp->wq.sq_size_log2);
- if (num_wrs == 0) {
- spin_unlock_irqrestore(&qhp->lock, flag);
- return -ENOMEM;
- }
- idx = Q_PTR2IDX(qhp->wq.wptr, qhp->wq.size_log2);
- PDBG("%s: idx 0x%0x, mw 0x%p, mw_bind 0x%p\n", __func__, idx,
- mw, mw_bind);
- wqe = (union t3_wr *) (qhp->wq.queue + idx);
-
- t3_wr_flags = 0;
- if (mw_bind->send_flags & IB_SEND_SIGNALED)
- t3_wr_flags = T3_COMPLETION_FLAG;
-
- sgl.addr = mw_bind->bind_info.addr;
- sgl.lkey = mw_bind->bind_info.mr->lkey;
- sgl.length = mw_bind->bind_info.length;
- wqe->bind.reserved = 0;
- wqe->bind.type = TPT_VATO;
-
- /* TBD: check perms */
- wqe->bind.perms = iwch_ib_to_tpt_bind_access(
- mw_bind->bind_info.mw_access_flags);
- wqe->bind.mr_stag = cpu_to_be32(mw_bind->bind_info.mr->lkey);
- wqe->bind.mw_stag = cpu_to_be32(mw->rkey);
- wqe->bind.mw_len = cpu_to_be32(mw_bind->bind_info.length);
- wqe->bind.mw_va = cpu_to_be64(mw_bind->bind_info.addr);
- err = iwch_sgl2pbl_map(rhp, &sgl, 1, &pbl_addr, &page_size);
- if (err) {
- spin_unlock_irqrestore(&qhp->lock, flag);
- return err;
- }
- wqe->send.wrid.id0.hi = qhp->wq.sq_wptr;
- sqp = qhp->wq.sq + Q_PTR2IDX(qhp->wq.sq_wptr, qhp->wq.sq_size_log2);
- sqp->wr_id = mw_bind->wr_id;
- sqp->opcode = T3_BIND_MW;
- sqp->sq_wptr = qhp->wq.sq_wptr;
- sqp->complete = 0;
- sqp->signaled = (mw_bind->send_flags & IB_SEND_SIGNALED);
- wqe->bind.mr_pbl_addr = cpu_to_be32(pbl_addr);
- wqe->bind.mr_pagesz = page_size;
- build_fw_riwrh((void *)wqe, T3_WR_BIND, t3_wr_flags,
- Q_GENBIT(qhp->wq.wptr, qhp->wq.size_log2), 0,
- sizeof(struct t3_bind_mw_wr) >> 3, T3_SOPEOP);
- ++(qhp->wq.wptr);
- ++(qhp->wq.sq_wptr);
- spin_unlock_irqrestore(&qhp->lock, flag);
-
- if (cxio_wq_db_enabled(&qhp->wq))
- ring_doorbell(qhp->wq.doorbell, qhp->wq.qpid);
-
- return err;
-}
-
static inline void build_term_codes(struct respQ_msg_t *rsp_msg,
u8 *layer_type, u8 *ecode)
{
diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c
index 9a389a0..cd2ff5f 100644
--- a/drivers/infiniband/hw/cxgb4/cm.c
+++ b/drivers/infiniband/hw/cxgb4/cm.c
@@ -632,22 +632,18 @@ static void best_mtu(const unsigned short *mtus, unsigned short mtu,
static int send_connect(struct c4iw_ep *ep)
{
- struct cpl_act_open_req *req;
- struct cpl_t5_act_open_req *t5_req;
- struct cpl_act_open_req6 *req6;
- struct cpl_t5_act_open_req6 *t5_req6;
+ struct cpl_act_open_req *req = NULL;
+ struct cpl_t5_act_open_req *t5req = NULL;
+ struct cpl_t6_act_open_req *t6req = NULL;
+ struct cpl_act_open_req6 *req6 = NULL;
+ struct cpl_t5_act_open_req6 *t5req6 = NULL;
+ struct cpl_t6_act_open_req6 *t6req6 = NULL;
struct sk_buff *skb;
u64 opt0;
u32 opt2;
unsigned int mtu_idx;
int wscale;
- int wrlen;
- int sizev4 = is_t4(ep->com.dev->rdev.lldi.adapter_type) ?
- sizeof(struct cpl_act_open_req) :
- sizeof(struct cpl_t5_act_open_req);
- int sizev6 = is_t4(ep->com.dev->rdev.lldi.adapter_type) ?
- sizeof(struct cpl_act_open_req6) :
- sizeof(struct cpl_t5_act_open_req6);
+ int win, sizev4, sizev6, wrlen;
struct sockaddr_in *la = (struct sockaddr_in *)
&ep->com.mapped_local_addr;
struct sockaddr_in *ra = (struct sockaddr_in *)
@@ -656,8 +652,28 @@ static int send_connect(struct c4iw_ep *ep)
&ep->com.mapped_local_addr;
struct sockaddr_in6 *ra6 = (struct sockaddr_in6 *)
&ep->com.mapped_remote_addr;
- int win;
int ret;
+ enum chip_type adapter_type = ep->com.dev->rdev.lldi.adapter_type;
+ u32 isn = (prandom_u32() & ~7UL) - 1;
+
+ switch (CHELSIO_CHIP_VERSION(adapter_type)) {
+ case CHELSIO_T4:
+ sizev4 = sizeof(struct cpl_act_open_req);
+ sizev6 = sizeof(struct cpl_act_open_req6);
+ break;
+ case CHELSIO_T5:
+ sizev4 = sizeof(struct cpl_t5_act_open_req);
+ sizev6 = sizeof(struct cpl_t5_act_open_req6);
+ break;
+ case CHELSIO_T6:
+ sizev4 = sizeof(struct cpl_t6_act_open_req);
+ sizev6 = sizeof(struct cpl_t6_act_open_req6);
+ break;
+ default:
+ pr_err("T%d Chip is not supported\n",
+ CHELSIO_CHIP_VERSION(adapter_type));
+ return -EINVAL;
+ }
wrlen = (ep->com.remote_addr.ss_family == AF_INET) ?
roundup(sizev4, 16) :
@@ -706,7 +722,10 @@ static int send_connect(struct c4iw_ep *ep)
opt2 |= SACK_EN_F;
if (wscale && enable_tcp_window_scaling)
opt2 |= WND_SCALE_EN_F;
- if (is_t5(ep->com.dev->rdev.lldi.adapter_type)) {
+ if (CHELSIO_CHIP_VERSION(adapter_type) > CHELSIO_T4) {
+ if (peer2peer)
+ isn += 4;
+
opt2 |= T5_OPT_2_VALID_F;
opt2 |= CONG_CNTRL_V(CONG_ALG_TAHOE);
opt2 |= T5_ISS_F;
@@ -718,102 +737,109 @@ static int send_connect(struct c4iw_ep *ep)
t4_set_arp_err_handler(skb, ep, act_open_req_arp_failure);
- if (is_t4(ep->com.dev->rdev.lldi.adapter_type)) {
- if (ep->com.remote_addr.ss_family == AF_INET) {
- req = (struct cpl_act_open_req *) skb_put(skb, wrlen);
+ if (ep->com.remote_addr.ss_family == AF_INET) {
+ switch (CHELSIO_CHIP_VERSION(adapter_type)) {
+ case CHELSIO_T4:
+ req = (struct cpl_act_open_req *)skb_put(skb, wrlen);
INIT_TP_WR(req, 0);
- OPCODE_TID(req) = cpu_to_be32(
- MK_OPCODE_TID(CPL_ACT_OPEN_REQ,
- ((ep->rss_qid << 14) | ep->atid)));
- req->local_port = la->sin_port;
- req->peer_port = ra->sin_port;
- req->local_ip = la->sin_addr.s_addr;
- req->peer_ip = ra->sin_addr.s_addr;
- req->opt0 = cpu_to_be64(opt0);
+ break;
+ case CHELSIO_T5:
+ t5req = (struct cpl_t5_act_open_req *)skb_put(skb,
+ wrlen);
+ INIT_TP_WR(t5req, 0);
+ req = (struct cpl_act_open_req *)t5req;
+ break;
+ case CHELSIO_T6:
+ t6req = (struct cpl_t6_act_open_req *)skb_put(skb,
+ wrlen);
+ INIT_TP_WR(t6req, 0);
+ req = (struct cpl_act_open_req *)t6req;
+ t5req = (struct cpl_t5_act_open_req *)t6req;
+ break;
+ default:
+ pr_err("T%d Chip is not supported\n",
+ CHELSIO_CHIP_VERSION(adapter_type));
+ ret = -EINVAL;
+ goto clip_release;
+ }
+
+ OPCODE_TID(req) = cpu_to_be32(MK_OPCODE_TID(CPL_ACT_OPEN_REQ,
+ ((ep->rss_qid<<14) | ep->atid)));
+ req->local_port = la->sin_port;
+ req->peer_port = ra->sin_port;
+ req->local_ip = la->sin_addr.s_addr;
+ req->peer_ip = ra->sin_addr.s_addr;
+ req->opt0 = cpu_to_be64(opt0);
+
+ if (is_t4(ep->com.dev->rdev.lldi.adapter_type)) {
req->params = cpu_to_be32(cxgb4_select_ntuple(
ep->com.dev->rdev.lldi.ports[0],
ep->l2t));
req->opt2 = cpu_to_be32(opt2);
} else {
+ t5req->params = cpu_to_be64(FILTER_TUPLE_V(
+ cxgb4_select_ntuple(
+ ep->com.dev->rdev.lldi.ports[0],
+ ep->l2t)));
+ t5req->rsvd = cpu_to_be32(isn);
+ PDBG("%s snd_isn %u\n", __func__, t5req->rsvd);
+ t5req->opt2 = cpu_to_be32(opt2);
+ }
+ } else {
+ switch (CHELSIO_CHIP_VERSION(adapter_type)) {
+ case CHELSIO_T4:
req6 = (struct cpl_act_open_req6 *)skb_put(skb, wrlen);
-
INIT_TP_WR(req6, 0);
- OPCODE_TID(req6) = cpu_to_be32(
- MK_OPCODE_TID(CPL_ACT_OPEN_REQ6,
- ((ep->rss_qid<<14)|ep->atid)));
- req6->local_port = la6->sin6_port;
- req6->peer_port = ra6->sin6_port;
- req6->local_ip_hi = *((__be64 *)
- (la6->sin6_addr.s6_addr));
- req6->local_ip_lo = *((__be64 *)
- (la6->sin6_addr.s6_addr + 8));
- req6->peer_ip_hi = *((__be64 *)
- (ra6->sin6_addr.s6_addr));
- req6->peer_ip_lo = *((__be64 *)
- (ra6->sin6_addr.s6_addr + 8));
- req6->opt0 = cpu_to_be64(opt0);
+ break;
+ case CHELSIO_T5:
+ t5req6 = (struct cpl_t5_act_open_req6 *)skb_put(skb,
+ wrlen);
+ INIT_TP_WR(t5req6, 0);
+ req6 = (struct cpl_act_open_req6 *)t5req6;
+ break;
+ case CHELSIO_T6:
+ t6req6 = (struct cpl_t6_act_open_req6 *)skb_put(skb,
+ wrlen);
+ INIT_TP_WR(t6req6, 0);
+ req6 = (struct cpl_act_open_req6 *)t6req6;
+ t5req6 = (struct cpl_t5_act_open_req6 *)t6req6;
+ break;
+ default:
+ pr_err("T%d Chip is not supported\n",
+ CHELSIO_CHIP_VERSION(adapter_type));
+ ret = -EINVAL;
+ goto clip_release;
+ }
+
+ OPCODE_TID(req6) = cpu_to_be32(MK_OPCODE_TID(CPL_ACT_OPEN_REQ6,
+ ((ep->rss_qid<<14)|ep->atid)));
+ req6->local_port = la6->sin6_port;
+ req6->peer_port = ra6->sin6_port;
+ req6->local_ip_hi = *((__be64 *)(la6->sin6_addr.s6_addr));
+ req6->local_ip_lo = *((__be64 *)(la6->sin6_addr.s6_addr + 8));
+ req6->peer_ip_hi = *((__be64 *)(ra6->sin6_addr.s6_addr));
+ req6->peer_ip_lo = *((__be64 *)(ra6->sin6_addr.s6_addr + 8));
+ req6->opt0 = cpu_to_be64(opt0);
+
+ if (is_t4(ep->com.dev->rdev.lldi.adapter_type)) {
req6->params = cpu_to_be32(cxgb4_select_ntuple(
ep->com.dev->rdev.lldi.ports[0],
ep->l2t));
req6->opt2 = cpu_to_be32(opt2);
- }
- } else {
- u32 isn = (prandom_u32() & ~7UL) - 1;
-
- if (peer2peer)
- isn += 4;
-
- if (ep->com.remote_addr.ss_family == AF_INET) {
- t5_req = (struct cpl_t5_act_open_req *)
- skb_put(skb, wrlen);
- INIT_TP_WR(t5_req, 0);
- OPCODE_TID(t5_req) = cpu_to_be32(
- MK_OPCODE_TID(CPL_ACT_OPEN_REQ,
- ((ep->rss_qid << 14) | ep->atid)));
- t5_req->local_port = la->sin_port;
- t5_req->peer_port = ra->sin_port;
- t5_req->local_ip = la->sin_addr.s_addr;
- t5_req->peer_ip = ra->sin_addr.s_addr;
- t5_req->opt0 = cpu_to_be64(opt0);
- t5_req->params = cpu_to_be64(FILTER_TUPLE_V(
- cxgb4_select_ntuple(
- ep->com.dev->rdev.lldi.ports[0],
- ep->l2t)));
- t5_req->rsvd = cpu_to_be32(isn);
- PDBG("%s snd_isn %u\n", __func__,
- be32_to_cpu(t5_req->rsvd));
- t5_req->opt2 = cpu_to_be32(opt2);
} else {
- t5_req6 = (struct cpl_t5_act_open_req6 *)
- skb_put(skb, wrlen);
- INIT_TP_WR(t5_req6, 0);
- OPCODE_TID(t5_req6) = cpu_to_be32(
- MK_OPCODE_TID(CPL_ACT_OPEN_REQ6,
- ((ep->rss_qid<<14)|ep->atid)));
- t5_req6->local_port = la6->sin6_port;
- t5_req6->peer_port = ra6->sin6_port;
- t5_req6->local_ip_hi = *((__be64 *)
- (la6->sin6_addr.s6_addr));
- t5_req6->local_ip_lo = *((__be64 *)
- (la6->sin6_addr.s6_addr + 8));
- t5_req6->peer_ip_hi = *((__be64 *)
- (ra6->sin6_addr.s6_addr));
- t5_req6->peer_ip_lo = *((__be64 *)
- (ra6->sin6_addr.s6_addr + 8));
- t5_req6->opt0 = cpu_to_be64(opt0);
- t5_req6->params = cpu_to_be64(FILTER_TUPLE_V(
- cxgb4_select_ntuple(
+ t5req6->params = cpu_to_be64(FILTER_TUPLE_V(
+ cxgb4_select_ntuple(
ep->com.dev->rdev.lldi.ports[0],
ep->l2t)));
- t5_req6->rsvd = cpu_to_be32(isn);
- PDBG("%s snd_isn %u\n", __func__,
- be32_to_cpu(t5_req6->rsvd));
- t5_req6->opt2 = cpu_to_be32(opt2);
+ t5req6->rsvd = cpu_to_be32(isn);
+ PDBG("%s snd_isn %u\n", __func__, t5req6->rsvd);
+ t5req6->opt2 = cpu_to_be32(opt2);
}
}
set_bit(ACT_OPEN_REQ, &ep->com.history);
ret = c4iw_l2t_send(&ep->com.dev->rdev, skb, ep->l2t);
+clip_release:
if (ret && ep->com.remote_addr.ss_family == AF_INET6)
cxgb4_clip_release(ep->com.dev->rdev.lldi.ports[0],
(const u32 *)&la6->sin6_addr.s6_addr, 1);
@@ -1196,6 +1222,8 @@ static void connect_reply_upcall(struct c4iw_ep *ep, int status)
if ((status == 0) || (status == -ECONNREFUSED)) {
if (!ep->tried_with_mpa_v1) {
/* this means MPA_v2 is used */
+ event.ord = ep->ird;
+ event.ird = ep->ord;
event.private_data_len = ep->plen -
sizeof(struct mpa_v2_conn_params);
event.private_data = ep->mpa_pkt +
@@ -1203,6 +1231,8 @@ static void connect_reply_upcall(struct c4iw_ep *ep, int status)
sizeof(struct mpa_v2_conn_params);
} else {
/* this means MPA_v1 is used */
+ event.ord = cur_max_read_depth(ep->com.dev);
+ event.ird = cur_max_read_depth(ep->com.dev);
event.private_data_len = ep->plen;
event.private_data = ep->mpa_pkt +
sizeof(struct mpa_message);
@@ -1265,8 +1295,8 @@ static void established_upcall(struct c4iw_ep *ep)
PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
memset(&event, 0, sizeof(event));
event.event = IW_CM_EVENT_ESTABLISHED;
- event.ird = ep->ird;
- event.ord = ep->ord;
+ event.ird = ep->ord;
+ event.ord = ep->ird;
if (ep->com.cm_id) {
PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
ep->com.cm_id->event_handler(ep->com.cm_id, &event);
@@ -1898,7 +1928,7 @@ static void set_tcp_window(struct c4iw_ep *ep, struct port_info *pi)
static int import_ep(struct c4iw_ep *ep, int iptype, __u8 *peer_ip,
struct dst_entry *dst, struct c4iw_dev *cdev,
- bool clear_mpa_v1)
+ bool clear_mpa_v1, enum chip_type adapter_type)
{
struct neighbour *n;
int err, step;
@@ -1933,7 +1963,8 @@ static int import_ep(struct c4iw_ep *ep, int iptype, __u8 *peer_ip,
goto out;
ep->mtu = pdev->mtu;
ep->tx_chan = cxgb4_port_chan(pdev);
- ep->smac_idx = (cxgb4_port_viid(pdev) & 0x7F) << 1;
+ ep->smac_idx = cxgb4_tp_smt_idx(adapter_type,
+ cxgb4_port_viid(pdev));
step = cdev->rdev.lldi.ntxq /
cdev->rdev.lldi.nchan;
ep->txq_idx = cxgb4_port_idx(pdev) * step;
@@ -1952,7 +1983,8 @@ static int import_ep(struct c4iw_ep *ep, int iptype, __u8 *peer_ip,
goto out;
ep->mtu = dst_mtu(dst);
ep->tx_chan = cxgb4_port_chan(pdev);
- ep->smac_idx = (cxgb4_port_viid(pdev) & 0x7F) << 1;
+ ep->smac_idx = cxgb4_tp_smt_idx(adapter_type,
+ cxgb4_port_viid(pdev));
step = cdev->rdev.lldi.ntxq /
cdev->rdev.lldi.nchan;
ep->txq_idx = cxgb4_port_idx(pdev) * step;
@@ -2025,7 +2057,8 @@ static int c4iw_reconnect(struct c4iw_ep *ep)
err = -EHOSTUNREACH;
goto fail3;
}
- err = import_ep(ep, iptype, ra, ep->dst, ep->com.dev, false);
+ err = import_ep(ep, iptype, ra, ep->dst, ep->com.dev, false,
+ ep->com.dev->rdev.lldi.adapter_type);
if (err) {
pr_err("%s - cannot alloc l2e.\n", __func__);
goto fail4;
@@ -2213,13 +2246,14 @@ static void accept_cr(struct c4iw_ep *ep, struct sk_buff *skb,
int wscale;
struct cpl_t5_pass_accept_rpl *rpl5 = NULL;
int win;
+ enum chip_type adapter_type = ep->com.dev->rdev.lldi.adapter_type;
PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
BUG_ON(skb_cloned(skb));
skb_get(skb);
rpl = cplhdr(skb);
- if (is_t5(ep->com.dev->rdev.lldi.adapter_type)) {
+ if (!is_t4(adapter_type)) {
skb_trim(skb, roundup(sizeof(*rpl5), 16));
rpl5 = (void *)rpl;
INIT_TP_WR(rpl5, ep->hwtid);
@@ -2266,12 +2300,16 @@ static void accept_cr(struct c4iw_ep *ep, struct sk_buff *skb,
const struct tcphdr *tcph;
u32 hlen = ntohl(req->hdr_len);
- tcph = (const void *)(req + 1) + ETH_HDR_LEN_G(hlen) +
- IP_HDR_LEN_G(hlen);
+ if (CHELSIO_CHIP_VERSION(adapter_type) <= CHELSIO_T5)
+ tcph = (const void *)(req + 1) + ETH_HDR_LEN_G(hlen) +
+ IP_HDR_LEN_G(hlen);
+ else
+ tcph = (const void *)(req + 1) +
+ T6_ETH_HDR_LEN_G(hlen) + T6_IP_HDR_LEN_G(hlen);
if (tcph->ece && tcph->cwr)
opt2 |= CCTRL_ECN_V(1);
}
- if (is_t5(ep->com.dev->rdev.lldi.adapter_type)) {
+ if (CHELSIO_CHIP_VERSION(adapter_type) > CHELSIO_T4) {
u32 isn = (prandom_u32() & ~7UL) - 1;
opt2 |= T5_OPT_2_VALID_F;
opt2 |= CONG_CNTRL_V(CONG_ALG_TAHOE);
@@ -2302,12 +2340,16 @@ static void reject_cr(struct c4iw_dev *dev, u32 hwtid, struct sk_buff *skb)
return;
}
-static void get_4tuple(struct cpl_pass_accept_req *req, int *iptype,
- __u8 *local_ip, __u8 *peer_ip,
+static void get_4tuple(struct cpl_pass_accept_req *req, enum chip_type type,
+ int *iptype, __u8 *local_ip, __u8 *peer_ip,
__be16 *local_port, __be16 *peer_port)
{
- int eth_len = ETH_HDR_LEN_G(be32_to_cpu(req->hdr_len));
- int ip_len = IP_HDR_LEN_G(be32_to_cpu(req->hdr_len));
+ int eth_len = (CHELSIO_CHIP_VERSION(type) <= CHELSIO_T5) ?
+ ETH_HDR_LEN_G(be32_to_cpu(req->hdr_len)) :
+ T6_ETH_HDR_LEN_G(be32_to_cpu(req->hdr_len));
+ int ip_len = (CHELSIO_CHIP_VERSION(type) <= CHELSIO_T5) ?
+ IP_HDR_LEN_G(be32_to_cpu(req->hdr_len)) :
+ T6_IP_HDR_LEN_G(be32_to_cpu(req->hdr_len));
struct iphdr *ip = (struct iphdr *)((u8 *)(req + 1) + eth_len);
struct ipv6hdr *ip6 = (struct ipv6hdr *)((u8 *)(req + 1) + eth_len);
struct tcphdr *tcp = (struct tcphdr *)
@@ -2362,7 +2404,8 @@ static int pass_accept_req(struct c4iw_dev *dev, struct sk_buff *skb)
goto reject;
}
- get_4tuple(req, &iptype, local_ip, peer_ip, &local_port, &peer_port);
+ get_4tuple(req, parent_ep->com.dev->rdev.lldi.adapter_type, &iptype,
+ local_ip, peer_ip, &local_port, &peer_port);
/* Find output route */
if (iptype == 4) {
@@ -2397,7 +2440,8 @@ static int pass_accept_req(struct c4iw_dev *dev, struct sk_buff *skb)
goto reject;
}
- err = import_ep(child_ep, iptype, peer_ip, dst, dev, false);
+ err = import_ep(child_ep, iptype, peer_ip, dst, dev, false,
+ parent_ep->com.dev->rdev.lldi.adapter_type);
if (err) {
printk(KERN_ERR MOD "%s - failed to allocate l2t entry!\n",
__func__);
@@ -2929,7 +2973,7 @@ int c4iw_accept_cr(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
} else {
if (peer2peer &&
(ep->mpa_attr.p2p_type != FW_RI_INIT_P2PTYPE_DISABLED) &&
- (p2p_type == FW_RI_INIT_P2PTYPE_READ_REQ) && ep->ord == 0)
+ (p2p_type == FW_RI_INIT_P2PTYPE_READ_REQ) && ep->ird == 0)
ep->ird = 1;
}
@@ -3189,7 +3233,8 @@ int c4iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
goto fail2;
}
- err = import_ep(ep, iptype, ra, ep->dst, ep->com.dev, true);
+ err = import_ep(ep, iptype, ra, ep->dst, ep->com.dev, true,
+ ep->com.dev->rdev.lldi.adapter_type);
if (err) {
printk(KERN_ERR MOD "%s - cannot alloc l2e.\n", __func__);
goto fail3;
@@ -3226,6 +3271,12 @@ static int create_server6(struct c4iw_dev *dev, struct c4iw_listen_ep *ep)
struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)
&ep->com.mapped_local_addr;
+ if (ipv6_addr_type(&sin6->sin6_addr) != IPV6_ADDR_ANY) {
+ err = cxgb4_clip_get(ep->com.dev->rdev.lldi.ports[0],
+ (const u32 *)&sin6->sin6_addr.s6_addr, 1);
+ if (err)
+ return err;
+ }
c4iw_init_wr_wait(&ep->com.wr_wait);
err = cxgb4_create_server6(ep->com.dev->rdev.lldi.ports[0],
ep->stid, &sin6->sin6_addr,
@@ -3237,13 +3288,13 @@ static int create_server6(struct c4iw_dev *dev, struct c4iw_listen_ep *ep)
0, 0, __func__);
else if (err > 0)
err = net_xmit_errno(err);
- if (err)
+ if (err) {
+ cxgb4_clip_release(ep->com.dev->rdev.lldi.ports[0],
+ (const u32 *)&sin6->sin6_addr.s6_addr, 1);
pr_err("cxgb4_create_server6/filter failed err %d stid %d laddr %pI6 lport %d\n",
err, ep->stid,
sin6->sin6_addr.s6_addr, ntohs(sin6->sin6_port));
- else
- cxgb4_clip_get(ep->com.dev->rdev.lldi.ports[0],
- (const u32 *)&sin6->sin6_addr.s6_addr, 1);
+ }
return err;
}
@@ -3260,6 +3311,10 @@ static int create_server4(struct c4iw_dev *dev, struct c4iw_listen_ep *ep)
sin->sin_addr.s_addr, sin->sin_port, 0,
ep->com.dev->rdev.lldi.rxq_ids[0], 0, 0);
if (err == -EBUSY) {
+ if (c4iw_fatal_error(&ep->com.dev->rdev)) {
+ err = -EIO;
+ break;
+ }
set_current_state(TASK_UNINTERRUPTIBLE);
schedule_timeout(usecs_to_jiffies(100));
}
@@ -3593,20 +3648,23 @@ static int deferred_fw6_msg(struct c4iw_dev *dev, struct sk_buff *skb)
static void build_cpl_pass_accept_req(struct sk_buff *skb, int stid , u8 tos)
{
- u32 l2info;
- u16 vlantag, len, hdr_len, eth_hdr_len;
+ __be32 l2info;
+ __be16 hdr_len, vlantag, len;
+ u16 eth_hdr_len;
+ int tcp_hdr_len, ip_hdr_len;
u8 intf;
struct cpl_rx_pkt *cpl = cplhdr(skb);
struct cpl_pass_accept_req *req;
struct tcp_options_received tmp_opt;
struct c4iw_dev *dev;
+ enum chip_type type;
dev = *((struct c4iw_dev **) (skb->cb + sizeof(void *)));
/* Store values from cpl_rx_pkt in temporary location. */
- vlantag = (__force u16) cpl->vlan;
- len = (__force u16) cpl->len;
- l2info = (__force u32) cpl->l2info;
- hdr_len = (__force u16) cpl->hdr_len;
+ vlantag = cpl->vlan;
+ len = cpl->len;
+ l2info = cpl->l2info;
+ hdr_len = cpl->hdr_len;
intf = cpl->iff;
__skb_pull(skb, sizeof(*req) + sizeof(struct rss_header));
@@ -3623,20 +3681,28 @@ static void build_cpl_pass_accept_req(struct sk_buff *skb, int stid , u8 tos)
memset(req, 0, sizeof(*req));
req->l2info = cpu_to_be16(SYN_INTF_V(intf) |
SYN_MAC_IDX_V(RX_MACIDX_G(
- (__force int) htonl(l2info))) |
+ be32_to_cpu(l2info))) |
SYN_XACT_MATCH_F);
- eth_hdr_len = is_t4(dev->rdev.lldi.adapter_type) ?
- RX_ETHHDR_LEN_G((__force int)htonl(l2info)) :
- RX_T5_ETHHDR_LEN_G((__force int)htonl(l2info));
- req->hdr_len = cpu_to_be32(SYN_RX_CHAN_V(RX_CHAN_G(
- (__force int) htonl(l2info))) |
- TCP_HDR_LEN_V(RX_TCPHDR_LEN_G(
- (__force int) htons(hdr_len))) |
- IP_HDR_LEN_V(RX_IPHDR_LEN_G(
- (__force int) htons(hdr_len))) |
- ETH_HDR_LEN_V(RX_ETHHDR_LEN_G(eth_hdr_len)));
- req->vlan = (__force __be16) vlantag;
- req->len = (__force __be16) len;
+ type = dev->rdev.lldi.adapter_type;
+ tcp_hdr_len = RX_TCPHDR_LEN_G(be16_to_cpu(hdr_len));
+ ip_hdr_len = RX_IPHDR_LEN_G(be16_to_cpu(hdr_len));
+ req->hdr_len =
+ cpu_to_be32(SYN_RX_CHAN_V(RX_CHAN_G(be32_to_cpu(l2info))));
+ if (CHELSIO_CHIP_VERSION(type) <= CHELSIO_T5) {
+ eth_hdr_len = is_t4(type) ?
+ RX_ETHHDR_LEN_G(be32_to_cpu(l2info)) :
+ RX_T5_ETHHDR_LEN_G(be32_to_cpu(l2info));
+ req->hdr_len |= cpu_to_be32(TCP_HDR_LEN_V(tcp_hdr_len) |
+ IP_HDR_LEN_V(ip_hdr_len) |
+ ETH_HDR_LEN_V(eth_hdr_len));
+ } else { /* T6 and later */
+ eth_hdr_len = RX_T6_ETHHDR_LEN_G(be32_to_cpu(l2info));
+ req->hdr_len |= cpu_to_be32(T6_TCP_HDR_LEN_V(tcp_hdr_len) |
+ T6_IP_HDR_LEN_V(ip_hdr_len) |
+ T6_ETH_HDR_LEN_V(eth_hdr_len));
+ }
+ req->vlan = vlantag;
+ req->len = len;
req->tos_stid = cpu_to_be32(PASS_OPEN_TID_V(stid) |
PASS_OPEN_TOS_V(tos));
req->tcpopt.mss = htons(tmp_opt.mss_clamp);
@@ -3755,9 +3821,22 @@ static int rx_pkt(struct c4iw_dev *dev, struct sk_buff *skb)
goto reject;
}
- eth_hdr_len = is_t4(dev->rdev.lldi.adapter_type) ?
- RX_ETHHDR_LEN_G(htonl(cpl->l2info)) :
- RX_T5_ETHHDR_LEN_G(htonl(cpl->l2info));
+ switch (CHELSIO_CHIP_VERSION(dev->rdev.lldi.adapter_type)) {
+ case CHELSIO_T4:
+ eth_hdr_len = RX_ETHHDR_LEN_G(be32_to_cpu(cpl->l2info));
+ break;
+ case CHELSIO_T5:
+ eth_hdr_len = RX_T5_ETHHDR_LEN_G(be32_to_cpu(cpl->l2info));
+ break;
+ case CHELSIO_T6:
+ eth_hdr_len = RX_T6_ETHHDR_LEN_G(be32_to_cpu(cpl->l2info));
+ break;
+ default:
+ pr_err("T%d Chip is not supported\n",
+ CHELSIO_CHIP_VERSION(dev->rdev.lldi.adapter_type));
+ goto reject;
+ }
+
if (eth_hdr_len == ETH_HLEN) {
eh = (struct ethhdr *)(req + 1);
iph = (struct iphdr *)(eh + 1);
diff --git a/drivers/infiniband/hw/cxgb4/cq.c b/drivers/infiniband/hw/cxgb4/cq.c
index 92d5183..cf21df4 100644
--- a/drivers/infiniband/hw/cxgb4/cq.c
+++ b/drivers/infiniband/hw/cxgb4/cq.c
@@ -744,15 +744,12 @@ static int c4iw_poll_cq_one(struct c4iw_cq *chp, struct ib_wc *wc)
case FW_RI_SEND_WITH_SE:
wc->opcode = IB_WC_SEND;
break;
- case FW_RI_BIND_MW:
- wc->opcode = IB_WC_BIND_MW;
- break;
case FW_RI_LOCAL_INV:
wc->opcode = IB_WC_LOCAL_INV;
break;
case FW_RI_FAST_REGISTER:
- wc->opcode = IB_WC_FAST_REG_MR;
+ wc->opcode = IB_WC_REG_MR;
break;
default:
printk(KERN_ERR MOD "Unexpected opcode %d "
diff --git a/drivers/infiniband/hw/cxgb4/device.c b/drivers/infiniband/hw/cxgb4/device.c
index 1a29739..8024ea4 100644
--- a/drivers/infiniband/hw/cxgb4/device.c
+++ b/drivers/infiniband/hw/cxgb4/device.c
@@ -315,14 +315,12 @@ static int qp_release(struct inode *inode, struct file *file)
static int qp_open(struct inode *inode, struct file *file)
{
struct c4iw_debugfs_data *qpd;
- int ret = 0;
int count = 1;
qpd = kmalloc(sizeof *qpd, GFP_KERNEL);
- if (!qpd) {
- ret = -ENOMEM;
- goto out;
- }
+ if (!qpd)
+ return -ENOMEM;
+
qpd->devp = inode->i_private;
qpd->pos = 0;
@@ -333,8 +331,8 @@ static int qp_open(struct inode *inode, struct file *file)
qpd->bufsize = count * 128;
qpd->buf = vmalloc(qpd->bufsize);
if (!qpd->buf) {
- ret = -ENOMEM;
- goto err1;
+ kfree(qpd);
+ return -ENOMEM;
}
spin_lock_irq(&qpd->devp->lock);
@@ -343,11 +341,7 @@ static int qp_open(struct inode *inode, struct file *file)
qpd->buf[qpd->pos++] = 0;
file->private_data = qpd;
- goto out;
-err1:
- kfree(qpd);
-out:
- return ret;
+ return 0;
}
static const struct file_operations qp_debugfs_fops = {
@@ -781,8 +775,7 @@ static int c4iw_rdev_open(struct c4iw_rdev *rdev)
pr_err(MOD "%s: unsupported udb/ucq densities %u/%u\n",
pci_name(rdev->lldi.pdev), rdev->lldi.udb_density,
rdev->lldi.ucq_density);
- err = -EINVAL;
- goto err1;
+ return -EINVAL;
}
if (rdev->lldi.vr->qp.start != rdev->lldi.vr->cq.start ||
rdev->lldi.vr->qp.size != rdev->lldi.vr->cq.size) {
@@ -791,8 +784,7 @@ static int c4iw_rdev_open(struct c4iw_rdev *rdev)
pci_name(rdev->lldi.pdev), rdev->lldi.vr->qp.start,
rdev->lldi.vr->qp.size, rdev->lldi.vr->cq.size,
rdev->lldi.vr->cq.size);
- err = -EINVAL;
- goto err1;
+ return -EINVAL;
}
rdev->qpmask = rdev->lldi.udb_density - 1;
@@ -816,10 +808,8 @@ static int c4iw_rdev_open(struct c4iw_rdev *rdev)
rdev->lldi.db_reg, rdev->lldi.gts_reg,
rdev->qpmask, rdev->cqmask);
- if (c4iw_num_stags(rdev) == 0) {
- err = -EINVAL;
- goto err1;
- }
+ if (c4iw_num_stags(rdev) == 0)
+ return -EINVAL;
rdev->stats.pd.total = T4_MAX_NUM_PD;
rdev->stats.stag.total = rdev->lldi.vr->stag.size;
@@ -831,29 +821,31 @@ static int c4iw_rdev_open(struct c4iw_rdev *rdev)
err = c4iw_init_resource(rdev, c4iw_num_stags(rdev), T4_MAX_NUM_PD);
if (err) {
printk(KERN_ERR MOD "error %d initializing resources\n", err);
- goto err1;
+ return err;
}
err = c4iw_pblpool_create(rdev);
if (err) {
printk(KERN_ERR MOD "error %d initializing pbl pool\n", err);
- goto err2;
+ goto destroy_resource;
}
err = c4iw_rqtpool_create(rdev);
if (err) {
printk(KERN_ERR MOD "error %d initializing rqt pool\n", err);
- goto err3;
+ goto destroy_pblpool;
}
err = c4iw_ocqp_pool_create(rdev);
if (err) {
printk(KERN_ERR MOD "error %d initializing ocqp pool\n", err);
- goto err4;
+ goto destroy_rqtpool;
}
rdev->status_page = (struct t4_dev_status_page *)
__get_free_page(GFP_KERNEL);
- if (!rdev->status_page) {
- pr_err(MOD "error allocating status page\n");
- goto err4;
- }
+ if (!rdev->status_page)
+ goto destroy_ocqp_pool;
+ rdev->status_page->qp_start = rdev->lldi.vr->qp.start;
+ rdev->status_page->qp_size = rdev->lldi.vr->qp.size;
+ rdev->status_page->cq_start = rdev->lldi.vr->cq.start;
+ rdev->status_page->cq_size = rdev->lldi.vr->cq.size;
if (c4iw_wr_log) {
rdev->wr_log = kzalloc((1 << c4iw_wr_log_size_order) *
@@ -869,13 +861,14 @@ static int c4iw_rdev_open(struct c4iw_rdev *rdev)
rdev->status_page->db_off = 0;
return 0;
-err4:
+destroy_ocqp_pool:
+ c4iw_ocqp_pool_destroy(rdev);
+destroy_rqtpool:
c4iw_rqtpool_destroy(rdev);
-err3:
+destroy_pblpool:
c4iw_pblpool_destroy(rdev);
-err2:
+destroy_resource:
c4iw_destroy_resource(&rdev->resource);
-err1:
return err;
}
@@ -962,12 +955,12 @@ static struct c4iw_dev *c4iw_alloc(const struct cxgb4_lld_info *infop)
devp->rdev.lldi.sge_egrstatuspagesize;
/*
- * For T5 devices, we map all of BAR2 with WC.
+ * For T5/T6 devices, we map all of BAR2 with WC.
* For T4 devices with onchip qp mem, we map only that part
* of BAR2 with WC.
*/
devp->rdev.bar2_pa = pci_resource_start(devp->rdev.lldi.pdev, 2);
- if (is_t5(devp->rdev.lldi.adapter_type)) {
+ if (!is_t4(devp->rdev.lldi.adapter_type)) {
devp->rdev.bar2_kva = ioremap_wc(devp->rdev.bar2_pa,
pci_resource_len(devp->rdev.lldi.pdev, 2));
if (!devp->rdev.bar2_kva) {
@@ -1267,11 +1260,9 @@ static int enable_qp_db(int id, void *p, void *data)
static void resume_rc_qp(struct c4iw_qp *qp)
{
spin_lock(&qp->lock);
- t4_ring_sq_db(&qp->wq, qp->wq.sq.wq_pidx_inc,
- is_t5(qp->rhp->rdev.lldi.adapter_type), NULL);
+ t4_ring_sq_db(&qp->wq, qp->wq.sq.wq_pidx_inc, NULL);
qp->wq.sq.wq_pidx_inc = 0;
- t4_ring_rq_db(&qp->wq, qp->wq.rq.wq_pidx_inc,
- is_t5(qp->rhp->rdev.lldi.adapter_type), NULL);
+ t4_ring_rq_db(&qp->wq, qp->wq.rq.wq_pidx_inc, NULL);
qp->wq.rq.wq_pidx_inc = 0;
spin_unlock(&qp->lock);
}
diff --git a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h
index c7bb38c..fb2de75 100644
--- a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h
+++ b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h
@@ -386,6 +386,10 @@ struct c4iw_mr {
struct c4iw_dev *rhp;
u64 kva;
struct tpt_attributes attr;
+ u64 *mpl;
+ dma_addr_t mpl_addr;
+ u32 max_mpl_len;
+ u32 mpl_len;
};
static inline struct c4iw_mr *to_c4iw_mr(struct ib_mr *ibmr)
@@ -405,20 +409,6 @@ static inline struct c4iw_mw *to_c4iw_mw(struct ib_mw *ibmw)
return container_of(ibmw, struct c4iw_mw, ibmw);
}
-struct c4iw_fr_page_list {
- struct ib_fast_reg_page_list ibpl;
- DEFINE_DMA_UNMAP_ADDR(mapping);
- dma_addr_t dma_addr;
- struct c4iw_dev *dev;
- int pll_len;
-};
-
-static inline struct c4iw_fr_page_list *to_c4iw_fr_page_list(
- struct ib_fast_reg_page_list *ibpl)
-{
- return container_of(ibpl, struct c4iw_fr_page_list, ibpl);
-}
-
struct c4iw_cq {
struct ib_cq ibcq;
struct c4iw_dev *rhp;
@@ -957,8 +947,6 @@ int c4iw_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
struct ib_send_wr **bad_wr);
int c4iw_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr,
struct ib_recv_wr **bad_wr);
-int c4iw_bind_mw(struct ib_qp *qp, struct ib_mw *mw,
- struct ib_mw_bind *mw_bind);
int c4iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param);
int c4iw_create_listen(struct iw_cm_id *cm_id, int backlog);
int c4iw_destroy_listen(struct iw_cm_id *cm_id);
@@ -966,30 +954,18 @@ int c4iw_accept_cr(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param);
int c4iw_reject_cr(struct iw_cm_id *cm_id, const void *pdata, u8 pdata_len);
void c4iw_qp_add_ref(struct ib_qp *qp);
void c4iw_qp_rem_ref(struct ib_qp *qp);
-void c4iw_free_fastreg_pbl(struct ib_fast_reg_page_list *page_list);
-struct ib_fast_reg_page_list *c4iw_alloc_fastreg_pbl(
- struct ib_device *device,
- int page_list_len);
struct ib_mr *c4iw_alloc_mr(struct ib_pd *pd,
enum ib_mr_type mr_type,
u32 max_num_sg);
+int c4iw_map_mr_sg(struct ib_mr *ibmr,
+ struct scatterlist *sg,
+ int sg_nents);
int c4iw_dealloc_mw(struct ib_mw *mw);
struct ib_mw *c4iw_alloc_mw(struct ib_pd *pd, enum ib_mw_type type);
struct ib_mr *c4iw_reg_user_mr(struct ib_pd *pd, u64 start,
u64 length, u64 virt, int acc,
struct ib_udata *udata);
struct ib_mr *c4iw_get_dma_mr(struct ib_pd *pd, int acc);
-struct ib_mr *c4iw_register_phys_mem(struct ib_pd *pd,
- struct ib_phys_buf *buffer_list,
- int num_phys_buf,
- int acc,
- u64 *iova_start);
-int c4iw_reregister_phys_mem(struct ib_mr *mr,
- int mr_rereg_mask,
- struct ib_pd *pd,
- struct ib_phys_buf *buffer_list,
- int num_phys_buf,
- int acc, u64 *iova_start);
int c4iw_dereg_mr(struct ib_mr *ib_mr);
int c4iw_destroy_cq(struct ib_cq *ib_cq);
struct ib_cq *c4iw_create_cq(struct ib_device *ibdev,
diff --git a/drivers/infiniband/hw/cxgb4/mem.c b/drivers/infiniband/hw/cxgb4/mem.c
index 026b91e..7849890 100644
--- a/drivers/infiniband/hw/cxgb4/mem.c
+++ b/drivers/infiniband/hw/cxgb4/mem.c
@@ -144,7 +144,7 @@ static int _c4iw_write_mem_inline(struct c4iw_rdev *rdev, u32 addr, u32 len,
if (i == (num_wqe-1)) {
req->wr.wr_hi = cpu_to_be32(FW_WR_OP_V(FW_ULPTX_WR) |
FW_WR_COMPL_F);
- req->wr.wr_lo = (__force __be64)&wr_wait;
+ req->wr.wr_lo = (__force __be64)(unsigned long)&wr_wait;
} else
req->wr.wr_hi = cpu_to_be32(FW_WR_OP_V(FW_ULPTX_WR));
req->wr.wr_mid = cpu_to_be32(
@@ -392,32 +392,6 @@ static int register_mem(struct c4iw_dev *rhp, struct c4iw_pd *php,
return ret;
}
-static int reregister_mem(struct c4iw_dev *rhp, struct c4iw_pd *php,
- struct c4iw_mr *mhp, int shift, int npages)
-{
- u32 stag;
- int ret;
-
- if (npages > mhp->attr.pbl_size)
- return -ENOMEM;
-
- stag = mhp->attr.stag;
- ret = write_tpt_entry(&rhp->rdev, 0, &stag, 1, mhp->attr.pdid,
- FW_RI_STAG_NSMR, mhp->attr.perms,
- mhp->attr.mw_bind_enable, mhp->attr.zbva,
- mhp->attr.va_fbo, mhp->attr.len, shift - 12,
- mhp->attr.pbl_size, mhp->attr.pbl_addr);
- if (ret)
- return ret;
-
- ret = finish_mem_reg(mhp, stag);
- if (ret)
- dereg_mem(&rhp->rdev, mhp->attr.stag, mhp->attr.pbl_size,
- mhp->attr.pbl_addr);
-
- return ret;
-}
-
static int alloc_pbl(struct c4iw_mr *mhp, int npages)
{
mhp->attr.pbl_addr = c4iw_pblpool_alloc(&mhp->rhp->rdev,
@@ -431,228 +405,6 @@ static int alloc_pbl(struct c4iw_mr *mhp, int npages)
return 0;
}
-static int build_phys_page_list(struct ib_phys_buf *buffer_list,
- int num_phys_buf, u64 *iova_start,
- u64 *total_size, int *npages,
- int *shift, __be64 **page_list)
-{
- u64 mask;
- int i, j, n;
-
- mask = 0;
- *total_size = 0;
- for (i = 0; i < num_phys_buf; ++i) {
- if (i != 0 && buffer_list[i].addr & ~PAGE_MASK)
- return -EINVAL;
- if (i != 0 && i != num_phys_buf - 1 &&
- (buffer_list[i].size & ~PAGE_MASK))
- return -EINVAL;
- *total_size += buffer_list[i].size;
- if (i > 0)
- mask |= buffer_list[i].addr;
- else
- mask |= buffer_list[i].addr & PAGE_MASK;
- if (i != num_phys_buf - 1)
- mask |= buffer_list[i].addr + buffer_list[i].size;
- else
- mask |= (buffer_list[i].addr + buffer_list[i].size +
- PAGE_SIZE - 1) & PAGE_MASK;
- }
-
- if (*total_size > 0xFFFFFFFFULL)
- return -ENOMEM;
-
- /* Find largest page shift we can use to cover buffers */
- for (*shift = PAGE_SHIFT; *shift < 27; ++(*shift))
- if ((1ULL << *shift) & mask)
- break;
-
- buffer_list[0].size += buffer_list[0].addr & ((1ULL << *shift) - 1);
- buffer_list[0].addr &= ~0ull << *shift;
-
- *npages = 0;
- for (i = 0; i < num_phys_buf; ++i)
- *npages += (buffer_list[i].size +
- (1ULL << *shift) - 1) >> *shift;
-
- if (!*npages)
- return -EINVAL;
-
- *page_list = kmalloc(sizeof(u64) * *npages, GFP_KERNEL);
- if (!*page_list)
- return -ENOMEM;
-
- n = 0;
- for (i = 0; i < num_phys_buf; ++i)
- for (j = 0;
- j < (buffer_list[i].size + (1ULL << *shift) - 1) >> *shift;
- ++j)
- (*page_list)[n++] = cpu_to_be64(buffer_list[i].addr +
- ((u64) j << *shift));
-
- PDBG("%s va 0x%llx mask 0x%llx shift %d len %lld pbl_size %d\n",
- __func__, (unsigned long long)*iova_start,
- (unsigned long long)mask, *shift, (unsigned long long)*total_size,
- *npages);
-
- return 0;
-
-}
-
-int c4iw_reregister_phys_mem(struct ib_mr *mr, int mr_rereg_mask,
- struct ib_pd *pd, struct ib_phys_buf *buffer_list,
- int num_phys_buf, int acc, u64 *iova_start)
-{
-
- struct c4iw_mr mh, *mhp;
- struct c4iw_pd *php;
- struct c4iw_dev *rhp;
- __be64 *page_list = NULL;
- int shift = 0;
- u64 total_size;
- int npages;
- int ret;
-
- PDBG("%s ib_mr %p ib_pd %p\n", __func__, mr, pd);
-
- /* There can be no memory windows */
- if (atomic_read(&mr->usecnt))
- return -EINVAL;
-
- mhp = to_c4iw_mr(mr);
- rhp = mhp->rhp;
- php = to_c4iw_pd(mr->pd);
-
- /* make sure we are on the same adapter */
- if (rhp != php->rhp)
- return -EINVAL;
-
- memcpy(&mh, mhp, sizeof *mhp);
-
- if (mr_rereg_mask & IB_MR_REREG_PD)
- php = to_c4iw_pd(pd);
- if (mr_rereg_mask & IB_MR_REREG_ACCESS) {
- mh.attr.perms = c4iw_ib_to_tpt_access(acc);
- mh.attr.mw_bind_enable = (acc & IB_ACCESS_MW_BIND) ==
- IB_ACCESS_MW_BIND;
- }
- if (mr_rereg_mask & IB_MR_REREG_TRANS) {
- ret = build_phys_page_list(buffer_list, num_phys_buf,
- iova_start,
- &total_size, &npages,
- &shift, &page_list);
- if (ret)
- return ret;
- }
-
- if (mr_exceeds_hw_limits(rhp, total_size)) {
- kfree(page_list);
- return -EINVAL;
- }
-
- ret = reregister_mem(rhp, php, &mh, shift, npages);
- kfree(page_list);
- if (ret)
- return ret;
- if (mr_rereg_mask & IB_MR_REREG_PD)
- mhp->attr.pdid = php->pdid;
- if (mr_rereg_mask & IB_MR_REREG_ACCESS)
- mhp->attr.perms = c4iw_ib_to_tpt_access(acc);
- if (mr_rereg_mask & IB_MR_REREG_TRANS) {
- mhp->attr.zbva = 0;
- mhp->attr.va_fbo = *iova_start;
- mhp->attr.page_size = shift - 12;
- mhp->attr.len = (u32) total_size;
- mhp->attr.pbl_size = npages;
- }
-
- return 0;
-}
-
-struct ib_mr *c4iw_register_phys_mem(struct ib_pd *pd,
- struct ib_phys_buf *buffer_list,
- int num_phys_buf, int acc, u64 *iova_start)
-{
- __be64 *page_list;
- int shift;
- u64 total_size;
- int npages;
- struct c4iw_dev *rhp;
- struct c4iw_pd *php;
- struct c4iw_mr *mhp;
- int ret;
-
- PDBG("%s ib_pd %p\n", __func__, pd);
- php = to_c4iw_pd(pd);
- rhp = php->rhp;
-
- mhp = kzalloc(sizeof(*mhp), GFP_KERNEL);
- if (!mhp)
- return ERR_PTR(-ENOMEM);
-
- mhp->rhp = rhp;
-
- /* First check that we have enough alignment */
- if ((*iova_start & ~PAGE_MASK) != (buffer_list[0].addr & ~PAGE_MASK)) {
- ret = -EINVAL;
- goto err;
- }
-
- if (num_phys_buf > 1 &&
- ((buffer_list[0].addr + buffer_list[0].size) & ~PAGE_MASK)) {
- ret = -EINVAL;
- goto err;
- }
-
- ret = build_phys_page_list(buffer_list, num_phys_buf, iova_start,
- &total_size, &npages, &shift,
- &page_list);
- if (ret)
- goto err;
-
- if (mr_exceeds_hw_limits(rhp, total_size)) {
- kfree(page_list);
- ret = -EINVAL;
- goto err;
- }
-
- ret = alloc_pbl(mhp, npages);
- if (ret) {
- kfree(page_list);
- goto err;
- }
-
- ret = write_pbl(&mhp->rhp->rdev, page_list, mhp->attr.pbl_addr,
- npages);
- kfree(page_list);
- if (ret)
- goto err_pbl;
-
- mhp->attr.pdid = php->pdid;
- mhp->attr.zbva = 0;
-
- mhp->attr.perms = c4iw_ib_to_tpt_access(acc);
- mhp->attr.va_fbo = *iova_start;
- mhp->attr.page_size = shift - 12;
-
- mhp->attr.len = (u32) total_size;
- mhp->attr.pbl_size = npages;
- ret = register_mem(rhp, php, mhp, shift);
- if (ret)
- goto err_pbl;
-
- return &mhp->ibmr;
-
-err_pbl:
- c4iw_pblpool_free(&mhp->rhp->rdev, mhp->attr.pbl_addr,
- mhp->attr.pbl_size << 3);
-
-err:
- kfree(mhp);
- return ERR_PTR(ret);
-
-}
-
struct ib_mr *c4iw_get_dma_mr(struct ib_pd *pd, int acc)
{
struct c4iw_dev *rhp;
@@ -863,6 +615,7 @@ struct ib_mr *c4iw_alloc_mr(struct ib_pd *pd,
u32 mmid;
u32 stag = 0;
int ret = 0;
+ int length = roundup(max_num_sg * sizeof(u64), 32);
if (mr_type != IB_MR_TYPE_MEM_REG ||
max_num_sg > t4_max_fr_depth(use_dsgl))
@@ -876,6 +629,14 @@ struct ib_mr *c4iw_alloc_mr(struct ib_pd *pd,
goto err;
}
+ mhp->mpl = dma_alloc_coherent(&rhp->rdev.lldi.pdev->dev,
+ length, &mhp->mpl_addr, GFP_KERNEL);
+ if (!mhp->mpl) {
+ ret = -ENOMEM;
+ goto err_mpl;
+ }
+ mhp->max_mpl_len = length;
+
mhp->rhp = rhp;
ret = alloc_pbl(mhp, max_num_sg);
if (ret)
@@ -905,54 +666,35 @@ err2:
c4iw_pblpool_free(&mhp->rhp->rdev, mhp->attr.pbl_addr,
mhp->attr.pbl_size << 3);
err1:
+ dma_free_coherent(&mhp->rhp->rdev.lldi.pdev->dev,
+ mhp->max_mpl_len, mhp->mpl, mhp->mpl_addr);
+err_mpl:
kfree(mhp);
err:
return ERR_PTR(ret);
}
-struct ib_fast_reg_page_list *c4iw_alloc_fastreg_pbl(struct ib_device *device,
- int page_list_len)
+static int c4iw_set_page(struct ib_mr *ibmr, u64 addr)
{
- struct c4iw_fr_page_list *c4pl;
- struct c4iw_dev *dev = to_c4iw_dev(device);
- dma_addr_t dma_addr;
- int pll_len = roundup(page_list_len * sizeof(u64), 32);
-
- c4pl = kmalloc(sizeof(*c4pl), GFP_KERNEL);
- if (!c4pl)
- return ERR_PTR(-ENOMEM);
+ struct c4iw_mr *mhp = to_c4iw_mr(ibmr);
- c4pl->ibpl.page_list = dma_alloc_coherent(&dev->rdev.lldi.pdev->dev,
- pll_len, &dma_addr,
- GFP_KERNEL);
- if (!c4pl->ibpl.page_list) {
- kfree(c4pl);
- return ERR_PTR(-ENOMEM);
- }
- dma_unmap_addr_set(c4pl, mapping, dma_addr);
- c4pl->dma_addr = dma_addr;
- c4pl->dev = dev;
- c4pl->pll_len = pll_len;
+ if (unlikely(mhp->mpl_len == mhp->max_mpl_len))
+ return -ENOMEM;
- PDBG("%s c4pl %p pll_len %u page_list %p dma_addr %pad\n",
- __func__, c4pl, c4pl->pll_len, c4pl->ibpl.page_list,
- &c4pl->dma_addr);
+ mhp->mpl[mhp->mpl_len++] = addr;
- return &c4pl->ibpl;
+ return 0;
}
-void c4iw_free_fastreg_pbl(struct ib_fast_reg_page_list *ibpl)
+int c4iw_map_mr_sg(struct ib_mr *ibmr,
+ struct scatterlist *sg,
+ int sg_nents)
{
- struct c4iw_fr_page_list *c4pl = to_c4iw_fr_page_list(ibpl);
+ struct c4iw_mr *mhp = to_c4iw_mr(ibmr);
- PDBG("%s c4pl %p pll_len %u page_list %p dma_addr %pad\n",
- __func__, c4pl, c4pl->pll_len, c4pl->ibpl.page_list,
- &c4pl->dma_addr);
+ mhp->mpl_len = 0;
- dma_free_coherent(&c4pl->dev->rdev.lldi.pdev->dev,
- c4pl->pll_len,
- c4pl->ibpl.page_list, dma_unmap_addr(c4pl, mapping));
- kfree(c4pl);
+ return ib_sg_to_pages(ibmr, sg, sg_nents, c4iw_set_page);
}
int c4iw_dereg_mr(struct ib_mr *ib_mr)
@@ -962,14 +704,14 @@ int c4iw_dereg_mr(struct ib_mr *ib_mr)
u32 mmid;
PDBG("%s ib_mr %p\n", __func__, ib_mr);
- /* There can be no memory windows */
- if (atomic_read(&ib_mr->usecnt))
- return -EINVAL;
mhp = to_c4iw_mr(ib_mr);
rhp = mhp->rhp;
mmid = mhp->attr.stag >> 8;
remove_handle(rhp, &rhp->mmidr, mmid);
+ if (mhp->mpl)
+ dma_free_coherent(&mhp->rhp->rdev.lldi.pdev->dev,
+ mhp->max_mpl_len, mhp->mpl, mhp->mpl_addr);
dereg_mem(&rhp->rdev, mhp->attr.stag, mhp->attr.pbl_size,
mhp->attr.pbl_addr);
if (mhp->attr.pbl_size)
diff --git a/drivers/infiniband/hw/cxgb4/provider.c b/drivers/infiniband/hw/cxgb4/provider.c
index 7746113..ec04272 100644
--- a/drivers/infiniband/hw/cxgb4/provider.c
+++ b/drivers/infiniband/hw/cxgb4/provider.c
@@ -209,7 +209,7 @@ static int c4iw_mmap(struct ib_ucontext *context, struct vm_area_struct *vma)
if (addr >= rdev->oc_mw_pa)
vma->vm_page_prot = t4_pgprot_wc(vma->vm_page_prot);
else {
- if (is_t5(rdev->lldi.adapter_type))
+ if (!is_t4(rdev->lldi.adapter_type))
vma->vm_page_prot =
t4_pgprot_wc(vma->vm_page_prot);
else
@@ -549,16 +549,12 @@ int c4iw_register_device(struct c4iw_dev *dev)
dev->ibdev.resize_cq = c4iw_resize_cq;
dev->ibdev.poll_cq = c4iw_poll_cq;
dev->ibdev.get_dma_mr = c4iw_get_dma_mr;
- dev->ibdev.reg_phys_mr = c4iw_register_phys_mem;
- dev->ibdev.rereg_phys_mr = c4iw_reregister_phys_mem;
dev->ibdev.reg_user_mr = c4iw_reg_user_mr;
dev->ibdev.dereg_mr = c4iw_dereg_mr;
dev->ibdev.alloc_mw = c4iw_alloc_mw;
- dev->ibdev.bind_mw = c4iw_bind_mw;
dev->ibdev.dealloc_mw = c4iw_dealloc_mw;
dev->ibdev.alloc_mr = c4iw_alloc_mr;
- dev->ibdev.alloc_fast_reg_page_list = c4iw_alloc_fastreg_pbl;
- dev->ibdev.free_fast_reg_page_list = c4iw_free_fastreg_pbl;
+ dev->ibdev.map_mr_sg = c4iw_map_mr_sg;
dev->ibdev.attach_mcast = c4iw_multicast_attach;
dev->ibdev.detach_mcast = c4iw_multicast_detach;
dev->ibdev.process_mad = c4iw_process_mad;
diff --git a/drivers/infiniband/hw/cxgb4/qp.c b/drivers/infiniband/hw/cxgb4/qp.c
index 6517e12..e99345e 100644
--- a/drivers/infiniband/hw/cxgb4/qp.c
+++ b/drivers/infiniband/hw/cxgb4/qp.c
@@ -528,8 +528,8 @@ static int build_rdma_write(struct t4_sq *sq, union t4_wr *wqe,
if (wr->num_sge > T4_MAX_SEND_SGE)
return -EINVAL;
wqe->write.r2 = 0;
- wqe->write.stag_sink = cpu_to_be32(wr->wr.rdma.rkey);
- wqe->write.to_sink = cpu_to_be64(wr->wr.rdma.remote_addr);
+ wqe->write.stag_sink = cpu_to_be32(rdma_wr(wr)->rkey);
+ wqe->write.to_sink = cpu_to_be64(rdma_wr(wr)->remote_addr);
if (wr->num_sge) {
if (wr->send_flags & IB_SEND_INLINE) {
ret = build_immd(sq, wqe->write.u.immd_src, wr,
@@ -566,10 +566,10 @@ static int build_rdma_read(union t4_wr *wqe, struct ib_send_wr *wr, u8 *len16)
if (wr->num_sge > 1)
return -EINVAL;
if (wr->num_sge) {
- wqe->read.stag_src = cpu_to_be32(wr->wr.rdma.rkey);
- wqe->read.to_src_hi = cpu_to_be32((u32)(wr->wr.rdma.remote_addr
+ wqe->read.stag_src = cpu_to_be32(rdma_wr(wr)->rkey);
+ wqe->read.to_src_hi = cpu_to_be32((u32)(rdma_wr(wr)->remote_addr
>> 32));
- wqe->read.to_src_lo = cpu_to_be32((u32)wr->wr.rdma.remote_addr);
+ wqe->read.to_src_lo = cpu_to_be32((u32)rdma_wr(wr)->remote_addr);
wqe->read.stag_sink = cpu_to_be32(wr->sg_list[0].lkey);
wqe->read.plen = cpu_to_be32(wr->sg_list[0].length);
wqe->read.to_sink_hi = cpu_to_be32((u32)(wr->sg_list[0].addr
@@ -605,47 +605,41 @@ static int build_rdma_recv(struct c4iw_qp *qhp, union t4_recv_wr *wqe,
return 0;
}
-static int build_fastreg(struct t4_sq *sq, union t4_wr *wqe,
- struct ib_send_wr *wr, u8 *len16, u8 t5dev)
+static int build_memreg(struct t4_sq *sq, union t4_wr *wqe,
+ struct ib_reg_wr *wr, u8 *len16, u8 t5dev)
{
-
+ struct c4iw_mr *mhp = to_c4iw_mr(wr->mr);
struct fw_ri_immd *imdp;
__be64 *p;
int i;
- int pbllen = roundup(wr->wr.fast_reg.page_list_len * sizeof(u64), 32);
+ int pbllen = roundup(mhp->mpl_len * sizeof(u64), 32);
int rem;
- if (wr->wr.fast_reg.page_list_len >
- t4_max_fr_depth(use_dsgl))
+ if (mhp->mpl_len > t4_max_fr_depth(use_dsgl))
return -EINVAL;
wqe->fr.qpbinde_to_dcacpu = 0;
- wqe->fr.pgsz_shift = wr->wr.fast_reg.page_shift - 12;
+ wqe->fr.pgsz_shift = ilog2(wr->mr->page_size) - 12;
wqe->fr.addr_type = FW_RI_VA_BASED_TO;
- wqe->fr.mem_perms = c4iw_ib_to_tpt_access(wr->wr.fast_reg.access_flags);
+ wqe->fr.mem_perms = c4iw_ib_to_tpt_access(wr->access);
wqe->fr.len_hi = 0;
- wqe->fr.len_lo = cpu_to_be32(wr->wr.fast_reg.length);
- wqe->fr.stag = cpu_to_be32(wr->wr.fast_reg.rkey);
- wqe->fr.va_hi = cpu_to_be32(wr->wr.fast_reg.iova_start >> 32);
- wqe->fr.va_lo_fbo = cpu_to_be32(wr->wr.fast_reg.iova_start &
+ wqe->fr.len_lo = cpu_to_be32(mhp->ibmr.length);
+ wqe->fr.stag = cpu_to_be32(wr->key);
+ wqe->fr.va_hi = cpu_to_be32(mhp->ibmr.iova >> 32);
+ wqe->fr.va_lo_fbo = cpu_to_be32(mhp->ibmr.iova &
0xffffffff);
if (t5dev && use_dsgl && (pbllen > max_fr_immd)) {
- struct c4iw_fr_page_list *c4pl =
- to_c4iw_fr_page_list(wr->wr.fast_reg.page_list);
struct fw_ri_dsgl *sglp;
- for (i = 0; i < wr->wr.fast_reg.page_list_len; i++) {
- wr->wr.fast_reg.page_list->page_list[i] = (__force u64)
- cpu_to_be64((u64)
- wr->wr.fast_reg.page_list->page_list[i]);
- }
+ for (i = 0; i < mhp->mpl_len; i++)
+ mhp->mpl[i] = (__force u64)cpu_to_be64((u64)mhp->mpl[i]);
sglp = (struct fw_ri_dsgl *)(&wqe->fr + 1);
sglp->op = FW_RI_DATA_DSGL;
sglp->r1 = 0;
sglp->nsge = cpu_to_be16(1);
- sglp->addr0 = cpu_to_be64(c4pl->dma_addr);
+ sglp->addr0 = cpu_to_be64(mhp->mpl_addr);
sglp->len0 = cpu_to_be32(pbllen);
*len16 = DIV_ROUND_UP(sizeof(wqe->fr) + sizeof(*sglp), 16);
@@ -657,9 +651,8 @@ static int build_fastreg(struct t4_sq *sq, union t4_wr *wqe,
imdp->immdlen = cpu_to_be32(pbllen);
p = (__be64 *)(imdp + 1);
rem = pbllen;
- for (i = 0; i < wr->wr.fast_reg.page_list_len; i++) {
- *p = cpu_to_be64(
- (u64)wr->wr.fast_reg.page_list->page_list[i]);
+ for (i = 0; i < mhp->mpl_len; i++) {
+ *p = cpu_to_be64((u64)mhp->mpl[i]);
rem -= sizeof(*p);
if (++p == (__be64 *)&sq->queue[sq->size])
p = (__be64 *)sq->queue;
@@ -712,8 +705,7 @@ static int ring_kernel_sq_db(struct c4iw_qp *qhp, u16 inc)
spin_lock_irqsave(&qhp->rhp->lock, flags);
spin_lock(&qhp->lock);
if (qhp->rhp->db_state == NORMAL)
- t4_ring_sq_db(&qhp->wq, inc,
- is_t5(qhp->rhp->rdev.lldi.adapter_type), NULL);
+ t4_ring_sq_db(&qhp->wq, inc, NULL);
else {
add_to_fc_list(&qhp->rhp->db_fc_list, &qhp->db_fc_entry);
qhp->wq.sq.wq_pidx_inc += inc;
@@ -730,8 +722,7 @@ static int ring_kernel_rq_db(struct c4iw_qp *qhp, u16 inc)
spin_lock_irqsave(&qhp->rhp->lock, flags);
spin_lock(&qhp->lock);
if (qhp->rhp->db_state == NORMAL)
- t4_ring_rq_db(&qhp->wq, inc,
- is_t5(qhp->rhp->rdev.lldi.adapter_type), NULL);
+ t4_ring_rq_db(&qhp->wq, inc, NULL);
else {
add_to_fc_list(&qhp->rhp->db_fc_list, &qhp->db_fc_entry);
qhp->wq.rq.wq_pidx_inc += inc;
@@ -813,13 +804,13 @@ int c4iw_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
if (!qhp->wq.sq.oldest_read)
qhp->wq.sq.oldest_read = swsqe;
break;
- case IB_WR_FAST_REG_MR:
+ case IB_WR_REG_MR:
fw_opcode = FW_RI_FR_NSMR_WR;
swsqe->opcode = FW_RI_FAST_REGISTER;
- err = build_fastreg(&qhp->wq.sq, wqe, wr, &len16,
- is_t5(
- qhp->rhp->rdev.lldi.adapter_type) ?
- 1 : 0);
+ err = build_memreg(&qhp->wq.sq, wqe, reg_wr(wr), &len16,
+ is_t5(
+ qhp->rhp->rdev.lldi.adapter_type) ?
+ 1 : 0);
break;
case IB_WR_LOCAL_INV:
if (wr->send_flags & IB_SEND_FENCE)
@@ -860,8 +851,7 @@ int c4iw_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
idx += DIV_ROUND_UP(len16*16, T4_EQ_ENTRY_SIZE);
}
if (!qhp->rhp->rdev.status_page->db_off) {
- t4_ring_sq_db(&qhp->wq, idx,
- is_t5(qhp->rhp->rdev.lldi.adapter_type), wqe);
+ t4_ring_sq_db(&qhp->wq, idx, wqe);
spin_unlock_irqrestore(&qhp->lock, flag);
} else {
spin_unlock_irqrestore(&qhp->lock, flag);
@@ -934,8 +924,7 @@ int c4iw_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr,
num_wrs--;
}
if (!qhp->rhp->rdev.status_page->db_off) {
- t4_ring_rq_db(&qhp->wq, idx,
- is_t5(qhp->rhp->rdev.lldi.adapter_type), wqe);
+ t4_ring_rq_db(&qhp->wq, idx, wqe);
spin_unlock_irqrestore(&qhp->lock, flag);
} else {
spin_unlock_irqrestore(&qhp->lock, flag);
@@ -944,11 +933,6 @@ int c4iw_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr,
return err;
}
-int c4iw_bind_mw(struct ib_qp *qp, struct ib_mw *mw, struct ib_mw_bind *mw_bind)
-{
- return -ENOSYS;
-}
-
static inline void build_term_codes(struct t4_cqe *err_cqe, u8 *layer_type,
u8 *ecode)
{
@@ -1875,7 +1859,7 @@ int c4iw_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
attrs.rq_db_inc = attr->rq_psn;
mask |= (attr_mask & IB_QP_SQ_PSN) ? C4IW_QP_ATTR_SQ_DB : 0;
mask |= (attr_mask & IB_QP_RQ_PSN) ? C4IW_QP_ATTR_RQ_DB : 0;
- if (is_t5(to_c4iw_qp(ibqp)->rhp->rdev.lldi.adapter_type) &&
+ if (!is_t4(to_c4iw_qp(ibqp)->rhp->rdev.lldi.adapter_type) &&
(mask & (C4IW_QP_ATTR_SQ_DB|C4IW_QP_ATTR_RQ_DB)))
return -EINVAL;
diff --git a/drivers/infiniband/hw/cxgb4/t4.h b/drivers/infiniband/hw/cxgb4/t4.h
index 274a7ab..6126bbe 100644
--- a/drivers/infiniband/hw/cxgb4/t4.h
+++ b/drivers/infiniband/hw/cxgb4/t4.h
@@ -455,8 +455,7 @@ static inline void pio_copy(u64 __iomem *dst, u64 *src)
}
}
-static inline void t4_ring_sq_db(struct t4_wq *wq, u16 inc, u8 t5,
- union t4_wr *wqe)
+static inline void t4_ring_sq_db(struct t4_wq *wq, u16 inc, union t4_wr *wqe)
{
/* Flush host queue memory writes. */
@@ -482,7 +481,7 @@ static inline void t4_ring_sq_db(struct t4_wq *wq, u16 inc, u8 t5,
writel(QID_V(wq->sq.qid) | PIDX_V(inc), wq->db);
}
-static inline void t4_ring_rq_db(struct t4_wq *wq, u16 inc, u8 t5,
+static inline void t4_ring_rq_db(struct t4_wq *wq, u16 inc,
union t4_recv_wr *wqe)
{
@@ -700,4 +699,11 @@ static inline void t4_set_cq_in_error(struct t4_cq *cq)
struct t4_dev_status_page {
u8 db_off;
+ u8 pad1;
+ u16 pad2;
+ u32 pad3;
+ u64 qp_start;
+ u64 qp_size;
+ u64 cq_start;
+ u64 cq_size;
};
diff --git a/drivers/infiniband/hw/cxgb4/user.h b/drivers/infiniband/hw/cxgb4/user.h
index cbd0ce1..295f422 100644
--- a/drivers/infiniband/hw/cxgb4/user.h
+++ b/drivers/infiniband/hw/cxgb4/user.h
@@ -32,7 +32,7 @@
#ifndef __C4IW_USER_H__
#define __C4IW_USER_H__
-#define C4IW_UVERBS_ABI_VERSION 2
+#define C4IW_UVERBS_ABI_VERSION 3
/*
* Make sure that all structs defined in this file remain laid out so
diff --git a/drivers/infiniband/hw/mlx4/ah.c b/drivers/infiniband/hw/mlx4/ah.c
index 1688a17..105246f 100644
--- a/drivers/infiniband/hw/mlx4/ah.c
+++ b/drivers/infiniband/hw/mlx4/ah.c
@@ -76,7 +76,10 @@ static struct ib_ah *create_iboe_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr
struct mlx4_dev *dev = ibdev->dev;
int is_mcast = 0;
struct in6_addr in6;
- u16 vlan_tag;
+ u16 vlan_tag = 0xffff;
+ union ib_gid sgid;
+ struct ib_gid_attr gid_attr;
+ int ret;
memcpy(&in6, ah_attr->grh.dgid.raw, sizeof(in6));
if (rdma_is_multicast_addr(&in6)) {
@@ -85,12 +88,23 @@ static struct ib_ah *create_iboe_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr
} else {
memcpy(ah->av.eth.mac, ah_attr->dmac, ETH_ALEN);
}
- vlan_tag = ah_attr->vlan_id;
+ ret = ib_get_cached_gid(pd->device, ah_attr->port_num,
+ ah_attr->grh.sgid_index, &sgid, &gid_attr);
+ if (ret)
+ return ERR_PTR(ret);
+ eth_zero_addr(ah->av.eth.s_mac);
+ if (gid_attr.ndev) {
+ if (is_vlan_dev(gid_attr.ndev))
+ vlan_tag = vlan_dev_vlan_id(gid_attr.ndev);
+ memcpy(ah->av.eth.s_mac, gid_attr.ndev->dev_addr, ETH_ALEN);
+ dev_put(gid_attr.ndev);
+ }
if (vlan_tag < 0x1000)
vlan_tag |= (ah_attr->sl & 7) << 13;
ah->av.eth.port_pd = cpu_to_be32(to_mpd(pd)->pdn | (ah_attr->port_num << 24));
ah->av.eth.gid_index = mlx4_ib_gid_index_to_real_index(ibdev, ah_attr->port_num, ah_attr->grh.sgid_index);
ah->av.eth.vlan = cpu_to_be16(vlan_tag);
+ ah->av.eth.hop_limit = ah_attr->grh.hop_limit;
if (ah_attr->static_rate) {
ah->av.eth.stat_rate = ah_attr->static_rate + MLX4_STAT_RATE_OFFSET;
while (ah->av.eth.stat_rate > IB_RATE_2_5_GBPS + MLX4_STAT_RATE_OFFSET &&
diff --git a/drivers/infiniband/hw/mlx4/cq.c b/drivers/infiniband/hw/mlx4/cq.c
index 5fd49f9..9f8b516 100644
--- a/drivers/infiniband/hw/mlx4/cq.c
+++ b/drivers/infiniband/hw/mlx4/cq.c
@@ -811,14 +811,11 @@ repoll:
wc->opcode = IB_WC_MASKED_FETCH_ADD;
wc->byte_len = 8;
break;
- case MLX4_OPCODE_BIND_MW:
- wc->opcode = IB_WC_BIND_MW;
- break;
case MLX4_OPCODE_LSO:
wc->opcode = IB_WC_LSO;
break;
case MLX4_OPCODE_FMR:
- wc->opcode = IB_WC_FAST_REG_MR;
+ wc->opcode = IB_WC_REG_MR;
break;
case MLX4_OPCODE_LOCAL_INVAL:
wc->opcode = IB_WC_LOCAL_INV;
diff --git a/drivers/infiniband/hw/mlx4/mad.c b/drivers/infiniband/hw/mlx4/mad.c
index 1cd75ff..d68f506 100644
--- a/drivers/infiniband/hw/mlx4/mad.c
+++ b/drivers/infiniband/hw/mlx4/mad.c
@@ -40,6 +40,7 @@
#include <linux/gfp.h>
#include <rdma/ib_pma.h>
+#include <linux/mlx4/driver.h>
#include "mlx4_ib.h"
enum {
@@ -457,7 +458,8 @@ int mlx4_ib_send_to_slave(struct mlx4_ib_dev *dev, int slave, u8 port,
struct ib_grh *grh, struct ib_mad *mad)
{
struct ib_sge list;
- struct ib_send_wr wr, *bad_wr;
+ struct ib_ud_wr wr;
+ struct ib_send_wr *bad_wr;
struct mlx4_ib_demux_pv_ctx *tun_ctx;
struct mlx4_ib_demux_pv_qp *tun_qp;
struct mlx4_rcv_tunnel_mad *tun_mad;
@@ -582,18 +584,18 @@ int mlx4_ib_send_to_slave(struct mlx4_ib_dev *dev, int slave, u8 port,
list.length = sizeof (struct mlx4_rcv_tunnel_mad);
list.lkey = tun_ctx->pd->local_dma_lkey;
- wr.wr.ud.ah = ah;
- wr.wr.ud.port_num = port;
- wr.wr.ud.remote_qkey = IB_QP_SET_QKEY;
- wr.wr.ud.remote_qpn = dqpn;
- wr.next = NULL;
- wr.wr_id = ((u64) tun_tx_ix) | MLX4_TUN_SET_WRID_QPN(dest_qpt);
- wr.sg_list = &list;
- wr.num_sge = 1;
- wr.opcode = IB_WR_SEND;
- wr.send_flags = IB_SEND_SIGNALED;
-
- ret = ib_post_send(src_qp, &wr, &bad_wr);
+ wr.ah = ah;
+ wr.port_num = port;
+ wr.remote_qkey = IB_QP_SET_QKEY;
+ wr.remote_qpn = dqpn;
+ wr.wr.next = NULL;
+ wr.wr.wr_id = ((u64) tun_tx_ix) | MLX4_TUN_SET_WRID_QPN(dest_qpt);
+ wr.wr.sg_list = &list;
+ wr.wr.num_sge = 1;
+ wr.wr.opcode = IB_WR_SEND;
+ wr.wr.send_flags = IB_SEND_SIGNALED;
+
+ ret = ib_post_send(src_qp, &wr.wr, &bad_wr);
out:
if (ret)
ib_destroy_ah(ah);
@@ -605,8 +607,8 @@ static int mlx4_ib_demux_mad(struct ib_device *ibdev, u8 port,
struct ib_mad *mad)
{
struct mlx4_ib_dev *dev = to_mdev(ibdev);
- int err;
- int slave;
+ int err, other_port;
+ int slave = -1;
u8 *slave_id;
int is_eth = 0;
@@ -624,7 +626,17 @@ static int mlx4_ib_demux_mad(struct ib_device *ibdev, u8 port,
mlx4_ib_warn(ibdev, "RoCE mgmt class is not CM\n");
return -EINVAL;
}
- if (mlx4_get_slave_from_roce_gid(dev->dev, port, grh->dgid.raw, &slave)) {
+ err = mlx4_get_slave_from_roce_gid(dev->dev, port, grh->dgid.raw, &slave);
+ if (err && mlx4_is_mf_bonded(dev->dev)) {
+ other_port = (port == 1) ? 2 : 1;
+ err = mlx4_get_slave_from_roce_gid(dev->dev, other_port, grh->dgid.raw, &slave);
+ if (!err) {
+ port = other_port;
+ pr_debug("resolved slave %d from gid %pI6 wire port %d other %d\n",
+ slave, grh->dgid.raw, port, other_port);
+ }
+ }
+ if (err) {
mlx4_ib_warn(ibdev, "failed matching grh\n");
return -ENOENT;
}
@@ -805,17 +817,48 @@ static int ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num,
return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY;
}
-static void edit_counter(struct mlx4_counter *cnt,
- struct ib_pma_portcounters *pma_cnt)
+static void edit_counter(struct mlx4_counter *cnt, void *counters,
+ __be16 attr_id)
{
- ASSIGN_32BIT_COUNTER(pma_cnt->port_xmit_data,
- (be64_to_cpu(cnt->tx_bytes) >> 2));
- ASSIGN_32BIT_COUNTER(pma_cnt->port_rcv_data,
- (be64_to_cpu(cnt->rx_bytes) >> 2));
- ASSIGN_32BIT_COUNTER(pma_cnt->port_xmit_packets,
- be64_to_cpu(cnt->tx_frames));
- ASSIGN_32BIT_COUNTER(pma_cnt->port_rcv_packets,
- be64_to_cpu(cnt->rx_frames));
+ switch (attr_id) {
+ case IB_PMA_PORT_COUNTERS:
+ {
+ struct ib_pma_portcounters *pma_cnt =
+ (struct ib_pma_portcounters *)counters;
+
+ ASSIGN_32BIT_COUNTER(pma_cnt->port_xmit_data,
+ (be64_to_cpu(cnt->tx_bytes) >> 2));
+ ASSIGN_32BIT_COUNTER(pma_cnt->port_rcv_data,
+ (be64_to_cpu(cnt->rx_bytes) >> 2));
+ ASSIGN_32BIT_COUNTER(pma_cnt->port_xmit_packets,
+ be64_to_cpu(cnt->tx_frames));
+ ASSIGN_32BIT_COUNTER(pma_cnt->port_rcv_packets,
+ be64_to_cpu(cnt->rx_frames));
+ break;
+ }
+ case IB_PMA_PORT_COUNTERS_EXT:
+ {
+ struct ib_pma_portcounters_ext *pma_cnt_ext =
+ (struct ib_pma_portcounters_ext *)counters;
+
+ pma_cnt_ext->port_xmit_data =
+ cpu_to_be64(be64_to_cpu(cnt->tx_bytes) >> 2);
+ pma_cnt_ext->port_rcv_data =
+ cpu_to_be64(be64_to_cpu(cnt->rx_bytes) >> 2);
+ pma_cnt_ext->port_xmit_packets = cnt->tx_frames;
+ pma_cnt_ext->port_rcv_packets = cnt->rx_frames;
+ break;
+ }
+ }
+}
+
+static int iboe_process_mad_port_info(void *out_mad)
+{
+ struct ib_class_port_info cpi = {};
+
+ cpi.capability_mask = IB_PMA_CLASS_CAP_EXT_WIDTH;
+ memcpy(out_mad, &cpi, sizeof(cpi));
+ return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY;
}
static int iboe_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num,
@@ -824,23 +867,38 @@ static int iboe_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num,
{
struct mlx4_counter counter_stats;
struct mlx4_ib_dev *dev = to_mdev(ibdev);
- int err;
+ struct counter_index *tmp_counter;
+ int err = IB_MAD_RESULT_FAILURE, stats_avail = 0;
if (in_mad->mad_hdr.mgmt_class != IB_MGMT_CLASS_PERF_MGMT)
return -EINVAL;
+ if (in_mad->mad_hdr.attr_id == IB_PMA_CLASS_PORT_INFO)
+ return iboe_process_mad_port_info((void *)(out_mad->data + 40));
+
memset(&counter_stats, 0, sizeof(counter_stats));
- err = mlx4_get_counter_stats(dev->dev,
- dev->counters[port_num - 1].index,
- &counter_stats, 0);
- if (err)
- err = IB_MAD_RESULT_FAILURE;
- else {
+ mutex_lock(&dev->counters_table[port_num - 1].mutex);
+ list_for_each_entry(tmp_counter,
+ &dev->counters_table[port_num - 1].counters_list,
+ list) {
+ err = mlx4_get_counter_stats(dev->dev,
+ tmp_counter->index,
+ &counter_stats, 0);
+ if (err) {
+ err = IB_MAD_RESULT_FAILURE;
+ stats_avail = 0;
+ break;
+ }
+ stats_avail = 1;
+ }
+ mutex_unlock(&dev->counters_table[port_num - 1].mutex);
+ if (stats_avail) {
memset(out_mad->data, 0, sizeof out_mad->data);
switch (counter_stats.counter_mode & 0xf) {
case 0:
edit_counter(&counter_stats,
- (void *)(out_mad->data + 40));
+ (void *)(out_mad->data + 40),
+ in_mad->mad_hdr.attr_id);
err = IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY;
break;
default:
@@ -871,8 +929,10 @@ int mlx4_ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num,
*/
if (link == IB_LINK_LAYER_INFINIBAND) {
if (mlx4_is_slave(dev->dev) &&
- in_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_PERF_MGMT &&
- in_mad->mad_hdr.attr_id == IB_PMA_PORT_COUNTERS)
+ (in_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_PERF_MGMT &&
+ (in_mad->mad_hdr.attr_id == IB_PMA_PORT_COUNTERS ||
+ in_mad->mad_hdr.attr_id == IB_PMA_PORT_COUNTERS_EXT ||
+ in_mad->mad_hdr.attr_id == IB_PMA_CLASS_PORT_INFO)))
return iboe_process_mad(ibdev, mad_flags, port_num, in_wc,
in_grh, in_mad, out_mad);
@@ -1172,10 +1232,11 @@ static int is_proxy_qp0(struct mlx4_ib_dev *dev, int qpn, int slave)
int mlx4_ib_send_to_wire(struct mlx4_ib_dev *dev, int slave, u8 port,
enum ib_qp_type dest_qpt, u16 pkey_index,
u32 remote_qpn, u32 qkey, struct ib_ah_attr *attr,
- u8 *s_mac, struct ib_mad *mad)
+ u8 *s_mac, u16 vlan_id, struct ib_mad *mad)
{
struct ib_sge list;
- struct ib_send_wr wr, *bad_wr;
+ struct ib_ud_wr wr;
+ struct ib_send_wr *bad_wr;
struct mlx4_ib_demux_pv_ctx *sqp_ctx;
struct mlx4_ib_demux_pv_qp *sqp;
struct mlx4_mad_snd_buf *sqp_mad;
@@ -1246,22 +1307,25 @@ int mlx4_ib_send_to_wire(struct mlx4_ib_dev *dev, int slave, u8 port,
list.length = sizeof (struct mlx4_mad_snd_buf);
list.lkey = sqp_ctx->pd->local_dma_lkey;
- wr.wr.ud.ah = ah;
- wr.wr.ud.port_num = port;
- wr.wr.ud.pkey_index = wire_pkey_ix;
- wr.wr.ud.remote_qkey = qkey;
- wr.wr.ud.remote_qpn = remote_qpn;
- wr.next = NULL;
- wr.wr_id = ((u64) wire_tx_ix) | MLX4_TUN_SET_WRID_QPN(src_qpnum);
- wr.sg_list = &list;
- wr.num_sge = 1;
- wr.opcode = IB_WR_SEND;
- wr.send_flags = IB_SEND_SIGNALED;
+ wr.ah = ah;
+ wr.port_num = port;
+ wr.pkey_index = wire_pkey_ix;
+ wr.remote_qkey = qkey;
+ wr.remote_qpn = remote_qpn;
+ wr.wr.next = NULL;
+ wr.wr.wr_id = ((u64) wire_tx_ix) | MLX4_TUN_SET_WRID_QPN(src_qpnum);
+ wr.wr.sg_list = &list;
+ wr.wr.num_sge = 1;
+ wr.wr.opcode = IB_WR_SEND;
+ wr.wr.send_flags = IB_SEND_SIGNALED;
if (s_mac)
memcpy(to_mah(ah)->av.eth.s_mac, s_mac, 6);
+ if (vlan_id < 0x1000)
+ vlan_id |= (attr->sl & 7) << 13;
+ to_mah(ah)->av.eth.vlan = cpu_to_be16(vlan_id);
- ret = ib_post_send(send_qp, &wr, &bad_wr);
+ ret = ib_post_send(send_qp, &wr.wr, &bad_wr);
out:
if (ret)
ib_destroy_ah(ah);
@@ -1295,6 +1359,7 @@ static void mlx4_ib_multiplex_mad(struct mlx4_ib_demux_pv_ctx *ctx, struct ib_wc
u8 *slave_id;
int slave;
int port;
+ u16 vlan_id;
/* Get slave that sent this packet */
if (wc->src_qp < dev->dev->phys_caps.base_proxy_sqpn ||
@@ -1383,10 +1448,10 @@ static void mlx4_ib_multiplex_mad(struct mlx4_ib_demux_pv_ctx *ctx, struct ib_wc
fill_in_real_sgid_index(dev, slave, ctx->port, &ah_attr);
memcpy(ah_attr.dmac, tunnel->hdr.mac, 6);
- ah_attr.vlan_id = be16_to_cpu(tunnel->hdr.vlan);
+ vlan_id = be16_to_cpu(tunnel->hdr.vlan);
/* if slave have default vlan use it */
mlx4_get_slave_default_vlan(dev->dev, ctx->port, slave,
- &ah_attr.vlan_id, &ah_attr.sl);
+ &vlan_id, &ah_attr.sl);
mlx4_ib_send_to_wire(dev, slave, ctx->port,
is_proxy_qp0(dev, wc->src_qp, slave) ?
@@ -1394,7 +1459,7 @@ static void mlx4_ib_multiplex_mad(struct mlx4_ib_demux_pv_ctx *ctx, struct ib_wc
be16_to_cpu(tunnel->hdr.pkey_index),
be32_to_cpu(tunnel->hdr.remote_qpn),
be32_to_cpu(tunnel->hdr.qkey),
- &ah_attr, wc->smac, &tunnel->mad);
+ &ah_attr, wc->smac, vlan_id, &tunnel->mad);
}
static int mlx4_ib_alloc_pv_bufs(struct mlx4_ib_demux_pv_ctx *ctx,
diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c
index efecdf0..1c7ab6c 100644
--- a/drivers/infiniband/hw/mlx4/main.c
+++ b/drivers/infiniband/hw/mlx4/main.c
@@ -154,9 +154,9 @@ static struct net_device *mlx4_ib_get_netdev(struct ib_device *device, u8 port_n
return dev;
}
-static int mlx4_ib_update_gids(struct gid_entry *gids,
- struct mlx4_ib_dev *ibdev,
- u8 port_num)
+static int mlx4_ib_update_gids_v1(struct gid_entry *gids,
+ struct mlx4_ib_dev *ibdev,
+ u8 port_num)
{
struct mlx4_cmd_mailbox *mailbox;
int err;
@@ -187,6 +187,63 @@ static int mlx4_ib_update_gids(struct gid_entry *gids,
return err;
}
+static int mlx4_ib_update_gids_v1_v2(struct gid_entry *gids,
+ struct mlx4_ib_dev *ibdev,
+ u8 port_num)
+{
+ struct mlx4_cmd_mailbox *mailbox;
+ int err;
+ struct mlx4_dev *dev = ibdev->dev;
+ int i;
+ struct {
+ union ib_gid gid;
+ __be32 rsrvd1[2];
+ __be16 rsrvd2;
+ u8 type;
+ u8 version;
+ __be32 rsrvd3;
+ } *gid_tbl;
+
+ mailbox = mlx4_alloc_cmd_mailbox(dev);
+ if (IS_ERR(mailbox))
+ return -ENOMEM;
+
+ gid_tbl = mailbox->buf;
+ for (i = 0; i < MLX4_MAX_PORT_GIDS; ++i) {
+ memcpy(&gid_tbl[i].gid, &gids[i].gid, sizeof(union ib_gid));
+ if (gids[i].gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP) {
+ gid_tbl[i].version = 2;
+ if (!ipv6_addr_v4mapped((struct in6_addr *)&gids[i].gid))
+ gid_tbl[i].type = 1;
+ else
+ memset(&gid_tbl[i].gid, 0, 12);
+ }
+ }
+
+ err = mlx4_cmd(dev, mailbox->dma,
+ MLX4_SET_PORT_ROCE_ADDR << 8 | port_num,
+ 1, MLX4_CMD_SET_PORT, MLX4_CMD_TIME_CLASS_B,
+ MLX4_CMD_WRAPPED);
+ if (mlx4_is_bonded(dev))
+ err += mlx4_cmd(dev, mailbox->dma,
+ MLX4_SET_PORT_ROCE_ADDR << 8 | 2,
+ 1, MLX4_CMD_SET_PORT, MLX4_CMD_TIME_CLASS_B,
+ MLX4_CMD_WRAPPED);
+
+ mlx4_free_cmd_mailbox(dev, mailbox);
+ return err;
+}
+
+static int mlx4_ib_update_gids(struct gid_entry *gids,
+ struct mlx4_ib_dev *ibdev,
+ u8 port_num)
+{
+ if (ibdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_ROCE_V1_V2)
+ return mlx4_ib_update_gids_v1_v2(gids, ibdev, port_num);
+
+ return mlx4_ib_update_gids_v1(gids, ibdev, port_num);
+}
+
static int mlx4_ib_add_gid(struct ib_device *device,
u8 port_num,
unsigned int index,
@@ -215,7 +272,8 @@ static int mlx4_ib_add_gid(struct ib_device *device,
port_gid_table = &iboe->gids[port_num - 1];
spin_lock_bh(&iboe->lock);
for (i = 0; i < MLX4_MAX_PORT_GIDS; ++i) {
- if (!memcmp(&port_gid_table->gids[i].gid, gid, sizeof(*gid))) {
+ if (!memcmp(&port_gid_table->gids[i].gid, gid, sizeof(*gid)) &&
+ (port_gid_table->gids[i].gid_type == attr->gid_type)) {
found = i;
break;
}
@@ -233,6 +291,7 @@ static int mlx4_ib_add_gid(struct ib_device *device,
} else {
*context = port_gid_table->gids[free].ctx;
memcpy(&port_gid_table->gids[free].gid, gid, sizeof(*gid));
+ port_gid_table->gids[free].gid_type = attr->gid_type;
port_gid_table->gids[free].ctx->real_index = free;
port_gid_table->gids[free].ctx->refcount = 1;
hw_update = 1;
@@ -248,8 +307,10 @@ static int mlx4_ib_add_gid(struct ib_device *device,
if (!gids) {
ret = -ENOMEM;
} else {
- for (i = 0; i < MLX4_MAX_PORT_GIDS; i++)
+ for (i = 0; i < MLX4_MAX_PORT_GIDS; i++) {
memcpy(&gids[i].gid, &port_gid_table->gids[i].gid, sizeof(union ib_gid));
+ gids[i].gid_type = port_gid_table->gids[i].gid_type;
+ }
}
}
spin_unlock_bh(&iboe->lock);
@@ -325,6 +386,7 @@ int mlx4_ib_gid_index_to_real_index(struct mlx4_ib_dev *ibdev,
int i;
int ret;
unsigned long flags;
+ struct ib_gid_attr attr;
if (port_num > MLX4_MAX_PORTS)
return -EINVAL;
@@ -335,10 +397,13 @@ int mlx4_ib_gid_index_to_real_index(struct mlx4_ib_dev *ibdev,
if (!rdma_cap_roce_gid_table(&ibdev->ib_dev, port_num))
return index;
- ret = ib_get_cached_gid(&ibdev->ib_dev, port_num, index, &gid);
+ ret = ib_get_cached_gid(&ibdev->ib_dev, port_num, index, &gid, &attr);
if (ret)
return ret;
+ if (attr.ndev)
+ dev_put(attr.ndev);
+
if (!memcmp(&gid, &zgid, sizeof(gid)))
return -EINVAL;
@@ -346,7 +411,8 @@ int mlx4_ib_gid_index_to_real_index(struct mlx4_ib_dev *ibdev,
port_gid_table = &iboe->gids[port_num - 1];
for (i = 0; i < MLX4_MAX_PORT_GIDS; ++i)
- if (!memcmp(&port_gid_table->gids[i].gid, &gid, sizeof(gid))) {
+ if (!memcmp(&port_gid_table->gids[i].gid, &gid, sizeof(gid)) &&
+ attr.gid_type == port_gid_table->gids[i].gid_type) {
ctx = port_gid_table->gids[i].ctx;
break;
}
@@ -442,6 +508,8 @@ static int mlx4_ib_query_device(struct ib_device *ibdev,
props->device_cap_flags |= IB_DEVICE_MANAGED_FLOW_STEERING;
}
+ props->device_cap_flags |= IB_DEVICE_RAW_IP_CSUM;
+
props->vendor_id = be32_to_cpup((__be32 *) (out_mad->data + 36)) &
0xffffff;
props->vendor_part_id = dev->dev->persist->pdev->device;
@@ -454,7 +522,7 @@ static int mlx4_ib_query_device(struct ib_device *ibdev,
props->max_qp_wr = dev->dev->caps.max_wqes - MLX4_IB_SQ_MAX_SPARE;
props->max_sge = min(dev->dev->caps.max_sq_sg,
dev->dev->caps.max_rq_sg);
- props->max_sge_rd = props->max_sge;
+ props->max_sge_rd = MLX4_MAX_SGE_RD;
props->max_cq = dev->dev->quotas.cq;
props->max_cqe = dev->dev->caps.max_cqes;
props->max_mr = dev->dev->quotas.mpt;
@@ -754,7 +822,7 @@ static int mlx4_ib_query_gid(struct ib_device *ibdev, u8 port, int index,
if (!rdma_cap_roce_gid_table(ibdev, port))
return -ENODEV;
- ret = ib_get_cached_gid(ibdev, port, index, gid);
+ ret = ib_get_cached_gid(ibdev, port, index, gid, NULL);
if (ret == -EAGAIN) {
memcpy(gid, &zgid, sizeof(*gid));
return 0;
@@ -1247,6 +1315,22 @@ static int add_gid_entry(struct ib_qp *ibqp, union ib_gid *gid)
return 0;
}
+static void mlx4_ib_delete_counters_table(struct mlx4_ib_dev *ibdev,
+ struct mlx4_ib_counters *ctr_table)
+{
+ struct counter_index *counter, *tmp_count;
+
+ mutex_lock(&ctr_table->mutex);
+ list_for_each_entry_safe(counter, tmp_count, &ctr_table->counters_list,
+ list) {
+ if (counter->allocated)
+ mlx4_counter_free(ibdev->dev, counter->index);
+ list_del(&counter->list);
+ kfree(counter);
+ }
+ mutex_unlock(&ctr_table->mutex);
+}
+
int mlx4_ib_add_mc(struct mlx4_ib_dev *mdev, struct mlx4_ib_qp *mqp,
union ib_gid *gid)
{
@@ -2101,6 +2185,7 @@ static int mlx4_port_immutable(struct ib_device *ibdev, u8 port_num,
struct ib_port_immutable *immutable)
{
struct ib_port_attr attr;
+ struct mlx4_ib_dev *mdev = to_mdev(ibdev);
int err;
err = mlx4_ib_query_port(ibdev, port_num, &attr);
@@ -2110,10 +2195,15 @@ static int mlx4_port_immutable(struct ib_device *ibdev, u8 port_num,
immutable->pkey_tbl_len = attr.pkey_tbl_len;
immutable->gid_tbl_len = attr.gid_tbl_len;
- if (mlx4_ib_port_link_layer(ibdev, port_num) == IB_LINK_LAYER_INFINIBAND)
+ if (mlx4_ib_port_link_layer(ibdev, port_num) == IB_LINK_LAYER_INFINIBAND) {
immutable->core_cap_flags = RDMA_CORE_PORT_IBA_IB;
- else
- immutable->core_cap_flags = RDMA_CORE_PORT_IBA_ROCE;
+ } else {
+ if (mdev->dev->caps.flags & MLX4_DEV_CAP_FLAG_IBOE)
+ immutable->core_cap_flags = RDMA_CORE_PORT_IBA_ROCE;
+ if (mdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_ROCE_V1_V2)
+ immutable->core_cap_flags = RDMA_CORE_PORT_IBA_ROCE |
+ RDMA_CORE_PORT_IBA_ROCE_UDP_ENCAP;
+ }
immutable->max_mad_size = IB_MGMT_MAD_SIZE;
@@ -2131,6 +2221,7 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)
int num_req_counters;
int allocated;
u32 counter_index;
+ struct counter_index *new_counter_index = NULL;
pr_info_once("%s", mlx4_ib_version);
@@ -2247,8 +2338,7 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)
ibdev->ib_dev.rereg_user_mr = mlx4_ib_rereg_user_mr;
ibdev->ib_dev.dereg_mr = mlx4_ib_dereg_mr;
ibdev->ib_dev.alloc_mr = mlx4_ib_alloc_mr;
- ibdev->ib_dev.alloc_fast_reg_page_list = mlx4_ib_alloc_fast_reg_page_list;
- ibdev->ib_dev.free_fast_reg_page_list = mlx4_ib_free_fast_reg_page_list;
+ ibdev->ib_dev.map_mr_sg = mlx4_ib_map_mr_sg;
ibdev->ib_dev.attach_mcast = mlx4_ib_mcg_attach;
ibdev->ib_dev.detach_mcast = mlx4_ib_mcg_detach;
ibdev->ib_dev.process_mad = mlx4_ib_process_mad;
@@ -2265,7 +2355,6 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)
if (dev->caps.flags & MLX4_DEV_CAP_FLAG_MEM_WINDOW ||
dev->caps.bmme_flags & MLX4_BMME_FLAG_TYPE_2_WIN) {
ibdev->ib_dev.alloc_mw = mlx4_ib_alloc_mw;
- ibdev->ib_dev.bind_mw = mlx4_ib_bind_mw;
ibdev->ib_dev.dealloc_mw = mlx4_ib_dealloc_mw;
ibdev->ib_dev.uverbs_cmd_mask |=
@@ -2293,7 +2382,8 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)
ibdev->ib_dev.uverbs_ex_cmd_mask |=
(1ull << IB_USER_VERBS_EX_CMD_QUERY_DEVICE) |
- (1ull << IB_USER_VERBS_EX_CMD_CREATE_CQ);
+ (1ull << IB_USER_VERBS_EX_CMD_CREATE_CQ) |
+ (1ull << IB_USER_VERBS_EX_CMD_CREATE_QP);
mlx4_ib_alloc_eqs(dev, ibdev);
@@ -2302,6 +2392,11 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)
if (init_node_data(ibdev))
goto err_map;
+ for (i = 0; i < ibdev->num_ports; ++i) {
+ mutex_init(&ibdev->counters_table[i].mutex);
+ INIT_LIST_HEAD(&ibdev->counters_table[i].counters_list);
+ }
+
num_req_counters = mlx4_is_bonded(dev) ? 1 : ibdev->num_ports;
for (i = 0; i < num_req_counters; ++i) {
mutex_init(&ibdev->qp1_proxy_lock[i]);
@@ -2320,15 +2415,34 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)
counter_index = mlx4_get_default_counter_index(dev,
i + 1);
}
- ibdev->counters[i].index = counter_index;
- ibdev->counters[i].allocated = allocated;
+ new_counter_index = kmalloc(sizeof(*new_counter_index),
+ GFP_KERNEL);
+ if (!new_counter_index) {
+ if (allocated)
+ mlx4_counter_free(ibdev->dev, counter_index);
+ goto err_counter;
+ }
+ new_counter_index->index = counter_index;
+ new_counter_index->allocated = allocated;
+ list_add_tail(&new_counter_index->list,
+ &ibdev->counters_table[i].counters_list);
+ ibdev->counters_table[i].default_counter = counter_index;
pr_info("counter index %d for port %d allocated %d\n",
counter_index, i + 1, allocated);
}
if (mlx4_is_bonded(dev))
for (i = 1; i < ibdev->num_ports ; ++i) {
- ibdev->counters[i].index = ibdev->counters[0].index;
- ibdev->counters[i].allocated = 0;
+ new_counter_index =
+ kmalloc(sizeof(struct counter_index),
+ GFP_KERNEL);
+ if (!new_counter_index)
+ goto err_counter;
+ new_counter_index->index = counter_index;
+ new_counter_index->allocated = 0;
+ list_add_tail(&new_counter_index->list,
+ &ibdev->counters_table[i].counters_list);
+ ibdev->counters_table[i].default_counter =
+ counter_index;
}
mlx4_foreach_port(i, dev, MLX4_PORT_TYPE_IB)
@@ -2380,7 +2494,8 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)
if (mlx4_ib_init_sriov(ibdev))
goto err_mad;
- if (dev->caps.flags & MLX4_DEV_CAP_FLAG_IBOE) {
+ if (dev->caps.flags & MLX4_DEV_CAP_FLAG_IBOE ||
+ dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_ROCE_V1_V2) {
if (!iboe->nb.notifier_call) {
iboe->nb.notifier_call = mlx4_ib_netdev_event;
err = register_netdevice_notifier(&iboe->nb);
@@ -2389,6 +2504,12 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)
goto err_notif;
}
}
+ if (dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_ROCE_V1_V2) {
+ err = mlx4_config_roce_v2_port(dev, ROCE_V2_UDP_DPORT);
+ if (err) {
+ goto err_notif;
+ }
+ }
}
for (j = 0; j < ARRAY_SIZE(mlx4_class_attributes); ++j) {
@@ -2437,12 +2558,9 @@ err_steer_qp_release:
mlx4_qp_release_range(dev, ibdev->steer_qpn_base,
ibdev->steer_qpn_count);
err_counter:
- for (i = 0; i < ibdev->num_ports; ++i) {
- if (ibdev->counters[i].index != -1 &&
- ibdev->counters[i].allocated)
- mlx4_counter_free(ibdev->dev,
- ibdev->counters[i].index);
- }
+ for (i = 0; i < ibdev->num_ports; ++i)
+ mlx4_ib_delete_counters_table(ibdev, &ibdev->counters_table[i]);
+
err_map:
iounmap(ibdev->uar_map);
@@ -2546,9 +2664,8 @@ static void mlx4_ib_remove(struct mlx4_dev *dev, void *ibdev_ptr)
iounmap(ibdev->uar_map);
for (p = 0; p < ibdev->num_ports; ++p)
- if (ibdev->counters[p].index != -1 &&
- ibdev->counters[p].allocated)
- mlx4_counter_free(ibdev->dev, ibdev->counters[p].index);
+ mlx4_ib_delete_counters_table(ibdev, &ibdev->counters_table[p]);
+
mlx4_foreach_port(p, dev, MLX4_PORT_TYPE_IB)
mlx4_CLOSE_PORT(dev, p);
diff --git a/drivers/infiniband/hw/mlx4/mcg.c b/drivers/infiniband/hw/mlx4/mcg.c
index 2d5bccd..99451d8 100644
--- a/drivers/infiniband/hw/mlx4/mcg.c
+++ b/drivers/infiniband/hw/mlx4/mcg.c
@@ -222,7 +222,7 @@ static int send_mad_to_wire(struct mlx4_ib_demux_ctx *ctx, struct ib_mad *mad)
spin_unlock_irqrestore(&dev->sm_lock, flags);
return mlx4_ib_send_to_wire(dev, mlx4_master_func_num(dev->dev),
ctx->port, IB_QPT_GSI, 0, 1, IB_QP1_QKEY,
- &ah_attr, NULL, mad);
+ &ah_attr, NULL, 0xffff, mad);
}
static int send_mad_to_slave(int slave, struct mlx4_ib_demux_ctx *ctx,
diff --git a/drivers/infiniband/hw/mlx4/mlx4_ib.h b/drivers/infiniband/hw/mlx4/mlx4_ib.h
index 1e7b23b..52ce7b0 100644
--- a/drivers/infiniband/hw/mlx4/mlx4_ib.h
+++ b/drivers/infiniband/hw/mlx4/mlx4_ib.h
@@ -129,10 +129,17 @@ struct mlx4_ib_cq {
struct list_head recv_qp_list;
};
+#define MLX4_MR_PAGES_ALIGN 0x40
+
struct mlx4_ib_mr {
struct ib_mr ibmr;
+ __be64 *pages;
+ dma_addr_t page_map;
+ u32 npages;
+ u32 max_pages;
struct mlx4_mr mmr;
struct ib_umem *umem;
+ void *pages_alloc;
};
struct mlx4_ib_mw {
@@ -140,12 +147,6 @@ struct mlx4_ib_mw {
struct mlx4_mw mmw;
};
-struct mlx4_ib_fast_reg_page_list {
- struct ib_fast_reg_page_list ibfrpl;
- __be64 *mapped_page_list;
- dma_addr_t map;
-};
-
struct mlx4_ib_fmr {
struct ib_fmr ibfmr;
struct mlx4_fmr mfmr;
@@ -176,11 +177,18 @@ struct mlx4_ib_wq {
unsigned tail;
};
+enum {
+ MLX4_IB_QP_CREATE_ROCE_V2_GSI = IB_QP_CREATE_RESERVED_START
+};
+
enum mlx4_ib_qp_flags {
MLX4_IB_QP_LSO = IB_QP_CREATE_IPOIB_UD_LSO,
MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK = IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK,
MLX4_IB_QP_NETIF = IB_QP_CREATE_NETIF_QP,
MLX4_IB_QP_CREATE_USE_GFP_NOIO = IB_QP_CREATE_USE_GFP_NOIO,
+
+ /* Mellanox specific flags start from IB_QP_CREATE_RESERVED_START */
+ MLX4_IB_ROCE_V2_GSI_QP = MLX4_IB_QP_CREATE_ROCE_V2_GSI,
MLX4_IB_SRIOV_TUNNEL_QP = 1 << 30,
MLX4_IB_SRIOV_SQP = 1 << 31,
};
@@ -320,6 +328,7 @@ struct mlx4_ib_qp {
struct list_head qps_list;
struct list_head cq_recv_list;
struct list_head cq_send_list;
+ struct counter_index *counter_index;
};
struct mlx4_ib_srq {
@@ -476,6 +485,7 @@ struct gid_cache_context {
struct gid_entry {
union ib_gid gid;
+ enum ib_gid_type gid_type;
struct gid_cache_context *ctx;
};
@@ -528,10 +538,17 @@ struct mlx4_ib_iov_port {
};
struct counter_index {
+ struct list_head list;
u32 index;
u8 allocated;
};
+struct mlx4_ib_counters {
+ struct list_head counters_list;
+ struct mutex mutex; /* mutex for accessing counters list */
+ u32 default_counter;
+};
+
struct mlx4_ib_dev {
struct ib_device ib_dev;
struct mlx4_dev *dev;
@@ -550,7 +567,7 @@ struct mlx4_ib_dev {
struct mutex cap_mask_mutex;
bool ib_active;
struct mlx4_ib_iboe iboe;
- struct counter_index counters[MLX4_MAX_PORTS];
+ struct mlx4_ib_counters counters_table[MLX4_MAX_PORTS];
int *eq_table;
struct kobject *iov_parent;
struct kobject *ports_parent;
@@ -638,11 +655,6 @@ static inline struct mlx4_ib_mw *to_mmw(struct ib_mw *ibmw)
return container_of(ibmw, struct mlx4_ib_mw, ibmw);
}
-static inline struct mlx4_ib_fast_reg_page_list *to_mfrpl(struct ib_fast_reg_page_list *ibfrpl)
-{
- return container_of(ibfrpl, struct mlx4_ib_fast_reg_page_list, ibfrpl);
-}
-
static inline struct mlx4_ib_fmr *to_mfmr(struct ib_fmr *ibfmr)
{
return container_of(ibfmr, struct mlx4_ib_fmr, ibfmr);
@@ -700,16 +712,13 @@ struct ib_mr *mlx4_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
struct ib_udata *udata);
int mlx4_ib_dereg_mr(struct ib_mr *mr);
struct ib_mw *mlx4_ib_alloc_mw(struct ib_pd *pd, enum ib_mw_type type);
-int mlx4_ib_bind_mw(struct ib_qp *qp, struct ib_mw *mw,
- struct ib_mw_bind *mw_bind);
int mlx4_ib_dealloc_mw(struct ib_mw *mw);
struct ib_mr *mlx4_ib_alloc_mr(struct ib_pd *pd,
enum ib_mr_type mr_type,
u32 max_num_sg);
-struct ib_fast_reg_page_list *mlx4_ib_alloc_fast_reg_page_list(struct ib_device *ibdev,
- int page_list_len);
-void mlx4_ib_free_fast_reg_page_list(struct ib_fast_reg_page_list *page_list);
-
+int mlx4_ib_map_mr_sg(struct ib_mr *ibmr,
+ struct scatterlist *sg,
+ int sg_nents);
int mlx4_ib_modify_cq(struct ib_cq *cq, u16 cq_count, u16 cq_period);
int mlx4_ib_resize_cq(struct ib_cq *ibcq, int entries, struct ib_udata *udata);
struct ib_cq *mlx4_ib_create_cq(struct ib_device *ibdev,
@@ -813,7 +822,7 @@ int mlx4_ib_send_to_slave(struct mlx4_ib_dev *dev, int slave, u8 port,
int mlx4_ib_send_to_wire(struct mlx4_ib_dev *dev, int slave, u8 port,
enum ib_qp_type dest_qpt, u16 pkey_index, u32 remote_qpn,
u32 qkey, struct ib_ah_attr *attr, u8 *s_mac,
- struct ib_mad *mad);
+ u16 vlan_id, struct ib_mad *mad);
__be64 mlx4_ib_get_new_demux_tid(struct mlx4_ib_demux_ctx *ctx);
diff --git a/drivers/infiniband/hw/mlx4/mr.c b/drivers/infiniband/hw/mlx4/mr.c
index 2542fd3..242b94e 100644
--- a/drivers/infiniband/hw/mlx4/mr.c
+++ b/drivers/infiniband/hw/mlx4/mr.c
@@ -59,7 +59,7 @@ struct ib_mr *mlx4_ib_get_dma_mr(struct ib_pd *pd, int acc)
struct mlx4_ib_mr *mr;
int err;
- mr = kmalloc(sizeof *mr, GFP_KERNEL);
+ mr = kzalloc(sizeof(*mr), GFP_KERNEL);
if (!mr)
return ERR_PTR(-ENOMEM);
@@ -140,7 +140,7 @@ struct ib_mr *mlx4_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
int err;
int n;
- mr = kmalloc(sizeof *mr, GFP_KERNEL);
+ mr = kzalloc(sizeof(*mr), GFP_KERNEL);
if (!mr)
return ERR_PTR(-ENOMEM);
@@ -271,11 +271,59 @@ release_mpt_entry:
return err;
}
+static int
+mlx4_alloc_priv_pages(struct ib_device *device,
+ struct mlx4_ib_mr *mr,
+ int max_pages)
+{
+ int size = max_pages * sizeof(u64);
+ int add_size;
+ int ret;
+
+ add_size = max_t(int, MLX4_MR_PAGES_ALIGN - ARCH_KMALLOC_MINALIGN, 0);
+
+ mr->pages_alloc = kzalloc(size + add_size, GFP_KERNEL);
+ if (!mr->pages_alloc)
+ return -ENOMEM;
+
+ mr->pages = PTR_ALIGN(mr->pages_alloc, MLX4_MR_PAGES_ALIGN);
+
+ mr->page_map = dma_map_single(device->dma_device, mr->pages,
+ size, DMA_TO_DEVICE);
+
+ if (dma_mapping_error(device->dma_device, mr->page_map)) {
+ ret = -ENOMEM;
+ goto err;
+ }
+
+ return 0;
+err:
+ kfree(mr->pages_alloc);
+
+ return ret;
+}
+
+static void
+mlx4_free_priv_pages(struct mlx4_ib_mr *mr)
+{
+ if (mr->pages) {
+ struct ib_device *device = mr->ibmr.device;
+ int size = mr->max_pages * sizeof(u64);
+
+ dma_unmap_single(device->dma_device, mr->page_map,
+ size, DMA_TO_DEVICE);
+ kfree(mr->pages_alloc);
+ mr->pages = NULL;
+ }
+}
+
int mlx4_ib_dereg_mr(struct ib_mr *ibmr)
{
struct mlx4_ib_mr *mr = to_mmr(ibmr);
int ret;
+ mlx4_free_priv_pages(mr);
+
ret = mlx4_mr_free(to_mdev(ibmr->device)->dev, &mr->mmr);
if (ret)
return ret;
@@ -318,28 +366,6 @@ err_free:
return ERR_PTR(err);
}
-int mlx4_ib_bind_mw(struct ib_qp *qp, struct ib_mw *mw,
- struct ib_mw_bind *mw_bind)
-{
- struct ib_send_wr wr;
- struct ib_send_wr *bad_wr;
- int ret;
-
- memset(&wr, 0, sizeof(wr));
- wr.opcode = IB_WR_BIND_MW;
- wr.wr_id = mw_bind->wr_id;
- wr.send_flags = mw_bind->send_flags;
- wr.wr.bind_mw.mw = mw;
- wr.wr.bind_mw.bind_info = mw_bind->bind_info;
- wr.wr.bind_mw.rkey = ib_inc_rkey(mw->rkey);
-
- ret = mlx4_ib_post_send(qp, &wr, &bad_wr);
- if (!ret)
- mw->rkey = wr.wr.bind_mw.rkey;
-
- return ret;
-}
-
int mlx4_ib_dealloc_mw(struct ib_mw *ibmw)
{
struct mlx4_ib_mw *mw = to_mmw(ibmw);
@@ -362,7 +388,7 @@ struct ib_mr *mlx4_ib_alloc_mr(struct ib_pd *pd,
max_num_sg > MLX4_MAX_FAST_REG_PAGES)
return ERR_PTR(-EINVAL);
- mr = kmalloc(sizeof *mr, GFP_KERNEL);
+ mr = kzalloc(sizeof(*mr), GFP_KERNEL);
if (!mr)
return ERR_PTR(-ENOMEM);
@@ -371,71 +397,30 @@ struct ib_mr *mlx4_ib_alloc_mr(struct ib_pd *pd,
if (err)
goto err_free;
+ err = mlx4_alloc_priv_pages(pd->device, mr, max_num_sg);
+ if (err)
+ goto err_free_mr;
+
+ mr->max_pages = max_num_sg;
+
err = mlx4_mr_enable(dev->dev, &mr->mmr);
if (err)
- goto err_mr;
+ goto err_free_pl;
mr->ibmr.rkey = mr->ibmr.lkey = mr->mmr.key;
mr->umem = NULL;
return &mr->ibmr;
-err_mr:
+err_free_pl:
+ mlx4_free_priv_pages(mr);
+err_free_mr:
(void) mlx4_mr_free(dev->dev, &mr->mmr);
-
err_free:
kfree(mr);
return ERR_PTR(err);
}
-struct ib_fast_reg_page_list *mlx4_ib_alloc_fast_reg_page_list(struct ib_device *ibdev,
- int page_list_len)
-{
- struct mlx4_ib_dev *dev = to_mdev(ibdev);
- struct mlx4_ib_fast_reg_page_list *mfrpl;
- int size = page_list_len * sizeof (u64);
-
- if (page_list_len > MLX4_MAX_FAST_REG_PAGES)
- return ERR_PTR(-EINVAL);
-
- mfrpl = kmalloc(sizeof *mfrpl, GFP_KERNEL);
- if (!mfrpl)
- return ERR_PTR(-ENOMEM);
-
- mfrpl->ibfrpl.page_list = kmalloc(size, GFP_KERNEL);
- if (!mfrpl->ibfrpl.page_list)
- goto err_free;
-
- mfrpl->mapped_page_list = dma_alloc_coherent(&dev->dev->persist->
- pdev->dev,
- size, &mfrpl->map,
- GFP_KERNEL);
- if (!mfrpl->mapped_page_list)
- goto err_free;
-
- WARN_ON(mfrpl->map & 0x3f);
-
- return &mfrpl->ibfrpl;
-
-err_free:
- kfree(mfrpl->ibfrpl.page_list);
- kfree(mfrpl);
- return ERR_PTR(-ENOMEM);
-}
-
-void mlx4_ib_free_fast_reg_page_list(struct ib_fast_reg_page_list *page_list)
-{
- struct mlx4_ib_dev *dev = to_mdev(page_list->device);
- struct mlx4_ib_fast_reg_page_list *mfrpl = to_mfrpl(page_list);
- int size = page_list->max_page_list_len * sizeof (u64);
-
- dma_free_coherent(&dev->dev->persist->pdev->dev, size,
- mfrpl->mapped_page_list,
- mfrpl->map);
- kfree(mfrpl->ibfrpl.page_list);
- kfree(mfrpl);
-}
-
struct ib_fmr *mlx4_ib_fmr_alloc(struct ib_pd *pd, int acc,
struct ib_fmr_attr *fmr_attr)
{
@@ -528,3 +513,37 @@ int mlx4_ib_fmr_dealloc(struct ib_fmr *ibfmr)
return err;
}
+
+static int mlx4_set_page(struct ib_mr *ibmr, u64 addr)
+{
+ struct mlx4_ib_mr *mr = to_mmr(ibmr);
+
+ if (unlikely(mr->npages == mr->max_pages))
+ return -ENOMEM;
+
+ mr->pages[mr->npages++] = cpu_to_be64(addr | MLX4_MTT_FLAG_PRESENT);
+
+ return 0;
+}
+
+int mlx4_ib_map_mr_sg(struct ib_mr *ibmr,
+ struct scatterlist *sg,
+ int sg_nents)
+{
+ struct mlx4_ib_mr *mr = to_mmr(ibmr);
+ int rc;
+
+ mr->npages = 0;
+
+ ib_dma_sync_single_for_cpu(ibmr->device, mr->page_map,
+ sizeof(u64) * mr->max_pages,
+ DMA_TO_DEVICE);
+
+ rc = ib_sg_to_pages(ibmr, sg, sg_nents, mlx4_set_page);
+
+ ib_dma_sync_single_for_device(ibmr->device, mr->page_map,
+ sizeof(u64) * mr->max_pages,
+ DMA_TO_DEVICE);
+
+ return rc;
+}
diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c
index 4ad9be3..fd97534 100644
--- a/drivers/infiniband/hw/mlx4/qp.c
+++ b/drivers/infiniband/hw/mlx4/qp.c
@@ -32,8 +32,11 @@
*/
#include <linux/log2.h>
+#include <linux/etherdevice.h>
+#include <net/ip.h>
#include <linux/slab.h>
#include <linux/netdevice.h>
+#include <linux/vmalloc.h>
#include <rdma/ib_cache.h>
#include <rdma/ib_pack.h>
@@ -84,6 +87,7 @@ struct mlx4_ib_sqp {
u32 send_psn;
struct ib_ud_header ud_header;
u8 header_buf[MLX4_IB_UD_HEADER_SIZE];
+ struct ib_qp *roce_v2_gsi;
};
enum {
@@ -111,10 +115,9 @@ static const __be32 mlx4_ib_opcode[] = {
[IB_WR_ATOMIC_FETCH_AND_ADD] = cpu_to_be32(MLX4_OPCODE_ATOMIC_FA),
[IB_WR_SEND_WITH_INV] = cpu_to_be32(MLX4_OPCODE_SEND_INVAL),
[IB_WR_LOCAL_INV] = cpu_to_be32(MLX4_OPCODE_LOCAL_INVAL),
- [IB_WR_FAST_REG_MR] = cpu_to_be32(MLX4_OPCODE_FMR),
+ [IB_WR_REG_MR] = cpu_to_be32(MLX4_OPCODE_FMR),
[IB_WR_MASKED_ATOMIC_CMP_AND_SWP] = cpu_to_be32(MLX4_OPCODE_MASKED_ATOMIC_CS),
[IB_WR_MASKED_ATOMIC_FETCH_AND_ADD] = cpu_to_be32(MLX4_OPCODE_MASKED_ATOMIC_FA),
- [IB_WR_BIND_MW] = cpu_to_be32(MLX4_OPCODE_BIND_MW),
};
static struct mlx4_ib_sqp *to_msqp(struct mlx4_ib_qp *mqp)
@@ -153,7 +156,10 @@ static int is_sqp(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp)
}
}
}
- return proxy_sqp;
+ if (proxy_sqp)
+ return 1;
+
+ return !!(qp->flags & MLX4_IB_ROCE_V2_GSI_QP);
}
/* used for INIT/CLOSE port logic */
@@ -617,6 +623,18 @@ static int qp0_enabled_vf(struct mlx4_dev *dev, int qpn)
return 0;
}
+static void mlx4_ib_free_qp_counter(struct mlx4_ib_dev *dev,
+ struct mlx4_ib_qp *qp)
+{
+ mutex_lock(&dev->counters_table[qp->port - 1].mutex);
+ mlx4_counter_free(dev->dev, qp->counter_index->index);
+ list_del(&qp->counter_index->list);
+ mutex_unlock(&dev->counters_table[qp->port - 1].mutex);
+
+ kfree(qp->counter_index);
+ qp->counter_index = NULL;
+}
+
static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd,
struct ib_qp_init_attr *init_attr,
struct ib_udata *udata, int sqpn, struct mlx4_ib_qp **caller_qp,
@@ -746,9 +764,6 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd,
} else {
qp->sq_no_prefetch = 0;
- if (init_attr->create_flags & IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK)
- qp->flags |= MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK;
-
if (init_attr->create_flags & IB_QP_CREATE_IPOIB_UD_LSO)
qp->flags |= MLX4_IB_QP_LSO;
@@ -786,8 +801,16 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd,
if (err)
goto err_mtt;
- qp->sq.wrid = kmalloc(qp->sq.wqe_cnt * sizeof (u64), gfp);
- qp->rq.wrid = kmalloc(qp->rq.wqe_cnt * sizeof (u64), gfp);
+ qp->sq.wrid = kmalloc_array(qp->sq.wqe_cnt, sizeof(u64),
+ gfp | __GFP_NOWARN);
+ if (!qp->sq.wrid)
+ qp->sq.wrid = __vmalloc(qp->sq.wqe_cnt * sizeof(u64),
+ gfp, PAGE_KERNEL);
+ qp->rq.wrid = kmalloc_array(qp->rq.wqe_cnt, sizeof(u64),
+ gfp | __GFP_NOWARN);
+ if (!qp->rq.wrid)
+ qp->rq.wrid = __vmalloc(qp->rq.wqe_cnt * sizeof(u64),
+ gfp, PAGE_KERNEL);
if (!qp->sq.wrid || !qp->rq.wrid) {
err = -ENOMEM;
goto err_wrid;
@@ -822,6 +845,9 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd,
goto err_proxy;
}
+ if (init_attr->create_flags & IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK)
+ qp->flags |= MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK;
+
err = mlx4_qp_alloc(dev->dev, qpn, &qp->mqp, gfp);
if (err)
goto err_qpn;
@@ -874,8 +900,8 @@ err_wrid:
if (qp_has_rq(init_attr))
mlx4_ib_db_unmap_user(to_mucontext(pd->uobject->context), &qp->db);
} else {
- kfree(qp->sq.wrid);
- kfree(qp->rq.wrid);
+ kvfree(qp->sq.wrid);
+ kvfree(qp->rq.wrid);
}
err_mtt:
@@ -1050,8 +1076,8 @@ static void destroy_qp_common(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp,
&qp->db);
ib_umem_release(qp->umem);
} else {
- kfree(qp->sq.wrid);
- kfree(qp->rq.wrid);
+ kvfree(qp->sq.wrid);
+ kvfree(qp->rq.wrid);
if (qp->mlx4_ib_qp_type & (MLX4_IB_QPT_PROXY_SMI_OWNER |
MLX4_IB_QPT_PROXY_SMI | MLX4_IB_QPT_PROXY_GSI))
free_proxy_bufs(&dev->ib_dev, qp);
@@ -1080,12 +1106,13 @@ static u32 get_sqp_num(struct mlx4_ib_dev *dev, struct ib_qp_init_attr *attr)
return dev->dev->caps.qp1_proxy[attr->port_num - 1];
}
-struct ib_qp *mlx4_ib_create_qp(struct ib_pd *pd,
- struct ib_qp_init_attr *init_attr,
- struct ib_udata *udata)
+static struct ib_qp *_mlx4_ib_create_qp(struct ib_pd *pd,
+ struct ib_qp_init_attr *init_attr,
+ struct ib_udata *udata)
{
struct mlx4_ib_qp *qp = NULL;
int err;
+ int sup_u_create_flags = MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK;
u16 xrcdn = 0;
gfp_t gfp;
@@ -1100,6 +1127,7 @@ struct ib_qp *mlx4_ib_create_qp(struct ib_pd *pd,
MLX4_IB_SRIOV_TUNNEL_QP |
MLX4_IB_SRIOV_SQP |
MLX4_IB_QP_NETIF |
+ MLX4_IB_QP_CREATE_ROCE_V2_GSI |
MLX4_IB_QP_CREATE_USE_GFP_NOIO))
return ERR_PTR(-EINVAL);
@@ -1108,13 +1136,21 @@ struct ib_qp *mlx4_ib_create_qp(struct ib_pd *pd,
return ERR_PTR(-EINVAL);
}
- if (init_attr->create_flags &&
- (udata ||
- ((init_attr->create_flags & ~(MLX4_IB_SRIOV_SQP | MLX4_IB_QP_CREATE_USE_GFP_NOIO)) &&
- init_attr->qp_type != IB_QPT_UD) ||
- ((init_attr->create_flags & MLX4_IB_SRIOV_SQP) &&
- init_attr->qp_type > IB_QPT_GSI)))
- return ERR_PTR(-EINVAL);
+ if (init_attr->create_flags) {
+ if (udata && init_attr->create_flags & ~(sup_u_create_flags))
+ return ERR_PTR(-EINVAL);
+
+ if ((init_attr->create_flags & ~(MLX4_IB_SRIOV_SQP |
+ MLX4_IB_QP_CREATE_USE_GFP_NOIO |
+ MLX4_IB_QP_CREATE_ROCE_V2_GSI |
+ MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK) &&
+ init_attr->qp_type != IB_QPT_UD) ||
+ (init_attr->create_flags & MLX4_IB_SRIOV_SQP &&
+ init_attr->qp_type > IB_QPT_GSI) ||
+ (init_attr->create_flags & MLX4_IB_QP_CREATE_ROCE_V2_GSI &&
+ init_attr->qp_type != IB_QPT_GSI))
+ return ERR_PTR(-EINVAL);
+ }
switch (init_attr->qp_type) {
case IB_QPT_XRC_TGT:
@@ -1151,19 +1187,29 @@ struct ib_qp *mlx4_ib_create_qp(struct ib_pd *pd,
case IB_QPT_SMI:
case IB_QPT_GSI:
{
+ int sqpn;
+
/* Userspace is not allowed to create special QPs: */
if (udata)
return ERR_PTR(-EINVAL);
+ if (init_attr->create_flags & MLX4_IB_QP_CREATE_ROCE_V2_GSI) {
+ int res = mlx4_qp_reserve_range(to_mdev(pd->device)->dev, 1, 1, &sqpn, 0);
+
+ if (res)
+ return ERR_PTR(res);
+ } else {
+ sqpn = get_sqp_num(to_mdev(pd->device), init_attr);
+ }
err = create_qp_common(to_mdev(pd->device), pd, init_attr, udata,
- get_sqp_num(to_mdev(pd->device), init_attr),
+ sqpn,
&qp, gfp);
if (err)
return ERR_PTR(err);
qp->port = init_attr->port_num;
- qp->ibqp.qp_num = init_attr->qp_type == IB_QPT_SMI ? 0 : 1;
-
+ qp->ibqp.qp_num = init_attr->qp_type == IB_QPT_SMI ? 0 :
+ init_attr->create_flags & MLX4_IB_QP_CREATE_ROCE_V2_GSI ? sqpn : 1;
break;
}
default:
@@ -1174,7 +1220,41 @@ struct ib_qp *mlx4_ib_create_qp(struct ib_pd *pd,
return &qp->ibqp;
}
-int mlx4_ib_destroy_qp(struct ib_qp *qp)
+struct ib_qp *mlx4_ib_create_qp(struct ib_pd *pd,
+ struct ib_qp_init_attr *init_attr,
+ struct ib_udata *udata) {
+ struct ib_device *device = pd ? pd->device : init_attr->xrcd->device;
+ struct ib_qp *ibqp;
+ struct mlx4_ib_dev *dev = to_mdev(device);
+
+ ibqp = _mlx4_ib_create_qp(pd, init_attr, udata);
+
+ if (!IS_ERR(ibqp) &&
+ (init_attr->qp_type == IB_QPT_GSI) &&
+ !(init_attr->create_flags & MLX4_IB_QP_CREATE_ROCE_V2_GSI)) {
+ struct mlx4_ib_sqp *sqp = to_msqp((to_mqp(ibqp)));
+ int is_eth = rdma_cap_eth_ah(&dev->ib_dev, init_attr->port_num);
+
+ if (is_eth &&
+ dev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_ROCE_V1_V2) {
+ init_attr->create_flags |= MLX4_IB_QP_CREATE_ROCE_V2_GSI;
+ sqp->roce_v2_gsi = ib_create_qp(pd, init_attr);
+
+ if (IS_ERR(sqp->roce_v2_gsi)) {
+ pr_err("Failed to create GSI QP for RoCEv2 (%ld)\n", PTR_ERR(sqp->roce_v2_gsi));
+ sqp->roce_v2_gsi = NULL;
+ } else {
+ sqp = to_msqp(to_mqp(sqp->roce_v2_gsi));
+ sqp->qp.flags |= MLX4_IB_ROCE_V2_GSI_QP;
+ }
+
+ init_attr->create_flags &= ~MLX4_IB_QP_CREATE_ROCE_V2_GSI;
+ }
+ }
+ return ibqp;
+}
+
+static int _mlx4_ib_destroy_qp(struct ib_qp *qp)
{
struct mlx4_ib_dev *dev = to_mdev(qp->device);
struct mlx4_ib_qp *mqp = to_mqp(qp);
@@ -1189,6 +1269,9 @@ int mlx4_ib_destroy_qp(struct ib_qp *qp)
mutex_unlock(&dev->qp1_proxy_lock[mqp->port - 1]);
}
+ if (mqp->counter_index)
+ mlx4_ib_free_qp_counter(dev, mqp);
+
pd = get_pd(mqp);
destroy_qp_common(dev, mqp, !!pd->ibpd.uobject);
@@ -1200,6 +1283,20 @@ int mlx4_ib_destroy_qp(struct ib_qp *qp)
return 0;
}
+int mlx4_ib_destroy_qp(struct ib_qp *qp)
+{
+ struct mlx4_ib_qp *mqp = to_mqp(qp);
+
+ if (mqp->mlx4_ib_qp_type == MLX4_IB_QPT_GSI) {
+ struct mlx4_ib_sqp *sqp = to_msqp(mqp);
+
+ if (sqp->roce_v2_gsi)
+ ib_destroy_qp(sqp->roce_v2_gsi);
+ }
+
+ return _mlx4_ib_destroy_qp(qp);
+}
+
static int to_mlx4_st(struct mlx4_ib_dev *dev, enum mlx4_ib_qp_type type)
{
switch (type) {
@@ -1391,11 +1488,12 @@ static int _mlx4_set_path(struct mlx4_ib_dev *dev, const struct ib_ah_attr *ah,
static int mlx4_set_path(struct mlx4_ib_dev *dev, const struct ib_qp_attr *qp,
enum ib_qp_attr_mask qp_attr_mask,
struct mlx4_ib_qp *mqp,
- struct mlx4_qp_path *path, u8 port)
+ struct mlx4_qp_path *path, u8 port,
+ u16 vlan_id, u8 *smac)
{
return _mlx4_set_path(dev, &qp->ah_attr,
- mlx4_mac_to_u64((u8 *)qp->smac),
- (qp_attr_mask & IB_QP_VID) ? qp->vlan_id : 0xffff,
+ mlx4_mac_to_u64(smac),
+ vlan_id,
path, &mqp->pri, port);
}
@@ -1406,9 +1504,8 @@ static int mlx4_set_alt_path(struct mlx4_ib_dev *dev,
struct mlx4_qp_path *path, u8 port)
{
return _mlx4_set_path(dev, &qp->alt_ah_attr,
- mlx4_mac_to_u64((u8 *)qp->alt_smac),
- (qp_attr_mask & IB_QP_ALT_VID) ?
- qp->alt_vlan_id : 0xffff,
+ 0,
+ 0xffff,
path, &mqp->alt, port);
}
@@ -1424,7 +1521,8 @@ static void update_mcg_macs(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp)
}
}
-static int handle_eth_ud_smac_index(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp, u8 *smac,
+static int handle_eth_ud_smac_index(struct mlx4_ib_dev *dev,
+ struct mlx4_ib_qp *qp,
struct mlx4_qp_context *context)
{
u64 u64_mac;
@@ -1447,6 +1545,58 @@ static int handle_eth_ud_smac_index(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *
return 0;
}
+static int create_qp_lb_counter(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp)
+{
+ struct counter_index *new_counter_index;
+ int err;
+ u32 tmp_idx;
+
+ if (rdma_port_get_link_layer(&dev->ib_dev, qp->port) !=
+ IB_LINK_LAYER_ETHERNET ||
+ !(qp->flags & MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK) ||
+ !(dev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_LB_SRC_CHK))
+ return 0;
+
+ err = mlx4_counter_alloc(dev->dev, &tmp_idx);
+ if (err)
+ return err;
+
+ new_counter_index = kmalloc(sizeof(*new_counter_index), GFP_KERNEL);
+ if (!new_counter_index) {
+ mlx4_counter_free(dev->dev, tmp_idx);
+ return -ENOMEM;
+ }
+
+ new_counter_index->index = tmp_idx;
+ new_counter_index->allocated = 1;
+ qp->counter_index = new_counter_index;
+
+ mutex_lock(&dev->counters_table[qp->port - 1].mutex);
+ list_add_tail(&new_counter_index->list,
+ &dev->counters_table[qp->port - 1].counters_list);
+ mutex_unlock(&dev->counters_table[qp->port - 1].mutex);
+
+ return 0;
+}
+
+enum {
+ MLX4_QPC_ROCE_MODE_1 = 0,
+ MLX4_QPC_ROCE_MODE_2 = 2,
+ MLX4_QPC_ROCE_MODE_UNDEFINED = 0xff
+};
+
+static u8 gid_type_to_qpc(enum ib_gid_type gid_type)
+{
+ switch (gid_type) {
+ case IB_GID_TYPE_ROCE:
+ return MLX4_QPC_ROCE_MODE_1;
+ case IB_GID_TYPE_ROCE_UDP_ENCAP:
+ return MLX4_QPC_ROCE_MODE_2;
+ default:
+ return MLX4_QPC_ROCE_MODE_UNDEFINED;
+ }
+}
+
static int __mlx4_ib_modify_qp(struct ib_qp *ibqp,
const struct ib_qp_attr *attr, int attr_mask,
enum ib_qp_state cur_state, enum ib_qp_state new_state)
@@ -1460,6 +1610,7 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp,
int sqd_event;
int steer_qp = 0;
int err = -EINVAL;
+ int counter_index;
/* APM is not supported under RoCE */
if (attr_mask & IB_QP_ALT_PATH &&
@@ -1519,6 +1670,9 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp,
context->sq_size_stride = ilog2(qp->sq.wqe_cnt) << 3;
context->sq_size_stride |= qp->sq.wqe_shift - 4;
+ if (new_state == IB_QPS_RESET && qp->counter_index)
+ mlx4_ib_free_qp_counter(dev, qp);
+
if (cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT) {
context->sq_size_stride |= !!qp->sq_no_prefetch << 7;
context->xrcd = cpu_to_be32((u32) qp->xrcdn);
@@ -1527,9 +1681,12 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp,
}
if (qp->ibqp.uobject)
- context->usr_page = cpu_to_be32(to_mucontext(ibqp->uobject->context)->uar.index);
+ context->usr_page = cpu_to_be32(
+ mlx4_to_hw_uar_index(dev->dev,
+ to_mucontext(ibqp->uobject->context)->uar.index));
else
- context->usr_page = cpu_to_be32(dev->priv_uar.index);
+ context->usr_page = cpu_to_be32(
+ mlx4_to_hw_uar_index(dev->dev, dev->priv_uar.index));
if (attr_mask & IB_QP_DEST_QPN)
context->remote_qpn = cpu_to_be32(attr->dest_qp_num);
@@ -1543,10 +1700,24 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp,
}
if (cur_state == IB_QPS_INIT && new_state == IB_QPS_RTR) {
- if (dev->counters[qp->port - 1].index != -1) {
- context->pri_path.counter_index =
- dev->counters[qp->port - 1].index;
+ err = create_qp_lb_counter(dev, qp);
+ if (err)
+ goto out;
+
+ counter_index =
+ dev->counters_table[qp->port - 1].default_counter;
+ if (qp->counter_index)
+ counter_index = qp->counter_index->index;
+
+ if (counter_index != -1) {
+ context->pri_path.counter_index = counter_index;
optpar |= MLX4_QP_OPTPAR_COUNTER_INDEX;
+ if (qp->counter_index) {
+ context->pri_path.fl |=
+ MLX4_FL_ETH_SRC_CHECK_MC_LB;
+ context->pri_path.vlan_control |=
+ MLX4_CTRL_ETH_SRC_CHECK_IF_COUNTER;
+ }
} else
context->pri_path.counter_index =
MLX4_SINK_COUNTER_INDEX(dev->dev);
@@ -1555,6 +1726,14 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp,
mlx4_ib_steer_qp_reg(dev, qp, 1);
steer_qp = 1;
}
+
+ if (ibqp->qp_type == IB_QPT_GSI) {
+ enum ib_gid_type gid_type = qp->flags & MLX4_IB_ROCE_V2_GSI_QP ?
+ IB_GID_TYPE_ROCE_UDP_ENCAP : IB_GID_TYPE_ROCE;
+ u8 qpc_roce_mode = gid_type_to_qpc(gid_type);
+
+ context->rlkey_roce_mode |= (qpc_roce_mode << 6);
+ }
}
if (attr_mask & IB_QP_PKEY_INDEX) {
@@ -1565,13 +1744,50 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp,
}
if (attr_mask & IB_QP_AV) {
+ u8 port_num = mlx4_is_bonded(to_mdev(ibqp->device)->dev) ? 1 :
+ attr_mask & IB_QP_PORT ? attr->port_num : qp->port;
+ union ib_gid gid;
+ struct ib_gid_attr gid_attr;
+ u16 vlan = 0xffff;
+ u8 smac[ETH_ALEN];
+ int status = 0;
+ int is_eth = rdma_cap_eth_ah(&dev->ib_dev, port_num) &&
+ attr->ah_attr.ah_flags & IB_AH_GRH;
+
+ if (is_eth) {
+ int index = attr->ah_attr.grh.sgid_index;
+
+ status = ib_get_cached_gid(ibqp->device, port_num,
+ index, &gid, &gid_attr);
+ if (!status && !memcmp(&gid, &zgid, sizeof(gid)))
+ status = -ENOENT;
+ if (!status && gid_attr.ndev) {
+ vlan = rdma_vlan_dev_vlan_id(gid_attr.ndev);
+ memcpy(smac, gid_attr.ndev->dev_addr, ETH_ALEN);
+ dev_put(gid_attr.ndev);
+ }
+ }
+ if (status)
+ goto out;
+
if (mlx4_set_path(dev, attr, attr_mask, qp, &context->pri_path,
- attr_mask & IB_QP_PORT ?
- attr->port_num : qp->port))
+ port_num, vlan, smac))
goto out;
optpar |= (MLX4_QP_OPTPAR_PRIMARY_ADDR_PATH |
MLX4_QP_OPTPAR_SCHED_QUEUE);
+
+ if (is_eth &&
+ (cur_state == IB_QPS_INIT && new_state == IB_QPS_RTR)) {
+ u8 qpc_roce_mode = gid_type_to_qpc(gid_attr.gid_type);
+
+ if (qpc_roce_mode == MLX4_QPC_ROCE_MODE_UNDEFINED) {
+ err = -EINVAL;
+ goto out;
+ }
+ context->rlkey_roce_mode |= (qpc_roce_mode << 6);
+ }
+
}
if (attr_mask & IB_QP_TIMEOUT) {
@@ -1704,7 +1920,7 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp,
if (qp->mlx4_ib_qp_type == MLX4_IB_QPT_UD ||
qp->mlx4_ib_qp_type == MLX4_IB_QPT_PROXY_GSI ||
qp->mlx4_ib_qp_type == MLX4_IB_QPT_TUN_GSI) {
- err = handle_eth_ud_smac_index(dev, qp, (u8 *)attr->smac, context);
+ err = handle_eth_ud_smac_index(dev, qp, context);
if (err) {
err = -EINVAL;
goto out;
@@ -1743,7 +1959,7 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp,
sqd_event = 0;
if (!ibqp->uobject && cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT)
- context->rlkey |= (1 << 4);
+ context->rlkey_roce_mode |= (1 << 4);
/*
* Before passing a kernel QP to the HW, make sure that the
@@ -1848,6 +2064,8 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp,
}
}
out:
+ if (err && qp->counter_index)
+ mlx4_ib_free_qp_counter(dev, qp);
if (err && steer_qp)
mlx4_ib_steer_qp_reg(dev, qp, 0);
kfree(context);
@@ -1918,8 +2136,8 @@ out:
return err;
}
-int mlx4_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
- int attr_mask, struct ib_udata *udata)
+static int _mlx4_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
+ int attr_mask, struct ib_udata *udata)
{
struct mlx4_ib_dev *dev = to_mdev(ibqp->device);
struct mlx4_ib_qp *qp = to_mqp(ibqp);
@@ -2022,6 +2240,27 @@ out:
return err;
}
+int mlx4_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
+ int attr_mask, struct ib_udata *udata)
+{
+ struct mlx4_ib_qp *mqp = to_mqp(ibqp);
+ int ret;
+
+ ret = _mlx4_ib_modify_qp(ibqp, attr, attr_mask, udata);
+
+ if (mqp->mlx4_ib_qp_type == MLX4_IB_QPT_GSI) {
+ struct mlx4_ib_sqp *sqp = to_msqp(mqp);
+ int err = 0;
+
+ if (sqp->roce_v2_gsi)
+ err = ib_modify_qp(sqp->roce_v2_gsi, attr, attr_mask);
+ if (err)
+ pr_err("Failed to modify GSI QP for RoCEv2 (%d)\n",
+ err);
+ }
+ return ret;
+}
+
static int vf_get_qp0_qkey(struct mlx4_dev *dev, int qpn, u32 *qkey)
{
int i;
@@ -2036,14 +2275,14 @@ static int vf_get_qp0_qkey(struct mlx4_dev *dev, int qpn, u32 *qkey)
}
static int build_sriov_qp0_header(struct mlx4_ib_sqp *sqp,
- struct ib_send_wr *wr,
+ struct ib_ud_wr *wr,
void *wqe, unsigned *mlx_seg_len)
{
struct mlx4_ib_dev *mdev = to_mdev(sqp->qp.ibqp.device);
struct ib_device *ib_dev = &mdev->ib_dev;
struct mlx4_wqe_mlx_seg *mlx = wqe;
struct mlx4_wqe_inline_seg *inl = wqe + sizeof *mlx;
- struct mlx4_ib_ah *ah = to_mah(wr->wr.ud.ah);
+ struct mlx4_ib_ah *ah = to_mah(wr->ah);
u16 pkey;
u32 qkey;
int send_size;
@@ -2051,20 +2290,20 @@ static int build_sriov_qp0_header(struct mlx4_ib_sqp *sqp,
int spc;
int i;
- if (wr->opcode != IB_WR_SEND)
+ if (wr->wr.opcode != IB_WR_SEND)
return -EINVAL;
send_size = 0;
- for (i = 0; i < wr->num_sge; ++i)
- send_size += wr->sg_list[i].length;
+ for (i = 0; i < wr->wr.num_sge; ++i)
+ send_size += wr->wr.sg_list[i].length;
/* for proxy-qp0 sends, need to add in size of tunnel header */
/* for tunnel-qp0 sends, tunnel header is already in s/g list */
if (sqp->qp.mlx4_ib_qp_type == MLX4_IB_QPT_PROXY_SMI_OWNER)
send_size += sizeof (struct mlx4_ib_tunnel_header);
- ib_ud_header_init(send_size, 1, 0, 0, 0, 0, &sqp->ud_header);
+ ib_ud_header_init(send_size, 1, 0, 0, 0, 0, 0, 0, &sqp->ud_header);
if (sqp->qp.mlx4_ib_qp_type == MLX4_IB_QPT_PROXY_SMI_OWNER) {
sqp->ud_header.lrh.service_level =
@@ -2082,11 +2321,11 @@ static int build_sriov_qp0_header(struct mlx4_ib_sqp *sqp,
mlx->rlid = sqp->ud_header.lrh.destination_lid;
sqp->ud_header.lrh.virtual_lane = 0;
- sqp->ud_header.bth.solicited_event = !!(wr->send_flags & IB_SEND_SOLICITED);
+ sqp->ud_header.bth.solicited_event = !!(wr->wr.send_flags & IB_SEND_SOLICITED);
ib_get_cached_pkey(ib_dev, sqp->qp.port, 0, &pkey);
sqp->ud_header.bth.pkey = cpu_to_be16(pkey);
if (sqp->qp.mlx4_ib_qp_type == MLX4_IB_QPT_TUN_SMI_OWNER)
- sqp->ud_header.bth.destination_qpn = cpu_to_be32(wr->wr.ud.remote_qpn);
+ sqp->ud_header.bth.destination_qpn = cpu_to_be32(wr->remote_qpn);
else
sqp->ud_header.bth.destination_qpn =
cpu_to_be32(mdev->dev->caps.qp0_tunnel[sqp->qp.port - 1]);
@@ -2148,24 +2387,15 @@ static int build_sriov_qp0_header(struct mlx4_ib_sqp *sqp,
return 0;
}
-static void mlx4_u64_to_smac(u8 *dst_mac, u64 src_mac)
-{
- int i;
-
- for (i = ETH_ALEN; i; i--) {
- dst_mac[i - 1] = src_mac & 0xff;
- src_mac >>= 8;
- }
-}
-
-static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_send_wr *wr,
+#define MLX4_ROCEV2_QP1_SPORT 0xC000
+static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_ud_wr *wr,
void *wqe, unsigned *mlx_seg_len)
{
struct ib_device *ib_dev = sqp->qp.ibqp.device;
struct mlx4_wqe_mlx_seg *mlx = wqe;
struct mlx4_wqe_ctrl_seg *ctrl = wqe;
struct mlx4_wqe_inline_seg *inl = wqe + sizeof *mlx;
- struct mlx4_ib_ah *ah = to_mah(wr->wr.ud.ah);
+ struct mlx4_ib_ah *ah = to_mah(wr->ah);
union ib_gid sgid;
u16 pkey;
int send_size;
@@ -2177,14 +2407,18 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_send_wr *wr,
bool is_eth;
bool is_vlan = false;
bool is_grh;
+ bool is_udp = false;
+ int ip_version = 0;
send_size = 0;
- for (i = 0; i < wr->num_sge; ++i)
- send_size += wr->sg_list[i].length;
+ for (i = 0; i < wr->wr.num_sge; ++i)
+ send_size += wr->wr.sg_list[i].length;
is_eth = rdma_port_get_link_layer(sqp->qp.ibqp.device, sqp->qp.port) == IB_LINK_LAYER_ETHERNET;
is_grh = mlx4_ib_ah_grh_present(ah);
if (is_eth) {
+ struct ib_gid_attr gid_attr;
+
if (mlx4_is_mfunc(to_mdev(ib_dev)->dev)) {
/* When multi-function is enabled, the ib_core gid
* indexes don't necessarily match the hw ones, so
@@ -2197,17 +2431,36 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_send_wr *wr,
} else {
err = ib_get_cached_gid(ib_dev,
be32_to_cpu(ah->av.ib.port_pd) >> 24,
- ah->av.ib.gid_index, &sgid);
- if (err)
+ ah->av.ib.gid_index, &sgid,
+ &gid_attr);
+ if (!err) {
+ if (gid_attr.ndev)
+ dev_put(gid_attr.ndev);
+ if (!memcmp(&sgid, &zgid, sizeof(sgid)))
+ err = -ENOENT;
+ }
+ if (!err) {
+ is_udp = gid_attr.gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP;
+ if (is_udp) {
+ if (ipv6_addr_v4mapped((struct in6_addr *)&sgid))
+ ip_version = 4;
+ else
+ ip_version = 6;
+ is_grh = false;
+ }
+ } else {
return err;
+ }
}
-
if (ah->av.eth.vlan != cpu_to_be16(0xffff)) {
vlan = be16_to_cpu(ah->av.eth.vlan) & 0x0fff;
is_vlan = 1;
}
}
- ib_ud_header_init(send_size, !is_eth, is_eth, is_vlan, is_grh, 0, &sqp->ud_header);
+ err = ib_ud_header_init(send_size, !is_eth, is_eth, is_vlan, is_grh,
+ ip_version, is_udp, 0, &sqp->ud_header);
+ if (err)
+ return err;
if (!is_eth) {
sqp->ud_header.lrh.service_level =
@@ -2216,7 +2469,7 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_send_wr *wr,
sqp->ud_header.lrh.source_lid = cpu_to_be16(ah->av.ib.g_slid & 0x7f);
}
- if (is_grh) {
+ if (is_grh || (ip_version == 6)) {
sqp->ud_header.grh.traffic_class =
(be32_to_cpu(ah->av.ib.sl_tclass_flowlabel) >> 20) & 0xff;
sqp->ud_header.grh.flow_label =
@@ -2239,12 +2492,31 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_send_wr *wr,
ib_get_cached_gid(ib_dev,
be32_to_cpu(ah->av.ib.port_pd) >> 24,
ah->av.ib.gid_index,
- &sqp->ud_header.grh.source_gid);
+ &sqp->ud_header.grh.source_gid, NULL);
}
memcpy(sqp->ud_header.grh.destination_gid.raw,
ah->av.ib.dgid, 16);
}
+ if (ip_version == 4) {
+ sqp->ud_header.ip4.tos =
+ (be32_to_cpu(ah->av.ib.sl_tclass_flowlabel) >> 20) & 0xff;
+ sqp->ud_header.ip4.id = 0;
+ sqp->ud_header.ip4.frag_off = htons(IP_DF);
+ sqp->ud_header.ip4.ttl = ah->av.eth.hop_limit;
+
+ memcpy(&sqp->ud_header.ip4.saddr,
+ sgid.raw + 12, 4);
+ memcpy(&sqp->ud_header.ip4.daddr, ah->av.ib.dgid + 12, 4);
+ sqp->ud_header.ip4.check = ib_ud_ip4_csum(&sqp->ud_header);
+ }
+
+ if (is_udp) {
+ sqp->ud_header.udp.dport = htons(ROCE_V2_UDP_DPORT);
+ sqp->ud_header.udp.sport = htons(MLX4_ROCEV2_QP1_SPORT);
+ sqp->ud_header.udp.csum = 0;
+ }
+
mlx->flags &= cpu_to_be32(MLX4_WQE_CTRL_CQ_UPDATE);
if (!is_eth) {
@@ -2257,7 +2529,7 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_send_wr *wr,
mlx->rlid = sqp->ud_header.lrh.destination_lid;
}
- switch (wr->opcode) {
+ switch (wr->wr.opcode) {
case IB_WR_SEND:
sqp->ud_header.bth.opcode = IB_OPCODE_UD_SEND_ONLY;
sqp->ud_header.immediate_present = 0;
@@ -2265,7 +2537,7 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_send_wr *wr,
case IB_WR_SEND_WITH_IMM:
sqp->ud_header.bth.opcode = IB_OPCODE_UD_SEND_ONLY_WITH_IMMEDIATE;
sqp->ud_header.immediate_present = 1;
- sqp->ud_header.immediate_data = wr->ex.imm_data;
+ sqp->ud_header.immediate_data = wr->wr.ex.imm_data;
break;
default:
return -EINVAL;
@@ -2273,34 +2545,27 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_send_wr *wr,
if (is_eth) {
struct in6_addr in6;
-
+ u16 ether_type;
u16 pcp = (be32_to_cpu(ah->av.ib.sl_tclass_flowlabel) >> 29) << 13;
+ ether_type = (!is_udp) ? MLX4_IB_IBOE_ETHERTYPE :
+ (ip_version == 4 ? ETH_P_IP : ETH_P_IPV6);
+
mlx->sched_prio = cpu_to_be16(pcp);
+ ether_addr_copy(sqp->ud_header.eth.smac_h, ah->av.eth.s_mac);
memcpy(sqp->ud_header.eth.dmac_h, ah->av.eth.mac, 6);
- /* FIXME: cache smac value? */
memcpy(&ctrl->srcrb_flags16[0], ah->av.eth.mac, 2);
memcpy(&ctrl->imm, ah->av.eth.mac + 2, 4);
memcpy(&in6, sgid.raw, sizeof(in6));
- if (!mlx4_is_mfunc(to_mdev(ib_dev)->dev)) {
- u64 mac = atomic64_read(&to_mdev(ib_dev)->iboe.mac[sqp->qp.port - 1]);
- u8 smac[ETH_ALEN];
-
- mlx4_u64_to_smac(smac, mac);
- memcpy(sqp->ud_header.eth.smac_h, smac, ETH_ALEN);
- } else {
- /* use the src mac of the tunnel */
- memcpy(sqp->ud_header.eth.smac_h, ah->av.eth.s_mac, ETH_ALEN);
- }
if (!memcmp(sqp->ud_header.eth.smac_h, sqp->ud_header.eth.dmac_h, 6))
mlx->flags |= cpu_to_be32(MLX4_WQE_CTRL_FORCE_LOOPBACK);
if (!is_vlan) {
- sqp->ud_header.eth.type = cpu_to_be16(MLX4_IB_IBOE_ETHERTYPE);
+ sqp->ud_header.eth.type = cpu_to_be16(ether_type);
} else {
- sqp->ud_header.vlan.type = cpu_to_be16(MLX4_IB_IBOE_ETHERTYPE);
+ sqp->ud_header.vlan.type = cpu_to_be16(ether_type);
sqp->ud_header.vlan.tag = cpu_to_be16(vlan | pcp);
}
} else {
@@ -2308,16 +2573,16 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_send_wr *wr,
if (sqp->ud_header.lrh.destination_lid == IB_LID_PERMISSIVE)
sqp->ud_header.lrh.source_lid = IB_LID_PERMISSIVE;
}
- sqp->ud_header.bth.solicited_event = !!(wr->send_flags & IB_SEND_SOLICITED);
+ sqp->ud_header.bth.solicited_event = !!(wr->wr.send_flags & IB_SEND_SOLICITED);
if (!sqp->qp.ibqp.qp_num)
ib_get_cached_pkey(ib_dev, sqp->qp.port, sqp->pkey_index, &pkey);
else
- ib_get_cached_pkey(ib_dev, sqp->qp.port, wr->wr.ud.pkey_index, &pkey);
+ ib_get_cached_pkey(ib_dev, sqp->qp.port, wr->pkey_index, &pkey);
sqp->ud_header.bth.pkey = cpu_to_be16(pkey);
- sqp->ud_header.bth.destination_qpn = cpu_to_be32(wr->wr.ud.remote_qpn);
+ sqp->ud_header.bth.destination_qpn = cpu_to_be32(wr->remote_qpn);
sqp->ud_header.bth.psn = cpu_to_be32((sqp->send_psn++) & ((1 << 24) - 1));
- sqp->ud_header.deth.qkey = cpu_to_be32(wr->wr.ud.remote_qkey & 0x80000000 ?
- sqp->qkey : wr->wr.ud.remote_qkey);
+ sqp->ud_header.deth.qkey = cpu_to_be32(wr->remote_qkey & 0x80000000 ?
+ sqp->qkey : wr->remote_qkey);
sqp->ud_header.deth.source_qpn = cpu_to_be32(sqp->qp.ibqp.qp_num);
header_size = ib_ud_header_pack(&sqp->ud_header, sqp->header_buf);
@@ -2405,45 +2670,22 @@ static __be32 convert_access(int acc)
cpu_to_be32(MLX4_WQE_FMR_PERM_LOCAL_READ);
}
-static void set_fmr_seg(struct mlx4_wqe_fmr_seg *fseg, struct ib_send_wr *wr)
+static void set_reg_seg(struct mlx4_wqe_fmr_seg *fseg,
+ struct ib_reg_wr *wr)
{
- struct mlx4_ib_fast_reg_page_list *mfrpl = to_mfrpl(wr->wr.fast_reg.page_list);
- int i;
+ struct mlx4_ib_mr *mr = to_mmr(wr->mr);
- for (i = 0; i < wr->wr.fast_reg.page_list_len; ++i)
- mfrpl->mapped_page_list[i] =
- cpu_to_be64(wr->wr.fast_reg.page_list->page_list[i] |
- MLX4_MTT_FLAG_PRESENT);
-
- fseg->flags = convert_access(wr->wr.fast_reg.access_flags);
- fseg->mem_key = cpu_to_be32(wr->wr.fast_reg.rkey);
- fseg->buf_list = cpu_to_be64(mfrpl->map);
- fseg->start_addr = cpu_to_be64(wr->wr.fast_reg.iova_start);
- fseg->reg_len = cpu_to_be64(wr->wr.fast_reg.length);
+ fseg->flags = convert_access(wr->access);
+ fseg->mem_key = cpu_to_be32(wr->key);
+ fseg->buf_list = cpu_to_be64(mr->page_map);
+ fseg->start_addr = cpu_to_be64(mr->ibmr.iova);
+ fseg->reg_len = cpu_to_be64(mr->ibmr.length);
fseg->offset = 0; /* XXX -- is this just for ZBVA? */
- fseg->page_size = cpu_to_be32(wr->wr.fast_reg.page_shift);
+ fseg->page_size = cpu_to_be32(ilog2(mr->ibmr.page_size));
fseg->reserved[0] = 0;
fseg->reserved[1] = 0;
}
-static void set_bind_seg(struct mlx4_wqe_bind_seg *bseg, struct ib_send_wr *wr)
-{
- bseg->flags1 =
- convert_access(wr->wr.bind_mw.bind_info.mw_access_flags) &
- cpu_to_be32(MLX4_WQE_FMR_AND_BIND_PERM_REMOTE_READ |
- MLX4_WQE_FMR_AND_BIND_PERM_REMOTE_WRITE |
- MLX4_WQE_FMR_AND_BIND_PERM_ATOMIC);
- bseg->flags2 = 0;
- if (wr->wr.bind_mw.mw->type == IB_MW_TYPE_2)
- bseg->flags2 |= cpu_to_be32(MLX4_WQE_BIND_TYPE_2);
- if (wr->wr.bind_mw.bind_info.mw_access_flags & IB_ZERO_BASED)
- bseg->flags2 |= cpu_to_be32(MLX4_WQE_BIND_ZERO_BASED);
- bseg->new_rkey = cpu_to_be32(wr->wr.bind_mw.rkey);
- bseg->lkey = cpu_to_be32(wr->wr.bind_mw.bind_info.mr->lkey);
- bseg->addr = cpu_to_be64(wr->wr.bind_mw.bind_info.addr);
- bseg->length = cpu_to_be64(wr->wr.bind_mw.bind_info.length);
-}
-
static void set_local_inv_seg(struct mlx4_wqe_local_inval_seg *iseg, u32 rkey)
{
memset(iseg, 0, sizeof(*iseg));
@@ -2458,46 +2700,47 @@ static __always_inline void set_raddr_seg(struct mlx4_wqe_raddr_seg *rseg,
rseg->reserved = 0;
}
-static void set_atomic_seg(struct mlx4_wqe_atomic_seg *aseg, struct ib_send_wr *wr)
+static void set_atomic_seg(struct mlx4_wqe_atomic_seg *aseg,
+ struct ib_atomic_wr *wr)
{
- if (wr->opcode == IB_WR_ATOMIC_CMP_AND_SWP) {
- aseg->swap_add = cpu_to_be64(wr->wr.atomic.swap);
- aseg->compare = cpu_to_be64(wr->wr.atomic.compare_add);
- } else if (wr->opcode == IB_WR_MASKED_ATOMIC_FETCH_AND_ADD) {
- aseg->swap_add = cpu_to_be64(wr->wr.atomic.compare_add);
- aseg->compare = cpu_to_be64(wr->wr.atomic.compare_add_mask);
+ if (wr->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP) {
+ aseg->swap_add = cpu_to_be64(wr->swap);
+ aseg->compare = cpu_to_be64(wr->compare_add);
+ } else if (wr->wr.opcode == IB_WR_MASKED_ATOMIC_FETCH_AND_ADD) {
+ aseg->swap_add = cpu_to_be64(wr->compare_add);
+ aseg->compare = cpu_to_be64(wr->compare_add_mask);
} else {
- aseg->swap_add = cpu_to_be64(wr->wr.atomic.compare_add);
+ aseg->swap_add = cpu_to_be64(wr->compare_add);
aseg->compare = 0;
}
}
static void set_masked_atomic_seg(struct mlx4_wqe_masked_atomic_seg *aseg,
- struct ib_send_wr *wr)
+ struct ib_atomic_wr *wr)
{
- aseg->swap_add = cpu_to_be64(wr->wr.atomic.swap);
- aseg->swap_add_mask = cpu_to_be64(wr->wr.atomic.swap_mask);
- aseg->compare = cpu_to_be64(wr->wr.atomic.compare_add);
- aseg->compare_mask = cpu_to_be64(wr->wr.atomic.compare_add_mask);
+ aseg->swap_add = cpu_to_be64(wr->swap);
+ aseg->swap_add_mask = cpu_to_be64(wr->swap_mask);
+ aseg->compare = cpu_to_be64(wr->compare_add);
+ aseg->compare_mask = cpu_to_be64(wr->compare_add_mask);
}
static void set_datagram_seg(struct mlx4_wqe_datagram_seg *dseg,
- struct ib_send_wr *wr)
+ struct ib_ud_wr *wr)
{
- memcpy(dseg->av, &to_mah(wr->wr.ud.ah)->av, sizeof (struct mlx4_av));
- dseg->dqpn = cpu_to_be32(wr->wr.ud.remote_qpn);
- dseg->qkey = cpu_to_be32(wr->wr.ud.remote_qkey);
- dseg->vlan = to_mah(wr->wr.ud.ah)->av.eth.vlan;
- memcpy(dseg->mac, to_mah(wr->wr.ud.ah)->av.eth.mac, 6);
+ memcpy(dseg->av, &to_mah(wr->ah)->av, sizeof (struct mlx4_av));
+ dseg->dqpn = cpu_to_be32(wr->remote_qpn);
+ dseg->qkey = cpu_to_be32(wr->remote_qkey);
+ dseg->vlan = to_mah(wr->ah)->av.eth.vlan;
+ memcpy(dseg->mac, to_mah(wr->ah)->av.eth.mac, 6);
}
static void set_tunnel_datagram_seg(struct mlx4_ib_dev *dev,
struct mlx4_wqe_datagram_seg *dseg,
- struct ib_send_wr *wr,
+ struct ib_ud_wr *wr,
enum mlx4_ib_qp_type qpt)
{
- union mlx4_ext_av *av = &to_mah(wr->wr.ud.ah)->av;
+ union mlx4_ext_av *av = &to_mah(wr->ah)->av;
struct mlx4_av sqp_av = {0};
int port = *((u8 *) &av->ib.port_pd) & 0x3;
@@ -2516,18 +2759,18 @@ static void set_tunnel_datagram_seg(struct mlx4_ib_dev *dev,
dseg->qkey = cpu_to_be32(IB_QP_SET_QKEY);
}
-static void build_tunnel_header(struct ib_send_wr *wr, void *wqe, unsigned *mlx_seg_len)
+static void build_tunnel_header(struct ib_ud_wr *wr, void *wqe, unsigned *mlx_seg_len)
{
struct mlx4_wqe_inline_seg *inl = wqe;
struct mlx4_ib_tunnel_header hdr;
- struct mlx4_ib_ah *ah = to_mah(wr->wr.ud.ah);
+ struct mlx4_ib_ah *ah = to_mah(wr->ah);
int spc;
int i;
memcpy(&hdr.av, &ah->av, sizeof hdr.av);
- hdr.remote_qpn = cpu_to_be32(wr->wr.ud.remote_qpn);
- hdr.pkey_index = cpu_to_be16(wr->wr.ud.pkey_index);
- hdr.qkey = cpu_to_be32(wr->wr.ud.remote_qkey);
+ hdr.remote_qpn = cpu_to_be32(wr->remote_qpn);
+ hdr.pkey_index = cpu_to_be16(wr->pkey_index);
+ hdr.qkey = cpu_to_be32(wr->remote_qkey);
memcpy(hdr.mac, ah->av.eth.mac, 6);
hdr.vlan = ah->av.eth.vlan;
@@ -2599,22 +2842,22 @@ static void __set_data_seg(struct mlx4_wqe_data_seg *dseg, struct ib_sge *sg)
dseg->addr = cpu_to_be64(sg->addr);
}
-static int build_lso_seg(struct mlx4_wqe_lso_seg *wqe, struct ib_send_wr *wr,
+static int build_lso_seg(struct mlx4_wqe_lso_seg *wqe, struct ib_ud_wr *wr,
struct mlx4_ib_qp *qp, unsigned *lso_seg_len,
__be32 *lso_hdr_sz, __be32 *blh)
{
- unsigned halign = ALIGN(sizeof *wqe + wr->wr.ud.hlen, 16);
+ unsigned halign = ALIGN(sizeof *wqe + wr->hlen, 16);
if (unlikely(halign > MLX4_IB_CACHE_LINE_SIZE))
*blh = cpu_to_be32(1 << 6);
if (unlikely(!(qp->flags & MLX4_IB_QP_LSO) &&
- wr->num_sge > qp->sq.max_gs - (halign >> 4)))
+ wr->wr.num_sge > qp->sq.max_gs - (halign >> 4)))
return -EINVAL;
- memcpy(wqe->header, wr->wr.ud.header, wr->wr.ud.hlen);
+ memcpy(wqe->header, wr->header, wr->hlen);
- *lso_hdr_sz = cpu_to_be32(wr->wr.ud.mss << 16 | wr->wr.ud.hlen);
+ *lso_hdr_sz = cpu_to_be32(wr->mss << 16 | wr->hlen);
*lso_seg_len = halign;
return 0;
}
@@ -2662,6 +2905,29 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
int i;
struct mlx4_ib_dev *mdev = to_mdev(ibqp->device);
+ if (qp->mlx4_ib_qp_type == MLX4_IB_QPT_GSI) {
+ struct mlx4_ib_sqp *sqp = to_msqp(qp);
+
+ if (sqp->roce_v2_gsi) {
+ struct mlx4_ib_ah *ah = to_mah(ud_wr(wr)->ah);
+ struct ib_gid_attr gid_attr;
+ union ib_gid gid;
+
+ if (!ib_get_cached_gid(ibqp->device,
+ be32_to_cpu(ah->av.ib.port_pd) >> 24,
+ ah->av.ib.gid_index, &gid,
+ &gid_attr)) {
+ if (gid_attr.ndev)
+ dev_put(gid_attr.ndev);
+ qp = (gid_attr.gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP) ?
+ to_mqp(sqp->roce_v2_gsi) : qp;
+ } else {
+ pr_err("Failed to get gid at index %d. RoCEv2 will not work properly\n",
+ ah->av.ib.gid_index);
+ }
+ }
+ }
+
spin_lock_irqsave(&qp->sq.lock, flags);
if (mdev->dev->persist->state & MLX4_DEVICE_STATE_INTERNAL_ERROR) {
err = -EIO;
@@ -2713,11 +2979,11 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
case IB_WR_ATOMIC_CMP_AND_SWP:
case IB_WR_ATOMIC_FETCH_AND_ADD:
case IB_WR_MASKED_ATOMIC_FETCH_AND_ADD:
- set_raddr_seg(wqe, wr->wr.atomic.remote_addr,
- wr->wr.atomic.rkey);
+ set_raddr_seg(wqe, atomic_wr(wr)->remote_addr,
+ atomic_wr(wr)->rkey);
wqe += sizeof (struct mlx4_wqe_raddr_seg);
- set_atomic_seg(wqe, wr);
+ set_atomic_seg(wqe, atomic_wr(wr));
wqe += sizeof (struct mlx4_wqe_atomic_seg);
size += (sizeof (struct mlx4_wqe_raddr_seg) +
@@ -2726,11 +2992,11 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
break;
case IB_WR_MASKED_ATOMIC_CMP_AND_SWP:
- set_raddr_seg(wqe, wr->wr.atomic.remote_addr,
- wr->wr.atomic.rkey);
+ set_raddr_seg(wqe, atomic_wr(wr)->remote_addr,
+ atomic_wr(wr)->rkey);
wqe += sizeof (struct mlx4_wqe_raddr_seg);
- set_masked_atomic_seg(wqe, wr);
+ set_masked_atomic_seg(wqe, atomic_wr(wr));
wqe += sizeof (struct mlx4_wqe_masked_atomic_seg);
size += (sizeof (struct mlx4_wqe_raddr_seg) +
@@ -2741,8 +3007,8 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
case IB_WR_RDMA_READ:
case IB_WR_RDMA_WRITE:
case IB_WR_RDMA_WRITE_WITH_IMM:
- set_raddr_seg(wqe, wr->wr.rdma.remote_addr,
- wr->wr.rdma.rkey);
+ set_raddr_seg(wqe, rdma_wr(wr)->remote_addr,
+ rdma_wr(wr)->rkey);
wqe += sizeof (struct mlx4_wqe_raddr_seg);
size += sizeof (struct mlx4_wqe_raddr_seg) / 16;
break;
@@ -2755,21 +3021,14 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
size += sizeof (struct mlx4_wqe_local_inval_seg) / 16;
break;
- case IB_WR_FAST_REG_MR:
+ case IB_WR_REG_MR:
ctrl->srcrb_flags |=
cpu_to_be32(MLX4_WQE_CTRL_STRONG_ORDER);
- set_fmr_seg(wqe, wr);
- wqe += sizeof (struct mlx4_wqe_fmr_seg);
- size += sizeof (struct mlx4_wqe_fmr_seg) / 16;
+ set_reg_seg(wqe, reg_wr(wr));
+ wqe += sizeof(struct mlx4_wqe_fmr_seg);
+ size += sizeof(struct mlx4_wqe_fmr_seg) / 16;
break;
- case IB_WR_BIND_MW:
- ctrl->srcrb_flags |=
- cpu_to_be32(MLX4_WQE_CTRL_STRONG_ORDER);
- set_bind_seg(wqe, wr);
- wqe += sizeof(struct mlx4_wqe_bind_seg);
- size += sizeof(struct mlx4_wqe_bind_seg) / 16;
- break;
default:
/* No extra segments required for sends */
break;
@@ -2777,7 +3036,8 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
break;
case MLX4_IB_QPT_TUN_SMI_OWNER:
- err = build_sriov_qp0_header(to_msqp(qp), wr, ctrl, &seglen);
+ err = build_sriov_qp0_header(to_msqp(qp), ud_wr(wr),
+ ctrl, &seglen);
if (unlikely(err)) {
*bad_wr = wr;
goto out;
@@ -2788,19 +3048,20 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
case MLX4_IB_QPT_TUN_SMI:
case MLX4_IB_QPT_TUN_GSI:
/* this is a UD qp used in MAD responses to slaves. */
- set_datagram_seg(wqe, wr);
+ set_datagram_seg(wqe, ud_wr(wr));
/* set the forced-loopback bit in the data seg av */
*(__be32 *) wqe |= cpu_to_be32(0x80000000);
wqe += sizeof (struct mlx4_wqe_datagram_seg);
size += sizeof (struct mlx4_wqe_datagram_seg) / 16;
break;
case MLX4_IB_QPT_UD:
- set_datagram_seg(wqe, wr);
+ set_datagram_seg(wqe, ud_wr(wr));
wqe += sizeof (struct mlx4_wqe_datagram_seg);
size += sizeof (struct mlx4_wqe_datagram_seg) / 16;
if (wr->opcode == IB_WR_LSO) {
- err = build_lso_seg(wqe, wr, qp, &seglen, &lso_hdr_sz, &blh);
+ err = build_lso_seg(wqe, ud_wr(wr), qp, &seglen,
+ &lso_hdr_sz, &blh);
if (unlikely(err)) {
*bad_wr = wr;
goto out;
@@ -2812,7 +3073,8 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
break;
case MLX4_IB_QPT_PROXY_SMI_OWNER:
- err = build_sriov_qp0_header(to_msqp(qp), wr, ctrl, &seglen);
+ err = build_sriov_qp0_header(to_msqp(qp), ud_wr(wr),
+ ctrl, &seglen);
if (unlikely(err)) {
*bad_wr = wr;
goto out;
@@ -2823,7 +3085,7 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
add_zero_len_inline(wqe);
wqe += 16;
size++;
- build_tunnel_header(wr, wqe, &seglen);
+ build_tunnel_header(ud_wr(wr), wqe, &seglen);
wqe += seglen;
size += seglen / 16;
break;
@@ -2833,18 +3095,20 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
* In this case we first add a UD segment targeting
* the tunnel qp, and then add a header with address
* information */
- set_tunnel_datagram_seg(to_mdev(ibqp->device), wqe, wr,
+ set_tunnel_datagram_seg(to_mdev(ibqp->device), wqe,
+ ud_wr(wr),
qp->mlx4_ib_qp_type);
wqe += sizeof (struct mlx4_wqe_datagram_seg);
size += sizeof (struct mlx4_wqe_datagram_seg) / 16;
- build_tunnel_header(wr, wqe, &seglen);
+ build_tunnel_header(ud_wr(wr), wqe, &seglen);
wqe += seglen;
size += seglen / 16;
break;
case MLX4_IB_QPT_SMI:
case MLX4_IB_QPT_GSI:
- err = build_mlx_header(to_msqp(qp), wr, ctrl, &seglen);
+ err = build_mlx_header(to_msqp(qp), ud_wr(wr), ctrl,
+ &seglen);
if (unlikely(err)) {
*bad_wr = wr;
goto out;
diff --git a/drivers/infiniband/hw/mlx4/srq.c b/drivers/infiniband/hw/mlx4/srq.c
index dce5dfe..0597f3e 100644
--- a/drivers/infiniband/hw/mlx4/srq.c
+++ b/drivers/infiniband/hw/mlx4/srq.c
@@ -34,6 +34,7 @@
#include <linux/mlx4/qp.h>
#include <linux/mlx4/srq.h>
#include <linux/slab.h>
+#include <linux/vmalloc.h>
#include "mlx4_ib.h"
#include "user.h"
@@ -170,10 +171,15 @@ struct ib_srq *mlx4_ib_create_srq(struct ib_pd *pd,
if (err)
goto err_mtt;
- srq->wrid = kmalloc(srq->msrq.max * sizeof (u64), GFP_KERNEL);
+ srq->wrid = kmalloc_array(srq->msrq.max, sizeof(u64),
+ GFP_KERNEL | __GFP_NOWARN);
if (!srq->wrid) {
- err = -ENOMEM;
- goto err_mtt;
+ srq->wrid = __vmalloc(srq->msrq.max * sizeof(u64),
+ GFP_KERNEL, PAGE_KERNEL);
+ if (!srq->wrid) {
+ err = -ENOMEM;
+ goto err_mtt;
+ }
}
}
@@ -204,7 +210,7 @@ err_wrid:
if (pd->uobject)
mlx4_ib_db_unmap_user(to_mucontext(pd->uobject->context), &srq->db);
else
- kfree(srq->wrid);
+ kvfree(srq->wrid);
err_mtt:
mlx4_mtt_cleanup(dev->dev, &srq->mtt);
@@ -281,7 +287,7 @@ int mlx4_ib_destroy_srq(struct ib_srq *srq)
mlx4_ib_db_unmap_user(to_mucontext(srq->uobject->context), &msrq->db);
ib_umem_release(msrq->umem);
} else {
- kfree(msrq->wrid);
+ kvfree(msrq->wrid);
mlx4_buf_free(dev->dev, msrq->msrq.max << msrq->msrq.wqe_shift,
&msrq->buf);
mlx4_db_free(dev->dev, &msrq->db);
diff --git a/drivers/infiniband/hw/mlx5/ah.c b/drivers/infiniband/hw/mlx5/ah.c
index 6608058..745efa4 100644
--- a/drivers/infiniband/hw/mlx5/ah.c
+++ b/drivers/infiniband/hw/mlx5/ah.c
@@ -32,8 +32,10 @@
#include "mlx5_ib.h"
-struct ib_ah *create_ib_ah(struct ib_ah_attr *ah_attr,
- struct mlx5_ib_ah *ah)
+static struct ib_ah *create_ib_ah(struct mlx5_ib_dev *dev,
+ struct mlx5_ib_ah *ah,
+ struct ib_ah_attr *ah_attr,
+ enum rdma_link_layer ll)
{
if (ah_attr->ah_flags & IB_AH_GRH) {
memcpy(ah->av.rgid, &ah_attr->grh.dgid, 16);
@@ -44,9 +46,20 @@ struct ib_ah *create_ib_ah(struct ib_ah_attr *ah_attr,
ah->av.tclass = ah_attr->grh.traffic_class;
}
- ah->av.rlid = cpu_to_be16(ah_attr->dlid);
- ah->av.fl_mlid = ah_attr->src_path_bits & 0x7f;
- ah->av.stat_rate_sl = (ah_attr->static_rate << 4) | (ah_attr->sl & 0xf);
+ ah->av.stat_rate_sl = (ah_attr->static_rate << 4);
+
+ if (ll == IB_LINK_LAYER_ETHERNET) {
+ memcpy(ah->av.rmac, ah_attr->dmac, sizeof(ah_attr->dmac));
+ ah->av.udp_sport =
+ mlx5_get_roce_udp_sport(dev,
+ ah_attr->port_num,
+ ah_attr->grh.sgid_index);
+ ah->av.stat_rate_sl |= (ah_attr->sl & 0x7) << 1;
+ } else {
+ ah->av.rlid = cpu_to_be16(ah_attr->dlid);
+ ah->av.fl_mlid = ah_attr->src_path_bits & 0x7f;
+ ah->av.stat_rate_sl |= (ah_attr->sl & 0xf);
+ }
return &ah->ibah;
}
@@ -54,12 +67,19 @@ struct ib_ah *create_ib_ah(struct ib_ah_attr *ah_attr,
struct ib_ah *mlx5_ib_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr)
{
struct mlx5_ib_ah *ah;
+ struct mlx5_ib_dev *dev = to_mdev(pd->device);
+ enum rdma_link_layer ll;
+
+ ll = pd->device->get_link_layer(pd->device, ah_attr->port_num);
+
+ if (ll == IB_LINK_LAYER_ETHERNET && !(ah_attr->ah_flags & IB_AH_GRH))
+ return ERR_PTR(-EINVAL);
ah = kzalloc(sizeof(*ah), GFP_ATOMIC);
if (!ah)
return ERR_PTR(-ENOMEM);
- return create_ib_ah(ah_attr, ah); /* never fails */
+ return create_ib_ah(dev, ah, ah_attr, ll); /* never fails */
}
int mlx5_ib_query_ah(struct ib_ah *ibah, struct ib_ah_attr *ah_attr)
diff --git a/drivers/infiniband/hw/mlx5/cq.c b/drivers/infiniband/hw/mlx5/cq.c
index 2d0dbbf..fd1de31 100644
--- a/drivers/infiniband/hw/mlx5/cq.c
+++ b/drivers/infiniband/hw/mlx5/cq.c
@@ -109,8 +109,8 @@ static enum ib_wc_opcode get_umr_comp(struct mlx5_ib_wq *wq, int idx)
case IB_WR_LOCAL_INV:
return IB_WC_LOCAL_INV;
- case IB_WR_FAST_REG_MR:
- return IB_WC_FAST_REG_MR;
+ case IB_WR_REG_MR:
+ return IB_WC_REG_MR;
default:
pr_warn("unknown completion status\n");
@@ -154,9 +154,6 @@ static void handle_good_req(struct ib_wc *wc, struct mlx5_cqe64 *cqe,
wc->opcode = IB_WC_MASKED_FETCH_ADD;
wc->byte_len = 8;
break;
- case MLX5_OPCODE_BIND_MW:
- wc->opcode = IB_WC_BIND_MW;
- break;
case MLX5_OPCODE_UMR:
wc->opcode = get_umr_comp(wq, idx);
break;
@@ -171,6 +168,7 @@ enum {
static void handle_responder(struct ib_wc *wc, struct mlx5_cqe64 *cqe,
struct mlx5_ib_qp *qp)
{
+ enum rdma_link_layer ll = rdma_port_get_link_layer(qp->ibqp.device, 1);
struct mlx5_ib_dev *dev = to_mdev(qp->ibqp.device);
struct mlx5_ib_srq *srq;
struct mlx5_ib_wq *wq;
@@ -236,6 +234,22 @@ static void handle_responder(struct ib_wc *wc, struct mlx5_cqe64 *cqe,
} else {
wc->pkey_index = 0;
}
+
+ if (ll != IB_LINK_LAYER_ETHERNET)
+ return;
+
+ switch (wc->sl & 0x3) {
+ case MLX5_CQE_ROCE_L3_HEADER_TYPE_GRH:
+ wc->network_hdr_type = RDMA_NETWORK_IB;
+ break;
+ case MLX5_CQE_ROCE_L3_HEADER_TYPE_IPV6:
+ wc->network_hdr_type = RDMA_NETWORK_IPV6;
+ break;
+ case MLX5_CQE_ROCE_L3_HEADER_TYPE_IPV4:
+ wc->network_hdr_type = RDMA_NETWORK_IPV4;
+ break;
+ }
+ wc->wc_flags |= IB_WC_WITH_NETWORK_HDR_TYPE;
}
static void dump_cqe(struct mlx5_ib_dev *dev, struct mlx5_err_cqe *cqe)
@@ -756,16 +770,16 @@ struct ib_cq *mlx5_ib_create_cq(struct ib_device *ibdev,
int uninitialized_var(index);
int uninitialized_var(inlen);
int cqe_size;
- int irqn;
+ unsigned int irqn;
int eqn;
int err;
- if (attr->flags)
- return ERR_PTR(-EINVAL);
-
if (entries < 0)
return ERR_PTR(-EINVAL);
+ if (check_cq_create_flags(attr->flags))
+ return ERR_PTR(-EOPNOTSUPP);
+
entries = roundup_pow_of_two(entries + 1);
if (entries > (1 << MLX5_CAP_GEN(dev->mdev, log_max_cq_sz)))
return ERR_PTR(-EINVAL);
@@ -779,6 +793,7 @@ struct ib_cq *mlx5_ib_create_cq(struct ib_device *ibdev,
spin_lock_init(&cq->lock);
cq->resize_buf = NULL;
cq->resize_umem = NULL;
+ cq->create_flags = attr->flags;
if (context) {
err = create_cq_user(dev, udata, context, cq, entries,
@@ -796,6 +811,10 @@ struct ib_cq *mlx5_ib_create_cq(struct ib_device *ibdev,
cq->cqe_size = cqe_size;
cqb->ctx.cqe_sz_flags = cqe_sz_to_mlx_sz(cqe_size) << 5;
+
+ if (cq->create_flags & IB_CQ_FLAGS_IGNORE_OVERRUN)
+ cqb->ctx.cqe_sz_flags |= (1 << 1);
+
cqb->ctx.log_sz_usr_page = cpu_to_be32((ilog2(entries) << 24) | index);
err = mlx5_vector2eqn(dev->mdev, vector, &eqn, &irqn);
if (err)
diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c
index 68508d5..03c418c 100644
--- a/drivers/infiniband/hw/mlx5/main.c
+++ b/drivers/infiniband/hw/mlx5/main.c
@@ -40,9 +40,14 @@
#include <linux/io-mapping.h>
#include <linux/sched.h>
#include <rdma/ib_user_verbs.h>
+#include <rdma/ib_addr.h>
+#include <rdma/ib_cache.h>
#include <linux/mlx5/vport.h>
#include <rdma/ib_smi.h>
#include <rdma/ib_umem.h>
+#include <linux/in.h>
+#include <linux/etherdevice.h>
+#include <linux/mlx5/fs.h>
#include "user.h"
#include "mlx5_ib.h"
@@ -63,12 +68,14 @@ static char mlx5_version[] =
DRIVER_NAME ": Mellanox Connect-IB Infiniband driver v"
DRIVER_VERSION " (" DRIVER_RELDATE ")\n";
+enum {
+ MLX5_ATOMIC_SIZE_QP_8BYTES = 1 << 3,
+};
+
static enum rdma_link_layer
-mlx5_ib_port_link_layer(struct ib_device *device)
+mlx5_port_type_cap_to_rdma_ll(int port_type_cap)
{
- struct mlx5_ib_dev *dev = to_mdev(device);
-
- switch (MLX5_CAP_GEN(dev->mdev, port_type)) {
+ switch (port_type_cap) {
case MLX5_CAP_PORT_TYPE_IB:
return IB_LINK_LAYER_INFINIBAND;
case MLX5_CAP_PORT_TYPE_ETH:
@@ -78,6 +85,202 @@ mlx5_ib_port_link_layer(struct ib_device *device)
}
}
+static enum rdma_link_layer
+mlx5_ib_port_link_layer(struct ib_device *device, u8 port_num)
+{
+ struct mlx5_ib_dev *dev = to_mdev(device);
+ int port_type_cap = MLX5_CAP_GEN(dev->mdev, port_type);
+
+ return mlx5_port_type_cap_to_rdma_ll(port_type_cap);
+}
+
+static int mlx5_netdev_event(struct notifier_block *this,
+ unsigned long event, void *ptr)
+{
+ struct net_device *ndev = netdev_notifier_info_to_dev(ptr);
+ struct mlx5_ib_dev *ibdev = container_of(this, struct mlx5_ib_dev,
+ roce.nb);
+
+ if ((event != NETDEV_UNREGISTER) && (event != NETDEV_REGISTER))
+ return NOTIFY_DONE;
+
+ write_lock(&ibdev->roce.netdev_lock);
+ if (ndev->dev.parent == &ibdev->mdev->pdev->dev)
+ ibdev->roce.netdev = (event == NETDEV_UNREGISTER) ? NULL : ndev;
+ write_unlock(&ibdev->roce.netdev_lock);
+
+ return NOTIFY_DONE;
+}
+
+static struct net_device *mlx5_ib_get_netdev(struct ib_device *device,
+ u8 port_num)
+{
+ struct mlx5_ib_dev *ibdev = to_mdev(device);
+ struct net_device *ndev;
+
+ /* Ensure ndev does not disappear before we invoke dev_hold()
+ */
+ read_lock(&ibdev->roce.netdev_lock);
+ ndev = ibdev->roce.netdev;
+ if (ndev)
+ dev_hold(ndev);
+ read_unlock(&ibdev->roce.netdev_lock);
+
+ return ndev;
+}
+
+static int mlx5_query_port_roce(struct ib_device *device, u8 port_num,
+ struct ib_port_attr *props)
+{
+ struct mlx5_ib_dev *dev = to_mdev(device);
+ struct net_device *ndev;
+ enum ib_mtu ndev_ib_mtu;
+ u16 qkey_viol_cntr;
+
+ memset(props, 0, sizeof(*props));
+
+ props->port_cap_flags |= IB_PORT_CM_SUP;
+ props->port_cap_flags |= IB_PORT_IP_BASED_GIDS;
+
+ props->gid_tbl_len = MLX5_CAP_ROCE(dev->mdev,
+ roce_address_table_size);
+ props->max_mtu = IB_MTU_4096;
+ props->max_msg_sz = 1 << MLX5_CAP_GEN(dev->mdev, log_max_msg);
+ props->pkey_tbl_len = 1;
+ props->state = IB_PORT_DOWN;
+ props->phys_state = 3;
+
+ mlx5_query_nic_vport_qkey_viol_cntr(dev->mdev, &qkey_viol_cntr);
+ props->qkey_viol_cntr = qkey_viol_cntr;
+
+ ndev = mlx5_ib_get_netdev(device, port_num);
+ if (!ndev)
+ return 0;
+
+ if (netif_running(ndev) && netif_carrier_ok(ndev)) {
+ props->state = IB_PORT_ACTIVE;
+ props->phys_state = 5;
+ }
+
+ ndev_ib_mtu = iboe_get_mtu(ndev->mtu);
+
+ dev_put(ndev);
+
+ props->active_mtu = min(props->max_mtu, ndev_ib_mtu);
+
+ props->active_width = IB_WIDTH_4X; /* TODO */
+ props->active_speed = IB_SPEED_QDR; /* TODO */
+
+ return 0;
+}
+
+static void ib_gid_to_mlx5_roce_addr(const union ib_gid *gid,
+ const struct ib_gid_attr *attr,
+ void *mlx5_addr)
+{
+#define MLX5_SET_RA(p, f, v) MLX5_SET(roce_addr_layout, p, f, v)
+ char *mlx5_addr_l3_addr = MLX5_ADDR_OF(roce_addr_layout, mlx5_addr,
+ source_l3_address);
+ void *mlx5_addr_mac = MLX5_ADDR_OF(roce_addr_layout, mlx5_addr,
+ source_mac_47_32);
+
+ if (!gid)
+ return;
+
+ ether_addr_copy(mlx5_addr_mac, attr->ndev->dev_addr);
+
+ if (is_vlan_dev(attr->ndev)) {
+ MLX5_SET_RA(mlx5_addr, vlan_valid, 1);
+ MLX5_SET_RA(mlx5_addr, vlan_id, vlan_dev_vlan_id(attr->ndev));
+ }
+
+ switch (attr->gid_type) {
+ case IB_GID_TYPE_IB:
+ MLX5_SET_RA(mlx5_addr, roce_version, MLX5_ROCE_VERSION_1);
+ break;
+ case IB_GID_TYPE_ROCE_UDP_ENCAP:
+ MLX5_SET_RA(mlx5_addr, roce_version, MLX5_ROCE_VERSION_2);
+ break;
+
+ default:
+ WARN_ON(true);
+ }
+
+ if (attr->gid_type != IB_GID_TYPE_IB) {
+ if (ipv6_addr_v4mapped((void *)gid))
+ MLX5_SET_RA(mlx5_addr, roce_l3_type,
+ MLX5_ROCE_L3_TYPE_IPV4);
+ else
+ MLX5_SET_RA(mlx5_addr, roce_l3_type,
+ MLX5_ROCE_L3_TYPE_IPV6);
+ }
+
+ if ((attr->gid_type == IB_GID_TYPE_IB) ||
+ !ipv6_addr_v4mapped((void *)gid))
+ memcpy(mlx5_addr_l3_addr, gid, sizeof(*gid));
+ else
+ memcpy(&mlx5_addr_l3_addr[12], &gid->raw[12], 4);
+}
+
+static int set_roce_addr(struct ib_device *device, u8 port_num,
+ unsigned int index,
+ const union ib_gid *gid,
+ const struct ib_gid_attr *attr)
+{
+ struct mlx5_ib_dev *dev = to_mdev(device);
+ u32 in[MLX5_ST_SZ_DW(set_roce_address_in)];
+ u32 out[MLX5_ST_SZ_DW(set_roce_address_out)];
+ void *in_addr = MLX5_ADDR_OF(set_roce_address_in, in, roce_address);
+ enum rdma_link_layer ll = mlx5_ib_port_link_layer(device, port_num);
+
+ if (ll != IB_LINK_LAYER_ETHERNET)
+ return -EINVAL;
+
+ memset(in, 0, sizeof(in));
+
+ ib_gid_to_mlx5_roce_addr(gid, attr, in_addr);
+
+ MLX5_SET(set_roce_address_in, in, roce_address_index, index);
+ MLX5_SET(set_roce_address_in, in, opcode, MLX5_CMD_OP_SET_ROCE_ADDRESS);
+
+ memset(out, 0, sizeof(out));
+ return mlx5_cmd_exec(dev->mdev, in, sizeof(in), out, sizeof(out));
+}
+
+static int mlx5_ib_add_gid(struct ib_device *device, u8 port_num,
+ unsigned int index, const union ib_gid *gid,
+ const struct ib_gid_attr *attr,
+ __always_unused void **context)
+{
+ return set_roce_addr(device, port_num, index, gid, attr);
+}
+
+static int mlx5_ib_del_gid(struct ib_device *device, u8 port_num,
+ unsigned int index, __always_unused void **context)
+{
+ return set_roce_addr(device, port_num, index, NULL, NULL);
+}
+
+__be16 mlx5_get_roce_udp_sport(struct mlx5_ib_dev *dev, u8 port_num,
+ int index)
+{
+ struct ib_gid_attr attr;
+ union ib_gid gid;
+
+ if (ib_get_cached_gid(&dev->ib_dev, port_num, index, &gid, &attr))
+ return 0;
+
+ if (!attr.ndev)
+ return 0;
+
+ dev_put(attr.ndev);
+
+ if (attr.gid_type != IB_GID_TYPE_ROCE_UDP_ENCAP)
+ return 0;
+
+ return cpu_to_be16(MLX5_CAP_ROCE(dev->mdev, r_roce_min_src_udp_port));
+}
+
static int mlx5_use_mad_ifc(struct mlx5_ib_dev *dev)
{
return !dev->mdev->issi;
@@ -94,13 +297,35 @@ static int mlx5_get_vport_access_method(struct ib_device *ibdev)
if (mlx5_use_mad_ifc(to_mdev(ibdev)))
return MLX5_VPORT_ACCESS_METHOD_MAD;
- if (mlx5_ib_port_link_layer(ibdev) ==
+ if (mlx5_ib_port_link_layer(ibdev, 1) ==
IB_LINK_LAYER_ETHERNET)
return MLX5_VPORT_ACCESS_METHOD_NIC;
return MLX5_VPORT_ACCESS_METHOD_HCA;
}
+static void get_atomic_caps(struct mlx5_ib_dev *dev,
+ struct ib_device_attr *props)
+{
+ u8 tmp;
+ u8 atomic_operations = MLX5_CAP_ATOMIC(dev->mdev, atomic_operations);
+ u8 atomic_size_qp = MLX5_CAP_ATOMIC(dev->mdev, atomic_size_qp);
+ u8 atomic_req_8B_endianness_mode =
+ MLX5_CAP_ATOMIC(dev->mdev, atomic_req_8B_endianess_mode);
+
+ /* Check if HW supports 8 bytes standard atomic operations and capable
+ * of host endianness respond
+ */
+ tmp = MLX5_ATOMIC_OPS_CMP_SWAP | MLX5_ATOMIC_OPS_FETCH_ADD;
+ if (((atomic_operations & tmp) == tmp) &&
+ (atomic_size_qp & MLX5_ATOMIC_SIZE_QP_8BYTES) &&
+ (atomic_req_8B_endianness_mode)) {
+ props->atomic_cap = IB_ATOMIC_HCA;
+ } else {
+ props->atomic_cap = IB_ATOMIC_NONE;
+ }
+}
+
static int mlx5_query_system_image_guid(struct ib_device *ibdev,
__be64 *sys_image_guid)
{
@@ -116,13 +341,21 @@ static int mlx5_query_system_image_guid(struct ib_device *ibdev,
case MLX5_VPORT_ACCESS_METHOD_HCA:
err = mlx5_query_hca_vport_system_image_guid(mdev, &tmp);
- if (!err)
- *sys_image_guid = cpu_to_be64(tmp);
- return err;
+ break;
+
+ case MLX5_VPORT_ACCESS_METHOD_NIC:
+ err = mlx5_query_nic_vport_system_image_guid(mdev, &tmp);
+ break;
default:
return -EINVAL;
}
+
+ if (!err)
+ *sys_image_guid = cpu_to_be64(tmp);
+
+ return err;
+
}
static int mlx5_query_max_pkeys(struct ib_device *ibdev,
@@ -176,13 +409,20 @@ static int mlx5_query_node_guid(struct mlx5_ib_dev *dev,
case MLX5_VPORT_ACCESS_METHOD_HCA:
err = mlx5_query_hca_vport_node_guid(dev->mdev, &tmp);
- if (!err)
- *node_guid = cpu_to_be64(tmp);
- return err;
+ break;
+
+ case MLX5_VPORT_ACCESS_METHOD_NIC:
+ err = mlx5_query_nic_vport_node_guid(dev->mdev, &tmp);
+ break;
default:
return -EINVAL;
}
+
+ if (!err)
+ *node_guid = cpu_to_be64(tmp);
+
+ return err;
}
struct mlx5_reg_node_desc {
@@ -260,6 +500,10 @@ static int mlx5_ib_query_device(struct ib_device *ibdev,
if (MLX5_CAP_GEN(mdev, block_lb_mc))
props->device_cap_flags |= IB_DEVICE_BLOCK_MULTICAST_LOOPBACK;
+ if (MLX5_CAP_GEN(dev->mdev, eth_net_offloads) &&
+ (MLX5_CAP_ETH(dev->mdev, csum_cap)))
+ props->device_cap_flags |= IB_DEVICE_RAW_IP_CSUM;
+
props->vendor_part_id = mdev->pdev->device;
props->hw_ver = mdev->pdev->revision;
@@ -275,7 +519,7 @@ static int mlx5_ib_query_device(struct ib_device *ibdev,
props->max_sge = min(max_rq_sg, max_sq_sg);
props->max_sge_rd = props->max_sge;
props->max_cq = 1 << MLX5_CAP_GEN(mdev, log_max_cq);
- props->max_cqe = (1 << MLX5_CAP_GEN(mdev, log_max_eq_sz)) - 1;
+ props->max_cqe = (1 << MLX5_CAP_GEN(mdev, log_max_cq_sz)) - 1;
props->max_mr = 1 << MLX5_CAP_GEN(mdev, log_max_mkey);
props->max_pd = 1 << MLX5_CAP_GEN(mdev, log_max_pd);
props->max_qp_rd_atom = 1 << MLX5_CAP_GEN(mdev, log_max_ra_req_qp);
@@ -286,13 +530,15 @@ static int mlx5_ib_query_device(struct ib_device *ibdev,
props->max_res_rd_atom = props->max_qp_rd_atom * props->max_qp;
props->max_srq_sge = max_rq_sg - 1;
props->max_fast_reg_page_list_len = (unsigned int)-1;
- props->atomic_cap = IB_ATOMIC_NONE;
+ get_atomic_caps(dev, props);
props->masked_atomic_cap = IB_ATOMIC_NONE;
props->max_mcast_grp = 1 << MLX5_CAP_GEN(mdev, log_max_mcg);
props->max_mcast_qp_attach = MLX5_CAP_GEN(mdev, max_qp_mcg);
props->max_total_mcast_qp_attach = props->max_mcast_qp_attach *
props->max_mcast_grp;
props->max_map_per_fmr = INT_MAX; /* no limit in ConnectIB */
+ props->hca_core_clock = MLX5_CAP_GEN(mdev, device_frequency_khz);
+ props->timestamp_mask = 0x7FFFFFFFFFFFFFFFULL;
#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
if (MLX5_CAP_GEN(mdev, pg))
@@ -300,6 +546,9 @@ static int mlx5_ib_query_device(struct ib_device *ibdev,
props->odp_caps = dev->odp_caps;
#endif
+ if (MLX5_CAP_GEN(mdev, cd))
+ props->device_cap_flags |= IB_DEVICE_CROSS_CHANNEL;
+
return 0;
}
@@ -480,6 +729,9 @@ int mlx5_ib_query_port(struct ib_device *ibdev, u8 port,
case MLX5_VPORT_ACCESS_METHOD_HCA:
return mlx5_query_hca_port(ibdev, port, props);
+ case MLX5_VPORT_ACCESS_METHOD_NIC:
+ return mlx5_query_port_roce(ibdev, port, props);
+
default:
return -EINVAL;
}
@@ -580,8 +832,8 @@ static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev,
struct ib_udata *udata)
{
struct mlx5_ib_dev *dev = to_mdev(ibdev);
- struct mlx5_ib_alloc_ucontext_req_v2 req;
- struct mlx5_ib_alloc_ucontext_resp resp;
+ struct mlx5_ib_alloc_ucontext_req_v2 req = {};
+ struct mlx5_ib_alloc_ucontext_resp resp = {};
struct mlx5_ib_ucontext *context;
struct mlx5_uuar_info *uuari;
struct mlx5_uar *uars;
@@ -592,24 +844,28 @@ static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev,
int err;
int i;
size_t reqlen;
+ size_t min_req_v2 = offsetof(struct mlx5_ib_alloc_ucontext_req_v2,
+ max_cqe_version);
if (!dev->ib_active)
return ERR_PTR(-EAGAIN);
- memset(&req, 0, sizeof(req));
+ if (udata->inlen < sizeof(struct ib_uverbs_cmd_hdr))
+ return ERR_PTR(-EINVAL);
+
reqlen = udata->inlen - sizeof(struct ib_uverbs_cmd_hdr);
if (reqlen == sizeof(struct mlx5_ib_alloc_ucontext_req))
ver = 0;
- else if (reqlen == sizeof(struct mlx5_ib_alloc_ucontext_req_v2))
+ else if (reqlen >= min_req_v2)
ver = 2;
else
return ERR_PTR(-EINVAL);
- err = ib_copy_from_udata(&req, udata, reqlen);
+ err = ib_copy_from_udata(&req, udata, min(reqlen, sizeof(req)));
if (err)
return ERR_PTR(err);
- if (req.flags || req.reserved)
+ if (req.flags)
return ERR_PTR(-EINVAL);
if (req.total_num_uuars > MLX5_MAX_UUARS)
@@ -618,6 +874,14 @@ static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev,
if (req.total_num_uuars == 0)
return ERR_PTR(-EINVAL);
+ if (req.comp_mask || req.reserved0 || req.reserved1 || req.reserved2)
+ return ERR_PTR(-EOPNOTSUPP);
+
+ if (reqlen > sizeof(req) &&
+ !ib_is_udata_cleared(udata, sizeof(req),
+ reqlen - sizeof(req)))
+ return ERR_PTR(-EOPNOTSUPP);
+
req.total_num_uuars = ALIGN(req.total_num_uuars,
MLX5_NON_FP_BF_REGS_PER_PAGE);
if (req.num_low_latency_uuars > req.total_num_uuars - 1)
@@ -633,6 +897,11 @@ static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev,
resp.max_send_wqebb = 1 << MLX5_CAP_GEN(dev->mdev, log_max_qp_sz);
resp.max_recv_wr = 1 << MLX5_CAP_GEN(dev->mdev, log_max_qp_sz);
resp.max_srq_recv_wr = 1 << MLX5_CAP_GEN(dev->mdev, log_max_srq_sz);
+ resp.cqe_version = min_t(__u8,
+ (__u8)MLX5_CAP_GEN(dev->mdev, cqe_version),
+ req.max_cqe_version);
+ resp.response_length = min(offsetof(typeof(resp), response_length) +
+ sizeof(resp.response_length), udata->outlen);
context = kzalloc(sizeof(*context), GFP_KERNEL);
if (!context)
@@ -678,22 +947,49 @@ static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev,
context->ibucontext.invalidate_range = &mlx5_ib_invalidate_range;
#endif
+ if (MLX5_CAP_GEN(dev->mdev, log_max_transport_domain)) {
+ err = mlx5_core_alloc_transport_domain(dev->mdev,
+ &context->tdn);
+ if (err)
+ goto out_uars;
+ }
+
INIT_LIST_HEAD(&context->db_page_list);
mutex_init(&context->db_page_mutex);
resp.tot_uuars = req.total_num_uuars;
resp.num_ports = MLX5_CAP_GEN(dev->mdev, num_ports);
- err = ib_copy_to_udata(udata, &resp,
- sizeof(resp) - sizeof(resp.reserved));
+
+ if (field_avail(typeof(resp), cqe_version, udata->outlen))
+ resp.response_length += sizeof(resp.cqe_version);
+
+ if (field_avail(typeof(resp), hca_core_clock_offset, udata->outlen)) {
+ resp.comp_mask |=
+ MLX5_IB_ALLOC_UCONTEXT_RESP_MASK_CORE_CLOCK_OFFSET;
+ resp.hca_core_clock_offset =
+ offsetof(struct mlx5_init_seg, internal_timer_h) %
+ PAGE_SIZE;
+ resp.response_length += sizeof(resp.hca_core_clock_offset) +
+ sizeof(resp.reserved2) +
+ sizeof(resp.reserved3);
+ }
+
+ err = ib_copy_to_udata(udata, &resp, resp.response_length);
if (err)
- goto out_uars;
+ goto out_td;
uuari->ver = ver;
uuari->num_low_latency_uuars = req.num_low_latency_uuars;
uuari->uars = uars;
uuari->num_uars = num_uars;
+ context->cqe_version = resp.cqe_version;
+
return &context->ibucontext;
+out_td:
+ if (MLX5_CAP_GEN(dev->mdev, log_max_transport_domain))
+ mlx5_core_dealloc_transport_domain(dev->mdev, context->tdn);
+
out_uars:
for (i--; i >= 0; i--)
mlx5_cmd_free_uar(dev->mdev, uars[i].index);
@@ -718,6 +1014,9 @@ static int mlx5_ib_dealloc_ucontext(struct ib_ucontext *ibcontext)
struct mlx5_uuar_info *uuari = &context->uuari;
int i;
+ if (MLX5_CAP_GEN(dev->mdev, log_max_transport_domain))
+ mlx5_core_dealloc_transport_domain(dev->mdev, context->tdn);
+
for (i = 0; i < uuari->num_uars; i++) {
if (mlx5_cmd_free_uar(dev->mdev, uuari->uars[i].index))
mlx5_ib_warn(dev, "failed to free UAR 0x%x\n", uuari->uars[i].index);
@@ -787,6 +1086,30 @@ static int mlx5_ib_mmap(struct ib_ucontext *ibcontext, struct vm_area_struct *vm
case MLX5_IB_MMAP_GET_CONTIGUOUS_PAGES:
return -ENOSYS;
+ case MLX5_IB_MMAP_CORE_CLOCK:
+ if (vma->vm_end - vma->vm_start != PAGE_SIZE)
+ return -EINVAL;
+
+ if (vma->vm_flags & (VM_WRITE | VM_EXEC))
+ return -EPERM;
+
+ /* Don't expose to user-space information it shouldn't have */
+ if (PAGE_SIZE > 4096)
+ return -EOPNOTSUPP;
+
+ vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
+ pfn = (dev->mdev->iseg_base +
+ offsetof(struct mlx5_init_seg, internal_timer_h)) >>
+ PAGE_SHIFT;
+ if (io_remap_pfn_range(vma, vma->vm_start, pfn,
+ PAGE_SIZE, vma->vm_page_prot))
+ return -EAGAIN;
+
+ mlx5_ib_dbg(dev, "mapped internal timer at 0x%lx, PA 0x%llx\n",
+ vma->vm_start,
+ (unsigned long long)pfn << PAGE_SHIFT);
+ break;
+
default:
return -EINVAL;
}
@@ -835,6 +1158,457 @@ static int mlx5_ib_dealloc_pd(struct ib_pd *pd)
return 0;
}
+static bool outer_header_zero(u32 *match_criteria)
+{
+ int size = MLX5_ST_SZ_BYTES(fte_match_param);
+ char *outer_headers_c = MLX5_ADDR_OF(fte_match_param, match_criteria,
+ outer_headers);
+
+ return outer_headers_c[0] == 0 && !memcmp(outer_headers_c,
+ outer_headers_c + 1,
+ size - 1);
+}
+
+static int parse_flow_attr(u32 *match_c, u32 *match_v,
+ union ib_flow_spec *ib_spec)
+{
+ void *outer_headers_c = MLX5_ADDR_OF(fte_match_param, match_c,
+ outer_headers);
+ void *outer_headers_v = MLX5_ADDR_OF(fte_match_param, match_v,
+ outer_headers);
+ switch (ib_spec->type) {
+ case IB_FLOW_SPEC_ETH:
+ if (ib_spec->size != sizeof(ib_spec->eth))
+ return -EINVAL;
+
+ ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, outer_headers_c,
+ dmac_47_16),
+ ib_spec->eth.mask.dst_mac);
+ ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, outer_headers_v,
+ dmac_47_16),
+ ib_spec->eth.val.dst_mac);
+
+ if (ib_spec->eth.mask.vlan_tag) {
+ MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c,
+ vlan_tag, 1);
+ MLX5_SET(fte_match_set_lyr_2_4, outer_headers_v,
+ vlan_tag, 1);
+
+ MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c,
+ first_vid, ntohs(ib_spec->eth.mask.vlan_tag));
+ MLX5_SET(fte_match_set_lyr_2_4, outer_headers_v,
+ first_vid, ntohs(ib_spec->eth.val.vlan_tag));
+
+ MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c,
+ first_cfi,
+ ntohs(ib_spec->eth.mask.vlan_tag) >> 12);
+ MLX5_SET(fte_match_set_lyr_2_4, outer_headers_v,
+ first_cfi,
+ ntohs(ib_spec->eth.val.vlan_tag) >> 12);
+
+ MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c,
+ first_prio,
+ ntohs(ib_spec->eth.mask.vlan_tag) >> 13);
+ MLX5_SET(fte_match_set_lyr_2_4, outer_headers_v,
+ first_prio,
+ ntohs(ib_spec->eth.val.vlan_tag) >> 13);
+ }
+ MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c,
+ ethertype, ntohs(ib_spec->eth.mask.ether_type));
+ MLX5_SET(fte_match_set_lyr_2_4, outer_headers_v,
+ ethertype, ntohs(ib_spec->eth.val.ether_type));
+ break;
+ case IB_FLOW_SPEC_IPV4:
+ if (ib_spec->size != sizeof(ib_spec->ipv4))
+ return -EINVAL;
+
+ MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c,
+ ethertype, 0xffff);
+ MLX5_SET(fte_match_set_lyr_2_4, outer_headers_v,
+ ethertype, ETH_P_IP);
+
+ memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, outer_headers_c,
+ src_ipv4_src_ipv6.ipv4_layout.ipv4),
+ &ib_spec->ipv4.mask.src_ip,
+ sizeof(ib_spec->ipv4.mask.src_ip));
+ memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, outer_headers_v,
+ src_ipv4_src_ipv6.ipv4_layout.ipv4),
+ &ib_spec->ipv4.val.src_ip,
+ sizeof(ib_spec->ipv4.val.src_ip));
+ memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, outer_headers_c,
+ dst_ipv4_dst_ipv6.ipv4_layout.ipv4),
+ &ib_spec->ipv4.mask.dst_ip,
+ sizeof(ib_spec->ipv4.mask.dst_ip));
+ memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, outer_headers_v,
+ dst_ipv4_dst_ipv6.ipv4_layout.ipv4),
+ &ib_spec->ipv4.val.dst_ip,
+ sizeof(ib_spec->ipv4.val.dst_ip));
+ break;
+ case IB_FLOW_SPEC_TCP:
+ if (ib_spec->size != sizeof(ib_spec->tcp_udp))
+ return -EINVAL;
+
+ MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c, ip_protocol,
+ 0xff);
+ MLX5_SET(fte_match_set_lyr_2_4, outer_headers_v, ip_protocol,
+ IPPROTO_TCP);
+
+ MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c, tcp_sport,
+ ntohs(ib_spec->tcp_udp.mask.src_port));
+ MLX5_SET(fte_match_set_lyr_2_4, outer_headers_v, tcp_sport,
+ ntohs(ib_spec->tcp_udp.val.src_port));
+
+ MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c, tcp_dport,
+ ntohs(ib_spec->tcp_udp.mask.dst_port));
+ MLX5_SET(fte_match_set_lyr_2_4, outer_headers_v, tcp_dport,
+ ntohs(ib_spec->tcp_udp.val.dst_port));
+ break;
+ case IB_FLOW_SPEC_UDP:
+ if (ib_spec->size != sizeof(ib_spec->tcp_udp))
+ return -EINVAL;
+
+ MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c, ip_protocol,
+ 0xff);
+ MLX5_SET(fte_match_set_lyr_2_4, outer_headers_v, ip_protocol,
+ IPPROTO_UDP);
+
+ MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c, udp_sport,
+ ntohs(ib_spec->tcp_udp.mask.src_port));
+ MLX5_SET(fte_match_set_lyr_2_4, outer_headers_v, udp_sport,
+ ntohs(ib_spec->tcp_udp.val.src_port));
+
+ MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c, udp_dport,
+ ntohs(ib_spec->tcp_udp.mask.dst_port));
+ MLX5_SET(fte_match_set_lyr_2_4, outer_headers_v, udp_dport,
+ ntohs(ib_spec->tcp_udp.val.dst_port));
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+/* If a flow could catch both multicast and unicast packets,
+ * it won't fall into the multicast flow steering table and this rule
+ * could steal other multicast packets.
+ */
+static bool flow_is_multicast_only(struct ib_flow_attr *ib_attr)
+{
+ struct ib_flow_spec_eth *eth_spec;
+
+ if (ib_attr->type != IB_FLOW_ATTR_NORMAL ||
+ ib_attr->size < sizeof(struct ib_flow_attr) +
+ sizeof(struct ib_flow_spec_eth) ||
+ ib_attr->num_of_specs < 1)
+ return false;
+
+ eth_spec = (struct ib_flow_spec_eth *)(ib_attr + 1);
+ if (eth_spec->type != IB_FLOW_SPEC_ETH ||
+ eth_spec->size != sizeof(*eth_spec))
+ return false;
+
+ return is_multicast_ether_addr(eth_spec->mask.dst_mac) &&
+ is_multicast_ether_addr(eth_spec->val.dst_mac);
+}
+
+static bool is_valid_attr(struct ib_flow_attr *flow_attr)
+{
+ union ib_flow_spec *ib_spec = (union ib_flow_spec *)(flow_attr + 1);
+ bool has_ipv4_spec = false;
+ bool eth_type_ipv4 = true;
+ unsigned int spec_index;
+
+ /* Validate that ethertype is correct */
+ for (spec_index = 0; spec_index < flow_attr->num_of_specs; spec_index++) {
+ if (ib_spec->type == IB_FLOW_SPEC_ETH &&
+ ib_spec->eth.mask.ether_type) {
+ if (!((ib_spec->eth.mask.ether_type == htons(0xffff)) &&
+ ib_spec->eth.val.ether_type == htons(ETH_P_IP)))
+ eth_type_ipv4 = false;
+ } else if (ib_spec->type == IB_FLOW_SPEC_IPV4) {
+ has_ipv4_spec = true;
+ }
+ ib_spec = (void *)ib_spec + ib_spec->size;
+ }
+ return !has_ipv4_spec || eth_type_ipv4;
+}
+
+static void put_flow_table(struct mlx5_ib_dev *dev,
+ struct mlx5_ib_flow_prio *prio, bool ft_added)
+{
+ prio->refcount -= !!ft_added;
+ if (!prio->refcount) {
+ mlx5_destroy_flow_table(prio->flow_table);
+ prio->flow_table = NULL;
+ }
+}
+
+static int mlx5_ib_destroy_flow(struct ib_flow *flow_id)
+{
+ struct mlx5_ib_dev *dev = to_mdev(flow_id->qp->device);
+ struct mlx5_ib_flow_handler *handler = container_of(flow_id,
+ struct mlx5_ib_flow_handler,
+ ibflow);
+ struct mlx5_ib_flow_handler *iter, *tmp;
+
+ mutex_lock(&dev->flow_db.lock);
+
+ list_for_each_entry_safe(iter, tmp, &handler->list, list) {
+ mlx5_del_flow_rule(iter->rule);
+ list_del(&iter->list);
+ kfree(iter);
+ }
+
+ mlx5_del_flow_rule(handler->rule);
+ put_flow_table(dev, &dev->flow_db.prios[handler->prio], true);
+ mutex_unlock(&dev->flow_db.lock);
+
+ kfree(handler);
+
+ return 0;
+}
+
+#define MLX5_FS_MAX_TYPES 10
+#define MLX5_FS_MAX_ENTRIES 32000UL
+static struct mlx5_ib_flow_prio *get_flow_table(struct mlx5_ib_dev *dev,
+ struct ib_flow_attr *flow_attr)
+{
+ struct mlx5_flow_namespace *ns = NULL;
+ struct mlx5_ib_flow_prio *prio;
+ struct mlx5_flow_table *ft;
+ int num_entries;
+ int num_groups;
+ int priority;
+ int err = 0;
+
+ if (flow_attr->type == IB_FLOW_ATTR_NORMAL) {
+ if (flow_is_multicast_only(flow_attr))
+ priority = MLX5_IB_FLOW_MCAST_PRIO;
+ else
+ priority = flow_attr->priority;
+ ns = mlx5_get_flow_namespace(dev->mdev,
+ MLX5_FLOW_NAMESPACE_BYPASS);
+ num_entries = MLX5_FS_MAX_ENTRIES;
+ num_groups = MLX5_FS_MAX_TYPES;
+ prio = &dev->flow_db.prios[priority];
+ } else if (flow_attr->type == IB_FLOW_ATTR_ALL_DEFAULT ||
+ flow_attr->type == IB_FLOW_ATTR_MC_DEFAULT) {
+ ns = mlx5_get_flow_namespace(dev->mdev,
+ MLX5_FLOW_NAMESPACE_LEFTOVERS);
+ build_leftovers_ft_param(&priority,
+ &num_entries,
+ &num_groups);
+ prio = &dev->flow_db.prios[MLX5_IB_FLOW_LEFTOVERS_PRIO];
+ }
+
+ if (!ns)
+ return ERR_PTR(-ENOTSUPP);
+
+ ft = prio->flow_table;
+ if (!ft) {
+ ft = mlx5_create_auto_grouped_flow_table(ns, priority,
+ num_entries,
+ num_groups);
+
+ if (!IS_ERR(ft)) {
+ prio->refcount = 0;
+ prio->flow_table = ft;
+ } else {
+ err = PTR_ERR(ft);
+ }
+ }
+
+ return err ? ERR_PTR(err) : prio;
+}
+
+static struct mlx5_ib_flow_handler *create_flow_rule(struct mlx5_ib_dev *dev,
+ struct mlx5_ib_flow_prio *ft_prio,
+ struct ib_flow_attr *flow_attr,
+ struct mlx5_flow_destination *dst)
+{
+ struct mlx5_flow_table *ft = ft_prio->flow_table;
+ struct mlx5_ib_flow_handler *handler;
+ void *ib_flow = flow_attr + 1;
+ u8 match_criteria_enable = 0;
+ unsigned int spec_index;
+ u32 *match_c;
+ u32 *match_v;
+ int err = 0;
+
+ if (!is_valid_attr(flow_attr))
+ return ERR_PTR(-EINVAL);
+
+ match_c = kzalloc(MLX5_ST_SZ_BYTES(fte_match_param), GFP_KERNEL);
+ match_v = kzalloc(MLX5_ST_SZ_BYTES(fte_match_param), GFP_KERNEL);
+ handler = kzalloc(sizeof(*handler), GFP_KERNEL);
+ if (!handler || !match_c || !match_v) {
+ err = -ENOMEM;
+ goto free;
+ }
+
+ INIT_LIST_HEAD(&handler->list);
+
+ for (spec_index = 0; spec_index < flow_attr->num_of_specs; spec_index++) {
+ err = parse_flow_attr(match_c, match_v, ib_flow);
+ if (err < 0)
+ goto free;
+
+ ib_flow += ((union ib_flow_spec *)ib_flow)->size;
+ }
+
+ /* Outer header support only */
+ match_criteria_enable = (!outer_header_zero(match_c)) << 0;
+ handler->rule = mlx5_add_flow_rule(ft, match_criteria_enable,
+ match_c, match_v,
+ MLX5_FLOW_CONTEXT_ACTION_FWD_DEST,
+ MLX5_FS_DEFAULT_FLOW_TAG,
+ dst);
+
+ if (IS_ERR(handler->rule)) {
+ err = PTR_ERR(handler->rule);
+ goto free;
+ }
+
+ handler->prio = ft_prio - dev->flow_db.prios;
+
+ ft_prio->flow_table = ft;
+free:
+ if (err)
+ kfree(handler);
+ kfree(match_c);
+ kfree(match_v);
+ return err ? ERR_PTR(err) : handler;
+}
+
+enum {
+ LEFTOVERS_MC,
+ LEFTOVERS_UC,
+};
+
+static struct mlx5_ib_flow_handler *create_leftovers_rule(struct mlx5_ib_dev *dev,
+ struct mlx5_ib_flow_prio *ft_prio,
+ struct ib_flow_attr *flow_attr,
+ struct mlx5_flow_destination *dst)
+{
+ struct mlx5_ib_flow_handler *handler_ucast = NULL;
+ struct mlx5_ib_flow_handler *handler = NULL;
+
+ static struct {
+ struct ib_flow_attr flow_attr;
+ struct ib_flow_spec_eth eth_flow;
+ } leftovers_specs[] = {
+ [LEFTOVERS_MC] = {
+ .flow_attr = {
+ .num_of_specs = 1,
+ .size = sizeof(leftovers_specs[0])
+ },
+ .eth_flow = {
+ .type = IB_FLOW_SPEC_ETH,
+ .size = sizeof(struct ib_flow_spec_eth),
+ .mask = {.dst_mac = {0x1} },
+ .val = {.dst_mac = {0x1} }
+ }
+ },
+ [LEFTOVERS_UC] = {
+ .flow_attr = {
+ .num_of_specs = 1,
+ .size = sizeof(leftovers_specs[0])
+ },
+ .eth_flow = {
+ .type = IB_FLOW_SPEC_ETH,
+ .size = sizeof(struct ib_flow_spec_eth),
+ .mask = {.dst_mac = {0x1} },
+ .val = {.dst_mac = {} }
+ }
+ }
+ };
+
+ handler = create_flow_rule(dev, ft_prio,
+ &leftovers_specs[LEFTOVERS_MC].flow_attr,
+ dst);
+ if (!IS_ERR(handler) &&
+ flow_attr->type == IB_FLOW_ATTR_ALL_DEFAULT) {
+ handler_ucast = create_flow_rule(dev, ft_prio,
+ &leftovers_specs[LEFTOVERS_UC].flow_attr,
+ dst);
+ if (IS_ERR(handler_ucast)) {
+ kfree(handler);
+ handler = handler_ucast;
+ } else {
+ list_add(&handler_ucast->list, &handler->list);
+ }
+ }
+
+ return handler;
+}
+
+static struct ib_flow *mlx5_ib_create_flow(struct ib_qp *qp,
+ struct ib_flow_attr *flow_attr,
+ int domain)
+{
+ struct mlx5_ib_dev *dev = to_mdev(qp->device);
+ struct mlx5_ib_flow_handler *handler = NULL;
+ struct mlx5_flow_destination *dst = NULL;
+ struct mlx5_ib_flow_prio *ft_prio;
+ int err;
+
+ if (flow_attr->priority > MLX5_IB_FLOW_LAST_PRIO)
+ return ERR_PTR(-ENOSPC);
+
+ if (domain != IB_FLOW_DOMAIN_USER ||
+ flow_attr->port > MLX5_CAP_GEN(dev->mdev, num_ports) ||
+ flow_attr->flags)
+ return ERR_PTR(-EINVAL);
+
+ dst = kzalloc(sizeof(*dst), GFP_KERNEL);
+ if (!dst)
+ return ERR_PTR(-ENOMEM);
+
+ mutex_lock(&dev->flow_db.lock);
+
+ ft_prio = get_flow_table(dev, flow_attr);
+ if (IS_ERR(ft_prio)) {
+ err = PTR_ERR(ft_prio);
+ goto unlock;
+ }
+
+ dst->type = MLX5_FLOW_DESTINATION_TYPE_TIR;
+ dst->tir_num = to_mqp(qp)->raw_packet_qp.rq.tirn;
+
+ if (flow_attr->type == IB_FLOW_ATTR_NORMAL) {
+ handler = create_flow_rule(dev, ft_prio, flow_attr,
+ dst);
+ } else if (flow_attr->type == IB_FLOW_ATTR_ALL_DEFAULT ||
+ flow_attr->type == IB_FLOW_ATTR_MC_DEFAULT) {
+ handler = create_leftovers_rule(dev, ft_prio, flow_attr,
+ dst);
+ } else {
+ err = -EINVAL;
+ goto destroy_ft;
+ }
+
+ if (IS_ERR(handler)) {
+ err = PTR_ERR(handler);
+ handler = NULL;
+ goto destroy_ft;
+ }
+
+ ft_prio->refcount++;
+ mutex_unlock(&dev->flow_db.lock);
+ kfree(dst);
+
+ return &handler->ibflow;
+
+destroy_ft:
+ put_flow_table(dev, ft_prio, false);
+unlock:
+ mutex_unlock(&dev->flow_db.lock);
+ kfree(dst);
+ kfree(handler);
+ return ERR_PTR(err);
+}
+
static int mlx5_ib_mcg_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
{
struct mlx5_ib_dev *dev = to_mdev(ibqp->device);
@@ -1304,6 +2078,32 @@ static void destroy_dev_resources(struct mlx5_ib_resources *devr)
mlx5_ib_dealloc_pd(devr->p0);
}
+static u32 get_core_cap_flags(struct ib_device *ibdev)
+{
+ struct mlx5_ib_dev *dev = to_mdev(ibdev);
+ enum rdma_link_layer ll = mlx5_ib_port_link_layer(ibdev, 1);
+ u8 l3_type_cap = MLX5_CAP_ROCE(dev->mdev, l3_type);
+ u8 roce_version_cap = MLX5_CAP_ROCE(dev->mdev, roce_version);
+ u32 ret = 0;
+
+ if (ll == IB_LINK_LAYER_INFINIBAND)
+ return RDMA_CORE_PORT_IBA_IB;
+
+ if (!(l3_type_cap & MLX5_ROCE_L3_TYPE_IPV4_CAP))
+ return 0;
+
+ if (!(l3_type_cap & MLX5_ROCE_L3_TYPE_IPV6_CAP))
+ return 0;
+
+ if (roce_version_cap & MLX5_ROCE_VERSION_1_CAP)
+ ret |= RDMA_CORE_PORT_IBA_ROCE;
+
+ if (roce_version_cap & MLX5_ROCE_VERSION_2_CAP)
+ ret |= RDMA_CORE_PORT_IBA_ROCE_UDP_ENCAP;
+
+ return ret;
+}
+
static int mlx5_port_immutable(struct ib_device *ibdev, u8 port_num,
struct ib_port_immutable *immutable)
{
@@ -1316,20 +2116,50 @@ static int mlx5_port_immutable(struct ib_device *ibdev, u8 port_num,
immutable->pkey_tbl_len = attr.pkey_tbl_len;
immutable->gid_tbl_len = attr.gid_tbl_len;
- immutable->core_cap_flags = RDMA_CORE_PORT_IBA_IB;
+ immutable->core_cap_flags = get_core_cap_flags(ibdev);
immutable->max_mad_size = IB_MGMT_MAD_SIZE;
return 0;
}
+static int mlx5_enable_roce(struct mlx5_ib_dev *dev)
+{
+ int err;
+
+ dev->roce.nb.notifier_call = mlx5_netdev_event;
+ err = register_netdevice_notifier(&dev->roce.nb);
+ if (err)
+ return err;
+
+ err = mlx5_nic_vport_enable_roce(dev->mdev);
+ if (err)
+ goto err_unregister_netdevice_notifier;
+
+ return 0;
+
+err_unregister_netdevice_notifier:
+ unregister_netdevice_notifier(&dev->roce.nb);
+ return err;
+}
+
+static void mlx5_disable_roce(struct mlx5_ib_dev *dev)
+{
+ mlx5_nic_vport_disable_roce(dev->mdev);
+ unregister_netdevice_notifier(&dev->roce.nb);
+}
+
static void *mlx5_ib_add(struct mlx5_core_dev *mdev)
{
struct mlx5_ib_dev *dev;
+ enum rdma_link_layer ll;
+ int port_type_cap;
int err;
int i;
- /* don't create IB instance over Eth ports, no RoCE yet! */
- if (MLX5_CAP_GEN(mdev, port_type) == MLX5_CAP_PORT_TYPE_ETH)
+ port_type_cap = MLX5_CAP_GEN(mdev, port_type);
+ ll = mlx5_port_type_cap_to_rdma_ll(port_type_cap);
+
+ if ((ll == IB_LINK_LAYER_ETHERNET) && !MLX5_CAP_GEN(mdev, roce))
return NULL;
printk_once(KERN_INFO "%s", mlx5_version);
@@ -1340,6 +2170,7 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev)
dev->mdev = mdev;
+ rwlock_init(&dev->roce.netdev_lock);
err = get_port_caps(dev);
if (err)
goto err_dealloc;
@@ -1385,11 +2216,18 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev)
(1ull << IB_USER_VERBS_CMD_CREATE_XSRQ) |
(1ull << IB_USER_VERBS_CMD_OPEN_QP);
dev->ib_dev.uverbs_ex_cmd_mask =
- (1ull << IB_USER_VERBS_EX_CMD_QUERY_DEVICE);
+ (1ull << IB_USER_VERBS_EX_CMD_QUERY_DEVICE) |
+ (1ull << IB_USER_VERBS_EX_CMD_CREATE_CQ) |
+ (1ull << IB_USER_VERBS_EX_CMD_CREATE_QP);
dev->ib_dev.query_device = mlx5_ib_query_device;
dev->ib_dev.query_port = mlx5_ib_query_port;
+ dev->ib_dev.get_link_layer = mlx5_ib_port_link_layer;
+ if (ll == IB_LINK_LAYER_ETHERNET)
+ dev->ib_dev.get_netdev = mlx5_ib_get_netdev;
dev->ib_dev.query_gid = mlx5_ib_query_gid;
+ dev->ib_dev.add_gid = mlx5_ib_add_gid;
+ dev->ib_dev.del_gid = mlx5_ib_del_gid;
dev->ib_dev.query_pkey = mlx5_ib_query_pkey;
dev->ib_dev.modify_device = mlx5_ib_modify_device;
dev->ib_dev.modify_port = mlx5_ib_modify_port;
@@ -1425,8 +2263,7 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev)
dev->ib_dev.detach_mcast = mlx5_ib_mcg_detach;
dev->ib_dev.process_mad = mlx5_ib_process_mad;
dev->ib_dev.alloc_mr = mlx5_ib_alloc_mr;
- dev->ib_dev.alloc_fast_reg_page_list = mlx5_ib_alloc_fast_reg_page_list;
- dev->ib_dev.free_fast_reg_page_list = mlx5_ib_free_fast_reg_page_list;
+ dev->ib_dev.map_mr_sg = mlx5_ib_map_mr_sg;
dev->ib_dev.check_mr_status = mlx5_ib_check_mr_status;
dev->ib_dev.get_port_immutable = mlx5_port_immutable;
@@ -1440,15 +2277,30 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev)
(1ull << IB_USER_VERBS_CMD_CLOSE_XRCD);
}
+ if (mlx5_ib_port_link_layer(&dev->ib_dev, 1) ==
+ IB_LINK_LAYER_ETHERNET) {
+ dev->ib_dev.create_flow = mlx5_ib_create_flow;
+ dev->ib_dev.destroy_flow = mlx5_ib_destroy_flow;
+ dev->ib_dev.uverbs_ex_cmd_mask |=
+ (1ull << IB_USER_VERBS_EX_CMD_CREATE_FLOW) |
+ (1ull << IB_USER_VERBS_EX_CMD_DESTROY_FLOW);
+ }
err = init_node_data(dev);
if (err)
goto err_dealloc;
+ mutex_init(&dev->flow_db.lock);
mutex_init(&dev->cap_mask_mutex);
+ if (ll == IB_LINK_LAYER_ETHERNET) {
+ err = mlx5_enable_roce(dev);
+ if (err)
+ goto err_dealloc;
+ }
+
err = create_dev_resources(&dev->devr);
if (err)
- goto err_dealloc;
+ goto err_disable_roce;
err = mlx5_ib_odp_init_one(dev);
if (err)
@@ -1485,6 +2337,10 @@ err_odp:
err_rsrc:
destroy_dev_resources(&dev->devr);
+err_disable_roce:
+ if (ll == IB_LINK_LAYER_ETHERNET)
+ mlx5_disable_roce(dev);
+
err_dealloc:
ib_dealloc_device((struct ib_device *)dev);
@@ -1494,11 +2350,14 @@ err_dealloc:
static void mlx5_ib_remove(struct mlx5_core_dev *mdev, void *context)
{
struct mlx5_ib_dev *dev = context;
+ enum rdma_link_layer ll = mlx5_ib_port_link_layer(&dev->ib_dev, 1);
ib_unregister_device(&dev->ib_dev);
destroy_umrc_res(dev);
mlx5_ib_odp_remove_one(dev);
destroy_dev_resources(&dev->devr);
+ if (ll == IB_LINK_LAYER_ETHERNET)
+ mlx5_disable_roce(dev);
ib_dealloc_device(&dev->ib_dev);
}
diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h
index 22123b7..d2b9737 100644
--- a/drivers/infiniband/hw/mlx5/mlx5_ib.h
+++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h
@@ -42,6 +42,7 @@
#include <linux/mlx5/qp.h>
#include <linux/mlx5/srq.h>
#include <linux/types.h>
+#include <linux/mlx5/transobj.h>
#define mlx5_ib_dbg(dev, format, arg...) \
pr_debug("%s:%s:%d:(pid %d): " format, (dev)->ib_dev.name, __func__, \
@@ -55,6 +56,11 @@ pr_err("%s:%s:%d:(pid %d): " format, (dev)->ib_dev.name, __func__, \
pr_warn("%s:%s:%d:(pid %d): " format, (dev)->ib_dev.name, __func__, \
__LINE__, current->pid, ##arg)
+#define field_avail(type, fld, sz) (offsetof(type, fld) + \
+ sizeof(((type *)0)->fld) <= (sz))
+#define MLX5_IB_DEFAULT_UIDX 0xffffff
+#define MLX5_USER_ASSIGNED_UIDX_MASK __mlx5_mask(qpc, user_index)
+
enum {
MLX5_IB_MMAP_CMD_SHIFT = 8,
MLX5_IB_MMAP_CMD_MASK = 0xff,
@@ -62,7 +68,9 @@ enum {
enum mlx5_ib_mmap_cmd {
MLX5_IB_MMAP_REGULAR_PAGE = 0,
- MLX5_IB_MMAP_GET_CONTIGUOUS_PAGES = 1, /* always last */
+ MLX5_IB_MMAP_GET_CONTIGUOUS_PAGES = 1,
+ /* 5 is chosen in order to be compatible with old versions of libmlx5 */
+ MLX5_IB_MMAP_CORE_CLOCK = 5,
};
enum {
@@ -85,6 +93,15 @@ enum mlx5_ib_mad_ifc_flags {
MLX5_MAD_IFC_NET_VIEW = 4,
};
+enum {
+ MLX5_CROSS_CHANNEL_UUAR = 0,
+};
+
+enum {
+ MLX5_CQE_VERSION_V0,
+ MLX5_CQE_VERSION_V1,
+};
+
struct mlx5_ib_ucontext {
struct ib_ucontext ibucontext;
struct list_head db_page_list;
@@ -93,6 +110,9 @@ struct mlx5_ib_ucontext {
*/
struct mutex db_page_mutex;
struct mlx5_uuar_info uuari;
+ u8 cqe_version;
+ /* Transport Domain number */
+ u32 tdn;
};
static inline struct mlx5_ib_ucontext *to_mucontext(struct ib_ucontext *ibucontext)
@@ -105,6 +125,36 @@ struct mlx5_ib_pd {
u32 pdn;
};
+#define MLX5_IB_FLOW_MCAST_PRIO (MLX5_BY_PASS_NUM_PRIOS - 1)
+#define MLX5_IB_FLOW_LAST_PRIO (MLX5_IB_FLOW_MCAST_PRIO - 1)
+#if (MLX5_IB_FLOW_LAST_PRIO <= 0)
+#error "Invalid number of bypass priorities"
+#endif
+#define MLX5_IB_FLOW_LEFTOVERS_PRIO (MLX5_IB_FLOW_MCAST_PRIO + 1)
+
+#define MLX5_IB_NUM_FLOW_FT (MLX5_IB_FLOW_LEFTOVERS_PRIO + 1)
+struct mlx5_ib_flow_prio {
+ struct mlx5_flow_table *flow_table;
+ unsigned int refcount;
+};
+
+struct mlx5_ib_flow_handler {
+ struct list_head list;
+ struct ib_flow ibflow;
+ unsigned int prio;
+ struct mlx5_flow_rule *rule;
+};
+
+struct mlx5_ib_flow_db {
+ struct mlx5_ib_flow_prio prios[MLX5_IB_NUM_FLOW_FT];
+ /* Protect flow steering bypass flow tables
+ * when add/del flow rules.
+ * only single add/removal of flow steering rule could be done
+ * simultaneously.
+ */
+ struct mutex lock;
+};
+
/* Use macros here so that don't have to duplicate
* enum ib_send_flags and enum ib_qp_type for low-level driver
*/
@@ -171,35 +221,70 @@ struct mlx5_ib_pfault {
struct mlx5_pagefault mpfault;
};
+struct mlx5_ib_ubuffer {
+ struct ib_umem *umem;
+ int buf_size;
+ u64 buf_addr;
+};
+
+struct mlx5_ib_qp_base {
+ struct mlx5_ib_qp *container_mibqp;
+ struct mlx5_core_qp mqp;
+ struct mlx5_ib_ubuffer ubuffer;
+};
+
+struct mlx5_ib_qp_trans {
+ struct mlx5_ib_qp_base base;
+ u16 xrcdn;
+ u8 alt_port;
+ u8 atomic_rd_en;
+ u8 resp_depth;
+};
+
+struct mlx5_ib_rq {
+ struct mlx5_ib_qp_base base;
+ struct mlx5_ib_wq *rq;
+ struct mlx5_ib_ubuffer ubuffer;
+ struct mlx5_db *doorbell;
+ u32 tirn;
+ u8 state;
+};
+
+struct mlx5_ib_sq {
+ struct mlx5_ib_qp_base base;
+ struct mlx5_ib_wq *sq;
+ struct mlx5_ib_ubuffer ubuffer;
+ struct mlx5_db *doorbell;
+ u32 tisn;
+ u8 state;
+};
+
+struct mlx5_ib_raw_packet_qp {
+ struct mlx5_ib_sq sq;
+ struct mlx5_ib_rq rq;
+};
+
struct mlx5_ib_qp {
struct ib_qp ibqp;
- struct mlx5_core_qp mqp;
+ union {
+ struct mlx5_ib_qp_trans trans_qp;
+ struct mlx5_ib_raw_packet_qp raw_packet_qp;
+ };
struct mlx5_buf buf;
struct mlx5_db db;
struct mlx5_ib_wq rq;
- u32 doorbell_qpn;
u8 sq_signal_bits;
u8 fm_cache;
- int sq_max_wqes_per_wr;
- int sq_spare_wqes;
struct mlx5_ib_wq sq;
- struct ib_umem *umem;
- int buf_size;
-
/* serialize qp state modifications
*/
struct mutex mutex;
- u16 xrcdn;
u32 flags;
u8 port;
- u8 alt_port;
- u8 atomic_rd_en;
- u8 resp_depth;
u8 state;
- int mlx_type;
int wq_sig;
int scat_cqe;
int max_inline_data;
@@ -242,9 +327,13 @@ struct mlx5_ib_cq_buf {
enum mlx5_ib_qp_flags {
MLX5_IB_QP_BLOCK_MULTICAST_LOOPBACK = 1 << 0,
MLX5_IB_QP_SIGNATURE_HANDLING = 1 << 1,
+ MLX5_IB_QP_CROSS_CHANNEL = 1 << 2,
+ MLX5_IB_QP_MANAGED_SEND = 1 << 3,
+ MLX5_IB_QP_MANAGED_RECV = 1 << 4,
};
struct mlx5_umr_wr {
+ struct ib_send_wr wr;
union {
u64 virt_addr;
u64 offset;
@@ -257,6 +346,11 @@ struct mlx5_umr_wr {
u32 mkey;
};
+static inline struct mlx5_umr_wr *umr_wr(struct ib_send_wr *wr)
+{
+ return container_of(wr, struct mlx5_umr_wr, wr);
+}
+
struct mlx5_shared_mr_info {
int mr_id;
struct ib_umem *umem;
@@ -278,6 +372,7 @@ struct mlx5_ib_cq {
struct mlx5_ib_cq_buf *resize_buf;
struct ib_umem *resize_umem;
int cqe_size;
+ u32 create_flags;
};
struct mlx5_ib_srq {
@@ -313,6 +408,11 @@ enum mlx5_ib_mtt_access_flags {
struct mlx5_ib_mr {
struct ib_mr ibmr;
+ void *descs;
+ dma_addr_t desc_map;
+ int ndescs;
+ int max_descs;
+ int desc_size;
struct mlx5_core_mr mmr;
struct ib_umem *umem;
struct mlx5_shared_mr_info *smr_info;
@@ -324,12 +424,7 @@ struct mlx5_ib_mr {
struct mlx5_create_mkey_mbox_out out;
struct mlx5_core_sig_ctx *sig;
int live;
-};
-
-struct mlx5_ib_fast_reg_page_list {
- struct ib_fast_reg_page_list ibfrpl;
- __be64 *mapped_page_list;
- dma_addr_t map;
+ void *descs_alloc;
};
struct mlx5_ib_umr_context {
@@ -358,20 +453,6 @@ enum {
MLX5_FMR_BUSY,
};
-struct mlx5_ib_fmr {
- struct ib_fmr ibfmr;
- struct mlx5_core_mr mr;
- int access_flags;
- int state;
- /* protect fmr state
- */
- spinlock_t lock;
- u64 wrid;
- struct ib_send_wr wr[2];
- u8 page_shift;
- struct ib_fast_reg_page_list page_list;
-};
-
struct mlx5_cache_ent {
struct list_head head;
/* sync access to the cahce entry
@@ -415,9 +496,19 @@ struct mlx5_ib_resources {
struct ib_srq *s1;
};
+struct mlx5_roce {
+ /* Protect mlx5_ib_get_netdev from invoking dev_hold() with a NULL
+ * netdev pointer
+ */
+ rwlock_t netdev_lock;
+ struct net_device *netdev;
+ struct notifier_block nb;
+};
+
struct mlx5_ib_dev {
struct ib_device ib_dev;
struct mlx5_core_dev *mdev;
+ struct mlx5_roce roce;
MLX5_DECLARE_DOORBELL_LOCK(uar_lock);
int num_ports;
/* serialize update of capability mask
@@ -439,6 +530,7 @@ struct mlx5_ib_dev {
*/
struct srcu_struct mr_srcu;
#endif
+ struct mlx5_ib_flow_db flow_db;
};
static inline struct mlx5_ib_cq *to_mibcq(struct mlx5_core_cq *mcq)
@@ -456,11 +548,6 @@ static inline struct mlx5_ib_dev *to_mdev(struct ib_device *ibdev)
return container_of(ibdev, struct mlx5_ib_dev, ib_dev);
}
-static inline struct mlx5_ib_fmr *to_mfmr(struct ib_fmr *ibfmr)
-{
- return container_of(ibfmr, struct mlx5_ib_fmr, ibfmr);
-}
-
static inline struct mlx5_ib_cq *to_mcq(struct ib_cq *ibcq)
{
return container_of(ibcq, struct mlx5_ib_cq, ibcq);
@@ -468,7 +555,7 @@ static inline struct mlx5_ib_cq *to_mcq(struct ib_cq *ibcq)
static inline struct mlx5_ib_qp *to_mibqp(struct mlx5_core_qp *mqp)
{
- return container_of(mqp, struct mlx5_ib_qp, mqp);
+ return container_of(mqp, struct mlx5_ib_qp_base, mqp)->container_mibqp;
}
static inline struct mlx5_ib_mr *to_mibmr(struct mlx5_core_mr *mmr)
@@ -501,11 +588,6 @@ static inline struct mlx5_ib_mr *to_mmr(struct ib_mr *ibmr)
return container_of(ibmr, struct mlx5_ib_mr, ibmr);
}
-static inline struct mlx5_ib_fast_reg_page_list *to_mfrpl(struct ib_fast_reg_page_list *ibfrpl)
-{
- return container_of(ibfrpl, struct mlx5_ib_fast_reg_page_list, ibfrpl);
-}
-
struct mlx5_ib_ah {
struct ib_ah ibah;
struct mlx5_av av;
@@ -525,8 +607,6 @@ void mlx5_ib_free_srq_wqe(struct mlx5_ib_srq *srq, int wqe_index);
int mlx5_MAD_IFC(struct mlx5_ib_dev *dev, int ignore_mkey, int ignore_bkey,
u8 port, const struct ib_wc *in_wc, const struct ib_grh *in_grh,
const void *in_mad, void *response_mad);
-struct ib_ah *create_ib_ah(struct ib_ah_attr *ah_attr,
- struct mlx5_ib_ah *ah);
struct ib_ah *mlx5_ib_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr);
int mlx5_ib_query_ah(struct ib_ah *ibah, struct ib_ah_attr *ah_attr);
int mlx5_ib_destroy_ah(struct ib_ah *ah);
@@ -553,7 +633,8 @@ int mlx5_ib_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr,
struct ib_recv_wr **bad_wr);
void *mlx5_get_send_wqe(struct mlx5_ib_qp *qp, int n);
int mlx5_ib_read_user_wqe(struct mlx5_ib_qp *qp, int send, int wqe_index,
- void *buffer, u32 length);
+ void *buffer, u32 length,
+ struct mlx5_ib_qp_base *base);
struct ib_cq *mlx5_ib_create_cq(struct ib_device *ibdev,
const struct ib_cq_init_attr *attr,
struct ib_ucontext *context,
@@ -573,15 +654,9 @@ int mlx5_ib_dereg_mr(struct ib_mr *ibmr);
struct ib_mr *mlx5_ib_alloc_mr(struct ib_pd *pd,
enum ib_mr_type mr_type,
u32 max_num_sg);
-struct ib_fast_reg_page_list *mlx5_ib_alloc_fast_reg_page_list(struct ib_device *ibdev,
- int page_list_len);
-void mlx5_ib_free_fast_reg_page_list(struct ib_fast_reg_page_list *page_list);
-struct ib_fmr *mlx5_ib_fmr_alloc(struct ib_pd *pd, int acc,
- struct ib_fmr_attr *fmr_attr);
-int mlx5_ib_map_phys_fmr(struct ib_fmr *ibfmr, u64 *page_list,
- int npages, u64 iova);
-int mlx5_ib_unmap_fmr(struct list_head *fmr_list);
-int mlx5_ib_fmr_dealloc(struct ib_fmr *ibfmr);
+int mlx5_ib_map_mr_sg(struct ib_mr *ibmr,
+ struct scatterlist *sg,
+ int sg_nents);
int mlx5_ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num,
const struct ib_wc *in_wc, const struct ib_grh *in_grh,
const struct ib_mad_hdr *in, size_t in_mad_size,
@@ -661,6 +736,9 @@ static inline void mlx5_ib_qp_enable_pagefaults(struct mlx5_ib_qp *qp) {}
#endif /* CONFIG_INFINIBAND_ON_DEMAND_PAGING */
+__be16 mlx5_get_roce_udp_sport(struct mlx5_ib_dev *dev, u8 port_num,
+ int index);
+
static inline void init_query_mad(struct ib_smp *mad)
{
mad->base_version = 1;
@@ -686,4 +764,28 @@ static inline int is_qp1(enum ib_qp_type qp_type)
#define MLX5_MAX_UMR_SHIFT 16
#define MLX5_MAX_UMR_PAGES (1 << MLX5_MAX_UMR_SHIFT)
+static inline u32 check_cq_create_flags(u32 flags)
+{
+ /*
+ * It returns non-zero value for unsupported CQ
+ * create flags, otherwise it returns zero.
+ */
+ return (flags & ~(IB_CQ_FLAGS_IGNORE_OVERRUN |
+ IB_CQ_FLAGS_TIMESTAMP_COMPLETION));
+}
+
+static inline int verify_assign_uidx(u8 cqe_version, u32 cmd_uidx,
+ u32 *user_index)
+{
+ if (cqe_version) {
+ if ((cmd_uidx == MLX5_IB_DEFAULT_UIDX) ||
+ (cmd_uidx & ~MLX5_USER_ASSIGNED_UIDX_MASK))
+ return -EINVAL;
+ *user_index = cmd_uidx;
+ } else {
+ *user_index = MLX5_IB_DEFAULT_UIDX;
+ }
+
+ return 0;
+}
#endif /* MLX5_IB_H */
diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c
index 54a15b5..6000f7a 100644
--- a/drivers/infiniband/hw/mlx5/mr.c
+++ b/drivers/infiniband/hw/mlx5/mr.c
@@ -381,7 +381,19 @@ static void __cache_work_func(struct mlx5_cache_ent *ent)
}
}
} else if (ent->cur > 2 * ent->limit) {
- if (!someone_adding(cache) &&
+ /*
+ * The remove_keys() logic is performed as garbage collection
+ * task. Such task is intended to be run when no other active
+ * processes are running.
+ *
+ * The need_resched() will return TRUE if there are user tasks
+ * to be activated in near future.
+ *
+ * In such case, we don't execute remove_keys() and postpone
+ * the garbage collection work to try to run in next cycle,
+ * in order to free CPU resources to other tasks.
+ */
+ if (!need_resched() && !someone_adding(cache) &&
time_after(jiffies, cache->last_add + 300 * HZ)) {
remove_keys(dev, i, 1);
if (ent->cur > ent->limit)
@@ -687,7 +699,7 @@ static void prep_umr_reg_wqe(struct ib_pd *pd, struct ib_send_wr *wr,
int access_flags)
{
struct mlx5_ib_dev *dev = to_mdev(pd->device);
- struct mlx5_umr_wr *umrwr = (struct mlx5_umr_wr *)&wr->wr.fast_reg;
+ struct mlx5_umr_wr *umrwr = umr_wr(wr);
sg->addr = dma;
sg->length = ALIGN(sizeof(u64) * n, 64);
@@ -715,7 +727,7 @@ static void prep_umr_reg_wqe(struct ib_pd *pd, struct ib_send_wr *wr,
static void prep_umr_unreg_wqe(struct mlx5_ib_dev *dev,
struct ib_send_wr *wr, u32 key)
{
- struct mlx5_umr_wr *umrwr = (struct mlx5_umr_wr *)&wr->wr.fast_reg;
+ struct mlx5_umr_wr *umrwr = umr_wr(wr);
wr->send_flags = MLX5_IB_SEND_UMR_UNREG | MLX5_IB_SEND_UMR_FAIL_IF_FREE;
wr->opcode = MLX5_IB_WR_UMR;
@@ -752,7 +764,8 @@ static struct mlx5_ib_mr *reg_umr(struct ib_pd *pd, struct ib_umem *umem,
struct device *ddev = dev->ib_dev.dma_device;
struct umr_common *umrc = &dev->umrc;
struct mlx5_ib_umr_context umr_context;
- struct ib_send_wr wr, *bad;
+ struct mlx5_umr_wr umrwr;
+ struct ib_send_wr *bad;
struct mlx5_ib_mr *mr;
struct ib_sge sg;
int size;
@@ -798,14 +811,14 @@ static struct mlx5_ib_mr *reg_umr(struct ib_pd *pd, struct ib_umem *umem,
goto free_pas;
}
- memset(&wr, 0, sizeof(wr));
- wr.wr_id = (u64)(unsigned long)&umr_context;
- prep_umr_reg_wqe(pd, &wr, &sg, dma, npages, mr->mmr.key, page_shift,
- virt_addr, len, access_flags);
+ memset(&umrwr, 0, sizeof(umrwr));
+ umrwr.wr.wr_id = (u64)(unsigned long)&umr_context;
+ prep_umr_reg_wqe(pd, &umrwr.wr, &sg, dma, npages, mr->mmr.key,
+ page_shift, virt_addr, len, access_flags);
mlx5_ib_init_umr_context(&umr_context);
down(&umrc->sem);
- err = ib_post_send(umrc->qp, &wr, &bad);
+ err = ib_post_send(umrc->qp, &umrwr.wr, &bad);
if (err) {
mlx5_ib_warn(dev, "post send failed, err %d\n", err);
goto unmap_dma;
@@ -851,8 +864,8 @@ int mlx5_ib_update_mtt(struct mlx5_ib_mr *mr, u64 start_page_index, int npages,
int size;
__be64 *pas;
dma_addr_t dma;
- struct ib_send_wr wr, *bad;
- struct mlx5_umr_wr *umrwr = (struct mlx5_umr_wr *)&wr.wr.fast_reg;
+ struct ib_send_wr *bad;
+ struct mlx5_umr_wr wr;
struct ib_sge sg;
int err = 0;
const int page_index_alignment = MLX5_UMR_MTT_ALIGNMENT / sizeof(u64);
@@ -917,26 +930,26 @@ int mlx5_ib_update_mtt(struct mlx5_ib_mr *mr, u64 start_page_index, int npages,
dma_sync_single_for_device(ddev, dma, size, DMA_TO_DEVICE);
memset(&wr, 0, sizeof(wr));
- wr.wr_id = (u64)(unsigned long)&umr_context;
+ wr.wr.wr_id = (u64)(unsigned long)&umr_context;
sg.addr = dma;
sg.length = ALIGN(npages * sizeof(u64),
MLX5_UMR_MTT_ALIGNMENT);
sg.lkey = dev->umrc.pd->local_dma_lkey;
- wr.send_flags = MLX5_IB_SEND_UMR_FAIL_IF_FREE |
+ wr.wr.send_flags = MLX5_IB_SEND_UMR_FAIL_IF_FREE |
MLX5_IB_SEND_UMR_UPDATE_MTT;
- wr.sg_list = &sg;
- wr.num_sge = 1;
- wr.opcode = MLX5_IB_WR_UMR;
- umrwr->npages = sg.length / sizeof(u64);
- umrwr->page_shift = PAGE_SHIFT;
- umrwr->mkey = mr->mmr.key;
- umrwr->target.offset = start_page_index;
+ wr.wr.sg_list = &sg;
+ wr.wr.num_sge = 1;
+ wr.wr.opcode = MLX5_IB_WR_UMR;
+ wr.npages = sg.length / sizeof(u64);
+ wr.page_shift = PAGE_SHIFT;
+ wr.mkey = mr->mmr.key;
+ wr.target.offset = start_page_index;
mlx5_ib_init_umr_context(&umr_context);
down(&umrc->sem);
- err = ib_post_send(umrc->qp, &wr, &bad);
+ err = ib_post_send(umrc->qp, &wr.wr, &bad);
if (err) {
mlx5_ib_err(dev, "UMR post send failed, err %d\n", err);
} else {
@@ -1122,16 +1135,17 @@ static int unreg_umr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr)
{
struct umr_common *umrc = &dev->umrc;
struct mlx5_ib_umr_context umr_context;
- struct ib_send_wr wr, *bad;
+ struct mlx5_umr_wr umrwr;
+ struct ib_send_wr *bad;
int err;
- memset(&wr, 0, sizeof(wr));
- wr.wr_id = (u64)(unsigned long)&umr_context;
- prep_umr_unreg_wqe(dev, &wr, mr->mmr.key);
+ memset(&umrwr.wr, 0, sizeof(umrwr));
+ umrwr.wr.wr_id = (u64)(unsigned long)&umr_context;
+ prep_umr_unreg_wqe(dev, &umrwr.wr, mr->mmr.key);
mlx5_ib_init_umr_context(&umr_context);
down(&umrc->sem);
- err = ib_post_send(umrc->qp, &wr, &bad);
+ err = ib_post_send(umrc->qp, &umrwr.wr, &bad);
if (err) {
up(&umrc->sem);
mlx5_ib_dbg(dev, "err %d\n", err);
@@ -1151,6 +1165,52 @@ error:
return err;
}
+static int
+mlx5_alloc_priv_descs(struct ib_device *device,
+ struct mlx5_ib_mr *mr,
+ int ndescs,
+ int desc_size)
+{
+ int size = ndescs * desc_size;
+ int add_size;
+ int ret;
+
+ add_size = max_t(int, MLX5_UMR_ALIGN - ARCH_KMALLOC_MINALIGN, 0);
+
+ mr->descs_alloc = kzalloc(size + add_size, GFP_KERNEL);
+ if (!mr->descs_alloc)
+ return -ENOMEM;
+
+ mr->descs = PTR_ALIGN(mr->descs_alloc, MLX5_UMR_ALIGN);
+
+ mr->desc_map = dma_map_single(device->dma_device, mr->descs,
+ size, DMA_TO_DEVICE);
+ if (dma_mapping_error(device->dma_device, mr->desc_map)) {
+ ret = -ENOMEM;
+ goto err;
+ }
+
+ return 0;
+err:
+ kfree(mr->descs_alloc);
+
+ return ret;
+}
+
+static void
+mlx5_free_priv_descs(struct mlx5_ib_mr *mr)
+{
+ if (mr->descs) {
+ struct ib_device *device = mr->ibmr.device;
+ int size = mr->max_descs * mr->desc_size;
+
+ dma_unmap_single(device->dma_device, mr->desc_map,
+ size, DMA_TO_DEVICE);
+ kfree(mr->descs_alloc);
+ mr->descs = NULL;
+ }
+}
+
static int clean_mr(struct mlx5_ib_mr *mr)
{
struct mlx5_ib_dev *dev = to_mdev(mr->ibmr.device);
@@ -1170,6 +1230,8 @@ static int clean_mr(struct mlx5_ib_mr *mr)
mr->sig = NULL;
}
+ mlx5_free_priv_descs(mr);
+
if (!umred) {
err = destroy_mkey(dev, mr);
if (err) {
@@ -1259,6 +1321,14 @@ struct ib_mr *mlx5_ib_alloc_mr(struct ib_pd *pd,
if (mr_type == IB_MR_TYPE_MEM_REG) {
access_mode = MLX5_ACCESS_MODE_MTT;
in->seg.log2_page_size = PAGE_SHIFT;
+
+ err = mlx5_alloc_priv_descs(pd->device, mr,
+ ndescs, sizeof(u64));
+ if (err)
+ goto err_free_in;
+
+ mr->desc_size = sizeof(u64);
+ mr->max_descs = ndescs;
} else if (mr_type == IB_MR_TYPE_SIGNATURE) {
u32 psv_index[2];
@@ -1315,6 +1385,7 @@ err_destroy_psv:
mlx5_ib_warn(dev, "failed to destroy wire psv %d\n",
mr->sig->psv_wire.psv_idx);
}
+ mlx5_free_priv_descs(mr);
err_free_sig:
kfree(mr->sig);
err_free_in:
@@ -1324,48 +1395,6 @@ err_free:
return ERR_PTR(err);
}
-struct ib_fast_reg_page_list *mlx5_ib_alloc_fast_reg_page_list(struct ib_device *ibdev,
- int page_list_len)
-{
- struct mlx5_ib_fast_reg_page_list *mfrpl;
- int size = page_list_len * sizeof(u64);
-
- mfrpl = kmalloc(sizeof(*mfrpl), GFP_KERNEL);
- if (!mfrpl)
- return ERR_PTR(-ENOMEM);
-
- mfrpl->ibfrpl.page_list = kmalloc(size, GFP_KERNEL);
- if (!mfrpl->ibfrpl.page_list)
- goto err_free;
-
- mfrpl->mapped_page_list = dma_alloc_coherent(ibdev->dma_device,
- size, &mfrpl->map,
- GFP_KERNEL);
- if (!mfrpl->mapped_page_list)
- goto err_free;
-
- WARN_ON(mfrpl->map & 0x3f);
-
- return &mfrpl->ibfrpl;
-
-err_free:
- kfree(mfrpl->ibfrpl.page_list);
- kfree(mfrpl);
- return ERR_PTR(-ENOMEM);
-}
-
-void mlx5_ib_free_fast_reg_page_list(struct ib_fast_reg_page_list *page_list)
-{
- struct mlx5_ib_fast_reg_page_list *mfrpl = to_mfrpl(page_list);
- struct mlx5_ib_dev *dev = to_mdev(page_list->device);
- int size = page_list->max_page_list_len * sizeof(u64);
-
- dma_free_coherent(&dev->mdev->pdev->dev, size, mfrpl->mapped_page_list,
- mfrpl->map);
- kfree(mfrpl->ibfrpl.page_list);
- kfree(mfrpl);
-}
-
int mlx5_ib_check_mr_status(struct ib_mr *ibmr, u32 check_mask,
struct ib_mr_status *mr_status)
{
@@ -1406,3 +1435,39 @@ int mlx5_ib_check_mr_status(struct ib_mr *ibmr, u32 check_mask,
done:
return ret;
}
+
+static int mlx5_set_page(struct ib_mr *ibmr, u64 addr)
+{
+ struct mlx5_ib_mr *mr = to_mmr(ibmr);
+ __be64 *descs;
+
+ if (unlikely(mr->ndescs == mr->max_descs))
+ return -ENOMEM;
+
+ descs = mr->descs;
+ descs[mr->ndescs++] = cpu_to_be64(addr | MLX5_EN_RD | MLX5_EN_WR);
+
+ return 0;
+}
+
+int mlx5_ib_map_mr_sg(struct ib_mr *ibmr,
+ struct scatterlist *sg,
+ int sg_nents)
+{
+ struct mlx5_ib_mr *mr = to_mmr(ibmr);
+ int n;
+
+ mr->ndescs = 0;
+
+ ib_dma_sync_single_for_cpu(ibmr->device, mr->desc_map,
+ mr->desc_size * mr->max_descs,
+ DMA_TO_DEVICE);
+
+ n = ib_sg_to_pages(ibmr, sg, sg_nents, mlx5_set_page);
+
+ ib_dma_sync_single_for_device(ibmr->device, mr->desc_map,
+ mr->desc_size * mr->max_descs,
+ DMA_TO_DEVICE);
+
+ return n;
+}
diff --git a/drivers/infiniband/hw/mlx5/odp.c b/drivers/infiniband/hw/mlx5/odp.c
index aa8391e..b8d7636 100644
--- a/drivers/infiniband/hw/mlx5/odp.c
+++ b/drivers/infiniband/hw/mlx5/odp.c
@@ -153,14 +153,16 @@ static struct mlx5_ib_mr *mlx5_ib_odp_find_mr_lkey(struct mlx5_ib_dev *dev,
static void mlx5_ib_page_fault_resume(struct mlx5_ib_qp *qp,
struct mlx5_ib_pfault *pfault,
- int error) {
+ int error)
+{
struct mlx5_ib_dev *dev = to_mdev(qp->ibqp.pd->device);
- int ret = mlx5_core_page_fault_resume(dev->mdev, qp->mqp.qpn,
+ u32 qpn = qp->trans_qp.base.mqp.qpn;
+ int ret = mlx5_core_page_fault_resume(dev->mdev,
+ qpn,
pfault->mpfault.flags,
error);
if (ret)
- pr_err("Failed to resolve the page fault on QP 0x%x\n",
- qp->mqp.qpn);
+ pr_err("Failed to resolve the page fault on QP 0x%x\n", qpn);
}
/*
@@ -391,6 +393,7 @@ static int mlx5_ib_mr_initiator_pfault_handler(
#if defined(DEBUG)
u32 ctrl_wqe_index, ctrl_qpn;
#endif
+ u32 qpn = qp->trans_qp.base.mqp.qpn;
ds = be32_to_cpu(ctrl->qpn_ds) & MLX5_WQE_CTRL_DS_MASK;
if (ds * MLX5_WQE_DS_UNITS > wqe_length) {
@@ -401,7 +404,7 @@ static int mlx5_ib_mr_initiator_pfault_handler(
if (ds == 0) {
mlx5_ib_err(dev, "Got WQE with zero DS. wqe_index=%x, qpn=%x\n",
- wqe_index, qp->mqp.qpn);
+ wqe_index, qpn);
return -EFAULT;
}
@@ -411,16 +414,16 @@ static int mlx5_ib_mr_initiator_pfault_handler(
MLX5_WQE_CTRL_WQE_INDEX_SHIFT;
if (wqe_index != ctrl_wqe_index) {
mlx5_ib_err(dev, "Got WQE with invalid wqe_index. wqe_index=0x%x, qpn=0x%x ctrl->wqe_index=0x%x\n",
- wqe_index, qp->mqp.qpn,
+ wqe_index, qpn,
ctrl_wqe_index);
return -EFAULT;
}
ctrl_qpn = (be32_to_cpu(ctrl->qpn_ds) & MLX5_WQE_CTRL_QPN_MASK) >>
MLX5_WQE_CTRL_QPN_SHIFT;
- if (qp->mqp.qpn != ctrl_qpn) {
+ if (qpn != ctrl_qpn) {
mlx5_ib_err(dev, "Got WQE with incorrect QP number. wqe_index=0x%x, qpn=0x%x ctrl->qpn=0x%x\n",
- wqe_index, qp->mqp.qpn,
+ wqe_index, qpn,
ctrl_qpn);
return -EFAULT;
}
@@ -537,6 +540,7 @@ static void mlx5_ib_mr_wqe_pfault_handler(struct mlx5_ib_qp *qp,
int resume_with_error = 0;
u16 wqe_index = pfault->mpfault.wqe.wqe_index;
int requestor = pfault->mpfault.flags & MLX5_PFAULT_REQUESTOR;
+ u32 qpn = qp->trans_qp.base.mqp.qpn;
buffer = (char *)__get_free_page(GFP_KERNEL);
if (!buffer) {
@@ -546,10 +550,10 @@ static void mlx5_ib_mr_wqe_pfault_handler(struct mlx5_ib_qp *qp,
}
ret = mlx5_ib_read_user_wqe(qp, requestor, wqe_index, buffer,
- PAGE_SIZE);
+ PAGE_SIZE, &qp->trans_qp.base);
if (ret < 0) {
mlx5_ib_err(dev, "Failed reading a WQE following page fault, error=%x, wqe_index=%x, qpn=%x\n",
- -ret, wqe_index, qp->mqp.qpn);
+ -ret, wqe_index, qpn);
resume_with_error = 1;
goto resolve_page_fault;
}
@@ -586,7 +590,8 @@ static void mlx5_ib_mr_wqe_pfault_handler(struct mlx5_ib_qp *qp,
resolve_page_fault:
mlx5_ib_page_fault_resume(qp, pfault, resume_with_error);
mlx5_ib_dbg(dev, "PAGE FAULT completed. QP 0x%x resume_with_error=%d, flags: 0x%x\n",
- qp->mqp.qpn, resume_with_error, pfault->mpfault.flags);
+ qpn, resume_with_error,
+ pfault->mpfault.flags);
free_page((unsigned long)buffer);
}
@@ -753,7 +758,7 @@ void mlx5_ib_odp_create_qp(struct mlx5_ib_qp *qp)
qp->disable_page_faults = 1;
spin_lock_init(&qp->disable_page_faults_lock);
- qp->mqp.pfault_handler = mlx5_ib_pfault_handler;
+ qp->trans_qp.base.mqp.pfault_handler = mlx5_ib_pfault_handler;
for (i = 0; i < MLX5_IB_PAGEFAULT_CONTEXTS; ++i)
INIT_WORK(&qp->pagefaults[i].work, mlx5_ib_qp_pfault_action);
diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c
index 6f521a3..34cb8e8 100644
--- a/drivers/infiniband/hw/mlx5/qp.c
+++ b/drivers/infiniband/hw/mlx5/qp.c
@@ -32,6 +32,8 @@
#include <linux/module.h>
#include <rdma/ib_umem.h>
+#include <rdma/ib_cache.h>
+#include <rdma/ib_user_verbs.h>
#include "mlx5_ib.h"
#include "user.h"
@@ -64,7 +66,7 @@ static const u32 mlx5_ib_opcode[] = {
[IB_WR_ATOMIC_FETCH_AND_ADD] = MLX5_OPCODE_ATOMIC_FA,
[IB_WR_SEND_WITH_INV] = MLX5_OPCODE_SEND_INVAL,
[IB_WR_LOCAL_INV] = MLX5_OPCODE_UMR,
- [IB_WR_FAST_REG_MR] = MLX5_OPCODE_UMR,
+ [IB_WR_REG_MR] = MLX5_OPCODE_UMR,
[IB_WR_MASKED_ATOMIC_CMP_AND_SWP] = MLX5_OPCODE_ATOMIC_MASKED_CS,
[IB_WR_MASKED_ATOMIC_FETCH_AND_ADD] = MLX5_OPCODE_ATOMIC_MASKED_FA,
[MLX5_IB_WR_UMR] = MLX5_OPCODE_UMR,
@@ -114,14 +116,15 @@ void *mlx5_get_send_wqe(struct mlx5_ib_qp *qp, int n)
* Return: the number of bytes copied, or an error code.
*/
int mlx5_ib_read_user_wqe(struct mlx5_ib_qp *qp, int send, int wqe_index,
- void *buffer, u32 length)
+ void *buffer, u32 length,
+ struct mlx5_ib_qp_base *base)
{
struct ib_device *ibdev = qp->ibqp.device;
struct mlx5_ib_dev *dev = to_mdev(ibdev);
struct mlx5_ib_wq *wq = send ? &qp->sq : &qp->rq;
size_t offset;
size_t wq_end;
- struct ib_umem *umem = qp->umem;
+ struct ib_umem *umem = base->ubuffer.umem;
u32 first_copy_length;
int wqe_length;
int ret;
@@ -172,8 +175,10 @@ static void mlx5_ib_qp_event(struct mlx5_core_qp *qp, int type)
struct ib_qp *ibqp = &to_mibqp(qp)->ibqp;
struct ib_event event;
- if (type == MLX5_EVENT_TYPE_PATH_MIG)
- to_mibqp(qp)->port = to_mibqp(qp)->alt_port;
+ if (type == MLX5_EVENT_TYPE_PATH_MIG) {
+ /* This event is only valid for trans_qps */
+ to_mibqp(qp)->port = to_mibqp(qp)->trans_qp.alt_port;
+ }
if (ibqp->event_handler) {
event.device = ibqp->device;
@@ -265,8 +270,10 @@ static int sq_overhead(enum ib_qp_type qp_type)
/* fall through */
case IB_QPT_RC:
size += sizeof(struct mlx5_wqe_ctrl_seg) +
- sizeof(struct mlx5_wqe_atomic_seg) +
- sizeof(struct mlx5_wqe_raddr_seg);
+ max(sizeof(struct mlx5_wqe_atomic_seg) +
+ sizeof(struct mlx5_wqe_raddr_seg),
+ sizeof(struct mlx5_wqe_umr_ctrl_seg) +
+ sizeof(struct mlx5_mkey_seg));
break;
case IB_QPT_XRC_TGT:
@@ -274,9 +281,9 @@ static int sq_overhead(enum ib_qp_type qp_type)
case IB_QPT_UC:
size += sizeof(struct mlx5_wqe_ctrl_seg) +
- sizeof(struct mlx5_wqe_raddr_seg) +
- sizeof(struct mlx5_wqe_umr_ctrl_seg) +
- sizeof(struct mlx5_mkey_seg);
+ max(sizeof(struct mlx5_wqe_raddr_seg),
+ sizeof(struct mlx5_wqe_umr_ctrl_seg) +
+ sizeof(struct mlx5_mkey_seg));
break;
case IB_QPT_UD:
@@ -366,7 +373,9 @@ static int calc_sq_size(struct mlx5_ib_dev *dev, struct ib_qp_init_attr *attr,
static int set_user_buf_size(struct mlx5_ib_dev *dev,
struct mlx5_ib_qp *qp,
- struct mlx5_ib_create_qp *ucmd)
+ struct mlx5_ib_create_qp *ucmd,
+ struct mlx5_ib_qp_base *base,
+ struct ib_qp_init_attr *attr)
{
int desc_sz = 1 << qp->sq.wqe_shift;
@@ -391,8 +400,13 @@ static int set_user_buf_size(struct mlx5_ib_dev *dev,
return -EINVAL;
}
- qp->buf_size = (qp->rq.wqe_cnt << qp->rq.wqe_shift) +
- (qp->sq.wqe_cnt << 6);
+ if (attr->qp_type == IB_QPT_RAW_PACKET) {
+ base->ubuffer.buf_size = qp->rq.wqe_cnt << qp->rq.wqe_shift;
+ qp->raw_packet_qp.sq.ubuffer.buf_size = qp->sq.wqe_cnt << 6;
+ } else {
+ base->ubuffer.buf_size = (qp->rq.wqe_cnt << qp->rq.wqe_shift) +
+ (qp->sq.wqe_cnt << 6);
+ }
return 0;
}
@@ -578,8 +592,8 @@ static int to_mlx5_st(enum ib_qp_type type)
case IB_QPT_SMI: return MLX5_QP_ST_QP0;
case IB_QPT_GSI: return MLX5_QP_ST_QP1;
case IB_QPT_RAW_IPV6: return MLX5_QP_ST_RAW_IPV6;
- case IB_QPT_RAW_ETHERTYPE: return MLX5_QP_ST_RAW_ETHERTYPE;
case IB_QPT_RAW_PACKET:
+ case IB_QPT_RAW_ETHERTYPE: return MLX5_QP_ST_RAW_ETHERTYPE;
case IB_QPT_MAX:
default: return -EINVAL;
}
@@ -590,13 +604,51 @@ static int uuarn_to_uar_index(struct mlx5_uuar_info *uuari, int uuarn)
return uuari->uars[uuarn / MLX5_BF_REGS_PER_PAGE].index;
}
+static int mlx5_ib_umem_get(struct mlx5_ib_dev *dev,
+ struct ib_pd *pd,
+ unsigned long addr, size_t size,
+ struct ib_umem **umem,
+ int *npages, int *page_shift, int *ncont,
+ u32 *offset)
+{
+ int err;
+
+ *umem = ib_umem_get(pd->uobject->context, addr, size, 0, 0);
+ if (IS_ERR(*umem)) {
+ mlx5_ib_dbg(dev, "umem_get failed\n");
+ return PTR_ERR(*umem);
+ }
+
+ mlx5_ib_cont_pages(*umem, addr, npages, page_shift, ncont, NULL);
+
+ err = mlx5_ib_get_buf_offset(addr, *page_shift, offset);
+ if (err) {
+ mlx5_ib_warn(dev, "bad offset\n");
+ goto err_umem;
+ }
+
+ mlx5_ib_dbg(dev, "addr 0x%lx, size %zu, npages %d, page_shift %d, ncont %d, offset %d\n",
+ addr, size, *npages, *page_shift, *ncont, *offset);
+
+ return 0;
+
+err_umem:
+ ib_umem_release(*umem);
+ *umem = NULL;
+
+ return err;
+}
+
static int create_user_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd,
struct mlx5_ib_qp *qp, struct ib_udata *udata,
+ struct ib_qp_init_attr *attr,
struct mlx5_create_qp_mbox_in **in,
- struct mlx5_ib_create_qp_resp *resp, int *inlen)
+ struct mlx5_ib_create_qp_resp *resp, int *inlen,
+ struct mlx5_ib_qp_base *base)
{
struct mlx5_ib_ucontext *context;
struct mlx5_ib_create_qp ucmd;
+ struct mlx5_ib_ubuffer *ubuffer = &base->ubuffer;
int page_shift = 0;
int uar_index;
int npages;
@@ -615,18 +667,23 @@ static int create_user_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd,
/*
* TBD: should come from the verbs when we have the API
*/
- uuarn = alloc_uuar(&context->uuari, MLX5_IB_LATENCY_CLASS_HIGH);
- if (uuarn < 0) {
- mlx5_ib_dbg(dev, "failed to allocate low latency UUAR\n");
- mlx5_ib_dbg(dev, "reverting to medium latency\n");
- uuarn = alloc_uuar(&context->uuari, MLX5_IB_LATENCY_CLASS_MEDIUM);
+ if (qp->flags & MLX5_IB_QP_CROSS_CHANNEL)
+ /* In CROSS_CHANNEL CQ and QP must use the same UAR */
+ uuarn = MLX5_CROSS_CHANNEL_UUAR;
+ else {
+ uuarn = alloc_uuar(&context->uuari, MLX5_IB_LATENCY_CLASS_HIGH);
if (uuarn < 0) {
- mlx5_ib_dbg(dev, "failed to allocate medium latency UUAR\n");
- mlx5_ib_dbg(dev, "reverting to high latency\n");
- uuarn = alloc_uuar(&context->uuari, MLX5_IB_LATENCY_CLASS_LOW);
+ mlx5_ib_dbg(dev, "failed to allocate low latency UUAR\n");
+ mlx5_ib_dbg(dev, "reverting to medium latency\n");
+ uuarn = alloc_uuar(&context->uuari, MLX5_IB_LATENCY_CLASS_MEDIUM);
if (uuarn < 0) {
- mlx5_ib_warn(dev, "uuar allocation failed\n");
- return uuarn;
+ mlx5_ib_dbg(dev, "failed to allocate medium latency UUAR\n");
+ mlx5_ib_dbg(dev, "reverting to high latency\n");
+ uuarn = alloc_uuar(&context->uuari, MLX5_IB_LATENCY_CLASS_LOW);
+ if (uuarn < 0) {
+ mlx5_ib_warn(dev, "uuar allocation failed\n");
+ return uuarn;
+ }
}
}
}
@@ -638,32 +695,20 @@ static int create_user_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd,
qp->sq.wqe_shift = ilog2(MLX5_SEND_WQE_BB);
qp->sq.offset = qp->rq.wqe_cnt << qp->rq.wqe_shift;
- err = set_user_buf_size(dev, qp, &ucmd);
+ err = set_user_buf_size(dev, qp, &ucmd, base, attr);
if (err)
goto err_uuar;
- if (ucmd.buf_addr && qp->buf_size) {
- qp->umem = ib_umem_get(pd->uobject->context, ucmd.buf_addr,
- qp->buf_size, 0, 0);
- if (IS_ERR(qp->umem)) {
- mlx5_ib_dbg(dev, "umem_get failed\n");
- err = PTR_ERR(qp->umem);
+ if (ucmd.buf_addr && ubuffer->buf_size) {
+ ubuffer->buf_addr = ucmd.buf_addr;
+ err = mlx5_ib_umem_get(dev, pd, ubuffer->buf_addr,
+ ubuffer->buf_size,
+ &ubuffer->umem, &npages, &page_shift,
+ &ncont, &offset);
+ if (err)
goto err_uuar;
- }
} else {
- qp->umem = NULL;
- }
-
- if (qp->umem) {
- mlx5_ib_cont_pages(qp->umem, ucmd.buf_addr, &npages, &page_shift,
- &ncont, NULL);
- err = mlx5_ib_get_buf_offset(ucmd.buf_addr, page_shift, &offset);
- if (err) {
- mlx5_ib_warn(dev, "bad offset\n");
- goto err_umem;
- }
- mlx5_ib_dbg(dev, "addr 0x%llx, size %d, npages %d, page_shift %d, ncont %d, offset %d\n",
- ucmd.buf_addr, qp->buf_size, npages, page_shift, ncont, offset);
+ ubuffer->umem = NULL;
}
*inlen = sizeof(**in) + sizeof(*(*in)->pas) * ncont;
@@ -672,8 +717,9 @@ static int create_user_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd,
err = -ENOMEM;
goto err_umem;
}
- if (qp->umem)
- mlx5_ib_populate_pas(dev, qp->umem, page_shift, (*in)->pas, 0);
+ if (ubuffer->umem)
+ mlx5_ib_populate_pas(dev, ubuffer->umem, page_shift,
+ (*in)->pas, 0);
(*in)->ctx.log_pg_sz_remote_qpn =
cpu_to_be32((page_shift - MLX5_ADAPTER_PAGE_SHIFT) << 24);
(*in)->ctx.params2 = cpu_to_be32(offset << 6);
@@ -704,29 +750,31 @@ err_free:
kvfree(*in);
err_umem:
- if (qp->umem)
- ib_umem_release(qp->umem);
+ if (ubuffer->umem)
+ ib_umem_release(ubuffer->umem);
err_uuar:
free_uuar(&context->uuari, uuarn);
return err;
}
-static void destroy_qp_user(struct ib_pd *pd, struct mlx5_ib_qp *qp)
+static void destroy_qp_user(struct ib_pd *pd, struct mlx5_ib_qp *qp,
+ struct mlx5_ib_qp_base *base)
{
struct mlx5_ib_ucontext *context;
context = to_mucontext(pd->uobject->context);
mlx5_ib_db_unmap_user(context, &qp->db);
- if (qp->umem)
- ib_umem_release(qp->umem);
+ if (base->ubuffer.umem)
+ ib_umem_release(base->ubuffer.umem);
free_uuar(&context->uuari, qp->uuarn);
}
static int create_kernel_qp(struct mlx5_ib_dev *dev,
struct ib_qp_init_attr *init_attr,
struct mlx5_ib_qp *qp,
- struct mlx5_create_qp_mbox_in **in, int *inlen)
+ struct mlx5_create_qp_mbox_in **in, int *inlen,
+ struct mlx5_ib_qp_base *base)
{
enum mlx5_ib_latency_class lc = MLX5_IB_LATENCY_CLASS_LOW;
struct mlx5_uuar_info *uuari;
@@ -758,9 +806,9 @@ static int create_kernel_qp(struct mlx5_ib_dev *dev,
qp->rq.offset = 0;
qp->sq.offset = qp->rq.wqe_cnt << qp->rq.wqe_shift;
- qp->buf_size = err + (qp->rq.wqe_cnt << qp->rq.wqe_shift);
+ base->ubuffer.buf_size = err + (qp->rq.wqe_cnt << qp->rq.wqe_shift);
- err = mlx5_buf_alloc(dev->mdev, qp->buf_size, &qp->buf);
+ err = mlx5_buf_alloc(dev->mdev, base->ubuffer.buf_size, &qp->buf);
if (err) {
mlx5_ib_dbg(dev, "err %d\n", err);
goto err_uuar;
@@ -853,19 +901,304 @@ static int is_connected(enum ib_qp_type qp_type)
return 0;
}
+static int create_raw_packet_qp_tis(struct mlx5_ib_dev *dev,
+ struct mlx5_ib_sq *sq, u32 tdn)
+{
+ u32 in[MLX5_ST_SZ_DW(create_tis_in)];
+ void *tisc = MLX5_ADDR_OF(create_tis_in, in, ctx);
+
+ memset(in, 0, sizeof(in));
+
+ MLX5_SET(tisc, tisc, transport_domain, tdn);
+
+ return mlx5_core_create_tis(dev->mdev, in, sizeof(in), &sq->tisn);
+}
+
+static void destroy_raw_packet_qp_tis(struct mlx5_ib_dev *dev,
+ struct mlx5_ib_sq *sq)
+{
+ mlx5_core_destroy_tis(dev->mdev, sq->tisn);
+}
+
+static int create_raw_packet_qp_sq(struct mlx5_ib_dev *dev,
+ struct mlx5_ib_sq *sq, void *qpin,
+ struct ib_pd *pd)
+{
+ struct mlx5_ib_ubuffer *ubuffer = &sq->ubuffer;
+ __be64 *pas;
+ void *in;
+ void *sqc;
+ void *qpc = MLX5_ADDR_OF(create_qp_in, qpin, qpc);
+ void *wq;
+ int inlen;
+ int err;
+ int page_shift = 0;
+ int npages;
+ int ncont = 0;
+ u32 offset = 0;
+
+ err = mlx5_ib_umem_get(dev, pd, ubuffer->buf_addr, ubuffer->buf_size,
+ &sq->ubuffer.umem, &npages, &page_shift,
+ &ncont, &offset);
+ if (err)
+ return err;
+
+ inlen = MLX5_ST_SZ_BYTES(create_sq_in) + sizeof(u64) * ncont;
+ in = mlx5_vzalloc(inlen);
+ if (!in) {
+ err = -ENOMEM;
+ goto err_umem;
+ }
+
+ sqc = MLX5_ADDR_OF(create_sq_in, in, ctx);
+ MLX5_SET(sqc, sqc, flush_in_error_en, 1);
+ MLX5_SET(sqc, sqc, state, MLX5_SQC_STATE_RST);
+ MLX5_SET(sqc, sqc, user_index, MLX5_GET(qpc, qpc, user_index));
+ MLX5_SET(sqc, sqc, cqn, MLX5_GET(qpc, qpc, cqn_snd));
+ MLX5_SET(sqc, sqc, tis_lst_sz, 1);
+ MLX5_SET(sqc, sqc, tis_num_0, sq->tisn);
+
+ wq = MLX5_ADDR_OF(sqc, sqc, wq);
+ MLX5_SET(wq, wq, wq_type, MLX5_WQ_TYPE_CYCLIC);
+ MLX5_SET(wq, wq, pd, MLX5_GET(qpc, qpc, pd));
+ MLX5_SET(wq, wq, uar_page, MLX5_GET(qpc, qpc, uar_page));
+ MLX5_SET64(wq, wq, dbr_addr, MLX5_GET64(qpc, qpc, dbr_addr));
+ MLX5_SET(wq, wq, log_wq_stride, ilog2(MLX5_SEND_WQE_BB));
+ MLX5_SET(wq, wq, log_wq_sz, MLX5_GET(qpc, qpc, log_sq_size));
+ MLX5_SET(wq, wq, log_wq_pg_sz, page_shift - MLX5_ADAPTER_PAGE_SHIFT);
+ MLX5_SET(wq, wq, page_offset, offset);
+
+ pas = (__be64 *)MLX5_ADDR_OF(wq, wq, pas);
+ mlx5_ib_populate_pas(dev, sq->ubuffer.umem, page_shift, pas, 0);
+
+ err = mlx5_core_create_sq_tracked(dev->mdev, in, inlen, &sq->base.mqp);
+
+ kvfree(in);
+
+ if (err)
+ goto err_umem;
+
+ return 0;
+
+err_umem:
+ ib_umem_release(sq->ubuffer.umem);
+ sq->ubuffer.umem = NULL;
+
+ return err;
+}
+
+static void destroy_raw_packet_qp_sq(struct mlx5_ib_dev *dev,
+ struct mlx5_ib_sq *sq)
+{
+ mlx5_core_destroy_sq_tracked(dev->mdev, &sq->base.mqp);
+ ib_umem_release(sq->ubuffer.umem);
+}
+
+static int get_rq_pas_size(void *qpc)
+{
+ u32 log_page_size = MLX5_GET(qpc, qpc, log_page_size) + 12;
+ u32 log_rq_stride = MLX5_GET(qpc, qpc, log_rq_stride);
+ u32 log_rq_size = MLX5_GET(qpc, qpc, log_rq_size);
+ u32 page_offset = MLX5_GET(qpc, qpc, page_offset);
+ u32 po_quanta = 1 << (log_page_size - 6);
+ u32 rq_sz = 1 << (log_rq_size + 4 + log_rq_stride);
+ u32 page_size = 1 << log_page_size;
+ u32 rq_sz_po = rq_sz + (page_offset * po_quanta);
+ u32 rq_num_pas = (rq_sz_po + page_size - 1) / page_size;
+
+ return rq_num_pas * sizeof(u64);
+}
+
+static int create_raw_packet_qp_rq(struct mlx5_ib_dev *dev,
+ struct mlx5_ib_rq *rq, void *qpin)
+{
+ __be64 *pas;
+ __be64 *qp_pas;
+ void *in;
+ void *rqc;
+ void *wq;
+ void *qpc = MLX5_ADDR_OF(create_qp_in, qpin, qpc);
+ int inlen;
+ int err;
+ u32 rq_pas_size = get_rq_pas_size(qpc);
+
+ inlen = MLX5_ST_SZ_BYTES(create_rq_in) + rq_pas_size;
+ in = mlx5_vzalloc(inlen);
+ if (!in)
+ return -ENOMEM;
+
+ rqc = MLX5_ADDR_OF(create_rq_in, in, ctx);
+ MLX5_SET(rqc, rqc, vsd, 1);
+ MLX5_SET(rqc, rqc, mem_rq_type, MLX5_RQC_MEM_RQ_TYPE_MEMORY_RQ_INLINE);
+ MLX5_SET(rqc, rqc, state, MLX5_RQC_STATE_RST);
+ MLX5_SET(rqc, rqc, flush_in_error_en, 1);
+ MLX5_SET(rqc, rqc, user_index, MLX5_GET(qpc, qpc, user_index));
+ MLX5_SET(rqc, rqc, cqn, MLX5_GET(qpc, qpc, cqn_rcv));
+
+ wq = MLX5_ADDR_OF(rqc, rqc, wq);
+ MLX5_SET(wq, wq, wq_type, MLX5_WQ_TYPE_CYCLIC);
+ MLX5_SET(wq, wq, end_padding_mode,
+ MLX5_GET(qpc, qpc, end_padding_mode));
+ MLX5_SET(wq, wq, page_offset, MLX5_GET(qpc, qpc, page_offset));
+ MLX5_SET(wq, wq, pd, MLX5_GET(qpc, qpc, pd));
+ MLX5_SET64(wq, wq, dbr_addr, MLX5_GET64(qpc, qpc, dbr_addr));
+ MLX5_SET(wq, wq, log_wq_stride, MLX5_GET(qpc, qpc, log_rq_stride) + 4);
+ MLX5_SET(wq, wq, log_wq_pg_sz, MLX5_GET(qpc, qpc, log_page_size));
+ MLX5_SET(wq, wq, log_wq_sz, MLX5_GET(qpc, qpc, log_rq_size));
+
+ pas = (__be64 *)MLX5_ADDR_OF(wq, wq, pas);
+ qp_pas = (__be64 *)MLX5_ADDR_OF(create_qp_in, qpin, pas);
+ memcpy(pas, qp_pas, rq_pas_size);
+
+ err = mlx5_core_create_rq_tracked(dev->mdev, in, inlen, &rq->base.mqp);
+
+ kvfree(in);
+
+ return err;
+}
+
+static void destroy_raw_packet_qp_rq(struct mlx5_ib_dev *dev,
+ struct mlx5_ib_rq *rq)
+{
+ mlx5_core_destroy_rq_tracked(dev->mdev, &rq->base.mqp);
+}
+
+static int create_raw_packet_qp_tir(struct mlx5_ib_dev *dev,
+ struct mlx5_ib_rq *rq, u32 tdn)
+{
+ u32 *in;
+ void *tirc;
+ int inlen;
+ int err;
+
+ inlen = MLX5_ST_SZ_BYTES(create_tir_in);
+ in = mlx5_vzalloc(inlen);
+ if (!in)
+ return -ENOMEM;
+
+ tirc = MLX5_ADDR_OF(create_tir_in, in, ctx);
+ MLX5_SET(tirc, tirc, disp_type, MLX5_TIRC_DISP_TYPE_DIRECT);
+ MLX5_SET(tirc, tirc, inline_rqn, rq->base.mqp.qpn);
+ MLX5_SET(tirc, tirc, transport_domain, tdn);
+
+ err = mlx5_core_create_tir(dev->mdev, in, inlen, &rq->tirn);
+
+ kvfree(in);
+
+ return err;
+}
+
+static void destroy_raw_packet_qp_tir(struct mlx5_ib_dev *dev,
+ struct mlx5_ib_rq *rq)
+{
+ mlx5_core_destroy_tir(dev->mdev, rq->tirn);
+}
+
+static int create_raw_packet_qp(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
+ struct mlx5_create_qp_mbox_in *in,
+ struct ib_pd *pd)
+{
+ struct mlx5_ib_raw_packet_qp *raw_packet_qp = &qp->raw_packet_qp;
+ struct mlx5_ib_sq *sq = &raw_packet_qp->sq;
+ struct mlx5_ib_rq *rq = &raw_packet_qp->rq;
+ struct ib_uobject *uobj = pd->uobject;
+ struct ib_ucontext *ucontext = uobj->context;
+ struct mlx5_ib_ucontext *mucontext = to_mucontext(ucontext);
+ int err;
+ u32 tdn = mucontext->tdn;
+
+ if (qp->sq.wqe_cnt) {
+ err = create_raw_packet_qp_tis(dev, sq, tdn);
+ if (err)
+ return err;
+
+ err = create_raw_packet_qp_sq(dev, sq, in, pd);
+ if (err)
+ goto err_destroy_tis;
+
+ sq->base.container_mibqp = qp;
+ }
+
+ if (qp->rq.wqe_cnt) {
+ err = create_raw_packet_qp_rq(dev, rq, in);
+ if (err)
+ goto err_destroy_sq;
+
+ rq->base.container_mibqp = qp;
+
+ err = create_raw_packet_qp_tir(dev, rq, tdn);
+ if (err)
+ goto err_destroy_rq;
+ }
+
+ qp->trans_qp.base.mqp.qpn = qp->sq.wqe_cnt ? sq->base.mqp.qpn :
+ rq->base.mqp.qpn;
+
+ return 0;
+
+err_destroy_rq:
+ destroy_raw_packet_qp_rq(dev, rq);
+err_destroy_sq:
+ if (!qp->sq.wqe_cnt)
+ return err;
+ destroy_raw_packet_qp_sq(dev, sq);
+err_destroy_tis:
+ destroy_raw_packet_qp_tis(dev, sq);
+
+ return err;
+}
+
+static void destroy_raw_packet_qp(struct mlx5_ib_dev *dev,
+ struct mlx5_ib_qp *qp)
+{
+ struct mlx5_ib_raw_packet_qp *raw_packet_qp = &qp->raw_packet_qp;
+ struct mlx5_ib_sq *sq = &raw_packet_qp->sq;
+ struct mlx5_ib_rq *rq = &raw_packet_qp->rq;
+
+ if (qp->rq.wqe_cnt) {
+ destroy_raw_packet_qp_tir(dev, rq);
+ destroy_raw_packet_qp_rq(dev, rq);
+ }
+
+ if (qp->sq.wqe_cnt) {
+ destroy_raw_packet_qp_sq(dev, sq);
+ destroy_raw_packet_qp_tis(dev, sq);
+ }
+}
+
+static void raw_packet_qp_copy_info(struct mlx5_ib_qp *qp,
+ struct mlx5_ib_raw_packet_qp *raw_packet_qp)
+{
+ struct mlx5_ib_sq *sq = &raw_packet_qp->sq;
+ struct mlx5_ib_rq *rq = &raw_packet_qp->rq;
+
+ sq->sq = &qp->sq;
+ rq->rq = &qp->rq;
+ sq->doorbell = &qp->db;
+ rq->doorbell = &qp->db;
+}
+
static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd,
struct ib_qp_init_attr *init_attr,
struct ib_udata *udata, struct mlx5_ib_qp *qp)
{
struct mlx5_ib_resources *devr = &dev->devr;
struct mlx5_core_dev *mdev = dev->mdev;
+ struct mlx5_ib_qp_base *base;
struct mlx5_ib_create_qp_resp resp;
struct mlx5_create_qp_mbox_in *in;
struct mlx5_ib_create_qp ucmd;
int inlen = sizeof(*in);
int err;
+ u32 uidx = MLX5_IB_DEFAULT_UIDX;
+ void *qpc;
+
+ base = init_attr->qp_type == IB_QPT_RAW_PACKET ?
+ &qp->raw_packet_qp.rq.base :
+ &qp->trans_qp.base;
- mlx5_ib_odp_create_qp(qp);
+ if (init_attr->qp_type != IB_QPT_RAW_PACKET)
+ mlx5_ib_odp_create_qp(qp);
mutex_init(&qp->mutex);
spin_lock_init(&qp->sq.lock);
@@ -880,6 +1213,21 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd,
}
}
+ if (init_attr->create_flags &
+ (IB_QP_CREATE_CROSS_CHANNEL |
+ IB_QP_CREATE_MANAGED_SEND |
+ IB_QP_CREATE_MANAGED_RECV)) {
+ if (!MLX5_CAP_GEN(mdev, cd)) {
+ mlx5_ib_dbg(dev, "cross-channel isn't supported\n");
+ return -EINVAL;
+ }
+ if (init_attr->create_flags & IB_QP_CREATE_CROSS_CHANNEL)
+ qp->flags |= MLX5_IB_QP_CROSS_CHANNEL;
+ if (init_attr->create_flags & IB_QP_CREATE_MANAGED_SEND)
+ qp->flags |= MLX5_IB_QP_MANAGED_SEND;
+ if (init_attr->create_flags & IB_QP_CREATE_MANAGED_RECV)
+ qp->flags |= MLX5_IB_QP_MANAGED_RECV;
+ }
if (init_attr->sq_sig_type == IB_SIGNAL_ALL_WR)
qp->sq_signal_bits = MLX5_WQE_CTRL_CQ_UPDATE;
@@ -889,6 +1237,11 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd,
return -EFAULT;
}
+ err = get_qp_user_index(to_mucontext(pd->uobject->context),
+ &ucmd, udata->inlen, &uidx);
+ if (err)
+ return err;
+
qp->wq_sig = !!(ucmd.flags & MLX5_QP_FLAG_SIGNATURE);
qp->scat_cqe = !!(ucmd.flags & MLX5_QP_FLAG_SCATTER_CQE);
} else {
@@ -918,11 +1271,13 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd,
ucmd.sq_wqe_count, max_wqes);
return -EINVAL;
}
- err = create_user_qp(dev, pd, qp, udata, &in, &resp, &inlen);
+ err = create_user_qp(dev, pd, qp, udata, init_attr, &in,
+ &resp, &inlen, base);
if (err)
mlx5_ib_dbg(dev, "err %d\n", err);
} else {
- err = create_kernel_qp(dev, init_attr, qp, &in, &inlen);
+ err = create_kernel_qp(dev, init_attr, qp, &in, &inlen,
+ base);
if (err)
mlx5_ib_dbg(dev, "err %d\n", err);
}
@@ -954,6 +1309,13 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd,
if (qp->flags & MLX5_IB_QP_BLOCK_MULTICAST_LOOPBACK)
in->ctx.flags_pd |= cpu_to_be32(MLX5_QP_BLOCK_MCAST);
+ if (qp->flags & MLX5_IB_QP_CROSS_CHANNEL)
+ in->ctx.params2 |= cpu_to_be32(MLX5_QP_BIT_CC_MASTER);
+ if (qp->flags & MLX5_IB_QP_MANAGED_SEND)
+ in->ctx.params2 |= cpu_to_be32(MLX5_QP_BIT_CC_SLAVE_SEND);
+ if (qp->flags & MLX5_IB_QP_MANAGED_RECV)
+ in->ctx.params2 |= cpu_to_be32(MLX5_QP_BIT_CC_SLAVE_RECV);
+
if (qp->scat_cqe && is_connected(init_attr->qp_type)) {
int rcqe_sz;
int scqe_sz;
@@ -1018,26 +1380,35 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd,
in->ctx.db_rec_addr = cpu_to_be64(qp->db.dma);
- err = mlx5_core_create_qp(dev->mdev, &qp->mqp, in, inlen);
+ if (MLX5_CAP_GEN(mdev, cqe_version) == MLX5_CQE_VERSION_V1) {
+ qpc = MLX5_ADDR_OF(create_qp_in, in, qpc);
+ /* 0xffffff means we ask to work with cqe version 0 */
+ MLX5_SET(qpc, qpc, user_index, uidx);
+ }
+
+ if (init_attr->qp_type == IB_QPT_RAW_PACKET) {
+ qp->raw_packet_qp.sq.ubuffer.buf_addr = ucmd.sq_buf_addr;
+ raw_packet_qp_copy_info(qp, &qp->raw_packet_qp);
+ err = create_raw_packet_qp(dev, qp, in, pd);
+ } else {
+ err = mlx5_core_create_qp(dev->mdev, &base->mqp, in, inlen);
+ }
+
if (err) {
mlx5_ib_dbg(dev, "create qp failed\n");
goto err_create;
}
kvfree(in);
- /* Hardware wants QPN written in big-endian order (after
- * shifting) for send doorbell. Precompute this value to save
- * a little bit when posting sends.
- */
- qp->doorbell_qpn = swab32(qp->mqp.qpn << 8);
- qp->mqp.event = mlx5_ib_qp_event;
+ base->container_mibqp = qp;
+ base->mqp.event = mlx5_ib_qp_event;
return 0;
err_create:
if (qp->create_type == MLX5_QP_USER)
- destroy_qp_user(pd, qp);
+ destroy_qp_user(pd, qp, base);
else if (qp->create_type == MLX5_QP_KERNEL)
destroy_qp_kernel(dev, qp);
@@ -1129,11 +1500,11 @@ static void get_cqs(struct mlx5_ib_qp *qp,
case IB_QPT_UD:
case IB_QPT_RAW_IPV6:
case IB_QPT_RAW_ETHERTYPE:
+ case IB_QPT_RAW_PACKET:
*send_cq = to_mcq(qp->ibqp.send_cq);
*recv_cq = to_mcq(qp->ibqp.recv_cq);
break;
- case IB_QPT_RAW_PACKET:
case IB_QPT_MAX:
default:
*send_cq = NULL;
@@ -1142,45 +1513,66 @@ static void get_cqs(struct mlx5_ib_qp *qp,
}
}
+static int modify_raw_packet_qp(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
+ u16 operation);
+
static void destroy_qp_common(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp)
{
struct mlx5_ib_cq *send_cq, *recv_cq;
+ struct mlx5_ib_qp_base *base = &qp->trans_qp.base;
struct mlx5_modify_qp_mbox_in *in;
int err;
+ base = qp->ibqp.qp_type == IB_QPT_RAW_PACKET ?
+ &qp->raw_packet_qp.rq.base :
+ &qp->trans_qp.base;
+
in = kzalloc(sizeof(*in), GFP_KERNEL);
if (!in)
return;
if (qp->state != IB_QPS_RESET) {
- mlx5_ib_qp_disable_pagefaults(qp);
- if (mlx5_core_qp_modify(dev->mdev, to_mlx5_state(qp->state),
- MLX5_QP_STATE_RST, in, 0, &qp->mqp))
- mlx5_ib_warn(dev, "mlx5_ib: modify QP %06x to RESET failed\n",
- qp->mqp.qpn);
+ if (qp->ibqp.qp_type != IB_QPT_RAW_PACKET) {
+ mlx5_ib_qp_disable_pagefaults(qp);
+ err = mlx5_core_qp_modify(dev->mdev,
+ MLX5_CMD_OP_2RST_QP, in, 0,
+ &base->mqp);
+ } else {
+ err = modify_raw_packet_qp(dev, qp,
+ MLX5_CMD_OP_2RST_QP);
+ }
+ if (err)
+ mlx5_ib_warn(dev, "mlx5_ib: modify QP 0x%06x to RESET failed\n",
+ base->mqp.qpn);
}
get_cqs(qp, &send_cq, &recv_cq);
if (qp->create_type == MLX5_QP_KERNEL) {
mlx5_ib_lock_cqs(send_cq, recv_cq);
- __mlx5_ib_cq_clean(recv_cq, qp->mqp.qpn,
+ __mlx5_ib_cq_clean(recv_cq, base->mqp.qpn,
qp->ibqp.srq ? to_msrq(qp->ibqp.srq) : NULL);
if (send_cq != recv_cq)
- __mlx5_ib_cq_clean(send_cq, qp->mqp.qpn, NULL);
+ __mlx5_ib_cq_clean(send_cq, base->mqp.qpn,
+ NULL);
mlx5_ib_unlock_cqs(send_cq, recv_cq);
}
- err = mlx5_core_destroy_qp(dev->mdev, &qp->mqp);
- if (err)
- mlx5_ib_warn(dev, "failed to destroy QP 0x%x\n", qp->mqp.qpn);
- kfree(in);
+ if (qp->ibqp.qp_type == IB_QPT_RAW_PACKET) {
+ destroy_raw_packet_qp(dev, qp);
+ } else {
+ err = mlx5_core_destroy_qp(dev->mdev, &base->mqp);
+ if (err)
+ mlx5_ib_warn(dev, "failed to destroy QP 0x%x\n",
+ base->mqp.qpn);
+ }
+ kfree(in);
if (qp->create_type == MLX5_QP_KERNEL)
destroy_qp_kernel(dev, qp);
else if (qp->create_type == MLX5_QP_USER)
- destroy_qp_user(&get_pd(qp)->ibpd, qp);
+ destroy_qp_user(&get_pd(qp)->ibpd, qp, base);
}
static const char *ib_qp_type_str(enum ib_qp_type type)
@@ -1225,6 +1617,16 @@ struct ib_qp *mlx5_ib_create_qp(struct ib_pd *pd,
if (pd) {
dev = to_mdev(pd->device);
+
+ if (init_attr->qp_type == IB_QPT_RAW_PACKET) {
+ if (!pd->uobject) {
+ mlx5_ib_dbg(dev, "Raw Packet QP is not supported for kernel consumers\n");
+ return ERR_PTR(-EINVAL);
+ } else if (!to_mucontext(pd->uobject->context)->cqe_version) {
+ mlx5_ib_dbg(dev, "Raw Packet QP is only supported for CQE version > 0\n");
+ return ERR_PTR(-EINVAL);
+ }
+ }
} else {
/* being cautious here */
if (init_attr->qp_type != IB_QPT_XRC_TGT &&
@@ -1250,6 +1652,7 @@ struct ib_qp *mlx5_ib_create_qp(struct ib_pd *pd,
}
/* fall through */
+ case IB_QPT_RAW_PACKET:
case IB_QPT_RC:
case IB_QPT_UC:
case IB_QPT_UD:
@@ -1272,19 +1675,19 @@ struct ib_qp *mlx5_ib_create_qp(struct ib_pd *pd,
else if (is_qp1(init_attr->qp_type))
qp->ibqp.qp_num = 1;
else
- qp->ibqp.qp_num = qp->mqp.qpn;
+ qp->ibqp.qp_num = qp->trans_qp.base.mqp.qpn;
mlx5_ib_dbg(dev, "ib qpnum 0x%x, mlx qpn 0x%x, rcqn 0x%x, scqn 0x%x\n",
- qp->ibqp.qp_num, qp->mqp.qpn, to_mcq(init_attr->recv_cq)->mcq.cqn,
+ qp->ibqp.qp_num, qp->trans_qp.base.mqp.qpn,
+ to_mcq(init_attr->recv_cq)->mcq.cqn,
to_mcq(init_attr->send_cq)->mcq.cqn);
- qp->xrcdn = xrcdn;
+ qp->trans_qp.xrcdn = xrcdn;
break;
case IB_QPT_RAW_IPV6:
case IB_QPT_RAW_ETHERTYPE:
- case IB_QPT_RAW_PACKET:
case IB_QPT_MAX:
default:
mlx5_ib_dbg(dev, "unsupported qp type %d\n",
@@ -1318,12 +1721,12 @@ static __be32 to_mlx5_access_flags(struct mlx5_ib_qp *qp, const struct ib_qp_att
if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC)
dest_rd_atomic = attr->max_dest_rd_atomic;
else
- dest_rd_atomic = qp->resp_depth;
+ dest_rd_atomic = qp->trans_qp.resp_depth;
if (attr_mask & IB_QP_ACCESS_FLAGS)
access_flags = attr->qp_access_flags;
else
- access_flags = qp->atomic_rd_en;
+ access_flags = qp->trans_qp.atomic_rd_en;
if (!dest_rd_atomic)
access_flags &= IB_ACCESS_REMOTE_WRITE;
@@ -1360,21 +1763,42 @@ static int ib_rate_to_mlx5(struct mlx5_ib_dev *dev, u8 rate)
return rate + MLX5_STAT_RATE_OFFSET;
}
-static int mlx5_set_path(struct mlx5_ib_dev *dev, const struct ib_ah_attr *ah,
+static int modify_raw_packet_eth_prio(struct mlx5_core_dev *dev,
+ struct mlx5_ib_sq *sq, u8 sl)
+{
+ void *in;
+ void *tisc;
+ int inlen;
+ int err;
+
+ inlen = MLX5_ST_SZ_BYTES(modify_tis_in);
+ in = mlx5_vzalloc(inlen);
+ if (!in)
+ return -ENOMEM;
+
+ MLX5_SET(modify_tis_in, in, bitmask.prio, 1);
+
+ tisc = MLX5_ADDR_OF(modify_tis_in, in, ctx);
+ MLX5_SET(tisc, tisc, prio, ((sl & 0x7) << 1));
+
+ err = mlx5_core_modify_tis(dev, sq->tisn, in, inlen);
+
+ kvfree(in);
+
+ return err;
+}
+
+static int mlx5_set_path(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
+ const struct ib_ah_attr *ah,
struct mlx5_qp_path *path, u8 port, int attr_mask,
u32 path_flags, const struct ib_qp_attr *attr)
{
+ enum rdma_link_layer ll = rdma_port_get_link_layer(&dev->ib_dev, port);
int err;
- path->fl = (path_flags & MLX5_PATH_FLAG_FL) ? 0x80 : 0;
- path->free_ar = (path_flags & MLX5_PATH_FLAG_FREE_AR) ? 0x80 : 0;
-
if (attr_mask & IB_QP_PKEY_INDEX)
path->pkey_index = attr->pkey_index;
- path->grh_mlid = ah->src_path_bits & 0x7f;
- path->rlid = cpu_to_be16(ah->dlid);
-
if (ah->ah_flags & IB_AH_GRH) {
if (ah->grh.sgid_index >=
dev->mdev->port_caps[port - 1].gid_table_len) {
@@ -1383,7 +1807,27 @@ static int mlx5_set_path(struct mlx5_ib_dev *dev, const struct ib_ah_attr *ah,
dev->mdev->port_caps[port - 1].gid_table_len);
return -EINVAL;
}
- path->grh_mlid |= 1 << 7;
+ }
+
+ if (ll == IB_LINK_LAYER_ETHERNET) {
+ if (!(ah->ah_flags & IB_AH_GRH))
+ return -EINVAL;
+ memcpy(path->rmac, ah->dmac, sizeof(ah->dmac));
+ path->udp_sport = mlx5_get_roce_udp_sport(dev, port,
+ ah->grh.sgid_index);
+ path->dci_cfi_prio_sl = (ah->sl & 0x7) << 4;
+ } else {
+ path->fl = (path_flags & MLX5_PATH_FLAG_FL) ? 0x80 : 0;
+ path->free_ar = (path_flags & MLX5_PATH_FLAG_FREE_AR) ? 0x80 :
+ 0;
+ path->rlid = cpu_to_be16(ah->dlid);
+ path->grh_mlid = ah->src_path_bits & 0x7f;
+ if (ah->ah_flags & IB_AH_GRH)
+ path->grh_mlid |= 1 << 7;
+ path->dci_cfi_prio_sl = ah->sl & 0xf;
+ }
+
+ if (ah->ah_flags & IB_AH_GRH) {
path->mgid_index = ah->grh.sgid_index;
path->hop_limit = ah->grh.hop_limit;
path->tclass_flowlabel =
@@ -1401,7 +1845,10 @@ static int mlx5_set_path(struct mlx5_ib_dev *dev, const struct ib_ah_attr *ah,
if (attr_mask & IB_QP_TIMEOUT)
path->ackto_lt = attr->timeout << 3;
- path->sl = ah->sl & 0xf;
+ if ((qp->ibqp.qp_type == IB_QPT_RAW_PACKET) && qp->sq.wqe_cnt)
+ return modify_raw_packet_eth_prio(dev->mdev,
+ &qp->raw_packet_qp.sq,
+ ah->sl & 0xf);
return 0;
}
@@ -1549,12 +1996,154 @@ static int ib_mask_to_mlx5_opt(int ib_mask)
return result;
}
+static int modify_raw_packet_qp_rq(struct mlx5_core_dev *dev,
+ struct mlx5_ib_rq *rq, int new_state)
+{
+ void *in;
+ void *rqc;
+ int inlen;
+ int err;
+
+ inlen = MLX5_ST_SZ_BYTES(modify_rq_in);
+ in = mlx5_vzalloc(inlen);
+ if (!in)
+ return -ENOMEM;
+
+ MLX5_SET(modify_rq_in, in, rq_state, rq->state);
+
+ rqc = MLX5_ADDR_OF(modify_rq_in, in, ctx);
+ MLX5_SET(rqc, rqc, state, new_state);
+
+ err = mlx5_core_modify_rq(dev, rq->base.mqp.qpn, in, inlen);
+ if (err)
+ goto out;
+
+ rq->state = new_state;
+
+out:
+ kvfree(in);
+ return err;
+}
+
+static int modify_raw_packet_qp_sq(struct mlx5_core_dev *dev,
+ struct mlx5_ib_sq *sq, int new_state)
+{
+ void *in;
+ void *sqc;
+ int inlen;
+ int err;
+
+ inlen = MLX5_ST_SZ_BYTES(modify_sq_in);
+ in = mlx5_vzalloc(inlen);
+ if (!in)
+ return -ENOMEM;
+
+ MLX5_SET(modify_sq_in, in, sq_state, sq->state);
+
+ sqc = MLX5_ADDR_OF(modify_sq_in, in, ctx);
+ MLX5_SET(sqc, sqc, state, new_state);
+
+ err = mlx5_core_modify_sq(dev, sq->base.mqp.qpn, in, inlen);
+ if (err)
+ goto out;
+
+ sq->state = new_state;
+
+out:
+ kvfree(in);
+ return err;
+}
+
+static int modify_raw_packet_qp(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
+ u16 operation)
+{
+ struct mlx5_ib_raw_packet_qp *raw_packet_qp = &qp->raw_packet_qp;
+ struct mlx5_ib_rq *rq = &raw_packet_qp->rq;
+ struct mlx5_ib_sq *sq = &raw_packet_qp->sq;
+ int rq_state;
+ int sq_state;
+ int err;
+
+ switch (operation) {
+ case MLX5_CMD_OP_RST2INIT_QP:
+ rq_state = MLX5_RQC_STATE_RDY;
+ sq_state = MLX5_SQC_STATE_RDY;
+ break;
+ case MLX5_CMD_OP_2ERR_QP:
+ rq_state = MLX5_RQC_STATE_ERR;
+ sq_state = MLX5_SQC_STATE_ERR;
+ break;
+ case MLX5_CMD_OP_2RST_QP:
+ rq_state = MLX5_RQC_STATE_RST;
+ sq_state = MLX5_SQC_STATE_RST;
+ break;
+ case MLX5_CMD_OP_INIT2INIT_QP:
+ case MLX5_CMD_OP_INIT2RTR_QP:
+ case MLX5_CMD_OP_RTR2RTS_QP:
+ case MLX5_CMD_OP_RTS2RTS_QP:
+ /* Nothing to do here... */
+ return 0;
+ default:
+ WARN_ON(1);
+ return -EINVAL;
+ }
+
+ if (qp->rq.wqe_cnt) {
+ err = modify_raw_packet_qp_rq(dev->mdev, rq, rq_state);
+ if (err)
+ return err;
+ }
+
+ if (qp->sq.wqe_cnt)
+ return modify_raw_packet_qp_sq(dev->mdev, sq, sq_state);
+
+ return 0;
+}
+
static int __mlx5_ib_modify_qp(struct ib_qp *ibqp,
const struct ib_qp_attr *attr, int attr_mask,
enum ib_qp_state cur_state, enum ib_qp_state new_state)
{
+ static const u16 optab[MLX5_QP_NUM_STATE][MLX5_QP_NUM_STATE] = {
+ [MLX5_QP_STATE_RST] = {
+ [MLX5_QP_STATE_RST] = MLX5_CMD_OP_2RST_QP,
+ [MLX5_QP_STATE_ERR] = MLX5_CMD_OP_2ERR_QP,
+ [MLX5_QP_STATE_INIT] = MLX5_CMD_OP_RST2INIT_QP,
+ },
+ [MLX5_QP_STATE_INIT] = {
+ [MLX5_QP_STATE_RST] = MLX5_CMD_OP_2RST_QP,
+ [MLX5_QP_STATE_ERR] = MLX5_CMD_OP_2ERR_QP,
+ [MLX5_QP_STATE_INIT] = MLX5_CMD_OP_INIT2INIT_QP,
+ [MLX5_QP_STATE_RTR] = MLX5_CMD_OP_INIT2RTR_QP,
+ },
+ [MLX5_QP_STATE_RTR] = {
+ [MLX5_QP_STATE_RST] = MLX5_CMD_OP_2RST_QP,
+ [MLX5_QP_STATE_ERR] = MLX5_CMD_OP_2ERR_QP,
+ [MLX5_QP_STATE_RTS] = MLX5_CMD_OP_RTR2RTS_QP,
+ },
+ [MLX5_QP_STATE_RTS] = {
+ [MLX5_QP_STATE_RST] = MLX5_CMD_OP_2RST_QP,
+ [MLX5_QP_STATE_ERR] = MLX5_CMD_OP_2ERR_QP,
+ [MLX5_QP_STATE_RTS] = MLX5_CMD_OP_RTS2RTS_QP,
+ },
+ [MLX5_QP_STATE_SQD] = {
+ [MLX5_QP_STATE_RST] = MLX5_CMD_OP_2RST_QP,
+ [MLX5_QP_STATE_ERR] = MLX5_CMD_OP_2ERR_QP,
+ },
+ [MLX5_QP_STATE_SQER] = {
+ [MLX5_QP_STATE_RST] = MLX5_CMD_OP_2RST_QP,
+ [MLX5_QP_STATE_ERR] = MLX5_CMD_OP_2ERR_QP,
+ [MLX5_QP_STATE_RTS] = MLX5_CMD_OP_SQERR2RTS_QP,
+ },
+ [MLX5_QP_STATE_ERR] = {
+ [MLX5_QP_STATE_RST] = MLX5_CMD_OP_2RST_QP,
+ [MLX5_QP_STATE_ERR] = MLX5_CMD_OP_2ERR_QP,
+ }
+ };
+
struct mlx5_ib_dev *dev = to_mdev(ibqp->device);
struct mlx5_ib_qp *qp = to_mqp(ibqp);
+ struct mlx5_ib_qp_base *base = &qp->trans_qp.base;
struct mlx5_ib_cq *send_cq, *recv_cq;
struct mlx5_qp_context *context;
struct mlx5_modify_qp_mbox_in *in;
@@ -1564,6 +2153,7 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp,
int sqd_event;
int mlx5_st;
int err;
+ u16 op;
in = kzalloc(sizeof(*in), GFP_KERNEL);
if (!in)
@@ -1623,7 +2213,7 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp,
context->pri_path.port = attr->port_num;
if (attr_mask & IB_QP_AV) {
- err = mlx5_set_path(dev, &attr->ah_attr, &context->pri_path,
+ err = mlx5_set_path(dev, qp, &attr->ah_attr, &context->pri_path,
attr_mask & IB_QP_PORT ? attr->port_num : qp->port,
attr_mask, 0, attr);
if (err)
@@ -1634,7 +2224,8 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp,
context->pri_path.ackto_lt |= attr->timeout << 3;
if (attr_mask & IB_QP_ALT_PATH) {
- err = mlx5_set_path(dev, &attr->alt_ah_attr, &context->alt_path,
+ err = mlx5_set_path(dev, qp, &attr->alt_ah_attr,
+ &context->alt_path,
attr->alt_port_num, attr_mask, 0, attr);
if (err)
goto out;
@@ -1706,41 +2297,51 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp,
* again to RTS, and may cause the driver and the device to get out of
* sync. */
if (cur_state != IB_QPS_RESET && cur_state != IB_QPS_ERR &&
- (new_state == IB_QPS_RESET || new_state == IB_QPS_ERR))
+ (new_state == IB_QPS_RESET || new_state == IB_QPS_ERR) &&
+ (qp->ibqp.qp_type != IB_QPT_RAW_PACKET))
mlx5_ib_qp_disable_pagefaults(qp);
+ if (mlx5_cur >= MLX5_QP_NUM_STATE || mlx5_new >= MLX5_QP_NUM_STATE ||
+ !optab[mlx5_cur][mlx5_new])
+ goto out;
+
+ op = optab[mlx5_cur][mlx5_new];
optpar = ib_mask_to_mlx5_opt(attr_mask);
optpar &= opt_mask[mlx5_cur][mlx5_new][mlx5_st];
in->optparam = cpu_to_be32(optpar);
- err = mlx5_core_qp_modify(dev->mdev, to_mlx5_state(cur_state),
- to_mlx5_state(new_state), in, sqd_event,
- &qp->mqp);
+
+ if (qp->ibqp.qp_type == IB_QPT_RAW_PACKET)
+ err = modify_raw_packet_qp(dev, qp, op);
+ else
+ err = mlx5_core_qp_modify(dev->mdev, op, in, sqd_event,
+ &base->mqp);
if (err)
goto out;
- if (cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT)
+ if (cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT &&
+ (qp->ibqp.qp_type != IB_QPT_RAW_PACKET))
mlx5_ib_qp_enable_pagefaults(qp);
qp->state = new_state;
if (attr_mask & IB_QP_ACCESS_FLAGS)
- qp->atomic_rd_en = attr->qp_access_flags;
+ qp->trans_qp.atomic_rd_en = attr->qp_access_flags;
if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC)
- qp->resp_depth = attr->max_dest_rd_atomic;
+ qp->trans_qp.resp_depth = attr->max_dest_rd_atomic;
if (attr_mask & IB_QP_PORT)
qp->port = attr->port_num;
if (attr_mask & IB_QP_ALT_PATH)
- qp->alt_port = attr->alt_port_num;
+ qp->trans_qp.alt_port = attr->alt_port_num;
/*
* If we moved a kernel QP to RESET, clean up all old CQ
* entries and reinitialize the QP.
*/
if (new_state == IB_QPS_RESET && !ibqp->uobject) {
- mlx5_ib_cq_clean(recv_cq, qp->mqp.qpn,
+ mlx5_ib_cq_clean(recv_cq, base->mqp.qpn,
ibqp->srq ? to_msrq(ibqp->srq) : NULL);
if (send_cq != recv_cq)
- mlx5_ib_cq_clean(send_cq, qp->mqp.qpn, NULL);
+ mlx5_ib_cq_clean(send_cq, base->mqp.qpn, NULL);
qp->rq.head = 0;
qp->rq.tail = 0;
@@ -1765,15 +2366,21 @@ int mlx5_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
enum ib_qp_state cur_state, new_state;
int err = -EINVAL;
int port;
+ enum rdma_link_layer ll = IB_LINK_LAYER_UNSPECIFIED;
mutex_lock(&qp->mutex);
cur_state = attr_mask & IB_QP_CUR_STATE ? attr->cur_qp_state : qp->state;
new_state = attr_mask & IB_QP_STATE ? attr->qp_state : cur_state;
+ if (!(cur_state == new_state && cur_state == IB_QPS_RESET)) {
+ port = attr_mask & IB_QP_PORT ? attr->port_num : qp->port;
+ ll = dev->ib_dev.get_link_layer(&dev->ib_dev, port);
+ }
+
if (ibqp->qp_type != MLX5_IB_QPT_REG_UMR &&
!ib_modify_qp_is_ok(cur_state, new_state, ibqp->qp_type, attr_mask,
- IB_LINK_LAYER_UNSPECIFIED))
+ ll))
goto out;
if ((attr_mask & IB_QP_PORT) &&
@@ -1838,9 +2445,9 @@ static __always_inline void set_raddr_seg(struct mlx5_wqe_raddr_seg *rseg,
static void set_datagram_seg(struct mlx5_wqe_datagram_seg *dseg,
struct ib_send_wr *wr)
{
- memcpy(&dseg->av, &to_mah(wr->wr.ud.ah)->av, sizeof(struct mlx5_av));
- dseg->av.dqp_dct = cpu_to_be32(wr->wr.ud.remote_qpn | MLX5_EXTENDED_UD_AV);
- dseg->av.key.qkey.qkey = cpu_to_be32(wr->wr.ud.remote_qkey);
+ memcpy(&dseg->av, &to_mah(ud_wr(wr)->ah)->av, sizeof(struct mlx5_av));
+ dseg->av.dqp_dct = cpu_to_be32(ud_wr(wr)->remote_qpn | MLX5_EXTENDED_UD_AV);
+ dseg->av.key.qkey.qkey = cpu_to_be32(ud_wr(wr)->remote_qkey);
}
static void set_data_ptr_seg(struct mlx5_wqe_data_seg *dseg, struct ib_sge *sg)
@@ -1896,22 +2503,24 @@ static __be64 sig_mkey_mask(void)
return cpu_to_be64(result);
}
-static void set_frwr_umr_segment(struct mlx5_wqe_umr_ctrl_seg *umr,
- struct ib_send_wr *wr, int li)
+static void set_reg_umr_seg(struct mlx5_wqe_umr_ctrl_seg *umr,
+ struct mlx5_ib_mr *mr)
{
- memset(umr, 0, sizeof(*umr));
-
- if (li) {
- umr->mkey_mask = cpu_to_be64(MLX5_MKEY_MASK_FREE);
- umr->flags = 1 << 7;
- return;
- }
+ int ndescs = mr->ndescs;
- umr->flags = (1 << 5); /* fail if not free */
- umr->klm_octowords = get_klm_octo(wr->wr.fast_reg.page_list_len);
+ memset(umr, 0, sizeof(*umr));
+ umr->flags = MLX5_UMR_CHECK_NOT_FREE;
+ umr->klm_octowords = get_klm_octo(ndescs);
umr->mkey_mask = frwr_mkey_mask();
}
+static void set_linv_umr_seg(struct mlx5_wqe_umr_ctrl_seg *umr)
+{
+ memset(umr, 0, sizeof(*umr));
+ umr->mkey_mask = cpu_to_be64(MLX5_MKEY_MASK_FREE);
+ umr->flags = 1 << 7;
+}
+
static __be64 get_umr_reg_mr_mask(void)
{
u64 result;
@@ -1952,7 +2561,7 @@ static __be64 get_umr_update_mtt_mask(void)
static void set_reg_umr_segment(struct mlx5_wqe_umr_ctrl_seg *umr,
struct ib_send_wr *wr)
{
- struct mlx5_umr_wr *umrwr = (struct mlx5_umr_wr *)&wr->wr.fast_reg;
+ struct mlx5_umr_wr *umrwr = umr_wr(wr);
memset(umr, 0, sizeof(*umr));
@@ -1987,29 +2596,31 @@ static u8 get_umr_flags(int acc)
MLX5_PERM_LOCAL_READ | MLX5_PERM_UMR_EN;
}
-static void set_mkey_segment(struct mlx5_mkey_seg *seg, struct ib_send_wr *wr,
- int li, int *writ)
+static void set_reg_mkey_seg(struct mlx5_mkey_seg *seg,
+ struct mlx5_ib_mr *mr,
+ u32 key, int access)
{
- memset(seg, 0, sizeof(*seg));
- if (li) {
- seg->status = MLX5_MKEY_STATUS_FREE;
- return;
- }
+ int ndescs = ALIGN(mr->ndescs, 8) >> 1;
- seg->flags = get_umr_flags(wr->wr.fast_reg.access_flags) |
- MLX5_ACCESS_MODE_MTT;
- *writ = seg->flags & (MLX5_PERM_LOCAL_WRITE | IB_ACCESS_REMOTE_WRITE);
- seg->qpn_mkey7_0 = cpu_to_be32((wr->wr.fast_reg.rkey & 0xff) | 0xffffff00);
+ memset(seg, 0, sizeof(*seg));
+ seg->flags = get_umr_flags(access) | MLX5_ACCESS_MODE_MTT;
+ seg->qpn_mkey7_0 = cpu_to_be32((key & 0xff) | 0xffffff00);
seg->flags_pd = cpu_to_be32(MLX5_MKEY_REMOTE_INVAL);
- seg->start_addr = cpu_to_be64(wr->wr.fast_reg.iova_start);
- seg->len = cpu_to_be64(wr->wr.fast_reg.length);
- seg->xlt_oct_size = cpu_to_be32((wr->wr.fast_reg.page_list_len + 1) / 2);
- seg->log2_page_size = wr->wr.fast_reg.page_shift;
+ seg->start_addr = cpu_to_be64(mr->ibmr.iova);
+ seg->len = cpu_to_be64(mr->ibmr.length);
+ seg->xlt_oct_size = cpu_to_be32(ndescs);
+ seg->log2_page_size = ilog2(mr->ibmr.page_size);
+}
+
+static void set_linv_mkey_seg(struct mlx5_mkey_seg *seg)
+{
+ memset(seg, 0, sizeof(*seg));
+ seg->status = MLX5_MKEY_STATUS_FREE;
}
static void set_reg_mkey_segment(struct mlx5_mkey_seg *seg, struct ib_send_wr *wr)
{
- struct mlx5_umr_wr *umrwr = (struct mlx5_umr_wr *)&wr->wr.fast_reg;
+ struct mlx5_umr_wr *umrwr = umr_wr(wr);
memset(seg, 0, sizeof(*seg));
if (wr->send_flags & MLX5_IB_SEND_UMR_UNREG) {
@@ -2028,21 +2639,14 @@ static void set_reg_mkey_segment(struct mlx5_mkey_seg *seg, struct ib_send_wr *w
mlx5_mkey_variant(umrwr->mkey));
}
-static void set_frwr_pages(struct mlx5_wqe_data_seg *dseg,
- struct ib_send_wr *wr,
- struct mlx5_core_dev *mdev,
- struct mlx5_ib_pd *pd,
- int writ)
+static void set_reg_data_seg(struct mlx5_wqe_data_seg *dseg,
+ struct mlx5_ib_mr *mr,
+ struct mlx5_ib_pd *pd)
{
- struct mlx5_ib_fast_reg_page_list *mfrpl = to_mfrpl(wr->wr.fast_reg.page_list);
- u64 *page_list = wr->wr.fast_reg.page_list->page_list;
- u64 perm = MLX5_EN_RD | (writ ? MLX5_EN_WR : 0);
- int i;
+ int bcount = mr->desc_size * mr->ndescs;
- for (i = 0; i < wr->wr.fast_reg.page_list_len; i++)
- mfrpl->mapped_page_list[i] = cpu_to_be64(page_list[i] | perm);
- dseg->addr = cpu_to_be64(mfrpl->map);
- dseg->byte_count = cpu_to_be32(ALIGN(sizeof(u64) * wr->wr.fast_reg.page_list_len, 64));
+ dseg->addr = cpu_to_be64(mr->desc_map);
+ dseg->byte_count = cpu_to_be32(ALIGN(bcount, 64));
dseg->lkey = cpu_to_be32(pd->ibpd.local_dma_lkey);
}
@@ -2224,22 +2828,22 @@ static int mlx5_set_bsf(struct ib_mr *sig_mr,
return 0;
}
-static int set_sig_data_segment(struct ib_send_wr *wr, struct mlx5_ib_qp *qp,
- void **seg, int *size)
+static int set_sig_data_segment(struct ib_sig_handover_wr *wr,
+ struct mlx5_ib_qp *qp, void **seg, int *size)
{
- struct ib_sig_attrs *sig_attrs = wr->wr.sig_handover.sig_attrs;
- struct ib_mr *sig_mr = wr->wr.sig_handover.sig_mr;
+ struct ib_sig_attrs *sig_attrs = wr->sig_attrs;
+ struct ib_mr *sig_mr = wr->sig_mr;
struct mlx5_bsf *bsf;
- u32 data_len = wr->sg_list->length;
- u32 data_key = wr->sg_list->lkey;
- u64 data_va = wr->sg_list->addr;
+ u32 data_len = wr->wr.sg_list->length;
+ u32 data_key = wr->wr.sg_list->lkey;
+ u64 data_va = wr->wr.sg_list->addr;
int ret;
int wqe_size;
- if (!wr->wr.sig_handover.prot ||
- (data_key == wr->wr.sig_handover.prot->lkey &&
- data_va == wr->wr.sig_handover.prot->addr &&
- data_len == wr->wr.sig_handover.prot->length)) {
+ if (!wr->prot ||
+ (data_key == wr->prot->lkey &&
+ data_va == wr->prot->addr &&
+ data_len == wr->prot->length)) {
/**
* Source domain doesn't contain signature information
* or data and protection are interleaved in memory.
@@ -2273,8 +2877,8 @@ static int set_sig_data_segment(struct ib_send_wr *wr, struct mlx5_ib_qp *qp,
struct mlx5_stride_block_ctrl_seg *sblock_ctrl;
struct mlx5_stride_block_entry *data_sentry;
struct mlx5_stride_block_entry *prot_sentry;
- u32 prot_key = wr->wr.sig_handover.prot->lkey;
- u64 prot_va = wr->wr.sig_handover.prot->addr;
+ u32 prot_key = wr->prot->lkey;
+ u64 prot_va = wr->prot->addr;
u16 block_size = sig_attrs->mem.sig.dif.pi_interval;
int prot_size;
@@ -2326,16 +2930,16 @@ static int set_sig_data_segment(struct ib_send_wr *wr, struct mlx5_ib_qp *qp,
}
static void set_sig_mkey_segment(struct mlx5_mkey_seg *seg,
- struct ib_send_wr *wr, u32 nelements,
+ struct ib_sig_handover_wr *wr, u32 nelements,
u32 length, u32 pdn)
{
- struct ib_mr *sig_mr = wr->wr.sig_handover.sig_mr;
+ struct ib_mr *sig_mr = wr->sig_mr;
u32 sig_key = sig_mr->rkey;
u8 sigerr = to_mmr(sig_mr)->sig->sigerr_count & 1;
memset(seg, 0, sizeof(*seg));
- seg->flags = get_umr_flags(wr->wr.sig_handover.access_flags) |
+ seg->flags = get_umr_flags(wr->access_flags) |
MLX5_ACCESS_MODE_KLM;
seg->qpn_mkey7_0 = cpu_to_be32((sig_key & 0xff) | 0xffffff00);
seg->flags_pd = cpu_to_be32(MLX5_MKEY_REMOTE_INVAL | sigerr << 26 |
@@ -2346,7 +2950,7 @@ static void set_sig_mkey_segment(struct mlx5_mkey_seg *seg,
}
static void set_sig_umr_segment(struct mlx5_wqe_umr_ctrl_seg *umr,
- struct ib_send_wr *wr, u32 nelements)
+ u32 nelements)
{
memset(umr, 0, sizeof(*umr));
@@ -2357,37 +2961,37 @@ static void set_sig_umr_segment(struct mlx5_wqe_umr_ctrl_seg *umr,
}
-static int set_sig_umr_wr(struct ib_send_wr *wr, struct mlx5_ib_qp *qp,
+static int set_sig_umr_wr(struct ib_send_wr *send_wr, struct mlx5_ib_qp *qp,
void **seg, int *size)
{
- struct mlx5_ib_mr *sig_mr = to_mmr(wr->wr.sig_handover.sig_mr);
+ struct ib_sig_handover_wr *wr = sig_handover_wr(send_wr);
+ struct mlx5_ib_mr *sig_mr = to_mmr(wr->sig_mr);
u32 pdn = get_pd(qp)->pdn;
u32 klm_oct_size;
int region_len, ret;
- if (unlikely(wr->num_sge != 1) ||
- unlikely(wr->wr.sig_handover.access_flags &
- IB_ACCESS_REMOTE_ATOMIC) ||
+ if (unlikely(wr->wr.num_sge != 1) ||
+ unlikely(wr->access_flags & IB_ACCESS_REMOTE_ATOMIC) ||
unlikely(!sig_mr->sig) || unlikely(!qp->signature_en) ||
unlikely(!sig_mr->sig->sig_status_checked))
return -EINVAL;
/* length of the protected region, data + protection */
- region_len = wr->sg_list->length;
- if (wr->wr.sig_handover.prot &&
- (wr->wr.sig_handover.prot->lkey != wr->sg_list->lkey ||
- wr->wr.sig_handover.prot->addr != wr->sg_list->addr ||
- wr->wr.sig_handover.prot->length != wr->sg_list->length))
- region_len += wr->wr.sig_handover.prot->length;
+ region_len = wr->wr.sg_list->length;
+ if (wr->prot &&
+ (wr->prot->lkey != wr->wr.sg_list->lkey ||
+ wr->prot->addr != wr->wr.sg_list->addr ||
+ wr->prot->length != wr->wr.sg_list->length))
+ region_len += wr->prot->length;
/**
* KLM octoword size - if protection was provided
* then we use strided block format (3 octowords),
* else we use single KLM (1 octoword)
**/
- klm_oct_size = wr->wr.sig_handover.prot ? 3 : 1;
+ klm_oct_size = wr->prot ? 3 : 1;
- set_sig_umr_segment(*seg, wr, klm_oct_size);
+ set_sig_umr_segment(*seg, klm_oct_size);
*seg += sizeof(struct mlx5_wqe_umr_ctrl_seg);
*size += sizeof(struct mlx5_wqe_umr_ctrl_seg) / 16;
if (unlikely((*seg == qp->sq.qend)))
@@ -2433,38 +3037,52 @@ static int set_psv_wr(struct ib_sig_domain *domain,
return 0;
}
-static int set_frwr_li_wr(void **seg, struct ib_send_wr *wr, int *size,
- struct mlx5_core_dev *mdev, struct mlx5_ib_pd *pd, struct mlx5_ib_qp *qp)
+static int set_reg_wr(struct mlx5_ib_qp *qp,
+ struct ib_reg_wr *wr,
+ void **seg, int *size)
{
- int writ = 0;
- int li;
+ struct mlx5_ib_mr *mr = to_mmr(wr->mr);
+ struct mlx5_ib_pd *pd = to_mpd(qp->ibqp.pd);
- li = wr->opcode == IB_WR_LOCAL_INV ? 1 : 0;
- if (unlikely(wr->send_flags & IB_SEND_INLINE))
+ if (unlikely(wr->wr.send_flags & IB_SEND_INLINE)) {
+ mlx5_ib_warn(to_mdev(qp->ibqp.device),
+ "Invalid IB_SEND_INLINE send flag\n");
return -EINVAL;
+ }
- set_frwr_umr_segment(*seg, wr, li);
+ set_reg_umr_seg(*seg, mr);
*seg += sizeof(struct mlx5_wqe_umr_ctrl_seg);
*size += sizeof(struct mlx5_wqe_umr_ctrl_seg) / 16;
if (unlikely((*seg == qp->sq.qend)))
*seg = mlx5_get_send_wqe(qp, 0);
- set_mkey_segment(*seg, wr, li, &writ);
+
+ set_reg_mkey_seg(*seg, mr, wr->key, wr->access);
*seg += sizeof(struct mlx5_mkey_seg);
*size += sizeof(struct mlx5_mkey_seg) / 16;
if (unlikely((*seg == qp->sq.qend)))
*seg = mlx5_get_send_wqe(qp, 0);
- if (!li) {
- if (unlikely(wr->wr.fast_reg.page_list_len >
- wr->wr.fast_reg.page_list->max_page_list_len))
- return -ENOMEM;
- set_frwr_pages(*seg, wr, mdev, pd, writ);
- *seg += sizeof(struct mlx5_wqe_data_seg);
- *size += (sizeof(struct mlx5_wqe_data_seg) / 16);
- }
+ set_reg_data_seg(*seg, mr, pd);
+ *seg += sizeof(struct mlx5_wqe_data_seg);
+ *size += (sizeof(struct mlx5_wqe_data_seg) / 16);
+
return 0;
}
+static void set_linv_wr(struct mlx5_ib_qp *qp, void **seg, int *size)
+{
+ set_linv_umr_seg(*seg);
+ *seg += sizeof(struct mlx5_wqe_umr_ctrl_seg);
+ *size += sizeof(struct mlx5_wqe_umr_ctrl_seg) / 16;
+ if (unlikely((*seg == qp->sq.qend)))
+ *seg = mlx5_get_send_wqe(qp, 0);
+ set_linv_mkey_seg(*seg);
+ *seg += sizeof(struct mlx5_mkey_seg);
+ *size += sizeof(struct mlx5_mkey_seg) / 16;
+ if (unlikely((*seg == qp->sq.qend)))
+ *seg = mlx5_get_send_wqe(qp, 0);
+}
+
static void dump_wqe(struct mlx5_ib_qp *qp, int idx, int size_16)
{
__be32 *p = NULL;
@@ -2559,7 +3177,7 @@ static void finish_wqe(struct mlx5_ib_qp *qp,
ctrl->opmod_idx_opcode = cpu_to_be32(((u32)(qp->sq.cur_post) << 8) |
mlx5_opcode | ((u32)opmod << 24));
- ctrl->qpn_ds = cpu_to_be32(size | (qp->mqp.qpn << 8));
+ ctrl->qpn_ds = cpu_to_be32(size | (qp->trans_qp.base.mqp.qpn << 8));
ctrl->fm_ce_se |= fence;
qp->fm_cache = next_fence;
if (unlikely(qp->wq_sig))
@@ -2578,7 +3196,6 @@ int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
{
struct mlx5_wqe_ctrl_seg *ctrl = NULL; /* compiler warning */
struct mlx5_ib_dev *dev = to_mdev(ibqp->device);
- struct mlx5_core_dev *mdev = dev->mdev;
struct mlx5_ib_qp *qp = to_mqp(ibqp);
struct mlx5_ib_mr *mr;
struct mlx5_wqe_data_seg *dpseg;
@@ -2627,7 +3244,6 @@ int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
switch (ibqp->qp_type) {
case IB_QPT_XRC_INI:
xrc = seg;
- xrc->xrc_srqn = htonl(wr->xrc_remote_srq_num);
seg += sizeof(*xrc);
size += sizeof(*xrc) / 16;
/* fall through */
@@ -2636,8 +3252,8 @@ int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
case IB_WR_RDMA_READ:
case IB_WR_RDMA_WRITE:
case IB_WR_RDMA_WRITE_WITH_IMM:
- set_raddr_seg(seg, wr->wr.rdma.remote_addr,
- wr->wr.rdma.rkey);
+ set_raddr_seg(seg, rdma_wr(wr)->remote_addr,
+ rdma_wr(wr)->rkey);
seg += sizeof(struct mlx5_wqe_raddr_seg);
size += sizeof(struct mlx5_wqe_raddr_seg) / 16;
break;
@@ -2654,22 +3270,16 @@ int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
next_fence = MLX5_FENCE_MODE_INITIATOR_SMALL;
qp->sq.wr_data[idx] = IB_WR_LOCAL_INV;
ctrl->imm = cpu_to_be32(wr->ex.invalidate_rkey);
- err = set_frwr_li_wr(&seg, wr, &size, mdev, to_mpd(ibqp->pd), qp);
- if (err) {
- mlx5_ib_warn(dev, "\n");
- *bad_wr = wr;
- goto out;
- }
+ set_linv_wr(qp, &seg, &size);
num_sge = 0;
break;
- case IB_WR_FAST_REG_MR:
+ case IB_WR_REG_MR:
next_fence = MLX5_FENCE_MODE_INITIATOR_SMALL;
- qp->sq.wr_data[idx] = IB_WR_FAST_REG_MR;
- ctrl->imm = cpu_to_be32(wr->wr.fast_reg.rkey);
- err = set_frwr_li_wr(&seg, wr, &size, mdev, to_mpd(ibqp->pd), qp);
+ qp->sq.wr_data[idx] = IB_WR_REG_MR;
+ ctrl->imm = cpu_to_be32(reg_wr(wr)->key);
+ err = set_reg_wr(qp, reg_wr(wr), &seg, &size);
if (err) {
- mlx5_ib_warn(dev, "\n");
*bad_wr = wr;
goto out;
}
@@ -2678,7 +3288,7 @@ int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
case IB_WR_REG_SIG_MR:
qp->sq.wr_data[idx] = IB_WR_REG_SIG_MR;
- mr = to_mmr(wr->wr.sig_handover.sig_mr);
+ mr = to_mmr(sig_handover_wr(wr)->sig_mr);
ctrl->imm = cpu_to_be32(mr->ibmr.rkey);
err = set_sig_umr_wr(wr, qp, &seg, &size);
@@ -2706,7 +3316,7 @@ int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
goto out;
}
- err = set_psv_wr(&wr->wr.sig_handover.sig_attrs->mem,
+ err = set_psv_wr(&sig_handover_wr(wr)->sig_attrs->mem,
mr->sig->psv_memory.psv_idx, &seg,
&size);
if (err) {
@@ -2728,7 +3338,7 @@ int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
}
next_fence = MLX5_FENCE_MODE_INITIATOR_SMALL;
- err = set_psv_wr(&wr->wr.sig_handover.sig_attrs->wire,
+ err = set_psv_wr(&sig_handover_wr(wr)->sig_attrs->wire,
mr->sig->psv_wire.psv_idx, &seg,
&size);
if (err) {
@@ -2752,8 +3362,8 @@ int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
switch (wr->opcode) {
case IB_WR_RDMA_WRITE:
case IB_WR_RDMA_WRITE_WITH_IMM:
- set_raddr_seg(seg, wr->wr.rdma.remote_addr,
- wr->wr.rdma.rkey);
+ set_raddr_seg(seg, rdma_wr(wr)->remote_addr,
+ rdma_wr(wr)->rkey);
seg += sizeof(struct mlx5_wqe_raddr_seg);
size += sizeof(struct mlx5_wqe_raddr_seg) / 16;
break;
@@ -2780,7 +3390,7 @@ int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
goto out;
}
qp->sq.wr_data[idx] = MLX5_IB_WR_UMR;
- ctrl->imm = cpu_to_be32(wr->wr.fast_reg.rkey);
+ ctrl->imm = cpu_to_be32(umr_wr(wr)->mkey);
set_reg_umr_segment(seg, wr);
seg += sizeof(struct mlx5_wqe_umr_ctrl_seg);
size += sizeof(struct mlx5_wqe_umr_ctrl_seg) / 16;
@@ -3000,7 +3610,7 @@ static void to_ib_ah_attr(struct mlx5_ib_dev *ibdev, struct ib_ah_attr *ib_ah_at
ib_ah_attr->port_num > MLX5_CAP_GEN(dev, num_ports))
return;
- ib_ah_attr->sl = path->sl & 0xf;
+ ib_ah_attr->sl = path->dci_cfi_prio_sl & 0xf;
ib_ah_attr->dlid = be16_to_cpu(path->rlid);
ib_ah_attr->src_path_bits = path->grh_mlid & 0x7f;
@@ -3018,39 +3628,153 @@ static void to_ib_ah_attr(struct mlx5_ib_dev *ibdev, struct ib_ah_attr *ib_ah_at
}
}
-int mlx5_ib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, int qp_attr_mask,
- struct ib_qp_init_attr *qp_init_attr)
+static int query_raw_packet_qp_sq_state(struct mlx5_ib_dev *dev,
+ struct mlx5_ib_sq *sq,
+ u8 *sq_state)
+{
+ void *out;
+ void *sqc;
+ int inlen;
+ int err;
+
+ inlen = MLX5_ST_SZ_BYTES(query_sq_out);
+ out = mlx5_vzalloc(inlen);
+ if (!out)
+ return -ENOMEM;
+
+ err = mlx5_core_query_sq(dev->mdev, sq->base.mqp.qpn, out);
+ if (err)
+ goto out;
+
+ sqc = MLX5_ADDR_OF(query_sq_out, out, sq_context);
+ *sq_state = MLX5_GET(sqc, sqc, state);
+ sq->state = *sq_state;
+
+out:
+ kvfree(out);
+ return err;
+}
+
+static int query_raw_packet_qp_rq_state(struct mlx5_ib_dev *dev,
+ struct mlx5_ib_rq *rq,
+ u8 *rq_state)
+{
+ void *out;
+ void *rqc;
+ int inlen;
+ int err;
+
+ inlen = MLX5_ST_SZ_BYTES(query_rq_out);
+ out = mlx5_vzalloc(inlen);
+ if (!out)
+ return -ENOMEM;
+
+ err = mlx5_core_query_rq(dev->mdev, rq->base.mqp.qpn, out);
+ if (err)
+ goto out;
+
+ rqc = MLX5_ADDR_OF(query_rq_out, out, rq_context);
+ *rq_state = MLX5_GET(rqc, rqc, state);
+ rq->state = *rq_state;
+
+out:
+ kvfree(out);
+ return err;
+}
+
+static int sqrq_state_to_qp_state(u8 sq_state, u8 rq_state,
+ struct mlx5_ib_qp *qp, u8 *qp_state)
+{
+ static const u8 sqrq_trans[MLX5_RQ_NUM_STATE][MLX5_SQ_NUM_STATE] = {
+ [MLX5_RQC_STATE_RST] = {
+ [MLX5_SQC_STATE_RST] = IB_QPS_RESET,
+ [MLX5_SQC_STATE_RDY] = MLX5_QP_STATE_BAD,
+ [MLX5_SQC_STATE_ERR] = MLX5_QP_STATE_BAD,
+ [MLX5_SQ_STATE_NA] = IB_QPS_RESET,
+ },
+ [MLX5_RQC_STATE_RDY] = {
+ [MLX5_SQC_STATE_RST] = MLX5_QP_STATE_BAD,
+ [MLX5_SQC_STATE_RDY] = MLX5_QP_STATE,
+ [MLX5_SQC_STATE_ERR] = IB_QPS_SQE,
+ [MLX5_SQ_STATE_NA] = MLX5_QP_STATE,
+ },
+ [MLX5_RQC_STATE_ERR] = {
+ [MLX5_SQC_STATE_RST] = MLX5_QP_STATE_BAD,
+ [MLX5_SQC_STATE_RDY] = MLX5_QP_STATE_BAD,
+ [MLX5_SQC_STATE_ERR] = IB_QPS_ERR,
+ [MLX5_SQ_STATE_NA] = IB_QPS_ERR,
+ },
+ [MLX5_RQ_STATE_NA] = {
+ [MLX5_SQC_STATE_RST] = IB_QPS_RESET,
+ [MLX5_SQC_STATE_RDY] = MLX5_QP_STATE,
+ [MLX5_SQC_STATE_ERR] = MLX5_QP_STATE,
+ [MLX5_SQ_STATE_NA] = MLX5_QP_STATE_BAD,
+ },
+ };
+
+ *qp_state = sqrq_trans[rq_state][sq_state];
+
+ if (*qp_state == MLX5_QP_STATE_BAD) {
+ WARN(1, "Buggy Raw Packet QP state, SQ 0x%x state: 0x%x, RQ 0x%x state: 0x%x",
+ qp->raw_packet_qp.sq.base.mqp.qpn, sq_state,
+ qp->raw_packet_qp.rq.base.mqp.qpn, rq_state);
+ return -EINVAL;
+ }
+
+ if (*qp_state == MLX5_QP_STATE)
+ *qp_state = qp->state;
+
+ return 0;
+}
+
+static int query_raw_packet_qp_state(struct mlx5_ib_dev *dev,
+ struct mlx5_ib_qp *qp,
+ u8 *raw_packet_qp_state)
+{
+ struct mlx5_ib_raw_packet_qp *raw_packet_qp = &qp->raw_packet_qp;
+ struct mlx5_ib_sq *sq = &raw_packet_qp->sq;
+ struct mlx5_ib_rq *rq = &raw_packet_qp->rq;
+ int err;
+ u8 sq_state = MLX5_SQ_STATE_NA;
+ u8 rq_state = MLX5_RQ_STATE_NA;
+
+ if (qp->sq.wqe_cnt) {
+ err = query_raw_packet_qp_sq_state(dev, sq, &sq_state);
+ if (err)
+ return err;
+ }
+
+ if (qp->rq.wqe_cnt) {
+ err = query_raw_packet_qp_rq_state(dev, rq, &rq_state);
+ if (err)
+ return err;
+ }
+
+ return sqrq_state_to_qp_state(sq_state, rq_state, qp,
+ raw_packet_qp_state);
+}
+
+static int query_qp_attr(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
+ struct ib_qp_attr *qp_attr)
{
- struct mlx5_ib_dev *dev = to_mdev(ibqp->device);
- struct mlx5_ib_qp *qp = to_mqp(ibqp);
struct mlx5_query_qp_mbox_out *outb;
struct mlx5_qp_context *context;
int mlx5_state;
int err = 0;
-#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
- /*
- * Wait for any outstanding page faults, in case the user frees memory
- * based upon this query's result.
- */
- flush_workqueue(mlx5_ib_page_fault_wq);
-#endif
-
- mutex_lock(&qp->mutex);
outb = kzalloc(sizeof(*outb), GFP_KERNEL);
- if (!outb) {
- err = -ENOMEM;
- goto out;
- }
+ if (!outb)
+ return -ENOMEM;
+
context = &outb->ctx;
- err = mlx5_core_qp_query(dev->mdev, &qp->mqp, outb, sizeof(*outb));
+ err = mlx5_core_qp_query(dev->mdev, &qp->trans_qp.base.mqp, outb,
+ sizeof(*outb));
if (err)
- goto out_free;
+ goto out;
mlx5_state = be32_to_cpu(context->flags) >> 28;
qp->state = to_ib_qp_state(mlx5_state);
- qp_attr->qp_state = qp->state;
qp_attr->path_mtu = context->mtu_msgmax >> 5;
qp_attr->path_mig_state =
to_ib_mig_state((be32_to_cpu(context->flags) >> 11) & 0x3);
@@ -3084,6 +3808,43 @@ int mlx5_ib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, int qp_attr
qp_attr->retry_cnt = (be32_to_cpu(context->params1) >> 16) & 0x7;
qp_attr->rnr_retry = (be32_to_cpu(context->params1) >> 13) & 0x7;
qp_attr->alt_timeout = context->alt_path.ackto_lt >> 3;
+
+out:
+ kfree(outb);
+ return err;
+}
+
+int mlx5_ib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr,
+ int qp_attr_mask, struct ib_qp_init_attr *qp_init_attr)
+{
+ struct mlx5_ib_dev *dev = to_mdev(ibqp->device);
+ struct mlx5_ib_qp *qp = to_mqp(ibqp);
+ int err = 0;
+ u8 raw_packet_qp_state;
+
+#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
+ /*
+ * Wait for any outstanding page faults, in case the user frees memory
+ * based upon this query's result.
+ */
+ flush_workqueue(mlx5_ib_page_fault_wq);
+#endif
+
+ mutex_lock(&qp->mutex);
+
+ if (qp->ibqp.qp_type == IB_QPT_RAW_PACKET) {
+ err = query_raw_packet_qp_state(dev, qp, &raw_packet_qp_state);
+ if (err)
+ goto out;
+ qp->state = raw_packet_qp_state;
+ qp_attr->port_num = 1;
+ } else {
+ err = query_qp_attr(dev, qp, qp_attr);
+ if (err)
+ goto out;
+ }
+
+ qp_attr->qp_state = qp->state;
qp_attr->cur_qp_state = qp_attr->qp_state;
qp_attr->cap.max_recv_wr = qp->rq.wqe_cnt;
qp_attr->cap.max_recv_sge = qp->rq.max_gs;
@@ -3107,12 +3868,16 @@ int mlx5_ib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, int qp_attr
if (qp->flags & MLX5_IB_QP_BLOCK_MULTICAST_LOOPBACK)
qp_init_attr->create_flags |= IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK;
+ if (qp->flags & MLX5_IB_QP_CROSS_CHANNEL)
+ qp_init_attr->create_flags |= IB_QP_CREATE_CROSS_CHANNEL;
+ if (qp->flags & MLX5_IB_QP_MANAGED_SEND)
+ qp_init_attr->create_flags |= IB_QP_CREATE_MANAGED_SEND;
+ if (qp->flags & MLX5_IB_QP_MANAGED_RECV)
+ qp_init_attr->create_flags |= IB_QP_CREATE_MANAGED_RECV;
+
qp_init_attr->sq_sig_type = qp->sq_signal_bits & MLX5_WQE_CTRL_CQ_UPDATE ?
IB_SIGNAL_ALL_WR : IB_SIGNAL_REQ_WR;
-out_free:
- kfree(outb);
-
out:
mutex_unlock(&qp->mutex);
return err;
diff --git a/drivers/infiniband/hw/mlx5/srq.c b/drivers/infiniband/hw/mlx5/srq.c
index e008505..3b2ddd6 100644
--- a/drivers/infiniband/hw/mlx5/srq.c
+++ b/drivers/infiniband/hw/mlx5/srq.c
@@ -75,31 +75,42 @@ static void mlx5_ib_srq_event(struct mlx5_core_srq *srq, enum mlx5_event type)
static int create_srq_user(struct ib_pd *pd, struct mlx5_ib_srq *srq,
struct mlx5_create_srq_mbox_in **in,
- struct ib_udata *udata, int buf_size, int *inlen)
+ struct ib_udata *udata, int buf_size, int *inlen,
+ int is_xrc)
{
struct mlx5_ib_dev *dev = to_mdev(pd->device);
- struct mlx5_ib_create_srq ucmd;
+ struct mlx5_ib_create_srq ucmd = {};
size_t ucmdlen;
+ void *xsrqc;
int err;
int npages;
int page_shift;
int ncont;
u32 offset;
+ u32 uidx = MLX5_IB_DEFAULT_UIDX;
- ucmdlen =
- (udata->inlen - sizeof(struct ib_uverbs_cmd_hdr) <
- sizeof(ucmd)) ? (sizeof(ucmd) -
- sizeof(ucmd.reserved)) : sizeof(ucmd);
+ ucmdlen = min(udata->inlen, sizeof(ucmd));
if (ib_copy_from_udata(&ucmd, udata, ucmdlen)) {
mlx5_ib_dbg(dev, "failed copy udata\n");
return -EFAULT;
}
- if (ucmdlen == sizeof(ucmd) &&
- ucmd.reserved != 0)
+ if (ucmd.reserved0 || ucmd.reserved1)
return -EINVAL;
+ if (udata->inlen > sizeof(ucmd) &&
+ !ib_is_udata_cleared(udata, sizeof(ucmd),
+ udata->inlen - sizeof(ucmd)))
+ return -EINVAL;
+
+ if (is_xrc) {
+ err = get_srq_user_index(to_mucontext(pd->uobject->context),
+ &ucmd, udata->inlen, &uidx);
+ if (err)
+ return err;
+ }
+
srq->wq_sig = !!(ucmd.flags & MLX5_SRQ_FLAG_SIGNATURE);
srq->umem = ib_umem_get(pd->uobject->context, ucmd.buf_addr, buf_size,
@@ -138,6 +149,13 @@ static int create_srq_user(struct ib_pd *pd, struct mlx5_ib_srq *srq,
(*in)->ctx.log_pg_sz = page_shift - MLX5_ADAPTER_PAGE_SHIFT;
(*in)->ctx.pgoff_cqn = cpu_to_be32(offset << 26);
+ if ((MLX5_CAP_GEN(dev->mdev, cqe_version) == MLX5_CQE_VERSION_V1) &&
+ is_xrc){
+ xsrqc = MLX5_ADDR_OF(create_xrc_srq_in, *in,
+ xrc_srq_context_entry);
+ MLX5_SET(xrc_srqc, xsrqc, user_index, uidx);
+ }
+
return 0;
err_in:
@@ -151,13 +169,14 @@ err_umem:
static int create_srq_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_srq *srq,
struct mlx5_create_srq_mbox_in **in, int buf_size,
- int *inlen)
+ int *inlen, int is_xrc)
{
int err;
int i;
struct mlx5_wqe_srq_next_seg *next;
int page_shift;
int npages;
+ void *xsrqc;
err = mlx5_db_alloc(dev->mdev, &srq->db);
if (err) {
@@ -204,6 +223,14 @@ static int create_srq_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_srq *srq,
(*in)->ctx.log_pg_sz = page_shift - MLX5_ADAPTER_PAGE_SHIFT;
+ if ((MLX5_CAP_GEN(dev->mdev, cqe_version) == MLX5_CQE_VERSION_V1) &&
+ is_xrc){
+ xsrqc = MLX5_ADDR_OF(create_xrc_srq_in, *in,
+ xrc_srq_context_entry);
+ /* 0xffffff means we ask to work with cqe version 0 */
+ MLX5_SET(xrc_srqc, xsrqc, user_index, MLX5_IB_DEFAULT_UIDX);
+ }
+
return 0;
err_in:
@@ -275,10 +302,14 @@ struct ib_srq *mlx5_ib_create_srq(struct ib_pd *pd,
desc_size, init_attr->attr.max_wr, srq->msrq.max, srq->msrq.max_gs,
srq->msrq.max_avail_gather);
+ is_xrc = (init_attr->srq_type == IB_SRQT_XRC);
+
if (pd->uobject)
- err = create_srq_user(pd, srq, &in, udata, buf_size, &inlen);
+ err = create_srq_user(pd, srq, &in, udata, buf_size, &inlen,
+ is_xrc);
else
- err = create_srq_kernel(dev, srq, &in, buf_size, &inlen);
+ err = create_srq_kernel(dev, srq, &in, buf_size, &inlen,
+ is_xrc);
if (err) {
mlx5_ib_warn(dev, "create srq %s failed, err %d\n",
@@ -286,7 +317,6 @@ struct ib_srq *mlx5_ib_create_srq(struct ib_pd *pd,
goto err_srq;
}
- is_xrc = (init_attr->srq_type == IB_SRQT_XRC);
in->ctx.state_log_sz = ilog2(srq->msrq.max);
flgs = ((srq->msrq.wqe_shift - 4) | (is_xrc << 5) | (srq->wq_sig << 7)) << 24;
xrcdn = 0;
diff --git a/drivers/infiniband/hw/mlx5/user.h b/drivers/infiniband/hw/mlx5/user.h
index 76fb7b9..b94a554 100644
--- a/drivers/infiniband/hw/mlx5/user.h
+++ b/drivers/infiniband/hw/mlx5/user.h
@@ -35,6 +35,8 @@
#include <linux/types.h>
+#include "mlx5_ib.h"
+
enum {
MLX5_QP_FLAG_SIGNATURE = 1 << 0,
MLX5_QP_FLAG_SCATTER_CQE = 1 << 1,
@@ -66,7 +68,15 @@ struct mlx5_ib_alloc_ucontext_req_v2 {
__u32 total_num_uuars;
__u32 num_low_latency_uuars;
__u32 flags;
- __u32 reserved;
+ __u32 comp_mask;
+ __u8 max_cqe_version;
+ __u8 reserved0;
+ __u16 reserved1;
+ __u32 reserved2;
+};
+
+enum mlx5_ib_alloc_ucontext_resp_mask {
+ MLX5_IB_ALLOC_UCONTEXT_RESP_MASK_CORE_CLOCK_OFFSET = 1UL << 0,
};
struct mlx5_ib_alloc_ucontext_resp {
@@ -80,7 +90,13 @@ struct mlx5_ib_alloc_ucontext_resp {
__u32 max_recv_wr;
__u32 max_srq_recv_wr;
__u16 num_ports;
- __u16 reserved;
+ __u16 reserved1;
+ __u32 comp_mask;
+ __u32 response_length;
+ __u8 cqe_version;
+ __u8 reserved2;
+ __u16 reserved3;
+ __u64 hca_core_clock_offset;
};
struct mlx5_ib_alloc_pd_resp {
@@ -110,7 +126,9 @@ struct mlx5_ib_create_srq {
__u64 buf_addr;
__u64 db_addr;
__u32 flags;
- __u32 reserved; /* explicit padding (optional on i386) */
+ __u32 reserved0; /* explicit padding (optional on i386) */
+ __u32 uidx;
+ __u32 reserved1;
};
struct mlx5_ib_create_srq_resp {
@@ -125,9 +143,48 @@ struct mlx5_ib_create_qp {
__u32 rq_wqe_count;
__u32 rq_wqe_shift;
__u32 flags;
+ __u32 uidx;
+ __u32 reserved0;
+ __u64 sq_buf_addr;
};
struct mlx5_ib_create_qp_resp {
__u32 uuar_index;
};
+
+static inline int get_qp_user_index(struct mlx5_ib_ucontext *ucontext,
+ struct mlx5_ib_create_qp *ucmd,
+ int inlen,
+ u32 *user_index)
+{
+ u8 cqe_version = ucontext->cqe_version;
+
+ if (field_avail(struct mlx5_ib_create_qp, uidx, inlen) &&
+ !cqe_version && (ucmd->uidx == MLX5_IB_DEFAULT_UIDX))
+ return 0;
+
+ if (!!(field_avail(struct mlx5_ib_create_qp, uidx, inlen) !=
+ !!cqe_version))
+ return -EINVAL;
+
+ return verify_assign_uidx(cqe_version, ucmd->uidx, user_index);
+}
+
+static inline int get_srq_user_index(struct mlx5_ib_ucontext *ucontext,
+ struct mlx5_ib_create_srq *ucmd,
+ int inlen,
+ u32 *user_index)
+{
+ u8 cqe_version = ucontext->cqe_version;
+
+ if (field_avail(struct mlx5_ib_create_srq, uidx, inlen) &&
+ !cqe_version && (ucmd->uidx == MLX5_IB_DEFAULT_UIDX))
+ return 0;
+
+ if (!!(field_avail(struct mlx5_ib_create_srq, uidx, inlen) !=
+ !!cqe_version))
+ return -EINVAL;
+
+ return verify_assign_uidx(cqe_version, ucmd->uidx, user_index);
+}
#endif /* MLX5_IB_USER_H */
diff --git a/drivers/infiniband/hw/mthca/mthca_av.c b/drivers/infiniband/hw/mthca/mthca_av.c
index 32f6c63..bcac294 100644
--- a/drivers/infiniband/hw/mthca/mthca_av.c
+++ b/drivers/infiniband/hw/mthca/mthca_av.c
@@ -281,7 +281,7 @@ int mthca_read_ah(struct mthca_dev *dev, struct mthca_ah *ah,
ib_get_cached_gid(&dev->ib_dev,
be32_to_cpu(ah->av->port_pd) >> 24,
ah->av->gid_index % dev->limits.gid_table_len,
- &header->grh.source_gid);
+ &header->grh.source_gid, NULL);
memcpy(header->grh.destination_gid.raw,
ah->av->dgid, 16);
}
diff --git a/drivers/infiniband/hw/mthca/mthca_cq.c b/drivers/infiniband/hw/mthca/mthca_cq.c
index 40ba833..a6531ff 100644
--- a/drivers/infiniband/hw/mthca/mthca_cq.c
+++ b/drivers/infiniband/hw/mthca/mthca_cq.c
@@ -608,9 +608,6 @@ static inline int mthca_poll_one(struct mthca_dev *dev,
entry->opcode = IB_WC_FETCH_ADD;
entry->byte_len = MTHCA_ATOMIC_BYTE_LEN;
break;
- case MTHCA_OPCODE_BIND_MW:
- entry->opcode = IB_WC_BIND_MW;
- break;
default:
entry->opcode = MTHCA_OPCODE_INVALID;
break;
diff --git a/drivers/infiniband/hw/mthca/mthca_provider.c b/drivers/infiniband/hw/mthca/mthca_provider.c
index dc2d48c..9866c35 100644
--- a/drivers/infiniband/hw/mthca/mthca_provider.c
+++ b/drivers/infiniband/hw/mthca/mthca_provider.c
@@ -898,89 +898,6 @@ static struct ib_mr *mthca_get_dma_mr(struct ib_pd *pd, int acc)
return &mr->ibmr;
}
-static struct ib_mr *mthca_reg_phys_mr(struct ib_pd *pd,
- struct ib_phys_buf *buffer_list,
- int num_phys_buf,
- int acc,
- u64 *iova_start)
-{
- struct mthca_mr *mr;
- u64 *page_list;
- u64 total_size;
- unsigned long mask;
- int shift;
- int npages;
- int err;
- int i, j, n;
-
- mask = buffer_list[0].addr ^ *iova_start;
- total_size = 0;
- for (i = 0; i < num_phys_buf; ++i) {
- if (i != 0)
- mask |= buffer_list[i].addr;
- if (i != num_phys_buf - 1)
- mask |= buffer_list[i].addr + buffer_list[i].size;
-
- total_size += buffer_list[i].size;
- }
-
- if (mask & ~PAGE_MASK)
- return ERR_PTR(-EINVAL);
-
- shift = __ffs(mask | 1 << 31);
-
- buffer_list[0].size += buffer_list[0].addr & ((1ULL << shift) - 1);
- buffer_list[0].addr &= ~0ull << shift;
-
- mr = kmalloc(sizeof *mr, GFP_KERNEL);
- if (!mr)
- return ERR_PTR(-ENOMEM);
-
- npages = 0;
- for (i = 0; i < num_phys_buf; ++i)
- npages += (buffer_list[i].size + (1ULL << shift) - 1) >> shift;
-
- if (!npages)
- return &mr->ibmr;
-
- page_list = kmalloc(npages * sizeof *page_list, GFP_KERNEL);
- if (!page_list) {
- kfree(mr);
- return ERR_PTR(-ENOMEM);
- }
-
- n = 0;
- for (i = 0; i < num_phys_buf; ++i)
- for (j = 0;
- j < (buffer_list[i].size + (1ULL << shift) - 1) >> shift;
- ++j)
- page_list[n++] = buffer_list[i].addr + ((u64) j << shift);
-
- mthca_dbg(to_mdev(pd->device), "Registering memory at %llx (iova %llx) "
- "in PD %x; shift %d, npages %d.\n",
- (unsigned long long) buffer_list[0].addr,
- (unsigned long long) *iova_start,
- to_mpd(pd)->pd_num,
- shift, npages);
-
- err = mthca_mr_alloc_phys(to_mdev(pd->device),
- to_mpd(pd)->pd_num,
- page_list, shift, npages,
- *iova_start, total_size,
- convert_access(acc), mr);
-
- if (err) {
- kfree(page_list);
- kfree(mr);
- return ERR_PTR(err);
- }
-
- kfree(page_list);
- mr->umem = NULL;
-
- return &mr->ibmr;
-}
-
static struct ib_mr *mthca_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
u64 virt, int acc, struct ib_udata *udata)
{
@@ -1346,7 +1263,6 @@ int mthca_register_device(struct mthca_dev *dev)
dev->ib_dev.destroy_cq = mthca_destroy_cq;
dev->ib_dev.poll_cq = mthca_poll_cq;
dev->ib_dev.get_dma_mr = mthca_get_dma_mr;
- dev->ib_dev.reg_phys_mr = mthca_reg_phys_mr;
dev->ib_dev.reg_user_mr = mthca_reg_user_mr;
dev->ib_dev.dereg_mr = mthca_dereg_mr;
dev->ib_dev.get_port_immutable = mthca_port_immutable;
diff --git a/drivers/infiniband/hw/mthca/mthca_qp.c b/drivers/infiniband/hw/mthca/mthca_qp.c
index e354b2f..96e5fb9 100644
--- a/drivers/infiniband/hw/mthca/mthca_qp.c
+++ b/drivers/infiniband/hw/mthca/mthca_qp.c
@@ -1476,7 +1476,7 @@ void mthca_free_qp(struct mthca_dev *dev,
/* Create UD header for an MLX send and build a data segment for it */
static int build_mlx_header(struct mthca_dev *dev, struct mthca_sqp *sqp,
- int ind, struct ib_send_wr *wr,
+ int ind, struct ib_ud_wr *wr,
struct mthca_mlx_seg *mlx,
struct mthca_data_seg *data)
{
@@ -1485,10 +1485,10 @@ static int build_mlx_header(struct mthca_dev *dev, struct mthca_sqp *sqp,
u16 pkey;
ib_ud_header_init(256, /* assume a MAD */ 1, 0, 0,
- mthca_ah_grh_present(to_mah(wr->wr.ud.ah)), 0,
+ mthca_ah_grh_present(to_mah(wr->ah)), 0, 0, 0,
&sqp->ud_header);
- err = mthca_read_ah(dev, to_mah(wr->wr.ud.ah), &sqp->ud_header);
+ err = mthca_read_ah(dev, to_mah(wr->ah), &sqp->ud_header);
if (err)
return err;
mlx->flags &= ~cpu_to_be32(MTHCA_NEXT_SOLICIT | 1);
@@ -1499,7 +1499,7 @@ static int build_mlx_header(struct mthca_dev *dev, struct mthca_sqp *sqp,
mlx->rlid = sqp->ud_header.lrh.destination_lid;
mlx->vcrc = 0;
- switch (wr->opcode) {
+ switch (wr->wr.opcode) {
case IB_WR_SEND:
sqp->ud_header.bth.opcode = IB_OPCODE_UD_SEND_ONLY;
sqp->ud_header.immediate_present = 0;
@@ -1507,7 +1507,7 @@ static int build_mlx_header(struct mthca_dev *dev, struct mthca_sqp *sqp,
case IB_WR_SEND_WITH_IMM:
sqp->ud_header.bth.opcode = IB_OPCODE_UD_SEND_ONLY_WITH_IMMEDIATE;
sqp->ud_header.immediate_present = 1;
- sqp->ud_header.immediate_data = wr->ex.imm_data;
+ sqp->ud_header.immediate_data = wr->wr.ex.imm_data;
break;
default:
return -EINVAL;
@@ -1516,18 +1516,18 @@ static int build_mlx_header(struct mthca_dev *dev, struct mthca_sqp *sqp,
sqp->ud_header.lrh.virtual_lane = !sqp->qp.ibqp.qp_num ? 15 : 0;
if (sqp->ud_header.lrh.destination_lid == IB_LID_PERMISSIVE)
sqp->ud_header.lrh.source_lid = IB_LID_PERMISSIVE;
- sqp->ud_header.bth.solicited_event = !!(wr->send_flags & IB_SEND_SOLICITED);
+ sqp->ud_header.bth.solicited_event = !!(wr->wr.send_flags & IB_SEND_SOLICITED);
if (!sqp->qp.ibqp.qp_num)
ib_get_cached_pkey(&dev->ib_dev, sqp->qp.port,
sqp->pkey_index, &pkey);
else
ib_get_cached_pkey(&dev->ib_dev, sqp->qp.port,
- wr->wr.ud.pkey_index, &pkey);
+ wr->pkey_index, &pkey);
sqp->ud_header.bth.pkey = cpu_to_be16(pkey);
- sqp->ud_header.bth.destination_qpn = cpu_to_be32(wr->wr.ud.remote_qpn);
+ sqp->ud_header.bth.destination_qpn = cpu_to_be32(wr->remote_qpn);
sqp->ud_header.bth.psn = cpu_to_be32((sqp->send_psn++) & ((1 << 24) - 1));
- sqp->ud_header.deth.qkey = cpu_to_be32(wr->wr.ud.remote_qkey & 0x80000000 ?
- sqp->qkey : wr->wr.ud.remote_qkey);
+ sqp->ud_header.deth.qkey = cpu_to_be32(wr->remote_qkey & 0x80000000 ?
+ sqp->qkey : wr->remote_qkey);
sqp->ud_header.deth.source_qpn = cpu_to_be32(sqp->qp.ibqp.qp_num);
header_size = ib_ud_header_pack(&sqp->ud_header,
@@ -1569,34 +1569,34 @@ static __always_inline void set_raddr_seg(struct mthca_raddr_seg *rseg,
}
static __always_inline void set_atomic_seg(struct mthca_atomic_seg *aseg,
- struct ib_send_wr *wr)
+ struct ib_atomic_wr *wr)
{
- if (wr->opcode == IB_WR_ATOMIC_CMP_AND_SWP) {
- aseg->swap_add = cpu_to_be64(wr->wr.atomic.swap);
- aseg->compare = cpu_to_be64(wr->wr.atomic.compare_add);
+ if (wr->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP) {
+ aseg->swap_add = cpu_to_be64(wr->swap);
+ aseg->compare = cpu_to_be64(wr->compare_add);
} else {
- aseg->swap_add = cpu_to_be64(wr->wr.atomic.compare_add);
+ aseg->swap_add = cpu_to_be64(wr->compare_add);
aseg->compare = 0;
}
}
static void set_tavor_ud_seg(struct mthca_tavor_ud_seg *useg,
- struct ib_send_wr *wr)
+ struct ib_ud_wr *wr)
{
- useg->lkey = cpu_to_be32(to_mah(wr->wr.ud.ah)->key);
- useg->av_addr = cpu_to_be64(to_mah(wr->wr.ud.ah)->avdma);
- useg->dqpn = cpu_to_be32(wr->wr.ud.remote_qpn);
- useg->qkey = cpu_to_be32(wr->wr.ud.remote_qkey);
+ useg->lkey = cpu_to_be32(to_mah(wr->ah)->key);
+ useg->av_addr = cpu_to_be64(to_mah(wr->ah)->avdma);
+ useg->dqpn = cpu_to_be32(wr->remote_qpn);
+ useg->qkey = cpu_to_be32(wr->remote_qkey);
}
static void set_arbel_ud_seg(struct mthca_arbel_ud_seg *useg,
- struct ib_send_wr *wr)
+ struct ib_ud_wr *wr)
{
- memcpy(useg->av, to_mah(wr->wr.ud.ah)->av, MTHCA_AV_SIZE);
- useg->dqpn = cpu_to_be32(wr->wr.ud.remote_qpn);
- useg->qkey = cpu_to_be32(wr->wr.ud.remote_qkey);
+ memcpy(useg->av, to_mah(wr->ah)->av, MTHCA_AV_SIZE);
+ useg->dqpn = cpu_to_be32(wr->remote_qpn);
+ useg->qkey = cpu_to_be32(wr->remote_qkey);
}
int mthca_tavor_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
@@ -1664,11 +1664,11 @@ int mthca_tavor_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
switch (wr->opcode) {
case IB_WR_ATOMIC_CMP_AND_SWP:
case IB_WR_ATOMIC_FETCH_AND_ADD:
- set_raddr_seg(wqe, wr->wr.atomic.remote_addr,
- wr->wr.atomic.rkey);
+ set_raddr_seg(wqe, atomic_wr(wr)->remote_addr,
+ atomic_wr(wr)->rkey);
wqe += sizeof (struct mthca_raddr_seg);
- set_atomic_seg(wqe, wr);
+ set_atomic_seg(wqe, atomic_wr(wr));
wqe += sizeof (struct mthca_atomic_seg);
size += (sizeof (struct mthca_raddr_seg) +
sizeof (struct mthca_atomic_seg)) / 16;
@@ -1677,8 +1677,8 @@ int mthca_tavor_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
case IB_WR_RDMA_WRITE:
case IB_WR_RDMA_WRITE_WITH_IMM:
case IB_WR_RDMA_READ:
- set_raddr_seg(wqe, wr->wr.rdma.remote_addr,
- wr->wr.rdma.rkey);
+ set_raddr_seg(wqe, rdma_wr(wr)->remote_addr,
+ rdma_wr(wr)->rkey);
wqe += sizeof (struct mthca_raddr_seg);
size += sizeof (struct mthca_raddr_seg) / 16;
break;
@@ -1694,8 +1694,8 @@ int mthca_tavor_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
switch (wr->opcode) {
case IB_WR_RDMA_WRITE:
case IB_WR_RDMA_WRITE_WITH_IMM:
- set_raddr_seg(wqe, wr->wr.rdma.remote_addr,
- wr->wr.rdma.rkey);
+ set_raddr_seg(wqe, rdma_wr(wr)->remote_addr,
+ rdma_wr(wr)->rkey);
wqe += sizeof (struct mthca_raddr_seg);
size += sizeof (struct mthca_raddr_seg) / 16;
break;
@@ -1708,13 +1708,13 @@ int mthca_tavor_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
break;
case UD:
- set_tavor_ud_seg(wqe, wr);
+ set_tavor_ud_seg(wqe, ud_wr(wr));
wqe += sizeof (struct mthca_tavor_ud_seg);
size += sizeof (struct mthca_tavor_ud_seg) / 16;
break;
case MLX:
- err = build_mlx_header(dev, to_msqp(qp), ind, wr,
+ err = build_mlx_header(dev, to_msqp(qp), ind, ud_wr(wr),
wqe - sizeof (struct mthca_next_seg),
wqe);
if (err) {
@@ -2005,11 +2005,11 @@ int mthca_arbel_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
switch (wr->opcode) {
case IB_WR_ATOMIC_CMP_AND_SWP:
case IB_WR_ATOMIC_FETCH_AND_ADD:
- set_raddr_seg(wqe, wr->wr.atomic.remote_addr,
- wr->wr.atomic.rkey);
+ set_raddr_seg(wqe, atomic_wr(wr)->remote_addr,
+ atomic_wr(wr)->rkey);
wqe += sizeof (struct mthca_raddr_seg);
- set_atomic_seg(wqe, wr);
+ set_atomic_seg(wqe, atomic_wr(wr));
wqe += sizeof (struct mthca_atomic_seg);
size += (sizeof (struct mthca_raddr_seg) +
sizeof (struct mthca_atomic_seg)) / 16;
@@ -2018,8 +2018,8 @@ int mthca_arbel_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
case IB_WR_RDMA_READ:
case IB_WR_RDMA_WRITE:
case IB_WR_RDMA_WRITE_WITH_IMM:
- set_raddr_seg(wqe, wr->wr.rdma.remote_addr,
- wr->wr.rdma.rkey);
+ set_raddr_seg(wqe, rdma_wr(wr)->remote_addr,
+ rdma_wr(wr)->rkey);
wqe += sizeof (struct mthca_raddr_seg);
size += sizeof (struct mthca_raddr_seg) / 16;
break;
@@ -2035,8 +2035,8 @@ int mthca_arbel_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
switch (wr->opcode) {
case IB_WR_RDMA_WRITE:
case IB_WR_RDMA_WRITE_WITH_IMM:
- set_raddr_seg(wqe, wr->wr.rdma.remote_addr,
- wr->wr.rdma.rkey);
+ set_raddr_seg(wqe, rdma_wr(wr)->remote_addr,
+ rdma_wr(wr)->rkey);
wqe += sizeof (struct mthca_raddr_seg);
size += sizeof (struct mthca_raddr_seg) / 16;
break;
@@ -2049,13 +2049,13 @@ int mthca_arbel_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
break;
case UD:
- set_arbel_ud_seg(wqe, wr);
+ set_arbel_ud_seg(wqe, ud_wr(wr));
wqe += sizeof (struct mthca_arbel_ud_seg);
size += sizeof (struct mthca_arbel_ud_seg) / 16;
break;
case MLX:
- err = build_mlx_header(dev, to_msqp(qp), ind, wr,
+ err = build_mlx_header(dev, to_msqp(qp), ind, ud_wr(wr),
wqe - sizeof (struct mthca_next_seg),
wqe);
if (err) {
diff --git a/drivers/infiniband/hw/nes/nes_cm.c b/drivers/infiniband/hw/nes/nes_cm.c
index 8a3ad17..cb9f0f2 100644
--- a/drivers/infiniband/hw/nes/nes_cm.c
+++ b/drivers/infiniband/hw/nes/nes_cm.c
@@ -134,7 +134,7 @@ static void record_ird_ord(struct nes_cm_node *, u16, u16);
/* External CM API Interface */
/* instance of function pointers for client API */
/* set address of this instance to cm_core->cm_ops at cm_core alloc */
-static struct nes_cm_ops nes_cm_api = {
+static const struct nes_cm_ops nes_cm_api = {
mini_cm_accelerated,
mini_cm_listen,
mini_cm_del_listen,
@@ -3232,7 +3232,6 @@ int nes_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
int passive_state;
struct nes_ib_device *nesibdev;
struct ib_mr *ibmr = NULL;
- struct ib_phys_buf ibphysbuf;
struct nes_pd *nespd;
u64 tagged_offset;
u8 mpa_frame_offset = 0;
@@ -3316,21 +3315,19 @@ int nes_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
u64temp = (unsigned long)nesqp;
nesibdev = nesvnic->nesibdev;
nespd = nesqp->nespd;
- ibphysbuf.addr = nesqp->ietf_frame_pbase + mpa_frame_offset;
- ibphysbuf.size = buff_len;
tagged_offset = (u64)(unsigned long)*start_buff;
- ibmr = nesibdev->ibdev.reg_phys_mr((struct ib_pd *)nespd,
- &ibphysbuf, 1,
- IB_ACCESS_LOCAL_WRITE,
- &tagged_offset);
- if (!ibmr) {
+ ibmr = nes_reg_phys_mr(&nespd->ibpd,
+ nesqp->ietf_frame_pbase + mpa_frame_offset,
+ buff_len, IB_ACCESS_LOCAL_WRITE,
+ &tagged_offset);
+ if (IS_ERR(ibmr)) {
nes_debug(NES_DBG_CM, "Unable to register memory region"
"for lSMM for cm_node = %p \n",
cm_node);
pci_free_consistent(nesdev->pcidev,
nesqp->private_data_len + nesqp->ietf_frame_size,
nesqp->ietf_frame, nesqp->ietf_frame_pbase);
- return -ENOMEM;
+ return PTR_ERR(ibmr);
}
ibmr->pd = &nespd->ibpd;
diff --git a/drivers/infiniband/hw/nes/nes_cm.h b/drivers/infiniband/hw/nes/nes_cm.h
index 32a6420..147c2c8 100644
--- a/drivers/infiniband/hw/nes/nes_cm.h
+++ b/drivers/infiniband/hw/nes/nes_cm.h
@@ -423,7 +423,7 @@ struct nes_cm_core {
struct timer_list tcp_timer;
- struct nes_cm_ops *api;
+ const struct nes_cm_ops *api;
int (*post_event)(struct nes_cm_event *event);
atomic_t events_posted;
diff --git a/drivers/infiniband/hw/nes/nes_hw.h b/drivers/infiniband/hw/nes/nes_hw.h
index d748e4b..c908020 100644
--- a/drivers/infiniband/hw/nes/nes_hw.h
+++ b/drivers/infiniband/hw/nes/nes_hw.h
@@ -1200,12 +1200,6 @@ struct nes_fast_mr_wqe_pbl {
dma_addr_t paddr;
};
-struct nes_ib_fast_reg_page_list {
- struct ib_fast_reg_page_list ibfrpl;
- struct nes_fast_mr_wqe_pbl nes_wqe_pbl;
- u64 pbl;
-};
-
struct nes_listener {
struct work_struct work;
struct workqueue_struct *wq;
diff --git a/drivers/infiniband/hw/nes/nes_utils.c b/drivers/infiniband/hw/nes/nes_utils.c
index 2042c0f..6d3a169 100644
--- a/drivers/infiniband/hw/nes/nes_utils.c
+++ b/drivers/infiniband/hw/nes/nes_utils.c
@@ -727,7 +727,7 @@ int nes_arp_table(struct nes_device *nesdev, u32 ip_addr, u8 *mac_addr, u32 acti
if (action == NES_ARP_DELETE) {
nes_debug(NES_DBG_NETDEV, "DELETE, arp_index=%d\n", arp_index);
nesadapter->arp_table[arp_index].ip_addr = 0;
- memset(nesadapter->arp_table[arp_index].mac_addr, 0x00, ETH_ALEN);
+ eth_zero_addr(nesadapter->arp_table[arp_index].mac_addr);
nes_free_resource(nesadapter, nesadapter->allocated_arps, arp_index);
return arp_index;
}
diff --git a/drivers/infiniband/hw/nes/nes_verbs.c b/drivers/infiniband/hw/nes/nes_verbs.c
index 44cb513..8c4daf7 100644
--- a/drivers/infiniband/hw/nes/nes_verbs.c
+++ b/drivers/infiniband/hw/nes/nes_verbs.c
@@ -51,6 +51,7 @@ atomic_t qps_created;
atomic_t sw_qps_destroyed;
static void nes_unregister_ofa_device(struct nes_ib_device *nesibdev);
+static int nes_dereg_mr(struct ib_mr *ib_mr);
/**
* nes_alloc_mw
@@ -205,80 +206,6 @@ static int nes_dealloc_mw(struct ib_mw *ibmw)
}
-/**
- * nes_bind_mw
- */
-static int nes_bind_mw(struct ib_qp *ibqp, struct ib_mw *ibmw,
- struct ib_mw_bind *ibmw_bind)
-{
- u64 u64temp;
- struct nes_vnic *nesvnic = to_nesvnic(ibqp->device);
- struct nes_device *nesdev = nesvnic->nesdev;
- /* struct nes_mr *nesmr = to_nesmw(ibmw); */
- struct nes_qp *nesqp = to_nesqp(ibqp);
- struct nes_hw_qp_wqe *wqe;
- unsigned long flags = 0;
- u32 head;
- u32 wqe_misc = 0;
- u32 qsize;
-
- if (nesqp->ibqp_state > IB_QPS_RTS)
- return -EINVAL;
-
- spin_lock_irqsave(&nesqp->lock, flags);
-
- head = nesqp->hwqp.sq_head;
- qsize = nesqp->hwqp.sq_tail;
-
- /* Check for SQ overflow */
- if (((head + (2 * qsize) - nesqp->hwqp.sq_tail) % qsize) == (qsize - 1)) {
- spin_unlock_irqrestore(&nesqp->lock, flags);
- return -ENOMEM;
- }
-
- wqe = &nesqp->hwqp.sq_vbase[head];
- /* nes_debug(NES_DBG_MR, "processing sq wqe at %p, head = %u.\n", wqe, head); */
- nes_fill_init_qp_wqe(wqe, nesqp, head);
- u64temp = ibmw_bind->wr_id;
- set_wqe_64bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_COMP_SCRATCH_LOW_IDX, u64temp);
- wqe_misc = NES_IWARP_SQ_OP_BIND;
-
- wqe_misc |= NES_IWARP_SQ_WQE_LOCAL_FENCE;
-
- if (ibmw_bind->send_flags & IB_SEND_SIGNALED)
- wqe_misc |= NES_IWARP_SQ_WQE_SIGNALED_COMPL;
-
- if (ibmw_bind->bind_info.mw_access_flags & IB_ACCESS_REMOTE_WRITE)
- wqe_misc |= NES_CQP_STAG_RIGHTS_REMOTE_WRITE;
- if (ibmw_bind->bind_info.mw_access_flags & IB_ACCESS_REMOTE_READ)
- wqe_misc |= NES_CQP_STAG_RIGHTS_REMOTE_READ;
-
- set_wqe_32bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_MISC_IDX, wqe_misc);
- set_wqe_32bit_value(wqe->wqe_words, NES_IWARP_SQ_BIND_WQE_MR_IDX,
- ibmw_bind->bind_info.mr->lkey);
- set_wqe_32bit_value(wqe->wqe_words, NES_IWARP_SQ_BIND_WQE_MW_IDX, ibmw->rkey);
- set_wqe_32bit_value(wqe->wqe_words, NES_IWARP_SQ_BIND_WQE_LENGTH_LOW_IDX,
- ibmw_bind->bind_info.length);
- wqe->wqe_words[NES_IWARP_SQ_BIND_WQE_LENGTH_HIGH_IDX] = 0;
- u64temp = (u64)ibmw_bind->bind_info.addr;
- set_wqe_64bit_value(wqe->wqe_words, NES_IWARP_SQ_BIND_WQE_VA_FBO_LOW_IDX, u64temp);
-
- head++;
- if (head >= qsize)
- head = 0;
-
- nesqp->hwqp.sq_head = head;
- barrier();
-
- nes_write32(nesdev->regs+NES_WQE_ALLOC,
- (1 << 24) | 0x00800000 | nesqp->hwqp.qp_id);
-
- spin_unlock_irqrestore(&nesqp->lock, flags);
-
- return 0;
-}
-
-
/*
* nes_alloc_fast_mr
*/
@@ -443,79 +370,46 @@ static struct ib_mr *nes_alloc_mr(struct ib_pd *ibpd,
} else {
kfree(nesmr);
nes_free_resource(nesadapter, nesadapter->allocated_mrs, stag_index);
- ibmr = ERR_PTR(-ENOMEM);
+ return ERR_PTR(-ENOMEM);
}
+
+ nesmr->pages = pci_alloc_consistent(nesdev->pcidev,
+ max_num_sg * sizeof(u64),
+ &nesmr->paddr);
+ if (!nesmr->paddr)
+ goto err;
+
+ nesmr->max_pages = max_num_sg;
+
return ibmr;
-}
-/*
- * nes_alloc_fast_reg_page_list
- */
-static struct ib_fast_reg_page_list *nes_alloc_fast_reg_page_list(
- struct ib_device *ibdev,
- int page_list_len)
-{
- struct nes_vnic *nesvnic = to_nesvnic(ibdev);
- struct nes_device *nesdev = nesvnic->nesdev;
- struct ib_fast_reg_page_list *pifrpl;
- struct nes_ib_fast_reg_page_list *pnesfrpl;
+err:
+ nes_dereg_mr(ibmr);
- if (page_list_len > (NES_4K_PBL_CHUNK_SIZE / sizeof(u64)))
- return ERR_PTR(-E2BIG);
- /*
- * Allocate the ib_fast_reg_page_list structure, the
- * nes_fast_bpl structure, and the PLB table.
- */
- pnesfrpl = kmalloc(sizeof(struct nes_ib_fast_reg_page_list) +
- page_list_len * sizeof(u64), GFP_KERNEL);
+ return ERR_PTR(-ENOMEM);
+}
- if (!pnesfrpl)
- return ERR_PTR(-ENOMEM);
+static int nes_set_page(struct ib_mr *ibmr, u64 addr)
+{
+ struct nes_mr *nesmr = to_nesmr(ibmr);
- pifrpl = &pnesfrpl->ibfrpl;
- pifrpl->page_list = &pnesfrpl->pbl;
- pifrpl->max_page_list_len = page_list_len;
- /*
- * Allocate the WQE PBL
- */
- pnesfrpl->nes_wqe_pbl.kva = pci_alloc_consistent(nesdev->pcidev,
- page_list_len * sizeof(u64),
- &pnesfrpl->nes_wqe_pbl.paddr);
+ if (unlikely(nesmr->npages == nesmr->max_pages))
+ return -ENOMEM;
- if (!pnesfrpl->nes_wqe_pbl.kva) {
- kfree(pnesfrpl);
- return ERR_PTR(-ENOMEM);
- }
- nes_debug(NES_DBG_MR, "nes_alloc_fast_reg_pbl: nes_frpl = %p, "
- "ibfrpl = %p, ibfrpl.page_list = %p, pbl.kva = %p, "
- "pbl.paddr = %llx\n", pnesfrpl, &pnesfrpl->ibfrpl,
- pnesfrpl->ibfrpl.page_list, pnesfrpl->nes_wqe_pbl.kva,
- (unsigned long long) pnesfrpl->nes_wqe_pbl.paddr);
+ nesmr->pages[nesmr->npages++] = cpu_to_le64(addr);
- return pifrpl;
+ return 0;
}
-/*
- * nes_free_fast_reg_page_list
- */
-static void nes_free_fast_reg_page_list(struct ib_fast_reg_page_list *pifrpl)
+static int nes_map_mr_sg(struct ib_mr *ibmr,
+ struct scatterlist *sg,
+ int sg_nents)
{
- struct nes_vnic *nesvnic = to_nesvnic(pifrpl->device);
- struct nes_device *nesdev = nesvnic->nesdev;
- struct nes_ib_fast_reg_page_list *pnesfrpl;
+ struct nes_mr *nesmr = to_nesmr(ibmr);
- pnesfrpl = container_of(pifrpl, struct nes_ib_fast_reg_page_list, ibfrpl);
- /*
- * Free the WQE PBL.
- */
- pci_free_consistent(nesdev->pcidev,
- pifrpl->max_page_list_len * sizeof(u64),
- pnesfrpl->nes_wqe_pbl.kva,
- pnesfrpl->nes_wqe_pbl.paddr);
- /*
- * Free the PBL structure
- */
- kfree(pnesfrpl);
+ nesmr->npages = 0;
+
+ return ib_sg_to_pages(ibmr, sg, sg_nents, nes_set_page);
}
/**
@@ -2106,9 +2000,8 @@ static int nes_reg_mr(struct nes_device *nesdev, struct nes_pd *nespd,
/**
* nes_reg_phys_mr
*/
-static struct ib_mr *nes_reg_phys_mr(struct ib_pd *ib_pd,
- struct ib_phys_buf *buffer_list, int num_phys_buf, int acc,
- u64 * iova_start)
+struct ib_mr *nes_reg_phys_mr(struct ib_pd *ib_pd, u64 addr, u64 size,
+ int acc, u64 *iova_start)
{
u64 region_length;
struct nes_pd *nespd = to_nespd(ib_pd);
@@ -2120,13 +2013,10 @@ static struct ib_mr *nes_reg_phys_mr(struct ib_pd *ib_pd,
struct nes_vpbl vpbl;
struct nes_root_vpbl root_vpbl;
u32 stag;
- u32 i;
unsigned long mask;
u32 stag_index = 0;
u32 next_stag_index = 0;
u32 driver_key = 0;
- u32 root_pbl_index = 0;
- u32 cur_pbl_index = 0;
int err = 0;
int ret = 0;
u16 pbl_count = 0;
@@ -2145,11 +2035,8 @@ static struct ib_mr *nes_reg_phys_mr(struct ib_pd *ib_pd,
next_stag_index >>= 8;
next_stag_index %= nesadapter->max_mr;
- if (num_phys_buf > (1024*512)) {
- return ERR_PTR(-E2BIG);
- }
- if ((buffer_list[0].addr ^ *iova_start) & ~PAGE_MASK)
+ if ((addr ^ *iova_start) & ~PAGE_MASK)
return ERR_PTR(-EINVAL);
err = nes_alloc_resource(nesadapter, nesadapter->allocated_mrs, nesadapter->max_mr,
@@ -2164,84 +2051,33 @@ static struct ib_mr *nes_reg_phys_mr(struct ib_pd *ib_pd,
return ERR_PTR(-ENOMEM);
}
- for (i = 0; i < num_phys_buf; i++) {
+ /* Allocate a 4K buffer for the PBL */
+ vpbl.pbl_vbase = pci_alloc_consistent(nesdev->pcidev, 4096,
+ &vpbl.pbl_pbase);
+ nes_debug(NES_DBG_MR, "Allocating leaf PBL, va = %p, pa = 0x%016lX\n",
+ vpbl.pbl_vbase, (unsigned long)vpbl.pbl_pbase);
+ if (!vpbl.pbl_vbase) {
+ nes_free_resource(nesadapter, nesadapter->allocated_mrs, stag_index);
+ ibmr = ERR_PTR(-ENOMEM);
+ kfree(nesmr);
+ goto reg_phys_err;
+ }
- if ((i & 0x01FF) == 0) {
- if (root_pbl_index == 1) {
- /* Allocate the root PBL */
- root_vpbl.pbl_vbase = pci_alloc_consistent(nesdev->pcidev, 8192,
- &root_vpbl.pbl_pbase);
- nes_debug(NES_DBG_MR, "Allocating root PBL, va = %p, pa = 0x%08X\n",
- root_vpbl.pbl_vbase, (unsigned int)root_vpbl.pbl_pbase);
- if (!root_vpbl.pbl_vbase) {
- pci_free_consistent(nesdev->pcidev, 4096, vpbl.pbl_vbase,
- vpbl.pbl_pbase);
- nes_free_resource(nesadapter, nesadapter->allocated_mrs, stag_index);
- kfree(nesmr);
- return ERR_PTR(-ENOMEM);
- }
- root_vpbl.leaf_vpbl = kzalloc(sizeof(*root_vpbl.leaf_vpbl)*1024, GFP_KERNEL);
- if (!root_vpbl.leaf_vpbl) {
- pci_free_consistent(nesdev->pcidev, 8192, root_vpbl.pbl_vbase,
- root_vpbl.pbl_pbase);
- pci_free_consistent(nesdev->pcidev, 4096, vpbl.pbl_vbase,
- vpbl.pbl_pbase);
- nes_free_resource(nesadapter, nesadapter->allocated_mrs, stag_index);
- kfree(nesmr);
- return ERR_PTR(-ENOMEM);
- }
- root_vpbl.pbl_vbase[0].pa_low = cpu_to_le32((u32)vpbl.pbl_pbase);
- root_vpbl.pbl_vbase[0].pa_high =
- cpu_to_le32((u32)((((u64)vpbl.pbl_pbase) >> 32)));
- root_vpbl.leaf_vpbl[0] = vpbl;
- }
- /* Allocate a 4K buffer for the PBL */
- vpbl.pbl_vbase = pci_alloc_consistent(nesdev->pcidev, 4096,
- &vpbl.pbl_pbase);
- nes_debug(NES_DBG_MR, "Allocating leaf PBL, va = %p, pa = 0x%016lX\n",
- vpbl.pbl_vbase, (unsigned long)vpbl.pbl_pbase);
- if (!vpbl.pbl_vbase) {
- nes_free_resource(nesadapter, nesadapter->allocated_mrs, stag_index);
- ibmr = ERR_PTR(-ENOMEM);
- kfree(nesmr);
- goto reg_phys_err;
- }
- /* Fill in the root table */
- if (1 <= root_pbl_index) {
- root_vpbl.pbl_vbase[root_pbl_index].pa_low =
- cpu_to_le32((u32)vpbl.pbl_pbase);
- root_vpbl.pbl_vbase[root_pbl_index].pa_high =
- cpu_to_le32((u32)((((u64)vpbl.pbl_pbase) >> 32)));
- root_vpbl.leaf_vpbl[root_pbl_index] = vpbl;
- }
- root_pbl_index++;
- cur_pbl_index = 0;
- }
- mask = !buffer_list[i].size;
- if (i != 0)
- mask |= buffer_list[i].addr;
- if (i != num_phys_buf - 1)
- mask |= buffer_list[i].addr + buffer_list[i].size;
-
- if (mask & ~PAGE_MASK) {
- nes_free_resource(nesadapter, nesadapter->allocated_mrs, stag_index);
- nes_debug(NES_DBG_MR, "Invalid buffer addr or size\n");
- ibmr = ERR_PTR(-EINVAL);
- kfree(nesmr);
- goto reg_phys_err;
- }
+ mask = !size;
- region_length += buffer_list[i].size;
- if ((i != 0) && (single_page)) {
- if ((buffer_list[i-1].addr+PAGE_SIZE) != buffer_list[i].addr)
- single_page = 0;
- }
- vpbl.pbl_vbase[cur_pbl_index].pa_low = cpu_to_le32((u32)buffer_list[i].addr & PAGE_MASK);
- vpbl.pbl_vbase[cur_pbl_index++].pa_high =
- cpu_to_le32((u32)((((u64)buffer_list[i].addr) >> 32)));
+ if (mask & ~PAGE_MASK) {
+ nes_free_resource(nesadapter, nesadapter->allocated_mrs, stag_index);
+ nes_debug(NES_DBG_MR, "Invalid buffer addr or size\n");
+ ibmr = ERR_PTR(-EINVAL);
+ kfree(nesmr);
+ goto reg_phys_err;
}
+ region_length += size;
+ vpbl.pbl_vbase[0].pa_low = cpu_to_le32((u32)addr & PAGE_MASK);
+ vpbl.pbl_vbase[0].pa_high = cpu_to_le32((u32)((((u64)addr) >> 32)));
+
stag = stag_index << 8;
stag |= driver_key;
stag += (u32)stag_key;
@@ -2251,17 +2087,15 @@ static struct ib_mr *nes_reg_phys_mr(struct ib_pd *ib_pd,
stag, (unsigned long)*iova_start, (unsigned long)region_length, stag_index);
/* Make the leaf PBL the root if only one PBL */
- if (root_pbl_index == 1) {
- root_vpbl.pbl_pbase = vpbl.pbl_pbase;
- }
+ root_vpbl.pbl_pbase = vpbl.pbl_pbase;
if (single_page) {
pbl_count = 0;
} else {
- pbl_count = root_pbl_index;
+ pbl_count = 1;
}
ret = nes_reg_mr(nesdev, nespd, stag, region_length, &root_vpbl,
- buffer_list[0].addr, pbl_count, (u16)cur_pbl_index, acc, iova_start,
+ addr, pbl_count, 1, acc, iova_start,
&nesmr->pbls_used, &nesmr->pbl_4k);
if (ret == 0) {
@@ -2274,21 +2108,9 @@ static struct ib_mr *nes_reg_phys_mr(struct ib_pd *ib_pd,
ibmr = ERR_PTR(-ENOMEM);
}
- reg_phys_err:
- /* free the resources */
- if (root_pbl_index == 1) {
- /* single PBL case */
- pci_free_consistent(nesdev->pcidev, 4096, vpbl.pbl_vbase, vpbl.pbl_pbase);
- } else {
- for (i=0; i<root_pbl_index; i++) {
- pci_free_consistent(nesdev->pcidev, 4096, root_vpbl.leaf_vpbl[i].pbl_vbase,
- root_vpbl.leaf_vpbl[i].pbl_pbase);
- }
- kfree(root_vpbl.leaf_vpbl);
- pci_free_consistent(nesdev->pcidev, 8192, root_vpbl.pbl_vbase,
- root_vpbl.pbl_pbase);
- }
-
+reg_phys_err:
+ /* single PBL case */
+ pci_free_consistent(nesdev->pcidev, 4096, vpbl.pbl_vbase, vpbl.pbl_pbase);
return ibmr;
}
@@ -2298,17 +2120,13 @@ static struct ib_mr *nes_reg_phys_mr(struct ib_pd *ib_pd,
*/
static struct ib_mr *nes_get_dma_mr(struct ib_pd *pd, int acc)
{
- struct ib_phys_buf bl;
u64 kva = 0;
nes_debug(NES_DBG_MR, "\n");
- bl.size = (u64)0xffffffffffULL;
- bl.addr = 0;
- return nes_reg_phys_mr(pd, &bl, 1, acc, &kva);
+ return nes_reg_phys_mr(pd, 0, 0xffffffffffULL, acc, &kva);
}
-
/**
* nes_reg_user_mr
*/
@@ -2683,6 +2501,13 @@ static int nes_dereg_mr(struct ib_mr *ib_mr)
u16 major_code;
u16 minor_code;
+
+ if (nesmr->pages)
+ pci_free_consistent(nesdev->pcidev,
+ nesmr->max_pages * sizeof(u64),
+ nesmr->pages,
+ nesmr->paddr);
+
if (nesmr->region) {
ib_umem_release(nesmr->region);
}
@@ -3372,9 +3197,9 @@ static int nes_post_send(struct ib_qp *ibqp, struct ib_send_wr *ib_wr,
wqe_misc |= NES_IWARP_SQ_WQE_LOCAL_FENCE;
set_wqe_32bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_RDMA_STAG_IDX,
- ib_wr->wr.rdma.rkey);
+ rdma_wr(ib_wr)->rkey);
set_wqe_64bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_RDMA_TO_LOW_IDX,
- ib_wr->wr.rdma.remote_addr);
+ rdma_wr(ib_wr)->remote_addr);
if ((ib_wr->send_flags & IB_SEND_INLINE) &&
((nes_drv_opt & NES_DRV_OPT_NO_INLINE_DATA) == 0) &&
@@ -3409,9 +3234,9 @@ static int nes_post_send(struct ib_qp *ibqp, struct ib_send_wr *ib_wr,
}
set_wqe_64bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_RDMA_TO_LOW_IDX,
- ib_wr->wr.rdma.remote_addr);
+ rdma_wr(ib_wr)->remote_addr);
set_wqe_32bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_RDMA_STAG_IDX,
- ib_wr->wr.rdma.rkey);
+ rdma_wr(ib_wr)->rkey);
set_wqe_32bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_RDMA_LENGTH_IDX,
ib_wr->sg_list->length);
set_wqe_64bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_FRAG0_LOW_IDX,
@@ -3425,19 +3250,13 @@ static int nes_post_send(struct ib_qp *ibqp, struct ib_send_wr *ib_wr,
NES_IWARP_SQ_LOCINV_WQE_INV_STAG_IDX,
ib_wr->ex.invalidate_rkey);
break;
- case IB_WR_FAST_REG_MR:
+ case IB_WR_REG_MR:
{
- int i;
- int flags = ib_wr->wr.fast_reg.access_flags;
- struct nes_ib_fast_reg_page_list *pnesfrpl =
- container_of(ib_wr->wr.fast_reg.page_list,
- struct nes_ib_fast_reg_page_list,
- ibfrpl);
- u64 *src_page_list = pnesfrpl->ibfrpl.page_list;
- u64 *dst_page_list = pnesfrpl->nes_wqe_pbl.kva;
-
- if (ib_wr->wr.fast_reg.page_list_len >
- (NES_4K_PBL_CHUNK_SIZE / sizeof(u64))) {
+ struct nes_mr *mr = to_nesmr(reg_wr(ib_wr)->mr);
+ int page_shift = ilog2(reg_wr(ib_wr)->mr->page_size);
+ int flags = reg_wr(ib_wr)->access;
+
+ if (mr->npages > (NES_4K_PBL_CHUNK_SIZE / sizeof(u64))) {
nes_debug(NES_DBG_IW_TX, "SQ_FMR: bad page_list_len\n");
err = -EINVAL;
break;
@@ -3445,19 +3264,19 @@ static int nes_post_send(struct ib_qp *ibqp, struct ib_send_wr *ib_wr,
wqe_misc = NES_IWARP_SQ_OP_FAST_REG;
set_wqe_64bit_value(wqe->wqe_words,
NES_IWARP_SQ_FMR_WQE_VA_FBO_LOW_IDX,
- ib_wr->wr.fast_reg.iova_start);
+ mr->ibmr.iova);
set_wqe_32bit_value(wqe->wqe_words,
NES_IWARP_SQ_FMR_WQE_LENGTH_LOW_IDX,
- ib_wr->wr.fast_reg.length);
+ mr->ibmr.length);
set_wqe_32bit_value(wqe->wqe_words,
NES_IWARP_SQ_FMR_WQE_LENGTH_HIGH_IDX, 0);
set_wqe_32bit_value(wqe->wqe_words,
NES_IWARP_SQ_FMR_WQE_MR_STAG_IDX,
- ib_wr->wr.fast_reg.rkey);
- /* Set page size: */
- if (ib_wr->wr.fast_reg.page_shift == 12) {
+ reg_wr(ib_wr)->key);
+
+ if (page_shift == 12) {
wqe_misc |= NES_IWARP_SQ_FMR_WQE_PAGE_SIZE_4K;
- } else if (ib_wr->wr.fast_reg.page_shift == 21) {
+ } else if (page_shift == 21) {
wqe_misc |= NES_IWARP_SQ_FMR_WQE_PAGE_SIZE_2M;
} else {
nes_debug(NES_DBG_IW_TX, "Invalid page shift,"
@@ -3465,6 +3284,7 @@ static int nes_post_send(struct ib_qp *ibqp, struct ib_send_wr *ib_wr,
err = -EINVAL;
break;
}
+
/* Set access_flags */
wqe_misc |= NES_IWARP_SQ_FMR_WQE_RIGHTS_ENABLE_LOCAL_READ;
if (flags & IB_ACCESS_LOCAL_WRITE)
@@ -3480,35 +3300,22 @@ static int nes_post_send(struct ib_qp *ibqp, struct ib_send_wr *ib_wr,
wqe_misc |= NES_IWARP_SQ_FMR_WQE_RIGHTS_ENABLE_WINDOW_BIND;
/* Fill in PBL info: */
- if (ib_wr->wr.fast_reg.page_list_len >
- pnesfrpl->ibfrpl.max_page_list_len) {
- nes_debug(NES_DBG_IW_TX, "Invalid page list length,"
- " ib_wr=%p, value=%u, max=%u\n",
- ib_wr, ib_wr->wr.fast_reg.page_list_len,
- pnesfrpl->ibfrpl.max_page_list_len);
- err = -EINVAL;
- break;
- }
-
set_wqe_64bit_value(wqe->wqe_words,
NES_IWARP_SQ_FMR_WQE_PBL_ADDR_LOW_IDX,
- pnesfrpl->nes_wqe_pbl.paddr);
+ mr->paddr);
set_wqe_32bit_value(wqe->wqe_words,
NES_IWARP_SQ_FMR_WQE_PBL_LENGTH_IDX,
- ib_wr->wr.fast_reg.page_list_len * 8);
-
- for (i = 0; i < ib_wr->wr.fast_reg.page_list_len; i++)
- dst_page_list[i] = cpu_to_le64(src_page_list[i]);
+ mr->npages * 8);
- nes_debug(NES_DBG_IW_TX, "SQ_FMR: iova_start: %llx, "
+ nes_debug(NES_DBG_IW_TX, "SQ_REG_MR: iova_start: %llx, "
"length: %d, rkey: %0x, pgl_paddr: %llx, "
"page_list_len: %u, wqe_misc: %x\n",
- (unsigned long long) ib_wr->wr.fast_reg.iova_start,
- ib_wr->wr.fast_reg.length,
- ib_wr->wr.fast_reg.rkey,
- (unsigned long long) pnesfrpl->nes_wqe_pbl.paddr,
- ib_wr->wr.fast_reg.page_list_len,
+ (unsigned long long) mr->ibmr.iova,
+ mr->ibmr.length,
+ reg_wr(ib_wr)->key,
+ (unsigned long long) mr->paddr,
+ mr->npages,
wqe_misc);
break;
}
@@ -3751,7 +3558,7 @@ static int nes_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *entry)
entry->opcode = IB_WC_LOCAL_INV;
break;
case NES_IWARP_SQ_OP_FAST_REG:
- entry->opcode = IB_WC_FAST_REG_MR;
+ entry->opcode = IB_WC_REG_MR;
break;
}
@@ -3931,16 +3738,13 @@ struct nes_ib_device *nes_init_ofa_device(struct net_device *netdev)
nesibdev->ibdev.destroy_cq = nes_destroy_cq;
nesibdev->ibdev.poll_cq = nes_poll_cq;
nesibdev->ibdev.get_dma_mr = nes_get_dma_mr;
- nesibdev->ibdev.reg_phys_mr = nes_reg_phys_mr;
nesibdev->ibdev.reg_user_mr = nes_reg_user_mr;
nesibdev->ibdev.dereg_mr = nes_dereg_mr;
nesibdev->ibdev.alloc_mw = nes_alloc_mw;
nesibdev->ibdev.dealloc_mw = nes_dealloc_mw;
- nesibdev->ibdev.bind_mw = nes_bind_mw;
nesibdev->ibdev.alloc_mr = nes_alloc_mr;
- nesibdev->ibdev.alloc_fast_reg_page_list = nes_alloc_fast_reg_page_list;
- nesibdev->ibdev.free_fast_reg_page_list = nes_free_fast_reg_page_list;
+ nesibdev->ibdev.map_mr_sg = nes_map_mr_sg;
nesibdev->ibdev.attach_mcast = nes_multicast_attach;
nesibdev->ibdev.detach_mcast = nes_multicast_detach;
diff --git a/drivers/infiniband/hw/nes/nes_verbs.h b/drivers/infiniband/hw/nes/nes_verbs.h
index 309b31c..7029088 100644
--- a/drivers/infiniband/hw/nes/nes_verbs.h
+++ b/drivers/infiniband/hw/nes/nes_verbs.h
@@ -79,6 +79,10 @@ struct nes_mr {
u16 pbls_used;
u8 mode;
u8 pbl_4k;
+ __le64 *pages;
+ dma_addr_t paddr;
+ u32 max_pages;
+ u32 npages;
};
struct nes_hw_pb {
@@ -186,4 +190,8 @@ struct nes_qp {
u8 pau_state;
__u64 nesuqp_addr;
};
+
+struct ib_mr *nes_reg_phys_mr(struct ib_pd *ib_pd,
+ u64 addr, u64 size, int acc, u64 *iova_start);
+
#endif /* NES_VERBS_H */
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma.h b/drivers/infiniband/hw/ocrdma/ocrdma.h
index b4091ab..12503f1 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma.h
+++ b/drivers/infiniband/hw/ocrdma/ocrdma.h
@@ -55,7 +55,7 @@
#include <be_roce.h>
#include "ocrdma_sli.h"
-#define OCRDMA_ROCE_DRV_VERSION "10.6.0.0"
+#define OCRDMA_ROCE_DRV_VERSION "11.0.0.0"
#define OCRDMA_ROCE_DRV_DESC "Emulex OneConnect RoCE Driver"
#define OCRDMA_NODE_DESC "Emulex OneConnect RoCE HCA"
@@ -193,6 +193,8 @@ struct ocrdma_mr {
struct ib_mr ibmr;
struct ib_umem *umem;
struct ocrdma_hw_mr hwmr;
+ u64 *pages;
+ u32 npages;
};
struct ocrdma_stats {
@@ -230,6 +232,10 @@ struct phy_info {
u16 interface_type;
};
+enum ocrdma_flags {
+ OCRDMA_FLAGS_LINK_STATUS_INIT = 0x01
+};
+
struct ocrdma_dev {
struct ib_device ibdev;
struct ocrdma_dev_attr attr;
@@ -278,7 +284,6 @@ struct ocrdma_dev {
u32 hba_port_num;
struct list_head entry;
- struct rcu_head rcu;
int id;
u64 *stag_arr;
u8 sl; /* service level */
@@ -286,6 +291,7 @@ struct ocrdma_dev {
atomic_t update_sl;
u16 pvid;
u32 asic_id;
+ u32 flags;
ulong last_stats_time;
struct mutex stats_lock; /* provide synch for debugfs operations */
@@ -317,9 +323,6 @@ struct ocrdma_cq {
*/
u32 max_hw_cqe;
bool phase_change;
- bool deferred_arm, deferred_sol;
- bool first_arm;
-
spinlock_t cq_lock ____cacheline_aligned; /* provide synchronization
* to cq polling
*/
@@ -590,4 +593,9 @@ static inline u8 ocrdma_is_enabled_and_synced(u32 state)
(state & OCRDMA_STATE_FLAG_SYNC);
}
+static inline u8 ocrdma_get_ae_link_state(u32 ae_state)
+{
+ return ((ae_state & OCRDMA_AE_LSC_LS_MASK) >> OCRDMA_AE_LSC_LS_SHIFT);
+}
+
#endif
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_ah.c b/drivers/infiniband/hw/ocrdma/ocrdma_ah.c
index 44766fe..3790771 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_ah.c
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_ah.c
@@ -45,6 +45,7 @@
#include <rdma/ib_addr.h>
#include <rdma/ib_mad.h>
+#include <rdma/ib_cache.h>
#include "ocrdma.h"
#include "ocrdma_verbs.h"
@@ -56,10 +57,9 @@
static inline int set_av_attr(struct ocrdma_dev *dev, struct ocrdma_ah *ah,
struct ib_ah_attr *attr, union ib_gid *sgid,
- int pdid, bool *isvlan)
+ int pdid, bool *isvlan, u16 vlan_tag)
{
int status = 0;
- u16 vlan_tag;
struct ocrdma_eth_vlan eth;
struct ocrdma_grh grh;
int eth_sz;
@@ -68,7 +68,6 @@ static inline int set_av_attr(struct ocrdma_dev *dev, struct ocrdma_ah *ah,
memset(&grh, 0, sizeof(grh));
/* VLAN */
- vlan_tag = attr->vlan_id;
if (!vlan_tag || (vlan_tag > 0xFFF))
vlan_tag = dev->pvid;
if (vlan_tag || dev->pfc_state) {
@@ -115,9 +114,11 @@ static inline int set_av_attr(struct ocrdma_dev *dev, struct ocrdma_ah *ah,
struct ib_ah *ocrdma_create_ah(struct ib_pd *ibpd, struct ib_ah_attr *attr)
{
u32 *ahid_addr;
- bool isvlan = false;
int status;
struct ocrdma_ah *ah;
+ bool isvlan = false;
+ u16 vlan_tag = 0xffff;
+ struct ib_gid_attr sgid_attr;
struct ocrdma_pd *pd = get_ocrdma_pd(ibpd);
struct ocrdma_dev *dev = get_ocrdma_dev(ibpd->device);
union ib_gid sgid;
@@ -135,18 +136,26 @@ struct ib_ah *ocrdma_create_ah(struct ib_pd *ibpd, struct ib_ah_attr *attr)
if (status)
goto av_err;
- status = ocrdma_query_gid(&dev->ibdev, 1, attr->grh.sgid_index, &sgid);
+ status = ib_get_cached_gid(&dev->ibdev, 1, attr->grh.sgid_index, &sgid,
+ &sgid_attr);
if (status) {
pr_err("%s(): Failed to query sgid, status = %d\n",
__func__, status);
goto av_conf_err;
}
+ if (sgid_attr.ndev) {
+ if (is_vlan_dev(sgid_attr.ndev))
+ vlan_tag = vlan_dev_vlan_id(sgid_attr.ndev);
+ dev_put(sgid_attr.ndev);
+ }
if ((pd->uctx) &&
(!rdma_is_multicast_addr((struct in6_addr *)attr->grh.dgid.raw)) &&
(!rdma_link_local_addr((struct in6_addr *)attr->grh.dgid.raw))) {
- status = rdma_addr_find_dmac_by_grh(&sgid, &attr->grh.dgid,
- attr->dmac, &attr->vlan_id);
+ status = rdma_addr_find_l2_eth_by_grh(&sgid, &attr->grh.dgid,
+ attr->dmac, &vlan_tag,
+ &sgid_attr.ndev->ifindex,
+ NULL);
if (status) {
pr_err("%s(): Failed to resolve dmac from gid."
"status = %d\n", __func__, status);
@@ -154,7 +163,7 @@ struct ib_ah *ocrdma_create_ah(struct ib_pd *ibpd, struct ib_ah_attr *attr)
}
}
- status = set_av_attr(dev, ah, attr, &sgid, pd->id, &isvlan);
+ status = set_av_attr(dev, ah, attr, &sgid, pd->id, &isvlan, vlan_tag);
if (status)
goto av_conf_err;
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_hw.c b/drivers/infiniband/hw/ocrdma/ocrdma_hw.c
index aab391a..283ca84 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_hw.c
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_hw.c
@@ -47,6 +47,7 @@
#include <rdma/ib_verbs.h>
#include <rdma/ib_user_verbs.h>
+#include <rdma/ib_cache.h>
#include "ocrdma.h"
#include "ocrdma_hw.h"
@@ -578,6 +579,8 @@ static int ocrdma_mbx_create_mq(struct ocrdma_dev *dev,
cmd->async_event_bitmap = BIT(OCRDMA_ASYNC_GRP5_EVE_CODE);
cmd->async_event_bitmap |= BIT(OCRDMA_ASYNC_RDMA_EVE_CODE);
+ /* Request link events on this MQ. */
+ cmd->async_event_bitmap |= BIT(OCRDMA_ASYNC_LINK_EVE_CODE);
cmd->async_cqid_ringsize = cq->id;
cmd->async_cqid_ringsize |= (ocrdma_encoded_q_len(mq->len) <<
@@ -678,11 +681,33 @@ static void ocrdma_dispatch_ibevent(struct ocrdma_dev *dev,
int dev_event = 0;
int type = (cqe->valid_ae_event & OCRDMA_AE_MCQE_EVENT_TYPE_MASK) >>
OCRDMA_AE_MCQE_EVENT_TYPE_SHIFT;
+ u16 qpid = cqe->qpvalid_qpid & OCRDMA_AE_MCQE_QPID_MASK;
+ u16 cqid = cqe->cqvalid_cqid & OCRDMA_AE_MCQE_CQID_MASK;
- if (cqe->qpvalid_qpid & OCRDMA_AE_MCQE_QPVALID)
- qp = dev->qp_tbl[cqe->qpvalid_qpid & OCRDMA_AE_MCQE_QPID_MASK];
- if (cqe->cqvalid_cqid & OCRDMA_AE_MCQE_CQVALID)
- cq = dev->cq_tbl[cqe->cqvalid_cqid & OCRDMA_AE_MCQE_CQID_MASK];
+ /*
+ * Some FW version returns wrong qp or cq ids in CQEs.
+ * Checking whether the IDs are valid
+ */
+
+ if (cqe->qpvalid_qpid & OCRDMA_AE_MCQE_QPVALID) {
+ if (qpid < dev->attr.max_qp)
+ qp = dev->qp_tbl[qpid];
+ if (qp == NULL) {
+ pr_err("ocrdma%d:Async event - qpid %u is not valid\n",
+ dev->id, qpid);
+ return;
+ }
+ }
+
+ if (cqe->cqvalid_cqid & OCRDMA_AE_MCQE_CQVALID) {
+ if (cqid < dev->attr.max_cq)
+ cq = dev->cq_tbl[cqid];
+ if (cq == NULL) {
+ pr_err("ocrdma%d:Async event - cqid %u is not valid\n",
+ dev->id, cqid);
+ return;
+ }
+ }
memset(&ib_evt, 0, sizeof(ib_evt));
@@ -796,20 +821,42 @@ static void ocrdma_process_grp5_aync(struct ocrdma_dev *dev,
}
}
+static void ocrdma_process_link_state(struct ocrdma_dev *dev,
+ struct ocrdma_ae_mcqe *cqe)
+{
+ struct ocrdma_ae_lnkst_mcqe *evt;
+ u8 lstate;
+
+ evt = (struct ocrdma_ae_lnkst_mcqe *)cqe;
+ lstate = ocrdma_get_ae_link_state(evt->speed_state_ptn);
+
+ if (!(lstate & OCRDMA_AE_LSC_LLINK_MASK))
+ return;
+
+ if (dev->flags & OCRDMA_FLAGS_LINK_STATUS_INIT)
+ ocrdma_update_link_state(dev, (lstate & OCRDMA_LINK_ST_MASK));
+}
+
static void ocrdma_process_acqe(struct ocrdma_dev *dev, void *ae_cqe)
{
/* async CQE processing */
struct ocrdma_ae_mcqe *cqe = ae_cqe;
u32 evt_code = (cqe->valid_ae_event & OCRDMA_AE_MCQE_EVENT_CODE_MASK) >>
OCRDMA_AE_MCQE_EVENT_CODE_SHIFT;
-
- if (evt_code == OCRDMA_ASYNC_RDMA_EVE_CODE)
+ switch (evt_code) {
+ case OCRDMA_ASYNC_LINK_EVE_CODE:
+ ocrdma_process_link_state(dev, cqe);
+ break;
+ case OCRDMA_ASYNC_RDMA_EVE_CODE:
ocrdma_dispatch_ibevent(dev, cqe);
- else if (evt_code == OCRDMA_ASYNC_GRP5_EVE_CODE)
+ break;
+ case OCRDMA_ASYNC_GRP5_EVE_CODE:
ocrdma_process_grp5_aync(dev, cqe);
- else
+ break;
+ default:
pr_err("%s(%d) invalid evt code=0x%x\n", __func__,
dev->id, evt_code);
+ }
}
static void ocrdma_process_mcqe(struct ocrdma_dev *dev, struct ocrdma_mcqe *cqe)
@@ -1340,7 +1387,8 @@ mbx_err:
return status;
}
-int ocrdma_mbx_get_link_speed(struct ocrdma_dev *dev, u8 *lnk_speed)
+int ocrdma_mbx_get_link_speed(struct ocrdma_dev *dev, u8 *lnk_speed,
+ u8 *lnk_state)
{
int status = -ENOMEM;
struct ocrdma_get_link_speed_rsp *rsp;
@@ -1361,8 +1409,11 @@ int ocrdma_mbx_get_link_speed(struct ocrdma_dev *dev, u8 *lnk_speed)
goto mbx_err;
rsp = (struct ocrdma_get_link_speed_rsp *)cmd;
- *lnk_speed = (rsp->pflt_pps_ld_pnum & OCRDMA_PHY_PS_MASK)
- >> OCRDMA_PHY_PS_SHIFT;
+ if (lnk_speed)
+ *lnk_speed = (rsp->pflt_pps_ld_pnum & OCRDMA_PHY_PS_MASK)
+ >> OCRDMA_PHY_PS_SHIFT;
+ if (lnk_state)
+ *lnk_state = (rsp->res_lnk_st & OCRDMA_LINK_ST_MASK);
mbx_err:
kfree(cmd);
@@ -2448,6 +2499,7 @@ static int ocrdma_set_av_params(struct ocrdma_qp *qp,
int status;
struct ib_ah_attr *ah_attr = &attrs->ah_attr;
union ib_gid sgid, zgid;
+ struct ib_gid_attr sgid_attr;
u32 vlan_id = 0xFFFF;
u8 mac_addr[6];
struct ocrdma_dev *dev = get_ocrdma_dev(qp->ibqp.device);
@@ -2466,10 +2518,14 @@ static int ocrdma_set_av_params(struct ocrdma_qp *qp,
cmd->flags |= OCRDMA_QP_PARA_FLOW_LBL_VALID;
memcpy(&cmd->params.dgid[0], &ah_attr->grh.dgid.raw[0],
sizeof(cmd->params.dgid));
- status = ocrdma_query_gid(&dev->ibdev, 1,
- ah_attr->grh.sgid_index, &sgid);
- if (status)
- return status;
+
+ status = ib_get_cached_gid(&dev->ibdev, 1, ah_attr->grh.sgid_index,
+ &sgid, &sgid_attr);
+ if (!status && sgid_attr.ndev) {
+ vlan_id = rdma_vlan_dev_vlan_id(sgid_attr.ndev);
+ memcpy(mac_addr, sgid_attr.ndev->dev_addr, ETH_ALEN);
+ dev_put(sgid_attr.ndev);
+ }
memset(&zgid, 0, sizeof(zgid));
if (!memcmp(&sgid, &zgid, sizeof(zgid)))
@@ -2486,17 +2542,16 @@ static int ocrdma_set_av_params(struct ocrdma_qp *qp,
ocrdma_cpu_to_le32(&cmd->params.dgid[0], sizeof(cmd->params.dgid));
ocrdma_cpu_to_le32(&cmd->params.sgid[0], sizeof(cmd->params.sgid));
cmd->params.vlan_dmac_b4_to_b5 = mac_addr[4] | (mac_addr[5] << 8);
- if (attr_mask & IB_QP_VID) {
- vlan_id = attrs->vlan_id;
- } else if (dev->pfc_state) {
- vlan_id = 0;
- pr_err("ocrdma%d:Using VLAN with PFC is recommended\n",
- dev->id);
- pr_err("ocrdma%d:Using VLAN 0 for this connection\n",
- dev->id);
- }
- if (vlan_id < 0x1000) {
+ if (vlan_id == 0xFFFF)
+ vlan_id = 0;
+ if (vlan_id || dev->pfc_state) {
+ if (!vlan_id) {
+ pr_err("ocrdma%d:Using VLAN with PFC is recommended\n",
+ dev->id);
+ pr_err("ocrdma%d:Using VLAN 0 for this connection\n",
+ dev->id);
+ }
cmd->params.vlan_dmac_b4_to_b5 |=
vlan_id << OCRDMA_QP_PARAMS_VLAN_SHIFT;
cmd->flags |= OCRDMA_QP_PARA_VLAN_EN_VALID;
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_hw.h b/drivers/infiniband/hw/ocrdma/ocrdma_hw.h
index 7ed885c..ebc1f44 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_hw.h
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_hw.h
@@ -106,7 +106,8 @@ void ocrdma_ring_cq_db(struct ocrdma_dev *, u16 cq_id, bool armed,
bool solicited, u16 cqe_popped);
/* verbs specific mailbox commands */
-int ocrdma_mbx_get_link_speed(struct ocrdma_dev *dev, u8 *lnk_speed);
+int ocrdma_mbx_get_link_speed(struct ocrdma_dev *dev, u8 *lnk_speed,
+ u8 *lnk_st);
int ocrdma_query_config(struct ocrdma_dev *,
struct ocrdma_mbx_query_config *config);
@@ -153,5 +154,6 @@ char *port_speed_string(struct ocrdma_dev *dev);
void ocrdma_init_service_level(struct ocrdma_dev *);
void ocrdma_alloc_pd_pool(struct ocrdma_dev *dev);
void ocrdma_free_pd_range(struct ocrdma_dev *dev);
+void ocrdma_update_link_state(struct ocrdma_dev *dev, u8 lstate);
#endif /* __OCRDMA_HW_H__ */
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_main.c b/drivers/infiniband/hw/ocrdma/ocrdma_main.c
index 87aa55d..f387430 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_main.c
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_main.c
@@ -63,8 +63,6 @@ MODULE_DESCRIPTION(OCRDMA_ROCE_DRV_DESC " " OCRDMA_ROCE_DRV_VERSION);
MODULE_AUTHOR("Emulex Corporation");
MODULE_LICENSE("Dual BSD/GPL");
-static LIST_HEAD(ocrdma_dev_list);
-static DEFINE_SPINLOCK(ocrdma_devlist_lock);
static DEFINE_IDR(ocrdma_dev_id);
void ocrdma_get_guid(struct ocrdma_dev *dev, u8 *guid)
@@ -177,13 +175,11 @@ static int ocrdma_register_device(struct ocrdma_dev *dev)
dev->ibdev.req_notify_cq = ocrdma_arm_cq;
dev->ibdev.get_dma_mr = ocrdma_get_dma_mr;
- dev->ibdev.reg_phys_mr = ocrdma_reg_kernel_mr;
dev->ibdev.dereg_mr = ocrdma_dereg_mr;
dev->ibdev.reg_user_mr = ocrdma_reg_user_mr;
dev->ibdev.alloc_mr = ocrdma_alloc_mr;
- dev->ibdev.alloc_fast_reg_page_list = ocrdma_alloc_frmr_page_list;
- dev->ibdev.free_fast_reg_page_list = ocrdma_free_frmr_page_list;
+ dev->ibdev.map_mr_sg = ocrdma_map_mr_sg;
/* mandatory to support user space verbs consumer. */
dev->ibdev.alloc_ucontext = ocrdma_alloc_ucontext;
@@ -232,6 +228,11 @@ static int ocrdma_alloc_resources(struct ocrdma_dev *dev)
ocrdma_alloc_pd_pool(dev);
+ if (!ocrdma_alloc_stats_resources(dev)) {
+ pr_err("%s: stats resource allocation failed\n", __func__);
+ goto alloc_err;
+ }
+
spin_lock_init(&dev->av_tbl.lock);
spin_lock_init(&dev->flush_q_lock);
return 0;
@@ -242,6 +243,7 @@ alloc_err:
static void ocrdma_free_resources(struct ocrdma_dev *dev)
{
+ ocrdma_release_stats_resources(dev);
kfree(dev->stag_arr);
kfree(dev->qp_tbl);
kfree(dev->cq_tbl);
@@ -293,6 +295,7 @@ static void ocrdma_remove_sysfiles(struct ocrdma_dev *dev)
static struct ocrdma_dev *ocrdma_add(struct be_dev_info *dev_info)
{
int status = 0, i;
+ u8 lstate = 0;
struct ocrdma_dev *dev;
dev = (struct ocrdma_dev *)ib_alloc_device(sizeof(struct ocrdma_dev));
@@ -322,12 +325,14 @@ static struct ocrdma_dev *ocrdma_add(struct be_dev_info *dev_info)
if (status)
goto alloc_err;
+ /* Query Link state and update */
+ status = ocrdma_mbx_get_link_speed(dev, NULL, &lstate);
+ if (!status)
+ ocrdma_update_link_state(dev, lstate);
+
for (i = 0; i < ARRAY_SIZE(ocrdma_attributes); i++)
if (device_create_file(&dev->ibdev.dev, ocrdma_attributes[i]))
goto sysfs_err;
- spin_lock(&ocrdma_devlist_lock);
- list_add_tail_rcu(&dev->entry, &ocrdma_dev_list);
- spin_unlock(&ocrdma_devlist_lock);
/* Init stats */
ocrdma_add_port_stats(dev);
/* Interrupt Moderation */
@@ -356,9 +361,8 @@ idr_err:
return NULL;
}
-static void ocrdma_remove_free(struct rcu_head *rcu)
+static void ocrdma_remove_free(struct ocrdma_dev *dev)
{
- struct ocrdma_dev *dev = container_of(rcu, struct ocrdma_dev, rcu);
idr_remove(&ocrdma_dev_id, dev->id);
kfree(dev->mbx_cmd);
@@ -375,18 +379,12 @@ static void ocrdma_remove(struct ocrdma_dev *dev)
ib_unregister_device(&dev->ibdev);
ocrdma_rem_port_stats(dev);
-
- spin_lock(&ocrdma_devlist_lock);
- list_del_rcu(&dev->entry);
- spin_unlock(&ocrdma_devlist_lock);
-
ocrdma_free_resources(dev);
ocrdma_cleanup_hw(dev);
-
- call_rcu(&dev->rcu, ocrdma_remove_free);
+ ocrdma_remove_free(dev);
}
-static int ocrdma_open(struct ocrdma_dev *dev)
+static int ocrdma_dispatch_port_active(struct ocrdma_dev *dev)
{
struct ib_event port_event;
@@ -397,32 +395,9 @@ static int ocrdma_open(struct ocrdma_dev *dev)
return 0;
}
-static int ocrdma_close(struct ocrdma_dev *dev)
+static int ocrdma_dispatch_port_error(struct ocrdma_dev *dev)
{
- int i;
- struct ocrdma_qp *qp, **cur_qp;
struct ib_event err_event;
- struct ib_qp_attr attrs;
- int attr_mask = IB_QP_STATE;
-
- attrs.qp_state = IB_QPS_ERR;
- mutex_lock(&dev->dev_lock);
- if (dev->qp_tbl) {
- cur_qp = dev->qp_tbl;
- for (i = 0; i < OCRDMA_MAX_QP; i++) {
- qp = cur_qp[i];
- if (qp && qp->ibqp.qp_type != IB_QPT_GSI) {
- /* change the QP state to ERROR */
- _ocrdma_modify_qp(&qp->ibqp, &attrs, attr_mask);
-
- err_event.event = IB_EVENT_QP_FATAL;
- err_event.element.qp = &qp->ibqp;
- err_event.device = &dev->ibdev;
- ib_dispatch_event(&err_event);
- }
- }
- }
- mutex_unlock(&dev->dev_lock);
err_event.event = IB_EVENT_PORT_ERR;
err_event.element.port_num = 1;
@@ -433,7 +408,7 @@ static int ocrdma_close(struct ocrdma_dev *dev)
static void ocrdma_shutdown(struct ocrdma_dev *dev)
{
- ocrdma_close(dev);
+ ocrdma_dispatch_port_error(dev);
ocrdma_remove(dev);
}
@@ -444,16 +419,26 @@ static void ocrdma_shutdown(struct ocrdma_dev *dev)
static void ocrdma_event_handler(struct ocrdma_dev *dev, u32 event)
{
switch (event) {
- case BE_DEV_UP:
- ocrdma_open(dev);
- break;
- case BE_DEV_DOWN:
- ocrdma_close(dev);
- break;
case BE_DEV_SHUTDOWN:
ocrdma_shutdown(dev);
break;
+ default:
+ break;
+ }
+}
+
+void ocrdma_update_link_state(struct ocrdma_dev *dev, u8 lstate)
+{
+ if (!(dev->flags & OCRDMA_FLAGS_LINK_STATUS_INIT)) {
+ dev->flags |= OCRDMA_FLAGS_LINK_STATUS_INIT;
+ if (!lstate)
+ return;
}
+
+ if (!lstate)
+ ocrdma_dispatch_port_error(dev);
+ else
+ ocrdma_dispatch_port_active(dev);
}
static struct ocrdma_driver ocrdma_drv = {
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_sli.h b/drivers/infiniband/hw/ocrdma/ocrdma_sli.h
index 6a38268..99dd6fd 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_sli.h
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_sli.h
@@ -465,8 +465,11 @@ struct ocrdma_ae_qp_mcqe {
u32 valid_ae_event;
};
-#define OCRDMA_ASYNC_RDMA_EVE_CODE 0x14
-#define OCRDMA_ASYNC_GRP5_EVE_CODE 0x5
+enum ocrdma_async_event_code {
+ OCRDMA_ASYNC_LINK_EVE_CODE = 0x01,
+ OCRDMA_ASYNC_GRP5_EVE_CODE = 0x05,
+ OCRDMA_ASYNC_RDMA_EVE_CODE = 0x14
+};
enum ocrdma_async_grp5_events {
OCRDMA_ASYNC_EVENT_QOS_VALUE = 0x01,
@@ -489,6 +492,44 @@ enum OCRDMA_ASYNC_EVENT_TYPE {
OCRDMA_MAX_ASYNC_ERRORS
};
+struct ocrdma_ae_lnkst_mcqe {
+ u32 speed_state_ptn;
+ u32 qos_reason_falut;
+ u32 evt_tag;
+ u32 valid_ae_event;
+};
+
+enum {
+ OCRDMA_AE_LSC_PORT_NUM_MASK = 0x3F,
+ OCRDMA_AE_LSC_PT_SHIFT = 0x06,
+ OCRDMA_AE_LSC_PT_MASK = (0x03 <<
+ OCRDMA_AE_LSC_PT_SHIFT),
+ OCRDMA_AE_LSC_LS_SHIFT = 0x08,
+ OCRDMA_AE_LSC_LS_MASK = (0xFF <<
+ OCRDMA_AE_LSC_LS_SHIFT),
+ OCRDMA_AE_LSC_LD_SHIFT = 0x10,
+ OCRDMA_AE_LSC_LD_MASK = (0xFF <<
+ OCRDMA_AE_LSC_LD_SHIFT),
+ OCRDMA_AE_LSC_PPS_SHIFT = 0x18,
+ OCRDMA_AE_LSC_PPS_MASK = (0xFF <<
+ OCRDMA_AE_LSC_PPS_SHIFT),
+ OCRDMA_AE_LSC_PPF_MASK = 0xFF,
+ OCRDMA_AE_LSC_ER_SHIFT = 0x08,
+ OCRDMA_AE_LSC_ER_MASK = (0xFF <<
+ OCRDMA_AE_LSC_ER_SHIFT),
+ OCRDMA_AE_LSC_QOS_SHIFT = 0x10,
+ OCRDMA_AE_LSC_QOS_MASK = (0xFFFF <<
+ OCRDMA_AE_LSC_QOS_SHIFT)
+};
+
+enum {
+ OCRDMA_AE_LSC_PLINK_DOWN = 0x00,
+ OCRDMA_AE_LSC_PLINK_UP = 0x01,
+ OCRDMA_AE_LSC_LLINK_DOWN = 0x02,
+ OCRDMA_AE_LSC_LLINK_MASK = 0x02,
+ OCRDMA_AE_LSC_LLINK_UP = 0x03
+};
+
/* mailbox command request and responses */
enum {
OCRDMA_MBX_QUERY_CFG_CQ_OVERFLOW_SHIFT = 2,
@@ -676,7 +717,7 @@ enum {
OCRDMA_PHY_PFLT_SHIFT = 0x18,
OCRDMA_QOS_LNKSP_MASK = 0xFFFF0000,
OCRDMA_QOS_LNKSP_SHIFT = 0x10,
- OCRDMA_LLST_MASK = 0xFF,
+ OCRDMA_LINK_ST_MASK = 0x01,
OCRDMA_PLFC_MASK = 0x00000400,
OCRDMA_PLFC_SHIFT = 0x8,
OCRDMA_PLRFC_MASK = 0x00000200,
@@ -691,7 +732,7 @@ struct ocrdma_get_link_speed_rsp {
u32 pflt_pps_ld_pnum;
u32 qos_lsp;
- u32 res_lls;
+ u32 res_lnk_st;
};
enum {
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_stats.c b/drivers/infiniband/hw/ocrdma/ocrdma_stats.c
index 69334e2..255f774 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_stats.c
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_stats.c
@@ -64,10 +64,11 @@ static int ocrdma_add_stat(char *start, char *pcur,
return cpy_len;
}
-static bool ocrdma_alloc_stats_mem(struct ocrdma_dev *dev)
+bool ocrdma_alloc_stats_resources(struct ocrdma_dev *dev)
{
struct stats_mem *mem = &dev->stats_mem;
+ mutex_init(&dev->stats_lock);
/* Alloc mbox command mem*/
mem->size = max_t(u32, sizeof(struct ocrdma_rdma_stats_req),
sizeof(struct ocrdma_rdma_stats_resp));
@@ -91,13 +92,14 @@ static bool ocrdma_alloc_stats_mem(struct ocrdma_dev *dev)
return true;
}
-static void ocrdma_release_stats_mem(struct ocrdma_dev *dev)
+void ocrdma_release_stats_resources(struct ocrdma_dev *dev)
{
struct stats_mem *mem = &dev->stats_mem;
if (mem->va)
dma_free_coherent(&dev->nic_info.pdev->dev, mem->size,
mem->va, mem->pa);
+ mem->va = NULL;
kfree(mem->debugfs_mem);
}
@@ -838,15 +840,9 @@ void ocrdma_add_port_stats(struct ocrdma_dev *dev)
&dev->reset_stats, &ocrdma_dbg_ops))
goto err;
- /* Now create dma_mem for stats mbx command */
- if (!ocrdma_alloc_stats_mem(dev))
- goto err;
-
- mutex_init(&dev->stats_lock);
return;
err:
- ocrdma_release_stats_mem(dev);
debugfs_remove_recursive(dev->dir);
dev->dir = NULL;
}
@@ -855,9 +851,7 @@ void ocrdma_rem_port_stats(struct ocrdma_dev *dev)
{
if (!dev->dir)
return;
- mutex_destroy(&dev->stats_lock);
- ocrdma_release_stats_mem(dev);
- debugfs_remove(dev->dir);
+ debugfs_remove_recursive(dev->dir);
}
void ocrdma_init_debugfs(void)
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_stats.h b/drivers/infiniband/hw/ocrdma/ocrdma_stats.h
index c9e58d0..bba1fec 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_stats.h
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_stats.h
@@ -65,6 +65,8 @@ enum OCRDMA_STATS_TYPE {
void ocrdma_rem_debugfs(void);
void ocrdma_init_debugfs(void);
+bool ocrdma_alloc_stats_resources(struct ocrdma_dev *dev);
+void ocrdma_release_stats_resources(struct ocrdma_dev *dev);
void ocrdma_rem_port_stats(struct ocrdma_dev *dev);
void ocrdma_add_port_stats(struct ocrdma_dev *dev);
int ocrdma_pma_counters(struct ocrdma_dev *dev,
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c
index 1f3affb..12420e4 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c
@@ -73,7 +73,7 @@ int ocrdma_query_gid(struct ib_device *ibdev, u8 port,
if (index >= OCRDMA_MAX_SGID)
return -EINVAL;
- ret = ib_get_cached_gid(ibdev, port, index, sgid);
+ ret = ib_get_cached_gid(ibdev, port, index, sgid, NULL);
if (ret == -EAGAIN) {
memcpy(sgid, &zgid, sizeof(*sgid));
return 0;
@@ -125,8 +125,8 @@ int ocrdma_query_device(struct ib_device *ibdev, struct ib_device_attr *attr,
IB_DEVICE_SYS_IMAGE_GUID |
IB_DEVICE_LOCAL_DMA_LKEY |
IB_DEVICE_MEM_MGT_EXTENSIONS;
- attr->max_sge = min(dev->attr.max_send_sge, dev->attr.max_srq_sge);
- attr->max_sge_rd = 0;
+ attr->max_sge = dev->attr.max_send_sge;
+ attr->max_sge_rd = attr->max_sge;
attr->max_cq = dev->attr.max_cq;
attr->max_cqe = dev->attr.max_cqe;
attr->max_mr = dev->attr.max_mr;
@@ -171,7 +171,7 @@ static inline void get_link_speed_and_width(struct ocrdma_dev *dev,
int status;
u8 speed;
- status = ocrdma_mbx_get_link_speed(dev, &speed);
+ status = ocrdma_mbx_get_link_speed(dev, &speed, NULL);
if (status)
speed = OCRDMA_PHYS_LINK_SPEED_ZERO;
@@ -1013,6 +1013,7 @@ int ocrdma_dereg_mr(struct ib_mr *ib_mr)
(void) ocrdma_mbx_dealloc_lkey(dev, mr->hwmr.fr_mr, mr->hwmr.lkey);
+ kfree(mr->pages);
ocrdma_free_mr_pbl_tbl(dev, &mr->hwmr);
/* it could be user registered memory. */
@@ -1093,7 +1094,6 @@ struct ib_cq *ocrdma_create_cq(struct ib_device *ibdev,
spin_lock_init(&cq->comp_handler_lock);
INIT_LIST_HEAD(&cq->sq_head);
INIT_LIST_HEAD(&cq->rq_head);
- cq->first_arm = true;
if (ib_ctx) {
uctx = get_ocrdma_ucontext(ib_ctx);
@@ -1997,13 +1997,13 @@ static void ocrdma_build_ud_hdr(struct ocrdma_qp *qp,
{
struct ocrdma_ewqe_ud_hdr *ud_hdr =
(struct ocrdma_ewqe_ud_hdr *)(hdr + 1);
- struct ocrdma_ah *ah = get_ocrdma_ah(wr->wr.ud.ah);
+ struct ocrdma_ah *ah = get_ocrdma_ah(ud_wr(wr)->ah);
- ud_hdr->rsvd_dest_qpn = wr->wr.ud.remote_qpn;
+ ud_hdr->rsvd_dest_qpn = ud_wr(wr)->remote_qpn;
if (qp->qp_type == IB_QPT_GSI)
ud_hdr->qkey = qp->qkey;
else
- ud_hdr->qkey = wr->wr.ud.remote_qkey;
+ ud_hdr->qkey = ud_wr(wr)->remote_qkey;
ud_hdr->rsvd_ahid = ah->id;
if (ah->av->valid & OCRDMA_AV_VLAN_VALID)
hdr->cw |= (OCRDMA_FLAG_AH_VLAN_PR << OCRDMA_WQE_FLAGS_SHIFT);
@@ -2106,9 +2106,9 @@ static int ocrdma_build_write(struct ocrdma_qp *qp, struct ocrdma_hdr_wqe *hdr,
status = ocrdma_build_inline_sges(qp, hdr, sge, wr, wqe_size);
if (status)
return status;
- ext_rw->addr_lo = wr->wr.rdma.remote_addr;
- ext_rw->addr_hi = upper_32_bits(wr->wr.rdma.remote_addr);
- ext_rw->lrkey = wr->wr.rdma.rkey;
+ ext_rw->addr_lo = rdma_wr(wr)->remote_addr;
+ ext_rw->addr_hi = upper_32_bits(rdma_wr(wr)->remote_addr);
+ ext_rw->lrkey = rdma_wr(wr)->rkey;
ext_rw->len = hdr->total_len;
return 0;
}
@@ -2126,46 +2126,12 @@ static void ocrdma_build_read(struct ocrdma_qp *qp, struct ocrdma_hdr_wqe *hdr,
hdr->cw |= (OCRDMA_READ << OCRDMA_WQE_OPCODE_SHIFT);
hdr->cw |= (OCRDMA_TYPE_LKEY << OCRDMA_WQE_TYPE_SHIFT);
- ext_rw->addr_lo = wr->wr.rdma.remote_addr;
- ext_rw->addr_hi = upper_32_bits(wr->wr.rdma.remote_addr);
- ext_rw->lrkey = wr->wr.rdma.rkey;
+ ext_rw->addr_lo = rdma_wr(wr)->remote_addr;
+ ext_rw->addr_hi = upper_32_bits(rdma_wr(wr)->remote_addr);
+ ext_rw->lrkey = rdma_wr(wr)->rkey;
ext_rw->len = hdr->total_len;
}
-static void build_frmr_pbes(struct ib_send_wr *wr, struct ocrdma_pbl *pbl_tbl,
- struct ocrdma_hw_mr *hwmr)
-{
- int i;
- u64 buf_addr = 0;
- int num_pbes;
- struct ocrdma_pbe *pbe;
-
- pbe = (struct ocrdma_pbe *)pbl_tbl->va;
- num_pbes = 0;
-
- /* go through the OS phy regions & fill hw pbe entries into pbls. */
- for (i = 0; i < wr->wr.fast_reg.page_list_len; i++) {
- /* number of pbes can be more for one OS buf, when
- * buffers are of different sizes.
- * split the ib_buf to one or more pbes.
- */
- buf_addr = wr->wr.fast_reg.page_list->page_list[i];
- pbe->pa_lo = cpu_to_le32((u32) (buf_addr & PAGE_MASK));
- pbe->pa_hi = cpu_to_le32((u32) upper_32_bits(buf_addr));
- num_pbes += 1;
- pbe++;
-
- /* if the pbl is full storing the pbes,
- * move to next pbl.
- */
- if (num_pbes == (hwmr->pbl_size/sizeof(u64))) {
- pbl_tbl++;
- pbe = (struct ocrdma_pbe *)pbl_tbl->va;
- }
- }
- return;
-}
-
static int get_encoded_page_size(int pg_sz)
{
/* Max size is 256M 4096 << 16 */
@@ -2176,48 +2142,59 @@ static int get_encoded_page_size(int pg_sz)
return i;
}
-
-static int ocrdma_build_fr(struct ocrdma_qp *qp, struct ocrdma_hdr_wqe *hdr,
- struct ib_send_wr *wr)
+static int ocrdma_build_reg(struct ocrdma_qp *qp,
+ struct ocrdma_hdr_wqe *hdr,
+ struct ib_reg_wr *wr)
{
u64 fbo;
struct ocrdma_ewqe_fr *fast_reg = (struct ocrdma_ewqe_fr *)(hdr + 1);
- struct ocrdma_mr *mr;
- struct ocrdma_dev *dev = get_ocrdma_dev(qp->ibqp.device);
+ struct ocrdma_mr *mr = get_ocrdma_mr(wr->mr);
+ struct ocrdma_pbl *pbl_tbl = mr->hwmr.pbl_table;
+ struct ocrdma_pbe *pbe;
u32 wqe_size = sizeof(*fast_reg) + sizeof(*hdr);
+ int num_pbes = 0, i;
wqe_size = roundup(wqe_size, OCRDMA_WQE_ALIGN_BYTES);
- if (wr->wr.fast_reg.page_list_len > dev->attr.max_pages_per_frmr)
- return -EINVAL;
-
hdr->cw |= (OCRDMA_FR_MR << OCRDMA_WQE_OPCODE_SHIFT);
hdr->cw |= ((wqe_size / OCRDMA_WQE_STRIDE) << OCRDMA_WQE_SIZE_SHIFT);
- if (wr->wr.fast_reg.page_list_len == 0)
- BUG();
- if (wr->wr.fast_reg.access_flags & IB_ACCESS_LOCAL_WRITE)
+ if (wr->access & IB_ACCESS_LOCAL_WRITE)
hdr->rsvd_lkey_flags |= OCRDMA_LKEY_FLAG_LOCAL_WR;
- if (wr->wr.fast_reg.access_flags & IB_ACCESS_REMOTE_WRITE)
+ if (wr->access & IB_ACCESS_REMOTE_WRITE)
hdr->rsvd_lkey_flags |= OCRDMA_LKEY_FLAG_REMOTE_WR;
- if (wr->wr.fast_reg.access_flags & IB_ACCESS_REMOTE_READ)
+ if (wr->access & IB_ACCESS_REMOTE_READ)
hdr->rsvd_lkey_flags |= OCRDMA_LKEY_FLAG_REMOTE_RD;
- hdr->lkey = wr->wr.fast_reg.rkey;
- hdr->total_len = wr->wr.fast_reg.length;
+ hdr->lkey = wr->key;
+ hdr->total_len = mr->ibmr.length;
- fbo = wr->wr.fast_reg.iova_start -
- (wr->wr.fast_reg.page_list->page_list[0] & PAGE_MASK);
+ fbo = mr->ibmr.iova - mr->pages[0];
- fast_reg->va_hi = upper_32_bits(wr->wr.fast_reg.iova_start);
- fast_reg->va_lo = (u32) (wr->wr.fast_reg.iova_start & 0xffffffff);
+ fast_reg->va_hi = upper_32_bits(mr->ibmr.iova);
+ fast_reg->va_lo = (u32) (mr->ibmr.iova & 0xffffffff);
fast_reg->fbo_hi = upper_32_bits(fbo);
fast_reg->fbo_lo = (u32) fbo & 0xffffffff;
- fast_reg->num_sges = wr->wr.fast_reg.page_list_len;
- fast_reg->size_sge =
- get_encoded_page_size(1 << wr->wr.fast_reg.page_shift);
- mr = (struct ocrdma_mr *) (unsigned long)
- dev->stag_arr[(hdr->lkey >> 8) & (OCRDMA_MAX_STAG - 1)];
- build_frmr_pbes(wr, mr->hwmr.pbl_table, &mr->hwmr);
+ fast_reg->num_sges = mr->npages;
+ fast_reg->size_sge = get_encoded_page_size(mr->ibmr.page_size);
+
+ pbe = pbl_tbl->va;
+ for (i = 0; i < mr->npages; i++) {
+ u64 buf_addr = mr->pages[i];
+
+ pbe->pa_lo = cpu_to_le32((u32) (buf_addr & PAGE_MASK));
+ pbe->pa_hi = cpu_to_le32((u32) upper_32_bits(buf_addr));
+ num_pbes += 1;
+ pbe++;
+
+ /* if the pbl is full storing the pbes,
+ * move to next pbl.
+ */
+ if (num_pbes == (mr->hwmr.pbl_size/sizeof(u64))) {
+ pbl_tbl++;
+ pbe = (struct ocrdma_pbe *)pbl_tbl->va;
+ }
+ }
+
return 0;
}
@@ -2300,8 +2277,8 @@ int ocrdma_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
OCRDMA_WQE_STRIDE) << OCRDMA_WQE_SIZE_SHIFT;
hdr->lkey = wr->ex.invalidate_rkey;
break;
- case IB_WR_FAST_REG_MR:
- status = ocrdma_build_fr(qp, hdr, wr);
+ case IB_WR_REG_MR:
+ status = ocrdma_build_reg(qp, hdr, reg_wr(wr));
break;
default:
status = -EINVAL;
@@ -2567,7 +2544,7 @@ static void ocrdma_update_wc(struct ocrdma_qp *qp, struct ib_wc *ibwc,
ibwc->opcode = IB_WC_SEND;
break;
case OCRDMA_FR_MR:
- ibwc->opcode = IB_WC_FAST_REG_MR;
+ ibwc->opcode = IB_WC_REG_MR;
break;
case OCRDMA_LKEY_INV:
ibwc->opcode = IB_WC_LOCAL_INV;
@@ -2748,8 +2725,7 @@ static int ocrdma_update_ud_rcqe(struct ib_wc *ibwc, struct ocrdma_cqe *cqe)
OCRDMA_CQE_UD_STATUS_MASK) >> OCRDMA_CQE_UD_STATUS_SHIFT;
ibwc->src_qp = le32_to_cpu(cqe->flags_status_srcqpn) &
OCRDMA_CQE_SRCQP_MASK;
- ibwc->pkey_index = le32_to_cpu(cqe->ud.rxlen_pkey) &
- OCRDMA_CQE_PKEY_MASK;
+ ibwc->pkey_index = 0;
ibwc->wc_flags = IB_WC_GRH;
ibwc->byte_len = (le32_to_cpu(cqe->ud.rxlen_pkey) >>
OCRDMA_CQE_UD_XFER_LEN_SHIFT);
@@ -2933,17 +2909,9 @@ expand_cqe:
}
stop_cqe:
cq->getp = cur_getp;
- if (cq->deferred_arm) {
- ocrdma_ring_cq_db(dev, cq->id, true, cq->deferred_sol,
- polled_hw_cqes);
- cq->deferred_arm = false;
- cq->deferred_sol = false;
- } else {
- /* We need to pop the CQE. No need to arm */
- ocrdma_ring_cq_db(dev, cq->id, false, cq->deferred_sol,
- polled_hw_cqes);
- cq->deferred_sol = false;
- }
+
+ if (polled_hw_cqes)
+ ocrdma_ring_cq_db(dev, cq->id, false, false, polled_hw_cqes);
return i;
}
@@ -3027,13 +2995,7 @@ int ocrdma_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags cq_flags)
if (cq_flags & IB_CQ_SOLICITED)
sol_needed = true;
- if (cq->first_arm) {
- ocrdma_ring_cq_db(dev, cq_id, arm_needed, sol_needed, 0);
- cq->first_arm = false;
- }
-
- cq->deferred_arm = true;
- cq->deferred_sol = sol_needed;
+ ocrdma_ring_cq_db(dev, cq_id, arm_needed, sol_needed, 0);
spin_unlock_irqrestore(&cq->cq_lock, flags);
return 0;
@@ -3058,6 +3020,12 @@ struct ib_mr *ocrdma_alloc_mr(struct ib_pd *ibpd,
if (!mr)
return ERR_PTR(-ENOMEM);
+ mr->pages = kcalloc(max_num_sg, sizeof(u64), GFP_KERNEL);
+ if (!mr->pages) {
+ status = -ENOMEM;
+ goto pl_err;
+ }
+
status = ocrdma_get_pbl_info(dev, mr, max_num_sg);
if (status)
goto pbl_err;
@@ -3081,189 +3049,31 @@ struct ib_mr *ocrdma_alloc_mr(struct ib_pd *ibpd,
mbx_err:
ocrdma_free_mr_pbl_tbl(dev, &mr->hwmr);
pbl_err:
+ kfree(mr->pages);
+pl_err:
kfree(mr);
return ERR_PTR(-ENOMEM);
}
-struct ib_fast_reg_page_list *ocrdma_alloc_frmr_page_list(struct ib_device
- *ibdev,
- int page_list_len)
-{
- struct ib_fast_reg_page_list *frmr_list;
- int size;
-
- size = sizeof(*frmr_list) + (page_list_len * sizeof(u64));
- frmr_list = kzalloc(size, GFP_KERNEL);
- if (!frmr_list)
- return ERR_PTR(-ENOMEM);
- frmr_list->page_list = (u64 *)(frmr_list + 1);
- return frmr_list;
-}
-
-void ocrdma_free_frmr_page_list(struct ib_fast_reg_page_list *page_list)
-{
- kfree(page_list);
-}
-
-#define MAX_KERNEL_PBE_SIZE 65536
-static inline int count_kernel_pbes(struct ib_phys_buf *buf_list,
- int buf_cnt, u32 *pbe_size)
+static int ocrdma_set_page(struct ib_mr *ibmr, u64 addr)
{
- u64 total_size = 0;
- u64 buf_size = 0;
- int i;
- *pbe_size = roundup(buf_list[0].size, PAGE_SIZE);
- *pbe_size = roundup_pow_of_two(*pbe_size);
-
- /* find the smallest PBE size that we can have */
- for (i = 0; i < buf_cnt; i++) {
- /* first addr may not be page aligned, so ignore checking */
- if ((i != 0) && ((buf_list[i].addr & ~PAGE_MASK) ||
- (buf_list[i].size & ~PAGE_MASK))) {
- return 0;
- }
-
- /* if configured PBE size is greater then the chosen one,
- * reduce the PBE size.
- */
- buf_size = roundup(buf_list[i].size, PAGE_SIZE);
- /* pbe_size has to be even multiple of 4K 1,2,4,8...*/
- buf_size = roundup_pow_of_two(buf_size);
- if (*pbe_size > buf_size)
- *pbe_size = buf_size;
-
- total_size += buf_size;
- }
- *pbe_size = *pbe_size > MAX_KERNEL_PBE_SIZE ?
- (MAX_KERNEL_PBE_SIZE) : (*pbe_size);
+ struct ocrdma_mr *mr = get_ocrdma_mr(ibmr);
- /* num_pbes = total_size / (*pbe_size); this is implemented below. */
-
- return total_size >> ilog2(*pbe_size);
-}
-
-static void build_kernel_pbes(struct ib_phys_buf *buf_list, int ib_buf_cnt,
- u32 pbe_size, struct ocrdma_pbl *pbl_tbl,
- struct ocrdma_hw_mr *hwmr)
-{
- int i;
- int idx;
- int pbes_per_buf = 0;
- u64 buf_addr = 0;
- int num_pbes;
- struct ocrdma_pbe *pbe;
- int total_num_pbes = 0;
+ if (unlikely(mr->npages == mr->hwmr.num_pbes))
+ return -ENOMEM;
- if (!hwmr->num_pbes)
- return;
+ mr->pages[mr->npages++] = addr;
- pbe = (struct ocrdma_pbe *)pbl_tbl->va;
- num_pbes = 0;
-
- /* go through the OS phy regions & fill hw pbe entries into pbls. */
- for (i = 0; i < ib_buf_cnt; i++) {
- buf_addr = buf_list[i].addr;
- pbes_per_buf =
- roundup_pow_of_two(roundup(buf_list[i].size, PAGE_SIZE)) /
- pbe_size;
- hwmr->len += buf_list[i].size;
- /* number of pbes can be more for one OS buf, when
- * buffers are of different sizes.
- * split the ib_buf to one or more pbes.
- */
- for (idx = 0; idx < pbes_per_buf; idx++) {
- /* we program always page aligned addresses,
- * first unaligned address is taken care by fbo.
- */
- if (i == 0) {
- /* for non zero fbo, assign the
- * start of the page.
- */
- pbe->pa_lo =
- cpu_to_le32((u32) (buf_addr & PAGE_MASK));
- pbe->pa_hi =
- cpu_to_le32((u32) upper_32_bits(buf_addr));
- } else {
- pbe->pa_lo =
- cpu_to_le32((u32) (buf_addr & 0xffffffff));
- pbe->pa_hi =
- cpu_to_le32((u32) upper_32_bits(buf_addr));
- }
- buf_addr += pbe_size;
- num_pbes += 1;
- total_num_pbes += 1;
- pbe++;
-
- if (total_num_pbes == hwmr->num_pbes)
- goto mr_tbl_done;
- /* if the pbl is full storing the pbes,
- * move to next pbl.
- */
- if (num_pbes == (hwmr->pbl_size/sizeof(u64))) {
- pbl_tbl++;
- pbe = (struct ocrdma_pbe *)pbl_tbl->va;
- num_pbes = 0;
- }
- }
- }
-mr_tbl_done:
- return;
+ return 0;
}
-struct ib_mr *ocrdma_reg_kernel_mr(struct ib_pd *ibpd,
- struct ib_phys_buf *buf_list,
- int buf_cnt, int acc, u64 *iova_start)
+int ocrdma_map_mr_sg(struct ib_mr *ibmr,
+ struct scatterlist *sg,
+ int sg_nents)
{
- int status = -ENOMEM;
- struct ocrdma_mr *mr;
- struct ocrdma_pd *pd = get_ocrdma_pd(ibpd);
- struct ocrdma_dev *dev = get_ocrdma_dev(ibpd->device);
- u32 num_pbes;
- u32 pbe_size = 0;
+ struct ocrdma_mr *mr = get_ocrdma_mr(ibmr);
- if ((acc & IB_ACCESS_REMOTE_WRITE) && !(acc & IB_ACCESS_LOCAL_WRITE))
- return ERR_PTR(-EINVAL);
+ mr->npages = 0;
- mr = kzalloc(sizeof(*mr), GFP_KERNEL);
- if (!mr)
- return ERR_PTR(status);
-
- num_pbes = count_kernel_pbes(buf_list, buf_cnt, &pbe_size);
- if (num_pbes == 0) {
- status = -EINVAL;
- goto pbl_err;
- }
- status = ocrdma_get_pbl_info(dev, mr, num_pbes);
- if (status)
- goto pbl_err;
-
- mr->hwmr.pbe_size = pbe_size;
- mr->hwmr.fbo = *iova_start - (buf_list[0].addr & PAGE_MASK);
- mr->hwmr.va = *iova_start;
- mr->hwmr.local_rd = 1;
- mr->hwmr.remote_wr = (acc & IB_ACCESS_REMOTE_WRITE) ? 1 : 0;
- mr->hwmr.remote_rd = (acc & IB_ACCESS_REMOTE_READ) ? 1 : 0;
- mr->hwmr.local_wr = (acc & IB_ACCESS_LOCAL_WRITE) ? 1 : 0;
- mr->hwmr.remote_atomic = (acc & IB_ACCESS_REMOTE_ATOMIC) ? 1 : 0;
- mr->hwmr.mw_bind = (acc & IB_ACCESS_MW_BIND) ? 1 : 0;
-
- status = ocrdma_build_pbl_tbl(dev, &mr->hwmr);
- if (status)
- goto pbl_err;
- build_kernel_pbes(buf_list, buf_cnt, pbe_size, mr->hwmr.pbl_table,
- &mr->hwmr);
- status = ocrdma_reg_mr(dev, &mr->hwmr, pd->id, acc);
- if (status)
- goto mbx_err;
-
- mr->ibmr.lkey = mr->hwmr.lkey;
- if (mr->hwmr.remote_wr || mr->hwmr.remote_rd)
- mr->ibmr.rkey = mr->hwmr.lkey;
- return &mr->ibmr;
-
-mbx_err:
- ocrdma_free_mr_pbl_tbl(dev, &mr->hwmr);
-pbl_err:
- kfree(mr);
- return ERR_PTR(status);
+ return ib_sg_to_pages(ibmr, sg, sg_nents, ocrdma_set_page);
}
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.h b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.h
index 308c168..8b517fd 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.h
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.h
@@ -117,17 +117,13 @@ int ocrdma_post_srq_recv(struct ib_srq *, struct ib_recv_wr *,
int ocrdma_dereg_mr(struct ib_mr *);
struct ib_mr *ocrdma_get_dma_mr(struct ib_pd *, int acc);
-struct ib_mr *ocrdma_reg_kernel_mr(struct ib_pd *,
- struct ib_phys_buf *buffer_list,
- int num_phys_buf, int acc, u64 *iova_start);
struct ib_mr *ocrdma_reg_user_mr(struct ib_pd *, u64 start, u64 length,
u64 virt, int acc, struct ib_udata *);
struct ib_mr *ocrdma_alloc_mr(struct ib_pd *pd,
enum ib_mr_type mr_type,
u32 max_num_sg);
-struct ib_fast_reg_page_list *ocrdma_alloc_frmr_page_list(struct ib_device
- *ibdev,
- int page_list_len);
-void ocrdma_free_frmr_page_list(struct ib_fast_reg_page_list *page_list);
+int ocrdma_map_mr_sg(struct ib_mr *ibmr,
+ struct scatterlist *sg,
+ int sg_nents);
#endif /* __OCRDMA_VERBS_H__ */
diff --git a/drivers/infiniband/hw/qib/qib_fs.c b/drivers/infiniband/hw/qib/qib_fs.c
index 13ef22b..fcdf3791 100644
--- a/drivers/infiniband/hw/qib/qib_fs.c
+++ b/drivers/infiniband/hw/qib/qib_fs.c
@@ -89,14 +89,14 @@ static int create_file(const char *name, umode_t mode,
{
int error;
- mutex_lock(&d_inode(parent)->i_mutex);
+ inode_lock(d_inode(parent));
*dentry = lookup_one_len(name, parent, strlen(name));
if (!IS_ERR(*dentry))
error = qibfs_mknod(d_inode(parent), *dentry,
mode, fops, data);
else
error = PTR_ERR(*dentry);
- mutex_unlock(&d_inode(parent)->i_mutex);
+ inode_unlock(d_inode(parent));
return error;
}
@@ -481,7 +481,7 @@ static int remove_device_files(struct super_block *sb,
int ret, i;
root = dget(sb->s_root);
- mutex_lock(&d_inode(root)->i_mutex);
+ inode_lock(d_inode(root));
snprintf(unit, sizeof(unit), "%u", dd->unit);
dir = lookup_one_len(unit, root, strlen(unit));
@@ -491,7 +491,7 @@ static int remove_device_files(struct super_block *sb,
goto bail;
}
- mutex_lock(&d_inode(dir)->i_mutex);
+ inode_lock(d_inode(dir));
remove_file(dir, "counters");
remove_file(dir, "counter_names");
remove_file(dir, "portcounter_names");
@@ -506,13 +506,13 @@ static int remove_device_files(struct super_block *sb,
}
}
remove_file(dir, "flash");
- mutex_unlock(&d_inode(dir)->i_mutex);
+ inode_unlock(d_inode(dir));
ret = simple_rmdir(d_inode(root), dir);
d_delete(dir);
dput(dir);
bail:
- mutex_unlock(&d_inode(root)->i_mutex);
+ inode_unlock(d_inode(root));
dput(root);
return ret;
}
diff --git a/drivers/infiniband/hw/qib/qib_init.c b/drivers/infiniband/hw/qib/qib_init.c
index 7e00470..4ff340f 100644
--- a/drivers/infiniband/hw/qib/qib_init.c
+++ b/drivers/infiniband/hw/qib/qib_init.c
@@ -1680,7 +1680,7 @@ int qib_setup_eagerbufs(struct qib_ctxtdata *rcd)
* heavy filesystem activity makes these fail, and we can
* use compound pages.
*/
- gfp_flags = __GFP_WAIT | __GFP_IO | __GFP_COMP;
+ gfp_flags = __GFP_RECLAIM | __GFP_IO | __GFP_COMP;
egrcnt = rcd->rcvegrcnt;
egroff = rcd->rcvegr_tid_base;
diff --git a/drivers/infiniband/hw/qib/qib_keys.c b/drivers/infiniband/hw/qib/qib_keys.c
index 5afaa21..d725c56 100644
--- a/drivers/infiniband/hw/qib/qib_keys.c
+++ b/drivers/infiniband/hw/qib/qib_keys.c
@@ -336,14 +336,15 @@ bail:
}
/*
- * Initialize the memory region specified by the work reqeust.
+ * Initialize the memory region specified by the work request.
*/
-int qib_fast_reg_mr(struct qib_qp *qp, struct ib_send_wr *wr)
+int qib_reg_mr(struct qib_qp *qp, struct ib_reg_wr *wr)
{
struct qib_lkey_table *rkt = &to_idev(qp->ibqp.device)->lk_table;
struct qib_pd *pd = to_ipd(qp->ibqp.pd);
- struct qib_mregion *mr;
- u32 rkey = wr->wr.fast_reg.rkey;
+ struct qib_mr *mr = to_imr(wr->mr);
+ struct qib_mregion *mrg;
+ u32 key = wr->key;
unsigned i, n, m;
int ret = -EINVAL;
unsigned long flags;
@@ -351,33 +352,33 @@ int qib_fast_reg_mr(struct qib_qp *qp, struct ib_send_wr *wr)
size_t ps;
spin_lock_irqsave(&rkt->lock, flags);
- if (pd->user || rkey == 0)
+ if (pd->user || key == 0)
goto bail;
- mr = rcu_dereference_protected(
- rkt->table[(rkey >> (32 - ib_qib_lkey_table_size))],
+ mrg = rcu_dereference_protected(
+ rkt->table[(key >> (32 - ib_qib_lkey_table_size))],
lockdep_is_held(&rkt->lock));
- if (unlikely(mr == NULL || qp->ibqp.pd != mr->pd))
+ if (unlikely(mrg == NULL || qp->ibqp.pd != mrg->pd))
goto bail;
- if (wr->wr.fast_reg.page_list_len > mr->max_segs)
+ if (mr->npages > mrg->max_segs)
goto bail;
- ps = 1UL << wr->wr.fast_reg.page_shift;
- if (wr->wr.fast_reg.length > ps * wr->wr.fast_reg.page_list_len)
+ ps = mr->ibmr.page_size;
+ if (mr->ibmr.length > ps * mr->npages)
goto bail;
- mr->user_base = wr->wr.fast_reg.iova_start;
- mr->iova = wr->wr.fast_reg.iova_start;
- mr->lkey = rkey;
- mr->length = wr->wr.fast_reg.length;
- mr->access_flags = wr->wr.fast_reg.access_flags;
- page_list = wr->wr.fast_reg.page_list->page_list;
+ mrg->user_base = mr->ibmr.iova;
+ mrg->iova = mr->ibmr.iova;
+ mrg->lkey = key;
+ mrg->length = mr->ibmr.length;
+ mrg->access_flags = wr->access;
+ page_list = mr->pages;
m = 0;
n = 0;
- for (i = 0; i < wr->wr.fast_reg.page_list_len; i++) {
- mr->map[m]->segs[n].vaddr = (void *) page_list[i];
- mr->map[m]->segs[n].length = ps;
+ for (i = 0; i < mr->npages; i++) {
+ mrg->map[m]->segs[n].vaddr = (void *) page_list[i];
+ mrg->map[m]->segs[n].length = ps;
if (++n == QIB_SEGSZ) {
m++;
n = 0;
diff --git a/drivers/infiniband/hw/qib/qib_mr.c b/drivers/infiniband/hw/qib/qib_mr.c
index 19220dc..5f53304 100644
--- a/drivers/infiniband/hw/qib/qib_mr.c
+++ b/drivers/infiniband/hw/qib/qib_mr.c
@@ -150,10 +150,7 @@ static struct qib_mr *alloc_mr(int count, struct ib_pd *pd)
rval = init_qib_mregion(&mr->mr, pd, count);
if (rval)
goto bail;
- /*
- * ib_reg_phys_mr() will initialize mr->ibmr except for
- * lkey and rkey.
- */
+
rval = qib_alloc_lkey(&mr->mr, 0);
if (rval)
goto bail_mregion;
@@ -171,52 +168,6 @@ bail:
}
/**
- * qib_reg_phys_mr - register a physical memory region
- * @pd: protection domain for this memory region
- * @buffer_list: pointer to the list of physical buffers to register
- * @num_phys_buf: the number of physical buffers to register
- * @iova_start: the starting address passed over IB which maps to this MR
- *
- * Returns the memory region on success, otherwise returns an errno.
- */
-struct ib_mr *qib_reg_phys_mr(struct ib_pd *pd,
- struct ib_phys_buf *buffer_list,
- int num_phys_buf, int acc, u64 *iova_start)
-{
- struct qib_mr *mr;
- int n, m, i;
- struct ib_mr *ret;
-
- mr = alloc_mr(num_phys_buf, pd);
- if (IS_ERR(mr)) {
- ret = (struct ib_mr *)mr;
- goto bail;
- }
-
- mr->mr.user_base = *iova_start;
- mr->mr.iova = *iova_start;
- mr->mr.access_flags = acc;
-
- m = 0;
- n = 0;
- for (i = 0; i < num_phys_buf; i++) {
- mr->mr.map[m]->segs[n].vaddr = (void *) buffer_list[i].addr;
- mr->mr.map[m]->segs[n].length = buffer_list[i].size;
- mr->mr.length += buffer_list[i].size;
- n++;
- if (n == QIB_SEGSZ) {
- m++;
- n = 0;
- }
- }
-
- ret = &mr->ibmr;
-
-bail:
- return ret;
-}
-
-/**
* qib_reg_user_mr - register a userspace memory region
* @pd: protection domain for this memory region
* @start: starting userspace address
@@ -303,6 +254,7 @@ int qib_dereg_mr(struct ib_mr *ibmr)
int ret = 0;
unsigned long timeout;
+ kfree(mr->pages);
qib_free_lkey(&mr->mr);
qib_put_mr(&mr->mr); /* will set completion if last */
@@ -323,7 +275,7 @@ out:
/*
* Allocate a memory region usable with the
- * IB_WR_FAST_REG_MR send work request.
+ * IB_WR_REG_MR send work request.
*
* Return the memory region on success, otherwise return an errno.
*/
@@ -340,37 +292,38 @@ struct ib_mr *qib_alloc_mr(struct ib_pd *pd,
if (IS_ERR(mr))
return (struct ib_mr *)mr;
+ mr->pages = kcalloc(max_num_sg, sizeof(u64), GFP_KERNEL);
+ if (!mr->pages)
+ goto err;
+
return &mr->ibmr;
+
+err:
+ qib_dereg_mr(&mr->ibmr);
+ return ERR_PTR(-ENOMEM);
}
-struct ib_fast_reg_page_list *
-qib_alloc_fast_reg_page_list(struct ib_device *ibdev, int page_list_len)
+static int qib_set_page(struct ib_mr *ibmr, u64 addr)
{
- unsigned size = page_list_len * sizeof(u64);
- struct ib_fast_reg_page_list *pl;
-
- if (size > PAGE_SIZE)
- return ERR_PTR(-EINVAL);
-
- pl = kzalloc(sizeof(*pl), GFP_KERNEL);
- if (!pl)
- return ERR_PTR(-ENOMEM);
+ struct qib_mr *mr = to_imr(ibmr);
- pl->page_list = kzalloc(size, GFP_KERNEL);
- if (!pl->page_list)
- goto err_free;
+ if (unlikely(mr->npages == mr->mr.max_segs))
+ return -ENOMEM;
- return pl;
+ mr->pages[mr->npages++] = addr;
-err_free:
- kfree(pl);
- return ERR_PTR(-ENOMEM);
+ return 0;
}
-void qib_free_fast_reg_page_list(struct ib_fast_reg_page_list *pl)
+int qib_map_mr_sg(struct ib_mr *ibmr,
+ struct scatterlist *sg,
+ int sg_nents)
{
- kfree(pl->page_list);
- kfree(pl);
+ struct qib_mr *mr = to_imr(ibmr);
+
+ mr->npages = 0;
+
+ return ib_sg_to_pages(ibmr, sg, sg_nents, qib_set_page);
}
/**
diff --git a/drivers/infiniband/hw/qib/qib_qp.c b/drivers/infiniband/hw/qib/qib_qp.c
index 4fa88ba..3eff35c 100644
--- a/drivers/infiniband/hw/qib/qib_qp.c
+++ b/drivers/infiniband/hw/qib/qib_qp.c
@@ -100,9 +100,10 @@ static u32 credit_table[31] = {
32768 /* 1E */
};
-static void get_map_page(struct qib_qpn_table *qpt, struct qpn_map *map)
+static void get_map_page(struct qib_qpn_table *qpt, struct qpn_map *map,
+ gfp_t gfp)
{
- unsigned long page = get_zeroed_page(GFP_KERNEL);
+ unsigned long page = get_zeroed_page(gfp);
/*
* Free the page if someone raced with us installing it.
@@ -121,7 +122,7 @@ static void get_map_page(struct qib_qpn_table *qpt, struct qpn_map *map)
* zero/one for QP type IB_QPT_SMI/IB_QPT_GSI.
*/
static int alloc_qpn(struct qib_devdata *dd, struct qib_qpn_table *qpt,
- enum ib_qp_type type, u8 port)
+ enum ib_qp_type type, u8 port, gfp_t gfp)
{
u32 i, offset, max_scan, qpn;
struct qpn_map *map;
@@ -151,7 +152,7 @@ static int alloc_qpn(struct qib_devdata *dd, struct qib_qpn_table *qpt,
max_scan = qpt->nmaps - !offset;
for (i = 0;;) {
if (unlikely(!map->page)) {
- get_map_page(qpt, map);
+ get_map_page(qpt, map, gfp);
if (unlikely(!map->page))
break;
}
@@ -436,7 +437,7 @@ static void clear_mr_refs(struct qib_qp *qp, int clr_sends)
if (qp->ibqp.qp_type == IB_QPT_UD ||
qp->ibqp.qp_type == IB_QPT_SMI ||
qp->ibqp.qp_type == IB_QPT_GSI)
- atomic_dec(&to_iah(wqe->wr.wr.ud.ah)->refcount);
+ atomic_dec(&to_iah(wqe->ud_wr.ah)->refcount);
if (++qp->s_last >= qp->s_size)
qp->s_last = 0;
}
@@ -983,13 +984,21 @@ struct ib_qp *qib_create_qp(struct ib_pd *ibpd,
size_t sz;
size_t sg_list_sz;
struct ib_qp *ret;
+ gfp_t gfp;
+
if (init_attr->cap.max_send_sge > ib_qib_max_sges ||
init_attr->cap.max_send_wr > ib_qib_max_qp_wrs ||
- init_attr->create_flags) {
- ret = ERR_PTR(-EINVAL);
- goto bail;
- }
+ init_attr->create_flags & ~(IB_QP_CREATE_USE_GFP_NOIO))
+ return ERR_PTR(-EINVAL);
+
+ /* GFP_NOIO is applicable in RC QPs only */
+ if (init_attr->create_flags & IB_QP_CREATE_USE_GFP_NOIO &&
+ init_attr->qp_type != IB_QPT_RC)
+ return ERR_PTR(-EINVAL);
+
+ gfp = init_attr->create_flags & IB_QP_CREATE_USE_GFP_NOIO ?
+ GFP_NOIO : GFP_KERNEL;
/* Check receive queue parameters if no SRQ is specified. */
if (!init_attr->srq) {
@@ -1021,7 +1030,8 @@ struct ib_qp *qib_create_qp(struct ib_pd *ibpd,
sz = sizeof(struct qib_sge) *
init_attr->cap.max_send_sge +
sizeof(struct qib_swqe);
- swq = vmalloc((init_attr->cap.max_send_wr + 1) * sz);
+ swq = __vmalloc((init_attr->cap.max_send_wr + 1) * sz,
+ gfp, PAGE_KERNEL);
if (swq == NULL) {
ret = ERR_PTR(-ENOMEM);
goto bail;
@@ -1037,13 +1047,13 @@ struct ib_qp *qib_create_qp(struct ib_pd *ibpd,
} else if (init_attr->cap.max_recv_sge > 1)
sg_list_sz = sizeof(*qp->r_sg_list) *
(init_attr->cap.max_recv_sge - 1);
- qp = kzalloc(sz + sg_list_sz, GFP_KERNEL);
+ qp = kzalloc(sz + sg_list_sz, gfp);
if (!qp) {
ret = ERR_PTR(-ENOMEM);
goto bail_swq;
}
RCU_INIT_POINTER(qp->next, NULL);
- qp->s_hdr = kzalloc(sizeof(*qp->s_hdr), GFP_KERNEL);
+ qp->s_hdr = kzalloc(sizeof(*qp->s_hdr), gfp);
if (!qp->s_hdr) {
ret = ERR_PTR(-ENOMEM);
goto bail_qp;
@@ -1058,8 +1068,16 @@ struct ib_qp *qib_create_qp(struct ib_pd *ibpd,
qp->r_rq.max_sge = init_attr->cap.max_recv_sge;
sz = (sizeof(struct ib_sge) * qp->r_rq.max_sge) +
sizeof(struct qib_rwqe);
- qp->r_rq.wq = vmalloc_user(sizeof(struct qib_rwq) +
- qp->r_rq.size * sz);
+ if (gfp != GFP_NOIO)
+ qp->r_rq.wq = vmalloc_user(
+ sizeof(struct qib_rwq) +
+ qp->r_rq.size * sz);
+ else
+ qp->r_rq.wq = __vmalloc(
+ sizeof(struct qib_rwq) +
+ qp->r_rq.size * sz,
+ gfp, PAGE_KERNEL);
+
if (!qp->r_rq.wq) {
ret = ERR_PTR(-ENOMEM);
goto bail_qp;
@@ -1090,7 +1108,7 @@ struct ib_qp *qib_create_qp(struct ib_pd *ibpd,
dev = to_idev(ibpd->device);
dd = dd_from_dev(dev);
err = alloc_qpn(dd, &dev->qpn_table, init_attr->qp_type,
- init_attr->port_num);
+ init_attr->port_num, gfp);
if (err < 0) {
ret = ERR_PTR(err);
vfree(qp->r_rq.wq);
diff --git a/drivers/infiniband/hw/qib/qib_qsfp.c b/drivers/infiniband/hw/qib/qib_qsfp.c
index 5e27f76..4c7c3c8 100644
--- a/drivers/infiniband/hw/qib/qib_qsfp.c
+++ b/drivers/infiniband/hw/qib/qib_qsfp.c
@@ -292,7 +292,7 @@ int qib_refresh_qsfp_cache(struct qib_pportdata *ppd, struct qib_qsfp_cache *cp)
qib_dev_porterr(ppd->dd, ppd->port,
"QSFP byte0 is 0x%02X, S/B 0x0C/D\n", peek[0]);
- if ((peek[2] & 2) == 0) {
+ if ((peek[2] & 4) == 0) {
/*
* If cable is paged, rather than "flat memory", we need to
* set the page to zero, Even if it already appears to be zero.
@@ -538,7 +538,7 @@ int qib_qsfp_dump(struct qib_pportdata *ppd, char *buf, int len)
sofar += scnprintf(buf + sofar, len - sofar, "Date:%.*s\n",
QSFP_DATE_LEN, cd.date);
sofar += scnprintf(buf + sofar, len - sofar, "Lot:%.*s\n",
- QSFP_LOT_LEN, cd.date);
+ QSFP_LOT_LEN, cd.lot);
while (bidx < QSFP_DEFAULT_HDR_CNT) {
int iidx;
diff --git a/drivers/infiniband/hw/qib/qib_rc.c b/drivers/infiniband/hw/qib/qib_rc.c
index 4544d6f..e6b7556 100644
--- a/drivers/infiniband/hw/qib/qib_rc.c
+++ b/drivers/infiniband/hw/qib/qib_rc.c
@@ -373,10 +373,11 @@ int qib_make_rc_req(struct qib_qp *qp)
qp->s_flags |= QIB_S_WAIT_SSN_CREDIT;
goto bail;
}
+
ohdr->u.rc.reth.vaddr =
- cpu_to_be64(wqe->wr.wr.rdma.remote_addr);
+ cpu_to_be64(wqe->rdma_wr.remote_addr);
ohdr->u.rc.reth.rkey =
- cpu_to_be32(wqe->wr.wr.rdma.rkey);
+ cpu_to_be32(wqe->rdma_wr.rkey);
ohdr->u.rc.reth.length = cpu_to_be32(len);
hwords += sizeof(struct ib_reth) / sizeof(u32);
wqe->lpsn = wqe->psn;
@@ -386,15 +387,15 @@ int qib_make_rc_req(struct qib_qp *qp)
len = pmtu;
break;
}
- if (wqe->wr.opcode == IB_WR_RDMA_WRITE)
+ if (wqe->rdma_wr.wr.opcode == IB_WR_RDMA_WRITE)
qp->s_state = OP(RDMA_WRITE_ONLY);
else {
- qp->s_state =
- OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE);
+ qp->s_state = OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE);
/* Immediate data comes after RETH */
- ohdr->u.rc.imm_data = wqe->wr.ex.imm_data;
+ ohdr->u.rc.imm_data =
+ wqe->rdma_wr.wr.ex.imm_data;
hwords += 1;
- if (wqe->wr.send_flags & IB_SEND_SOLICITED)
+ if (wqe->rdma_wr.wr.send_flags & IB_SEND_SOLICITED)
bth0 |= IB_BTH_SOLICITED;
}
bth2 |= IB_BTH_REQ_ACK;
@@ -424,10 +425,11 @@ int qib_make_rc_req(struct qib_qp *qp)
qp->s_next_psn += (len - 1) / pmtu;
wqe->lpsn = qp->s_next_psn++;
}
+
ohdr->u.rc.reth.vaddr =
- cpu_to_be64(wqe->wr.wr.rdma.remote_addr);
+ cpu_to_be64(wqe->rdma_wr.remote_addr);
ohdr->u.rc.reth.rkey =
- cpu_to_be32(wqe->wr.wr.rdma.rkey);
+ cpu_to_be32(wqe->rdma_wr.rkey);
ohdr->u.rc.reth.length = cpu_to_be32(len);
qp->s_state = OP(RDMA_READ_REQUEST);
hwords += sizeof(ohdr->u.rc.reth) / sizeof(u32);
@@ -455,24 +457,24 @@ int qib_make_rc_req(struct qib_qp *qp)
qp->s_lsn++;
wqe->lpsn = wqe->psn;
}
- if (wqe->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP) {
+ if (wqe->atomic_wr.wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP) {
qp->s_state = OP(COMPARE_SWAP);
ohdr->u.atomic_eth.swap_data = cpu_to_be64(
- wqe->wr.wr.atomic.swap);
+ wqe->atomic_wr.swap);
ohdr->u.atomic_eth.compare_data = cpu_to_be64(
- wqe->wr.wr.atomic.compare_add);
+ wqe->atomic_wr.compare_add);
} else {
qp->s_state = OP(FETCH_ADD);
ohdr->u.atomic_eth.swap_data = cpu_to_be64(
- wqe->wr.wr.atomic.compare_add);
+ wqe->atomic_wr.compare_add);
ohdr->u.atomic_eth.compare_data = 0;
}
ohdr->u.atomic_eth.vaddr[0] = cpu_to_be32(
- wqe->wr.wr.atomic.remote_addr >> 32);
+ wqe->atomic_wr.remote_addr >> 32);
ohdr->u.atomic_eth.vaddr[1] = cpu_to_be32(
- wqe->wr.wr.atomic.remote_addr);
+ wqe->atomic_wr.remote_addr);
ohdr->u.atomic_eth.rkey = cpu_to_be32(
- wqe->wr.wr.atomic.rkey);
+ wqe->atomic_wr.rkey);
hwords += sizeof(struct ib_atomic_eth) / sizeof(u32);
ss = NULL;
len = 0;
@@ -597,9 +599,9 @@ int qib_make_rc_req(struct qib_qp *qp)
*/
len = ((qp->s_psn - wqe->psn) & QIB_PSN_MASK) * pmtu;
ohdr->u.rc.reth.vaddr =
- cpu_to_be64(wqe->wr.wr.rdma.remote_addr + len);
+ cpu_to_be64(wqe->rdma_wr.remote_addr + len);
ohdr->u.rc.reth.rkey =
- cpu_to_be32(wqe->wr.wr.rdma.rkey);
+ cpu_to_be32(wqe->rdma_wr.rkey);
ohdr->u.rc.reth.length = cpu_to_be32(wqe->length - len);
qp->s_state = OP(RDMA_READ_REQUEST);
hwords += sizeof(ohdr->u.rc.reth) / sizeof(u32);
diff --git a/drivers/infiniband/hw/qib/qib_ruc.c b/drivers/infiniband/hw/qib/qib_ruc.c
index 22e356c..b1aa21b 100644
--- a/drivers/infiniband/hw/qib/qib_ruc.c
+++ b/drivers/infiniband/hw/qib/qib_ruc.c
@@ -459,8 +459,8 @@ again:
if (wqe->length == 0)
break;
if (unlikely(!qib_rkey_ok(qp, &qp->r_sge.sge, wqe->length,
- wqe->wr.wr.rdma.remote_addr,
- wqe->wr.wr.rdma.rkey,
+ wqe->rdma_wr.remote_addr,
+ wqe->rdma_wr.rkey,
IB_ACCESS_REMOTE_WRITE)))
goto acc_err;
qp->r_sge.sg_list = NULL;
@@ -472,8 +472,8 @@ again:
if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_READ)))
goto inv_err;
if (unlikely(!qib_rkey_ok(qp, &sqp->s_sge.sge, wqe->length,
- wqe->wr.wr.rdma.remote_addr,
- wqe->wr.wr.rdma.rkey,
+ wqe->rdma_wr.remote_addr,
+ wqe->rdma_wr.rkey,
IB_ACCESS_REMOTE_READ)))
goto acc_err;
release = 0;
@@ -490,18 +490,18 @@ again:
if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_ATOMIC)))
goto inv_err;
if (unlikely(!qib_rkey_ok(qp, &qp->r_sge.sge, sizeof(u64),
- wqe->wr.wr.atomic.remote_addr,
- wqe->wr.wr.atomic.rkey,
+ wqe->atomic_wr.remote_addr,
+ wqe->atomic_wr.rkey,
IB_ACCESS_REMOTE_ATOMIC)))
goto acc_err;
/* Perform atomic OP and save result. */
maddr = (atomic64_t *) qp->r_sge.sge.vaddr;
- sdata = wqe->wr.wr.atomic.compare_add;
+ sdata = wqe->atomic_wr.compare_add;
*(u64 *) sqp->s_sge.sge.vaddr =
- (wqe->wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD) ?
+ (wqe->atomic_wr.wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD) ?
(u64) atomic64_add_return(sdata, maddr) - sdata :
(u64) cmpxchg((u64 *) qp->r_sge.sge.vaddr,
- sdata, wqe->wr.wr.atomic.swap);
+ sdata, wqe->atomic_wr.swap);
qib_put_mr(qp->r_sge.sge.mr);
qp->r_sge.num_sge = 0;
goto send_comp;
@@ -785,7 +785,7 @@ void qib_send_complete(struct qib_qp *qp, struct qib_swqe *wqe,
if (qp->ibqp.qp_type == IB_QPT_UD ||
qp->ibqp.qp_type == IB_QPT_SMI ||
qp->ibqp.qp_type == IB_QPT_GSI)
- atomic_dec(&to_iah(wqe->wr.wr.ud.ah)->refcount);
+ atomic_dec(&to_iah(wqe->ud_wr.ah)->refcount);
/* See ch. 11.2.4.1 and 10.7.3.1 */
if (!(qp->s_flags & QIB_S_SIGNAL_REQ_WR) ||
diff --git a/drivers/infiniband/hw/qib/qib_uc.c b/drivers/infiniband/hw/qib/qib_uc.c
index aa3a803..06a5645 100644
--- a/drivers/infiniband/hw/qib/qib_uc.c
+++ b/drivers/infiniband/hw/qib/qib_uc.c
@@ -129,9 +129,9 @@ int qib_make_uc_req(struct qib_qp *qp)
case IB_WR_RDMA_WRITE:
case IB_WR_RDMA_WRITE_WITH_IMM:
ohdr->u.rc.reth.vaddr =
- cpu_to_be64(wqe->wr.wr.rdma.remote_addr);
+ cpu_to_be64(wqe->rdma_wr.remote_addr);
ohdr->u.rc.reth.rkey =
- cpu_to_be32(wqe->wr.wr.rdma.rkey);
+ cpu_to_be32(wqe->rdma_wr.rkey);
ohdr->u.rc.reth.length = cpu_to_be32(len);
hwords += sizeof(struct ib_reth) / 4;
if (len > pmtu) {
diff --git a/drivers/infiniband/hw/qib/qib_ud.c b/drivers/infiniband/hw/qib/qib_ud.c
index 26243b7..59193f6 100644
--- a/drivers/infiniband/hw/qib/qib_ud.c
+++ b/drivers/infiniband/hw/qib/qib_ud.c
@@ -59,7 +59,7 @@ static void qib_ud_loopback(struct qib_qp *sqp, struct qib_swqe *swqe)
u32 length;
enum ib_qp_type sqptype, dqptype;
- qp = qib_lookup_qpn(ibp, swqe->wr.wr.ud.remote_qpn);
+ qp = qib_lookup_qpn(ibp, swqe->ud_wr.remote_qpn);
if (!qp) {
ibp->n_pkt_drops++;
return;
@@ -76,7 +76,7 @@ static void qib_ud_loopback(struct qib_qp *sqp, struct qib_swqe *swqe)
goto drop;
}
- ah_attr = &to_iah(swqe->wr.wr.ud.ah)->attr;
+ ah_attr = &to_iah(swqe->ud_wr.ah)->attr;
ppd = ppd_from_ibp(ibp);
if (qp->ibqp.qp_num > 1) {
@@ -106,8 +106,8 @@ static void qib_ud_loopback(struct qib_qp *sqp, struct qib_swqe *swqe)
if (qp->ibqp.qp_num) {
u32 qkey;
- qkey = (int)swqe->wr.wr.ud.remote_qkey < 0 ?
- sqp->qkey : swqe->wr.wr.ud.remote_qkey;
+ qkey = (int)swqe->ud_wr.remote_qkey < 0 ?
+ sqp->qkey : swqe->ud_wr.remote_qkey;
if (unlikely(qkey != qp->qkey)) {
u16 lid;
@@ -210,7 +210,7 @@ static void qib_ud_loopback(struct qib_qp *sqp, struct qib_swqe *swqe)
wc.qp = &qp->ibqp;
wc.src_qp = sqp->ibqp.qp_num;
wc.pkey_index = qp->ibqp.qp_type == IB_QPT_GSI ?
- swqe->wr.wr.ud.pkey_index : 0;
+ swqe->ud_wr.pkey_index : 0;
wc.slid = ppd->lid | (ah_attr->src_path_bits & ((1 << ppd->lmc) - 1));
wc.sl = ah_attr->sl;
wc.dlid_path_bits = ah_attr->dlid & ((1 << ppd->lmc) - 1);
@@ -277,7 +277,7 @@ int qib_make_ud_req(struct qib_qp *qp)
/* Construct the header. */
ibp = to_iport(qp->ibqp.device, qp->port_num);
ppd = ppd_from_ibp(ibp);
- ah_attr = &to_iah(wqe->wr.wr.ud.ah)->attr;
+ ah_attr = &to_iah(wqe->ud_wr.ah)->attr;
if (ah_attr->dlid >= QIB_MULTICAST_LID_BASE) {
if (ah_attr->dlid != QIB_PERMISSIVE_LID)
this_cpu_inc(ibp->pmastats->n_multicast_xmit);
@@ -363,7 +363,7 @@ int qib_make_ud_req(struct qib_qp *qp)
bth0 |= extra_bytes << 20;
bth0 |= qp->ibqp.qp_type == IB_QPT_SMI ? QIB_DEFAULT_P_KEY :
qib_get_pkey(ibp, qp->ibqp.qp_type == IB_QPT_GSI ?
- wqe->wr.wr.ud.pkey_index : qp->s_pkey_index);
+ wqe->ud_wr.pkey_index : qp->s_pkey_index);
ohdr->bth[0] = cpu_to_be32(bth0);
/*
* Use the multicast QP if the destination LID is a multicast LID.
@@ -371,14 +371,14 @@ int qib_make_ud_req(struct qib_qp *qp)
ohdr->bth[1] = ah_attr->dlid >= QIB_MULTICAST_LID_BASE &&
ah_attr->dlid != QIB_PERMISSIVE_LID ?
cpu_to_be32(QIB_MULTICAST_QPN) :
- cpu_to_be32(wqe->wr.wr.ud.remote_qpn);
+ cpu_to_be32(wqe->ud_wr.remote_qpn);
ohdr->bth[2] = cpu_to_be32(qp->s_next_psn++ & QIB_PSN_MASK);
/*
* Qkeys with the high order bit set mean use the
* qkey from the QP context instead of the WR (see 10.2.5).
*/
- ohdr->u.ud.deth[0] = cpu_to_be32((int)wqe->wr.wr.ud.remote_qkey < 0 ?
- qp->qkey : wqe->wr.wr.ud.remote_qkey);
+ ohdr->u.ud.deth[0] = cpu_to_be32((int)wqe->ud_wr.remote_qkey < 0 ?
+ qp->qkey : wqe->ud_wr.remote_qkey);
ohdr->u.ud.deth[1] = cpu_to_be32(qp->ibqp.qp_num);
done:
diff --git a/drivers/infiniband/hw/qib/qib_verbs.c b/drivers/infiniband/hw/qib/qib_verbs.c
index 3dcc498..baf1e42 100644
--- a/drivers/infiniband/hw/qib/qib_verbs.c
+++ b/drivers/infiniband/hw/qib/qib_verbs.c
@@ -346,6 +346,7 @@ static int qib_post_one_send(struct qib_qp *qp, struct ib_send_wr *wr,
unsigned long flags;
struct qib_lkey_table *rkt;
struct qib_pd *pd;
+ int avoid_schedule = 0;
spin_lock_irqsave(&qp->s_lock, flags);
@@ -362,8 +363,8 @@ static int qib_post_one_send(struct qib_qp *qp, struct ib_send_wr *wr,
* undefined operations.
* Make sure buffer is large enough to hold the result for atomics.
*/
- if (wr->opcode == IB_WR_FAST_REG_MR) {
- if (qib_fast_reg_mr(qp, wr))
+ if (wr->opcode == IB_WR_REG_MR) {
+ if (qib_reg_mr(qp, reg_wr(wr)))
goto bail_inval;
} else if (qp->ibqp.qp_type == IB_QPT_UC) {
if ((unsigned) wr->opcode >= IB_WR_RDMA_READ)
@@ -374,7 +375,7 @@ static int qib_post_one_send(struct qib_qp *qp, struct ib_send_wr *wr,
wr->opcode != IB_WR_SEND_WITH_IMM)
goto bail_inval;
/* Check UD destination address PD */
- if (qp->ibqp.pd != wr->wr.ud.ah->pd)
+ if (qp->ibqp.pd != ud_wr(wr)->ah->pd)
goto bail_inval;
} else if ((unsigned) wr->opcode > IB_WR_ATOMIC_FETCH_AND_ADD)
goto bail_inval;
@@ -397,7 +398,23 @@ static int qib_post_one_send(struct qib_qp *qp, struct ib_send_wr *wr,
rkt = &to_idev(qp->ibqp.device)->lk_table;
pd = to_ipd(qp->ibqp.pd);
wqe = get_swqe_ptr(qp, qp->s_head);
- wqe->wr = *wr;
+
+ if (qp->ibqp.qp_type != IB_QPT_UC &&
+ qp->ibqp.qp_type != IB_QPT_RC)
+ memcpy(&wqe->ud_wr, ud_wr(wr), sizeof(wqe->ud_wr));
+ else if (wr->opcode == IB_WR_REG_MR)
+ memcpy(&wqe->reg_wr, reg_wr(wr),
+ sizeof(wqe->reg_wr));
+ else if (wr->opcode == IB_WR_RDMA_WRITE_WITH_IMM ||
+ wr->opcode == IB_WR_RDMA_WRITE ||
+ wr->opcode == IB_WR_RDMA_READ)
+ memcpy(&wqe->rdma_wr, rdma_wr(wr), sizeof(wqe->rdma_wr));
+ else if (wr->opcode == IB_WR_ATOMIC_CMP_AND_SWP ||
+ wr->opcode == IB_WR_ATOMIC_FETCH_AND_ADD)
+ memcpy(&wqe->atomic_wr, atomic_wr(wr), sizeof(wqe->atomic_wr));
+ else
+ memcpy(&wqe->wr, wr, sizeof(wqe->wr));
+
wqe->length = 0;
j = 0;
if (wr->num_sge) {
@@ -422,11 +439,15 @@ static int qib_post_one_send(struct qib_qp *qp, struct ib_send_wr *wr,
qp->ibqp.qp_type == IB_QPT_RC) {
if (wqe->length > 0x80000000U)
goto bail_inval_free;
+ if (wqe->length <= qp->pmtu)
+ avoid_schedule = 1;
} else if (wqe->length > (dd_from_ibdev(qp->ibqp.device)->pport +
- qp->port_num - 1)->ibmtu)
+ qp->port_num - 1)->ibmtu) {
goto bail_inval_free;
- else
- atomic_inc(&to_iah(wr->wr.ud.ah)->refcount);
+ } else {
+ atomic_inc(&to_iah(ud_wr(wr)->ah)->refcount);
+ avoid_schedule = 1;
+ }
wqe->ssn = qp->s_ssn++;
qp->s_head = next;
@@ -442,7 +463,7 @@ bail_inval_free:
bail_inval:
ret = -EINVAL;
bail:
- if (!ret && !wr->next &&
+ if (!ret && !wr->next && !avoid_schedule &&
!qib_sdma_empty(
dd_from_ibdev(qp->ibqp.device)->pport + qp->port_num - 1)) {
qib_schedule_send(qp);
@@ -2240,12 +2261,10 @@ int qib_register_ib_device(struct qib_devdata *dd)
ibdev->poll_cq = qib_poll_cq;
ibdev->req_notify_cq = qib_req_notify_cq;
ibdev->get_dma_mr = qib_get_dma_mr;
- ibdev->reg_phys_mr = qib_reg_phys_mr;
ibdev->reg_user_mr = qib_reg_user_mr;
ibdev->dereg_mr = qib_dereg_mr;
ibdev->alloc_mr = qib_alloc_mr;
- ibdev->alloc_fast_reg_page_list = qib_alloc_fast_reg_page_list;
- ibdev->free_fast_reg_page_list = qib_free_fast_reg_page_list;
+ ibdev->map_mr_sg = qib_map_mr_sg;
ibdev->alloc_fmr = qib_alloc_fmr;
ibdev->map_phys_fmr = qib_map_phys_fmr;
ibdev->unmap_fmr = qib_unmap_fmr;
diff --git a/drivers/infiniband/hw/qib/qib_verbs.h b/drivers/infiniband/hw/qib/qib_verbs.h
index a08df70..6c5e777 100644
--- a/drivers/infiniband/hw/qib/qib_verbs.h
+++ b/drivers/infiniband/hw/qib/qib_verbs.h
@@ -329,6 +329,8 @@ struct qib_sge {
struct qib_mr {
struct ib_mr ibmr;
struct ib_umem *umem;
+ u64 *pages;
+ u32 npages;
struct qib_mregion mr; /* must be last */
};
@@ -338,7 +340,13 @@ struct qib_mr {
* in qp->s_max_sge.
*/
struct qib_swqe {
- struct ib_send_wr wr; /* don't use wr.sg_list */
+ union {
+ struct ib_send_wr wr; /* don't use wr.sg_list */
+ struct ib_ud_wr ud_wr;
+ struct ib_reg_wr reg_wr;
+ struct ib_rdma_wr rdma_wr;
+ struct ib_atomic_wr atomic_wr;
+ };
u32 psn; /* first packet sequence number */
u32 lpsn; /* last packet sequence number */
u32 ssn; /* send sequence number */
@@ -1024,10 +1032,6 @@ int qib_resize_cq(struct ib_cq *ibcq, int cqe, struct ib_udata *udata);
struct ib_mr *qib_get_dma_mr(struct ib_pd *pd, int acc);
-struct ib_mr *qib_reg_phys_mr(struct ib_pd *pd,
- struct ib_phys_buf *buffer_list,
- int num_phys_buf, int acc, u64 *iova_start);
-
struct ib_mr *qib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
u64 virt_addr, int mr_access_flags,
struct ib_udata *udata);
@@ -1038,12 +1042,11 @@ struct ib_mr *qib_alloc_mr(struct ib_pd *pd,
enum ib_mr_type mr_type,
u32 max_entries);
-struct ib_fast_reg_page_list *qib_alloc_fast_reg_page_list(
- struct ib_device *ibdev, int page_list_len);
-
-void qib_free_fast_reg_page_list(struct ib_fast_reg_page_list *pl);
+int qib_map_mr_sg(struct ib_mr *ibmr,
+ struct scatterlist *sg,
+ int sg_nents);
-int qib_fast_reg_mr(struct qib_qp *qp, struct ib_send_wr *wr);
+int qib_reg_mr(struct qib_qp *qp, struct ib_reg_wr *wr);
struct ib_fmr *qib_alloc_fmr(struct ib_pd *pd, int mr_access_flags,
struct ib_fmr_attr *fmr_attr);
diff --git a/drivers/infiniband/hw/qib/qib_verbs_mcast.c b/drivers/infiniband/hw/qib/qib_verbs_mcast.c
index f8ea069..b2fb528 100644
--- a/drivers/infiniband/hw/qib/qib_verbs_mcast.c
+++ b/drivers/infiniband/hw/qib/qib_verbs_mcast.c
@@ -286,15 +286,13 @@ int qib_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
struct qib_ibdev *dev = to_idev(ibqp->device);
struct qib_ibport *ibp = to_iport(ibqp->device, qp->port_num);
struct qib_mcast *mcast = NULL;
- struct qib_mcast_qp *p, *tmp;
+ struct qib_mcast_qp *p, *tmp, *delp = NULL;
struct rb_node *n;
int last = 0;
int ret;
- if (ibqp->qp_num <= 1 || qp->state == IB_QPS_RESET) {
- ret = -EINVAL;
- goto bail;
- }
+ if (ibqp->qp_num <= 1 || qp->state == IB_QPS_RESET)
+ return -EINVAL;
spin_lock_irq(&ibp->lock);
@@ -303,8 +301,7 @@ int qib_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
while (1) {
if (n == NULL) {
spin_unlock_irq(&ibp->lock);
- ret = -EINVAL;
- goto bail;
+ return -EINVAL;
}
mcast = rb_entry(n, struct qib_mcast, rb_node);
@@ -328,6 +325,7 @@ int qib_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
*/
list_del_rcu(&p->list);
mcast->n_attached--;
+ delp = p;
/* If this was the last attached QP, remove the GID too. */
if (list_empty(&mcast->qp_list)) {
@@ -338,15 +336,16 @@ int qib_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
}
spin_unlock_irq(&ibp->lock);
+ /* QP not attached */
+ if (!delp)
+ return -EINVAL;
+ /*
+ * Wait for any list walkers to finish before freeing the
+ * list element.
+ */
+ wait_event(mcast->wait, atomic_read(&mcast->refcount) <= 1);
+ qib_mcast_qp_free(delp);
- if (p) {
- /*
- * Wait for any list walkers to finish before freeing the
- * list element.
- */
- wait_event(mcast->wait, atomic_read(&mcast->refcount) <= 1);
- qib_mcast_qp_free(p);
- }
if (last) {
atomic_dec(&mcast->refcount);
wait_event(mcast->wait, !atomic_read(&mcast->refcount));
@@ -355,11 +354,7 @@ int qib_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
dev->n_mcast_grps_allocated--;
spin_unlock_irq(&dev->n_mcast_grps_lock);
}
-
- ret = 0;
-
-bail:
- return ret;
+ return 0;
}
int qib_mcast_tree_empty(struct qib_ibport *ibp)
diff --git a/drivers/infiniband/hw/usnic/usnic_debugfs.c b/drivers/infiniband/hw/usnic/usnic_debugfs.c
index 5e55b8b..92dc66c 100644
--- a/drivers/infiniband/hw/usnic/usnic_debugfs.c
+++ b/drivers/infiniband/hw/usnic/usnic_debugfs.c
@@ -157,8 +157,9 @@ void usnic_debugfs_flow_add(struct usnic_ib_qp_grp_flow *qp_flow)
qp_flow,
&flowinfo_ops);
if (IS_ERR_OR_NULL(qp_flow->dbgfs_dentry)) {
- usnic_err("Failed to create dbg fs entry for flow %u\n",
- qp_flow->flow->flow_id);
+ usnic_err("Failed to create dbg fs entry for flow %u with error %ld\n",
+ qp_flow->flow->flow_id,
+ PTR_ERR(qp_flow->dbgfs_dentry));
}
}
diff --git a/drivers/infiniband/hw/usnic/usnic_ib_main.c b/drivers/infiniband/hw/usnic/usnic_ib_main.c
index 0c15bd8..565c881 100644
--- a/drivers/infiniband/hw/usnic/usnic_ib_main.c
+++ b/drivers/infiniband/hw/usnic/usnic_ib_main.c
@@ -343,16 +343,15 @@ static void *usnic_ib_device_add(struct pci_dev *dev)
netdev = pci_get_drvdata(dev);
us_ibdev = (struct usnic_ib_dev *)ib_alloc_device(sizeof(*us_ibdev));
- if (IS_ERR_OR_NULL(us_ibdev)) {
+ if (!us_ibdev) {
usnic_err("Device %s context alloc failed\n",
netdev_name(pci_get_drvdata(dev)));
- return ERR_PTR(us_ibdev ? PTR_ERR(us_ibdev) : -EFAULT);
+ return ERR_PTR(-EFAULT);
}
us_ibdev->ufdev = usnic_fwd_dev_alloc(dev);
- if (IS_ERR_OR_NULL(us_ibdev->ufdev)) {
- usnic_err("Failed to alloc ufdev for %s with err %ld\n",
- pci_name(dev), PTR_ERR(us_ibdev->ufdev));
+ if (!us_ibdev->ufdev) {
+ usnic_err("Failed to alloc ufdev for %s\n", pci_name(dev));
goto err_dealloc;
}
diff --git a/drivers/infiniband/hw/usnic/usnic_ib_qp_grp.c b/drivers/infiniband/hw/usnic/usnic_ib_qp_grp.c
index 704d087..5b0248a 100644
--- a/drivers/infiniband/hw/usnic/usnic_ib_qp_grp.c
+++ b/drivers/infiniband/hw/usnic/usnic_ib_qp_grp.c
@@ -236,8 +236,8 @@ create_roce_custom_flow(struct usnic_ib_qp_grp *qp_grp,
/* Create Flow Handle */
qp_flow = kzalloc(sizeof(*qp_flow), GFP_ATOMIC);
- if (IS_ERR_OR_NULL(qp_flow)) {
- err = qp_flow ? PTR_ERR(qp_flow) : -ENOMEM;
+ if (!qp_flow) {
+ err = -ENOMEM;
goto out_dealloc_flow;
}
qp_flow->flow = flow;
@@ -311,8 +311,8 @@ create_udp_flow(struct usnic_ib_qp_grp *qp_grp,
/* Create qp_flow */
qp_flow = kzalloc(sizeof(*qp_flow), GFP_ATOMIC);
- if (IS_ERR_OR_NULL(qp_flow)) {
- err = qp_flow ? PTR_ERR(qp_flow) : -ENOMEM;
+ if (!qp_flow) {
+ err = -ENOMEM;
goto out_dealloc_flow;
}
qp_flow->flow = flow;
@@ -521,7 +521,7 @@ int usnic_ib_qp_grp_modify(struct usnic_ib_qp_grp *qp_grp,
if (!status) {
qp_grp->state = new_state;
- usnic_info("Transistioned %u from %s to %s",
+ usnic_info("Transitioned %u from %s to %s",
qp_grp->grp_id,
usnic_ib_qp_grp_state_to_string(old_state),
usnic_ib_qp_grp_state_to_string(new_state));
@@ -575,7 +575,7 @@ alloc_res_chunk_list(struct usnic_vnic *vnic,
return res_chunk_list;
out_free_res:
- for (i--; i > 0; i--)
+ for (i--; i >= 0; i--)
usnic_vnic_put_resources(res_chunk_list[i]);
kfree(res_chunk_list);
return ERR_PTR(err);
diff --git a/drivers/infiniband/hw/usnic/usnic_ib_verbs.c b/drivers/infiniband/hw/usnic/usnic_ib_verbs.c
index f8e3211..6cdb4d2 100644
--- a/drivers/infiniband/hw/usnic/usnic_ib_verbs.c
+++ b/drivers/infiniband/hw/usnic/usnic_ib_verbs.c
@@ -51,7 +51,7 @@
static void usnic_ib_fw_string_to_u64(char *fw_ver_str, u64 *fw_ver)
{
- *fw_ver = (u64) *fw_ver_str;
+ *fw_ver = *((u64 *)fw_ver_str);
}
static int usnic_ib_fill_create_qp_resp(struct usnic_ib_qp_grp *qp_grp,
@@ -571,20 +571,20 @@ int usnic_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
qp_grp = to_uqp_grp(ibqp);
- /* TODO: Future Support All States */
mutex_lock(&qp_grp->vf->pf->usdev_lock);
- if ((attr_mask & IB_QP_STATE) && attr->qp_state == IB_QPS_INIT) {
- status = usnic_ib_qp_grp_modify(qp_grp, IB_QPS_INIT, NULL);
- } else if ((attr_mask & IB_QP_STATE) && attr->qp_state == IB_QPS_RTR) {
- status = usnic_ib_qp_grp_modify(qp_grp, IB_QPS_RTR, NULL);
- } else if ((attr_mask & IB_QP_STATE) && attr->qp_state == IB_QPS_RTS) {
- status = usnic_ib_qp_grp_modify(qp_grp, IB_QPS_RTS, NULL);
+ if ((attr_mask & IB_QP_PORT) && attr->port_num != 1) {
+ /* usnic devices only have one port */
+ status = -EINVAL;
+ goto out_unlock;
+ }
+ if (attr_mask & IB_QP_STATE) {
+ status = usnic_ib_qp_grp_modify(qp_grp, attr->qp_state, NULL);
} else {
- usnic_err("Unexpected combination mask: %u state: %u\n",
- attr_mask & IB_QP_STATE, attr->qp_state);
+ usnic_err("Unhandled request, attr_mask=0x%x\n", attr_mask);
status = -EINVAL;
}
+out_unlock:
mutex_unlock(&qp_grp->vf->pf->usdev_lock);
return status;
}
@@ -625,8 +625,8 @@ struct ib_mr *usnic_ib_reg_mr(struct ib_pd *pd, u64 start, u64 length,
virt_addr, length);
mr = kzalloc(sizeof(*mr), GFP_KERNEL);
- if (IS_ERR_OR_NULL(mr))
- return ERR_PTR(mr ? PTR_ERR(mr) : -ENOMEM);
+ if (!mr)
+ return ERR_PTR(-ENOMEM);
mr->umem = usnic_uiom_reg_get(to_upd(pd)->umem_pd, start, length,
access_flags, 0);
diff --git a/drivers/infiniband/hw/usnic/usnic_ib_verbs.h b/drivers/infiniband/hw/usnic/usnic_ib_verbs.h
index 414eaa5..0d9d2e6a 100644
--- a/drivers/infiniband/hw/usnic/usnic_ib_verbs.h
+++ b/drivers/infiniband/hw/usnic/usnic_ib_verbs.h
@@ -43,8 +43,6 @@ int usnic_ib_query_device(struct ib_device *ibdev,
struct ib_udata *uhw);
int usnic_ib_query_port(struct ib_device *ibdev, u8 port,
struct ib_port_attr *props);
-enum rdma_protocol_type
-usnic_ib_query_protocol(struct ib_device *device, u8 port_num);
int usnic_ib_query_qp(struct ib_qp *qp, struct ib_qp_attr *qp_attr,
int qp_attr_mask,
struct ib_qp_init_attr *qp_init_attr);
diff --git a/drivers/infiniband/hw/usnic/usnic_vnic.c b/drivers/infiniband/hw/usnic/usnic_vnic.c
index 66de93f..8875107 100644
--- a/drivers/infiniband/hw/usnic/usnic_vnic.c
+++ b/drivers/infiniband/hw/usnic/usnic_vnic.c
@@ -237,7 +237,7 @@ usnic_vnic_get_resources(struct usnic_vnic *vnic, enum usnic_vnic_res_type type,
struct usnic_vnic_res *res;
int i;
- if (usnic_vnic_res_free_cnt(vnic, type) < cnt || cnt < 1 || !owner)
+ if (usnic_vnic_res_free_cnt(vnic, type) < cnt || cnt < 0 || !owner)
return ERR_PTR(-EINVAL);
ret = kzalloc(sizeof(*ret), GFP_ATOMIC);
@@ -247,26 +247,28 @@ usnic_vnic_get_resources(struct usnic_vnic *vnic, enum usnic_vnic_res_type type,
return ERR_PTR(-ENOMEM);
}
- ret->res = kzalloc(sizeof(*(ret->res))*cnt, GFP_ATOMIC);
- if (!ret->res) {
- usnic_err("Failed to allocate resources for %s. Out of memory\n",
- usnic_vnic_pci_name(vnic));
- kfree(ret);
- return ERR_PTR(-ENOMEM);
- }
+ if (cnt > 0) {
+ ret->res = kcalloc(cnt, sizeof(*(ret->res)), GFP_ATOMIC);
+ if (!ret->res) {
+ usnic_err("Failed to allocate resources for %s. Out of memory\n",
+ usnic_vnic_pci_name(vnic));
+ kfree(ret);
+ return ERR_PTR(-ENOMEM);
+ }
- spin_lock(&vnic->res_lock);
- src = &vnic->chunks[type];
- for (i = 0; i < src->cnt && ret->cnt < cnt; i++) {
- res = src->res[i];
- if (!res->owner) {
- src->free_cnt--;
- res->owner = owner;
- ret->res[ret->cnt++] = res;
+ spin_lock(&vnic->res_lock);
+ src = &vnic->chunks[type];
+ for (i = 0; i < src->cnt && ret->cnt < cnt; i++) {
+ res = src->res[i];
+ if (!res->owner) {
+ src->free_cnt--;
+ res->owner = owner;
+ ret->res[ret->cnt++] = res;
+ }
}
- }
- spin_unlock(&vnic->res_lock);
+ spin_unlock(&vnic->res_lock);
+ }
ret->type = type;
ret->vnic = vnic;
WARN_ON(ret->cnt != cnt);
@@ -281,14 +283,16 @@ void usnic_vnic_put_resources(struct usnic_vnic_res_chunk *chunk)
int i;
struct usnic_vnic *vnic = chunk->vnic;
- spin_lock(&vnic->res_lock);
- while ((i = --chunk->cnt) >= 0) {
- res = chunk->res[i];
- chunk->res[i] = NULL;
- res->owner = NULL;
- vnic->chunks[res->type].free_cnt++;
+ if (chunk->cnt > 0) {
+ spin_lock(&vnic->res_lock);
+ while ((i = --chunk->cnt) >= 0) {
+ res = chunk->res[i];
+ chunk->res[i] = NULL;
+ res->owner = NULL;
+ vnic->chunks[res->type].free_cnt++;
+ }
+ spin_unlock(&vnic->res_lock);
}
- spin_unlock(&vnic->res_lock);
kfree(chunk->res);
kfree(chunk);