diff options
Diffstat (limited to 'drivers/infiniband/hw')
72 files changed, 4300 insertions, 2790 deletions
diff --git a/drivers/infiniband/hw/cxgb3/iwch_cm.c b/drivers/infiniband/hw/cxgb3/iwch_cm.c index cb78b1e..f504ba7 100644 --- a/drivers/infiniband/hw/cxgb3/iwch_cm.c +++ b/drivers/infiniband/hw/cxgb3/iwch_cm.c @@ -149,7 +149,7 @@ static int iwch_l2t_send(struct t3cdev *tdev, struct sk_buff *skb, struct l2t_en error = l2t_send(tdev, skb, l2e); if (error < 0) kfree_skb(skb); - return error; + return error < 0 ? error : 0; } int iwch_cxgb3_ofld_send(struct t3cdev *tdev, struct sk_buff *skb) @@ -165,7 +165,7 @@ int iwch_cxgb3_ofld_send(struct t3cdev *tdev, struct sk_buff *skb) error = cxgb3_ofld_send(tdev, skb); if (error < 0) kfree_skb(skb); - return error; + return error < 0 ? error : 0; } static void release_tid(struct t3cdev *tdev, u32 hwtid, struct sk_buff *skb) diff --git a/drivers/infiniband/hw/cxgb3/iwch_cq.c b/drivers/infiniband/hw/cxgb3/iwch_cq.c index cf5474a..97fbfd2 100644 --- a/drivers/infiniband/hw/cxgb3/iwch_cq.c +++ b/drivers/infiniband/hw/cxgb3/iwch_cq.c @@ -115,15 +115,11 @@ static int iwch_poll_cq_one(struct iwch_dev *rhp, struct iwch_cq *chp, case T3_SEND_WITH_SE_INV: wc->opcode = IB_WC_SEND; break; - case T3_BIND_MW: - wc->opcode = IB_WC_BIND_MW; - break; - case T3_LOCAL_INV: wc->opcode = IB_WC_LOCAL_INV; break; case T3_FAST_REGISTER: - wc->opcode = IB_WC_FAST_REG_MR; + wc->opcode = IB_WC_REG_MR; break; default: printk(KERN_ERR MOD "Unexpected opcode %d " diff --git a/drivers/infiniband/hw/cxgb3/iwch_mem.c b/drivers/infiniband/hw/cxgb3/iwch_mem.c index 5c36ee2..1d04c87 100644 --- a/drivers/infiniband/hw/cxgb3/iwch_mem.c +++ b/drivers/infiniband/hw/cxgb3/iwch_mem.c @@ -75,37 +75,6 @@ int iwch_register_mem(struct iwch_dev *rhp, struct iwch_pd *php, return ret; } -int iwch_reregister_mem(struct iwch_dev *rhp, struct iwch_pd *php, - struct iwch_mr *mhp, - int shift, - int npages) -{ - u32 stag; - int ret; - - /* We could support this... */ - if (npages > mhp->attr.pbl_size) - return -ENOMEM; - - stag = mhp->attr.stag; - if (cxio_reregister_phys_mem(&rhp->rdev, - &stag, mhp->attr.pdid, - mhp->attr.perms, - mhp->attr.zbva, - mhp->attr.va_fbo, - mhp->attr.len, - shift - 12, - mhp->attr.pbl_size, mhp->attr.pbl_addr)) - return -ENOMEM; - - ret = iwch_finish_mem_reg(mhp, stag); - if (ret) - cxio_dereg_mem(&rhp->rdev, mhp->attr.stag, mhp->attr.pbl_size, - mhp->attr.pbl_addr); - - return ret; -} - int iwch_alloc_pbl(struct iwch_mr *mhp, int npages) { mhp->attr.pbl_addr = cxio_hal_pblpool_alloc(&mhp->rhp->rdev, @@ -130,74 +99,3 @@ int iwch_write_pbl(struct iwch_mr *mhp, __be64 *pages, int npages, int offset) return cxio_write_pbl(&mhp->rhp->rdev, pages, mhp->attr.pbl_addr + (offset << 3), npages); } - -int build_phys_page_list(struct ib_phys_buf *buffer_list, - int num_phys_buf, - u64 *iova_start, - u64 *total_size, - int *npages, - int *shift, - __be64 **page_list) -{ - u64 mask; - int i, j, n; - - mask = 0; - *total_size = 0; - for (i = 0; i < num_phys_buf; ++i) { - if (i != 0 && buffer_list[i].addr & ~PAGE_MASK) - return -EINVAL; - if (i != 0 && i != num_phys_buf - 1 && - (buffer_list[i].size & ~PAGE_MASK)) - return -EINVAL; - *total_size += buffer_list[i].size; - if (i > 0) - mask |= buffer_list[i].addr; - else - mask |= buffer_list[i].addr & PAGE_MASK; - if (i != num_phys_buf - 1) - mask |= buffer_list[i].addr + buffer_list[i].size; - else - mask |= (buffer_list[i].addr + buffer_list[i].size + - PAGE_SIZE - 1) & PAGE_MASK; - } - - if (*total_size > 0xFFFFFFFFULL) - return -ENOMEM; - - /* Find largest page shift we can use to cover buffers */ - for (*shift = PAGE_SHIFT; *shift < 27; ++(*shift)) - if ((1ULL << *shift) & mask) - break; - - buffer_list[0].size += buffer_list[0].addr & ((1ULL << *shift) - 1); - buffer_list[0].addr &= ~0ull << *shift; - - *npages = 0; - for (i = 0; i < num_phys_buf; ++i) - *npages += (buffer_list[i].size + - (1ULL << *shift) - 1) >> *shift; - - if (!*npages) - return -EINVAL; - - *page_list = kmalloc(sizeof(u64) * *npages, GFP_KERNEL); - if (!*page_list) - return -ENOMEM; - - n = 0; - for (i = 0; i < num_phys_buf; ++i) - for (j = 0; - j < (buffer_list[i].size + (1ULL << *shift) - 1) >> *shift; - ++j) - (*page_list)[n++] = cpu_to_be64(buffer_list[i].addr + - ((u64) j << *shift)); - - PDBG("%s va 0x%llx mask 0x%llx shift %d len %lld pbl_size %d\n", - __func__, (unsigned long long) *iova_start, - (unsigned long long) mask, *shift, (unsigned long long) *total_size, - *npages); - - return 0; - -} diff --git a/drivers/infiniband/hw/cxgb3/iwch_provider.c b/drivers/infiniband/hw/cxgb3/iwch_provider.c index 93308c4..2734820 100644 --- a/drivers/infiniband/hw/cxgb3/iwch_provider.c +++ b/drivers/infiniband/hw/cxgb3/iwch_provider.c @@ -458,11 +458,9 @@ static int iwch_dereg_mr(struct ib_mr *ib_mr) u32 mmid; PDBG("%s ib_mr %p\n", __func__, ib_mr); - /* There can be no memory windows */ - if (atomic_read(&ib_mr->usecnt)) - return -EINVAL; mhp = to_iwch_mr(ib_mr); + kfree(mhp->pages); rhp = mhp->rhp; mmid = mhp->attr.stag >> 8; cxio_dereg_mem(&rhp->rdev, mhp->attr.stag, mhp->attr.pbl_size, @@ -478,24 +476,25 @@ static int iwch_dereg_mr(struct ib_mr *ib_mr) return 0; } -static struct ib_mr *iwch_register_phys_mem(struct ib_pd *pd, - struct ib_phys_buf *buffer_list, - int num_phys_buf, - int acc, - u64 *iova_start) +static struct ib_mr *iwch_get_dma_mr(struct ib_pd *pd, int acc) { - __be64 *page_list; - int shift; - u64 total_size; - int npages; - struct iwch_dev *rhp; - struct iwch_pd *php; + const u64 total_size = 0xffffffff; + const u64 mask = (total_size + PAGE_SIZE - 1) & PAGE_MASK; + struct iwch_pd *php = to_iwch_pd(pd); + struct iwch_dev *rhp = php->rhp; struct iwch_mr *mhp; - int ret; + __be64 *page_list; + int shift = 26, npages, ret, i; PDBG("%s ib_pd %p\n", __func__, pd); - php = to_iwch_pd(pd); - rhp = php->rhp; + + /* + * T3 only supports 32 bits of size. + */ + if (sizeof(phys_addr_t) > 4) { + pr_warn_once(MOD "Cannot support dma_mrs on this platform.\n"); + return ERR_PTR(-ENOTSUPP); + } mhp = kzalloc(sizeof(*mhp), GFP_KERNEL); if (!mhp) @@ -503,22 +502,23 @@ static struct ib_mr *iwch_register_phys_mem(struct ib_pd *pd, mhp->rhp = rhp; - /* First check that we have enough alignment */ - if ((*iova_start & ~PAGE_MASK) != (buffer_list[0].addr & ~PAGE_MASK)) { + npages = (total_size + (1ULL << shift) - 1) >> shift; + if (!npages) { ret = -EINVAL; goto err; } - if (num_phys_buf > 1 && - ((buffer_list[0].addr + buffer_list[0].size) & ~PAGE_MASK)) { - ret = -EINVAL; + page_list = kmalloc_array(npages, sizeof(u64), GFP_KERNEL); + if (!page_list) { + ret = -ENOMEM; goto err; } - ret = build_phys_page_list(buffer_list, num_phys_buf, iova_start, - &total_size, &npages, &shift, &page_list); - if (ret) - goto err; + for (i = 0; i < npages; i++) + page_list[i] = cpu_to_be64((u64)i << shift); + + PDBG("%s mask 0x%llx shift %d len %lld pbl_size %d\n", + __func__, mask, shift, total_size, npages); ret = iwch_alloc_pbl(mhp, npages); if (ret) { @@ -535,7 +535,7 @@ static struct ib_mr *iwch_register_phys_mem(struct ib_pd *pd, mhp->attr.zbva = 0; mhp->attr.perms = iwch_ib_to_tpt_access(acc); - mhp->attr.va_fbo = *iova_start; + mhp->attr.va_fbo = 0; mhp->attr.page_size = shift - 12; mhp->attr.len = (u32) total_size; @@ -552,76 +552,8 @@ err_pbl: err: kfree(mhp); return ERR_PTR(ret); - } -static int iwch_reregister_phys_mem(struct ib_mr *mr, - int mr_rereg_mask, - struct ib_pd *pd, - struct ib_phys_buf *buffer_list, - int num_phys_buf, - int acc, u64 * iova_start) -{ - - struct iwch_mr mh, *mhp; - struct iwch_pd *php; - struct iwch_dev *rhp; - __be64 *page_list = NULL; - int shift = 0; - u64 total_size; - int npages = 0; - int ret; - - PDBG("%s ib_mr %p ib_pd %p\n", __func__, mr, pd); - - /* There can be no memory windows */ - if (atomic_read(&mr->usecnt)) - return -EINVAL; - - mhp = to_iwch_mr(mr); - rhp = mhp->rhp; - php = to_iwch_pd(mr->pd); - - /* make sure we are on the same adapter */ - if (rhp != php->rhp) - return -EINVAL; - - memcpy(&mh, mhp, sizeof *mhp); - - if (mr_rereg_mask & IB_MR_REREG_PD) - php = to_iwch_pd(pd); - if (mr_rereg_mask & IB_MR_REREG_ACCESS) - mh.attr.perms = iwch_ib_to_tpt_access(acc); - if (mr_rereg_mask & IB_MR_REREG_TRANS) { - ret = build_phys_page_list(buffer_list, num_phys_buf, - iova_start, - &total_size, &npages, - &shift, &page_list); - if (ret) - return ret; - } - - ret = iwch_reregister_mem(rhp, php, &mh, shift, npages); - kfree(page_list); - if (ret) { - return ret; - } - if (mr_rereg_mask & IB_MR_REREG_PD) - mhp->attr.pdid = php->pdid; - if (mr_rereg_mask & IB_MR_REREG_ACCESS) - mhp->attr.perms = iwch_ib_to_tpt_access(acc); - if (mr_rereg_mask & IB_MR_REREG_TRANS) { - mhp->attr.zbva = 0; - mhp->attr.va_fbo = *iova_start; - mhp->attr.page_size = shift - 12; - mhp->attr.len = (u32) total_size; - mhp->attr.pbl_size = npages; - } - - return 0; -} - - static struct ib_mr *iwch_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, u64 virt, int acc, struct ib_udata *udata) { @@ -725,28 +657,6 @@ err: return ERR_PTR(err); } -static struct ib_mr *iwch_get_dma_mr(struct ib_pd *pd, int acc) -{ - struct ib_phys_buf bl; - u64 kva; - struct ib_mr *ibmr; - - PDBG("%s ib_pd %p\n", __func__, pd); - - /* - * T3 only supports 32 bits of size. - */ - if (sizeof(phys_addr_t) > 4) { - pr_warn_once(MOD "Cannot support dma_mrs on this platform.\n"); - return ERR_PTR(-ENOTSUPP); - } - bl.size = 0xffffffff; - bl.addr = 0; - kva = 0; - ibmr = iwch_register_phys_mem(pd, &bl, 1, acc, &kva); - return ibmr; -} - static struct ib_mw *iwch_alloc_mw(struct ib_pd *pd, enum ib_mw_type type) { struct iwch_dev *rhp; @@ -821,6 +731,12 @@ static struct ib_mr *iwch_alloc_mr(struct ib_pd *pd, if (!mhp) goto err; + mhp->pages = kcalloc(max_num_sg, sizeof(u64), GFP_KERNEL); + if (!mhp->pages) { + ret = -ENOMEM; + goto pl_err; + } + mhp->rhp = rhp; ret = iwch_alloc_pbl(mhp, max_num_sg); if (ret) @@ -847,31 +763,34 @@ err3: err2: iwch_free_pbl(mhp); err1: + kfree(mhp->pages); +pl_err: kfree(mhp); err: return ERR_PTR(ret); } -static struct ib_fast_reg_page_list *iwch_alloc_fastreg_pbl( - struct ib_device *device, - int page_list_len) +static int iwch_set_page(struct ib_mr *ibmr, u64 addr) { - struct ib_fast_reg_page_list *page_list; + struct iwch_mr *mhp = to_iwch_mr(ibmr); - page_list = kmalloc(sizeof *page_list + page_list_len * sizeof(u64), - GFP_KERNEL); - if (!page_list) - return ERR_PTR(-ENOMEM); + if (unlikely(mhp->npages == mhp->attr.pbl_size)) + return -ENOMEM; - page_list->page_list = (u64 *)(page_list + 1); - page_list->max_page_list_len = page_list_len; + mhp->pages[mhp->npages++] = addr; - return page_list; + return 0; } -static void iwch_free_fastreg_pbl(struct ib_fast_reg_page_list *page_list) +static int iwch_map_mr_sg(struct ib_mr *ibmr, + struct scatterlist *sg, + int sg_nents) { - kfree(page_list); + struct iwch_mr *mhp = to_iwch_mr(ibmr); + + mhp->npages = 0; + + return ib_sg_to_pages(ibmr, sg, sg_nents, iwch_set_page); } static int iwch_destroy_qp(struct ib_qp *ib_qp) @@ -1442,16 +1361,12 @@ int iwch_register_device(struct iwch_dev *dev) dev->ibdev.resize_cq = iwch_resize_cq; dev->ibdev.poll_cq = iwch_poll_cq; dev->ibdev.get_dma_mr = iwch_get_dma_mr; - dev->ibdev.reg_phys_mr = iwch_register_phys_mem; - dev->ibdev.rereg_phys_mr = iwch_reregister_phys_mem; dev->ibdev.reg_user_mr = iwch_reg_user_mr; dev->ibdev.dereg_mr = iwch_dereg_mr; dev->ibdev.alloc_mw = iwch_alloc_mw; - dev->ibdev.bind_mw = iwch_bind_mw; dev->ibdev.dealloc_mw = iwch_dealloc_mw; dev->ibdev.alloc_mr = iwch_alloc_mr; - dev->ibdev.alloc_fast_reg_page_list = iwch_alloc_fastreg_pbl; - dev->ibdev.free_fast_reg_page_list = iwch_free_fastreg_pbl; + dev->ibdev.map_mr_sg = iwch_map_mr_sg; dev->ibdev.attach_mcast = iwch_multicast_attach; dev->ibdev.detach_mcast = iwch_multicast_detach; dev->ibdev.process_mad = iwch_process_mad; diff --git a/drivers/infiniband/hw/cxgb3/iwch_provider.h b/drivers/infiniband/hw/cxgb3/iwch_provider.h index 87c14b0..252c464 100644 --- a/drivers/infiniband/hw/cxgb3/iwch_provider.h +++ b/drivers/infiniband/hw/cxgb3/iwch_provider.h @@ -77,6 +77,8 @@ struct iwch_mr { struct iwch_dev *rhp; u64 kva; struct tpt_attributes attr; + u64 *pages; + u32 npages; }; typedef struct iwch_mw iwch_mw_handle; @@ -328,9 +330,6 @@ int iwch_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, struct ib_send_wr **bad_wr); int iwch_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr, struct ib_recv_wr **bad_wr); -int iwch_bind_mw(struct ib_qp *qp, - struct ib_mw *mw, - struct ib_mw_bind *mw_bind); int iwch_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc); int iwch_post_terminate(struct iwch_qp *qhp, struct respQ_msg_t *rsp_msg); int iwch_post_zb_read(struct iwch_ep *ep); @@ -339,21 +338,9 @@ void iwch_unregister_device(struct iwch_dev *dev); void stop_read_rep_timer(struct iwch_qp *qhp); int iwch_register_mem(struct iwch_dev *rhp, struct iwch_pd *php, struct iwch_mr *mhp, int shift); -int iwch_reregister_mem(struct iwch_dev *rhp, struct iwch_pd *php, - struct iwch_mr *mhp, - int shift, - int npages); int iwch_alloc_pbl(struct iwch_mr *mhp, int npages); void iwch_free_pbl(struct iwch_mr *mhp); int iwch_write_pbl(struct iwch_mr *mhp, __be64 *pages, int npages, int offset); -int build_phys_page_list(struct ib_phys_buf *buffer_list, - int num_phys_buf, - u64 *iova_start, - u64 *total_size, - int *npages, - int *shift, - __be64 **page_list); - #define IWCH_NODE_DESC "cxgb3 Chelsio Communications" diff --git a/drivers/infiniband/hw/cxgb3/iwch_qp.c b/drivers/infiniband/hw/cxgb3/iwch_qp.c index b57c0be..d939980 100644 --- a/drivers/infiniband/hw/cxgb3/iwch_qp.c +++ b/drivers/infiniband/hw/cxgb3/iwch_qp.c @@ -95,8 +95,8 @@ static int build_rdma_write(union t3_wr *wqe, struct ib_send_wr *wr, wqe->write.reserved[0] = 0; wqe->write.reserved[1] = 0; wqe->write.reserved[2] = 0; - wqe->write.stag_sink = cpu_to_be32(wr->wr.rdma.rkey); - wqe->write.to_sink = cpu_to_be64(wr->wr.rdma.remote_addr); + wqe->write.stag_sink = cpu_to_be32(rdma_wr(wr)->rkey); + wqe->write.to_sink = cpu_to_be64(rdma_wr(wr)->remote_addr); if (wr->opcode == IB_WR_RDMA_WRITE_WITH_IMM) { plen = 4; @@ -137,8 +137,8 @@ static int build_rdma_read(union t3_wr *wqe, struct ib_send_wr *wr, wqe->read.local_inv = 0; wqe->read.reserved[0] = 0; wqe->read.reserved[1] = 0; - wqe->read.rem_stag = cpu_to_be32(wr->wr.rdma.rkey); - wqe->read.rem_to = cpu_to_be64(wr->wr.rdma.remote_addr); + wqe->read.rem_stag = cpu_to_be32(rdma_wr(wr)->rkey); + wqe->read.rem_to = cpu_to_be64(rdma_wr(wr)->remote_addr); wqe->read.local_stag = cpu_to_be32(wr->sg_list[0].lkey); wqe->read.local_len = cpu_to_be32(wr->sg_list[0].length); wqe->read.local_to = cpu_to_be64(wr->sg_list[0].addr); @@ -146,27 +146,28 @@ static int build_rdma_read(union t3_wr *wqe, struct ib_send_wr *wr, return 0; } -static int build_fastreg(union t3_wr *wqe, struct ib_send_wr *wr, - u8 *flit_cnt, int *wr_cnt, struct t3_wq *wq) +static int build_memreg(union t3_wr *wqe, struct ib_reg_wr *wr, + u8 *flit_cnt, int *wr_cnt, struct t3_wq *wq) { + struct iwch_mr *mhp = to_iwch_mr(wr->mr); int i; __be64 *p; - if (wr->wr.fast_reg.page_list_len > T3_MAX_FASTREG_DEPTH) + if (mhp->npages > T3_MAX_FASTREG_DEPTH) return -EINVAL; *wr_cnt = 1; - wqe->fastreg.stag = cpu_to_be32(wr->wr.fast_reg.rkey); - wqe->fastreg.len = cpu_to_be32(wr->wr.fast_reg.length); - wqe->fastreg.va_base_hi = cpu_to_be32(wr->wr.fast_reg.iova_start >> 32); + wqe->fastreg.stag = cpu_to_be32(wr->key); + wqe->fastreg.len = cpu_to_be32(mhp->ibmr.length); + wqe->fastreg.va_base_hi = cpu_to_be32(mhp->ibmr.iova >> 32); wqe->fastreg.va_base_lo_fbo = - cpu_to_be32(wr->wr.fast_reg.iova_start & 0xffffffff); + cpu_to_be32(mhp->ibmr.iova & 0xffffffff); wqe->fastreg.page_type_perms = cpu_to_be32( - V_FR_PAGE_COUNT(wr->wr.fast_reg.page_list_len) | - V_FR_PAGE_SIZE(wr->wr.fast_reg.page_shift-12) | + V_FR_PAGE_COUNT(mhp->npages) | + V_FR_PAGE_SIZE(ilog2(wr->mr->page_size) - 12) | V_FR_TYPE(TPT_VATO) | - V_FR_PERMS(iwch_ib_to_tpt_access(wr->wr.fast_reg.access_flags))); + V_FR_PERMS(iwch_ib_to_tpt_access(wr->access))); p = &wqe->fastreg.pbl_addrs[0]; - for (i = 0; i < wr->wr.fast_reg.page_list_len; i++, p++) { + for (i = 0; i < mhp->npages; i++, p++) { /* If we need a 2nd WR, then set it up */ if (i == T3_MAX_FASTREG_FRAG) { @@ -175,14 +176,14 @@ static int build_fastreg(union t3_wr *wqe, struct ib_send_wr *wr, Q_PTR2IDX((wq->wptr+1), wq->size_log2)); build_fw_riwrh((void *)wqe, T3_WR_FASTREG, 0, Q_GENBIT(wq->wptr + 1, wq->size_log2), - 0, 1 + wr->wr.fast_reg.page_list_len - T3_MAX_FASTREG_FRAG, + 0, 1 + mhp->npages - T3_MAX_FASTREG_FRAG, T3_EOP); p = &wqe->pbl_frag.pbl_addrs[0]; } - *p = cpu_to_be64((u64)wr->wr.fast_reg.page_list->page_list[i]); + *p = cpu_to_be64((u64)mhp->pages[i]); } - *flit_cnt = 5 + wr->wr.fast_reg.page_list_len; + *flit_cnt = 5 + mhp->npages; if (*flit_cnt > 15) *flit_cnt = 15; return 0; @@ -414,10 +415,10 @@ int iwch_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, if (!qhp->wq.oldest_read) qhp->wq.oldest_read = sqp; break; - case IB_WR_FAST_REG_MR: + case IB_WR_REG_MR: t3_wr_opcode = T3_WR_FASTREG; - err = build_fastreg(wqe, wr, &t3_wr_flit_cnt, - &wr_cnt, &qhp->wq); + err = build_memreg(wqe, reg_wr(wr), &t3_wr_flit_cnt, + &wr_cnt, &qhp->wq); break; case IB_WR_LOCAL_INV: if (wr->send_flags & IB_SEND_FENCE) @@ -525,88 +526,6 @@ out: return err; } -int iwch_bind_mw(struct ib_qp *qp, - struct ib_mw *mw, - struct ib_mw_bind *mw_bind) -{ - struct iwch_dev *rhp; - struct iwch_mw *mhp; - struct iwch_qp *qhp; - union t3_wr *wqe; - u32 pbl_addr; - u8 page_size; - u32 num_wrs; - unsigned long flag; - struct ib_sge sgl; - int err=0; - enum t3_wr_flags t3_wr_flags; - u32 idx; - struct t3_swsq *sqp; - - qhp = to_iwch_qp(qp); - mhp = to_iwch_mw(mw); - rhp = qhp->rhp; - - spin_lock_irqsave(&qhp->lock, flag); - if (qhp->attr.state > IWCH_QP_STATE_RTS) { - spin_unlock_irqrestore(&qhp->lock, flag); - return -EINVAL; - } - num_wrs = Q_FREECNT(qhp->wq.sq_rptr, qhp->wq.sq_wptr, - qhp->wq.sq_size_log2); - if (num_wrs == 0) { - spin_unlock_irqrestore(&qhp->lock, flag); - return -ENOMEM; - } - idx = Q_PTR2IDX(qhp->wq.wptr, qhp->wq.size_log2); - PDBG("%s: idx 0x%0x, mw 0x%p, mw_bind 0x%p\n", __func__, idx, - mw, mw_bind); - wqe = (union t3_wr *) (qhp->wq.queue + idx); - - t3_wr_flags = 0; - if (mw_bind->send_flags & IB_SEND_SIGNALED) - t3_wr_flags = T3_COMPLETION_FLAG; - - sgl.addr = mw_bind->bind_info.addr; - sgl.lkey = mw_bind->bind_info.mr->lkey; - sgl.length = mw_bind->bind_info.length; - wqe->bind.reserved = 0; - wqe->bind.type = TPT_VATO; - - /* TBD: check perms */ - wqe->bind.perms = iwch_ib_to_tpt_bind_access( - mw_bind->bind_info.mw_access_flags); - wqe->bind.mr_stag = cpu_to_be32(mw_bind->bind_info.mr->lkey); - wqe->bind.mw_stag = cpu_to_be32(mw->rkey); - wqe->bind.mw_len = cpu_to_be32(mw_bind->bind_info.length); - wqe->bind.mw_va = cpu_to_be64(mw_bind->bind_info.addr); - err = iwch_sgl2pbl_map(rhp, &sgl, 1, &pbl_addr, &page_size); - if (err) { - spin_unlock_irqrestore(&qhp->lock, flag); - return err; - } - wqe->send.wrid.id0.hi = qhp->wq.sq_wptr; - sqp = qhp->wq.sq + Q_PTR2IDX(qhp->wq.sq_wptr, qhp->wq.sq_size_log2); - sqp->wr_id = mw_bind->wr_id; - sqp->opcode = T3_BIND_MW; - sqp->sq_wptr = qhp->wq.sq_wptr; - sqp->complete = 0; - sqp->signaled = (mw_bind->send_flags & IB_SEND_SIGNALED); - wqe->bind.mr_pbl_addr = cpu_to_be32(pbl_addr); - wqe->bind.mr_pagesz = page_size; - build_fw_riwrh((void *)wqe, T3_WR_BIND, t3_wr_flags, - Q_GENBIT(qhp->wq.wptr, qhp->wq.size_log2), 0, - sizeof(struct t3_bind_mw_wr) >> 3, T3_SOPEOP); - ++(qhp->wq.wptr); - ++(qhp->wq.sq_wptr); - spin_unlock_irqrestore(&qhp->lock, flag); - - if (cxio_wq_db_enabled(&qhp->wq)) - ring_doorbell(qhp->wq.doorbell, qhp->wq.qpid); - - return err; -} - static inline void build_term_codes(struct respQ_msg_t *rsp_msg, u8 *layer_type, u8 *ecode) { diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c index 9a389a0..cd2ff5f 100644 --- a/drivers/infiniband/hw/cxgb4/cm.c +++ b/drivers/infiniband/hw/cxgb4/cm.c @@ -632,22 +632,18 @@ static void best_mtu(const unsigned short *mtus, unsigned short mtu, static int send_connect(struct c4iw_ep *ep) { - struct cpl_act_open_req *req; - struct cpl_t5_act_open_req *t5_req; - struct cpl_act_open_req6 *req6; - struct cpl_t5_act_open_req6 *t5_req6; + struct cpl_act_open_req *req = NULL; + struct cpl_t5_act_open_req *t5req = NULL; + struct cpl_t6_act_open_req *t6req = NULL; + struct cpl_act_open_req6 *req6 = NULL; + struct cpl_t5_act_open_req6 *t5req6 = NULL; + struct cpl_t6_act_open_req6 *t6req6 = NULL; struct sk_buff *skb; u64 opt0; u32 opt2; unsigned int mtu_idx; int wscale; - int wrlen; - int sizev4 = is_t4(ep->com.dev->rdev.lldi.adapter_type) ? - sizeof(struct cpl_act_open_req) : - sizeof(struct cpl_t5_act_open_req); - int sizev6 = is_t4(ep->com.dev->rdev.lldi.adapter_type) ? - sizeof(struct cpl_act_open_req6) : - sizeof(struct cpl_t5_act_open_req6); + int win, sizev4, sizev6, wrlen; struct sockaddr_in *la = (struct sockaddr_in *) &ep->com.mapped_local_addr; struct sockaddr_in *ra = (struct sockaddr_in *) @@ -656,8 +652,28 @@ static int send_connect(struct c4iw_ep *ep) &ep->com.mapped_local_addr; struct sockaddr_in6 *ra6 = (struct sockaddr_in6 *) &ep->com.mapped_remote_addr; - int win; int ret; + enum chip_type adapter_type = ep->com.dev->rdev.lldi.adapter_type; + u32 isn = (prandom_u32() & ~7UL) - 1; + + switch (CHELSIO_CHIP_VERSION(adapter_type)) { + case CHELSIO_T4: + sizev4 = sizeof(struct cpl_act_open_req); + sizev6 = sizeof(struct cpl_act_open_req6); + break; + case CHELSIO_T5: + sizev4 = sizeof(struct cpl_t5_act_open_req); + sizev6 = sizeof(struct cpl_t5_act_open_req6); + break; + case CHELSIO_T6: + sizev4 = sizeof(struct cpl_t6_act_open_req); + sizev6 = sizeof(struct cpl_t6_act_open_req6); + break; + default: + pr_err("T%d Chip is not supported\n", + CHELSIO_CHIP_VERSION(adapter_type)); + return -EINVAL; + } wrlen = (ep->com.remote_addr.ss_family == AF_INET) ? roundup(sizev4, 16) : @@ -706,7 +722,10 @@ static int send_connect(struct c4iw_ep *ep) opt2 |= SACK_EN_F; if (wscale && enable_tcp_window_scaling) opt2 |= WND_SCALE_EN_F; - if (is_t5(ep->com.dev->rdev.lldi.adapter_type)) { + if (CHELSIO_CHIP_VERSION(adapter_type) > CHELSIO_T4) { + if (peer2peer) + isn += 4; + opt2 |= T5_OPT_2_VALID_F; opt2 |= CONG_CNTRL_V(CONG_ALG_TAHOE); opt2 |= T5_ISS_F; @@ -718,102 +737,109 @@ static int send_connect(struct c4iw_ep *ep) t4_set_arp_err_handler(skb, ep, act_open_req_arp_failure); - if (is_t4(ep->com.dev->rdev.lldi.adapter_type)) { - if (ep->com.remote_addr.ss_family == AF_INET) { - req = (struct cpl_act_open_req *) skb_put(skb, wrlen); + if (ep->com.remote_addr.ss_family == AF_INET) { + switch (CHELSIO_CHIP_VERSION(adapter_type)) { + case CHELSIO_T4: + req = (struct cpl_act_open_req *)skb_put(skb, wrlen); INIT_TP_WR(req, 0); - OPCODE_TID(req) = cpu_to_be32( - MK_OPCODE_TID(CPL_ACT_OPEN_REQ, - ((ep->rss_qid << 14) | ep->atid))); - req->local_port = la->sin_port; - req->peer_port = ra->sin_port; - req->local_ip = la->sin_addr.s_addr; - req->peer_ip = ra->sin_addr.s_addr; - req->opt0 = cpu_to_be64(opt0); + break; + case CHELSIO_T5: + t5req = (struct cpl_t5_act_open_req *)skb_put(skb, + wrlen); + INIT_TP_WR(t5req, 0); + req = (struct cpl_act_open_req *)t5req; + break; + case CHELSIO_T6: + t6req = (struct cpl_t6_act_open_req *)skb_put(skb, + wrlen); + INIT_TP_WR(t6req, 0); + req = (struct cpl_act_open_req *)t6req; + t5req = (struct cpl_t5_act_open_req *)t6req; + break; + default: + pr_err("T%d Chip is not supported\n", + CHELSIO_CHIP_VERSION(adapter_type)); + ret = -EINVAL; + goto clip_release; + } + + OPCODE_TID(req) = cpu_to_be32(MK_OPCODE_TID(CPL_ACT_OPEN_REQ, + ((ep->rss_qid<<14) | ep->atid))); + req->local_port = la->sin_port; + req->peer_port = ra->sin_port; + req->local_ip = la->sin_addr.s_addr; + req->peer_ip = ra->sin_addr.s_addr; + req->opt0 = cpu_to_be64(opt0); + + if (is_t4(ep->com.dev->rdev.lldi.adapter_type)) { req->params = cpu_to_be32(cxgb4_select_ntuple( ep->com.dev->rdev.lldi.ports[0], ep->l2t)); req->opt2 = cpu_to_be32(opt2); } else { + t5req->params = cpu_to_be64(FILTER_TUPLE_V( + cxgb4_select_ntuple( + ep->com.dev->rdev.lldi.ports[0], + ep->l2t))); + t5req->rsvd = cpu_to_be32(isn); + PDBG("%s snd_isn %u\n", __func__, t5req->rsvd); + t5req->opt2 = cpu_to_be32(opt2); + } + } else { + switch (CHELSIO_CHIP_VERSION(adapter_type)) { + case CHELSIO_T4: req6 = (struct cpl_act_open_req6 *)skb_put(skb, wrlen); - INIT_TP_WR(req6, 0); - OPCODE_TID(req6) = cpu_to_be32( - MK_OPCODE_TID(CPL_ACT_OPEN_REQ6, - ((ep->rss_qid<<14)|ep->atid))); - req6->local_port = la6->sin6_port; - req6->peer_port = ra6->sin6_port; - req6->local_ip_hi = *((__be64 *) - (la6->sin6_addr.s6_addr)); - req6->local_ip_lo = *((__be64 *) - (la6->sin6_addr.s6_addr + 8)); - req6->peer_ip_hi = *((__be64 *) - (ra6->sin6_addr.s6_addr)); - req6->peer_ip_lo = *((__be64 *) - (ra6->sin6_addr.s6_addr + 8)); - req6->opt0 = cpu_to_be64(opt0); + break; + case CHELSIO_T5: + t5req6 = (struct cpl_t5_act_open_req6 *)skb_put(skb, + wrlen); + INIT_TP_WR(t5req6, 0); + req6 = (struct cpl_act_open_req6 *)t5req6; + break; + case CHELSIO_T6: + t6req6 = (struct cpl_t6_act_open_req6 *)skb_put(skb, + wrlen); + INIT_TP_WR(t6req6, 0); + req6 = (struct cpl_act_open_req6 *)t6req6; + t5req6 = (struct cpl_t5_act_open_req6 *)t6req6; + break; + default: + pr_err("T%d Chip is not supported\n", + CHELSIO_CHIP_VERSION(adapter_type)); + ret = -EINVAL; + goto clip_release; + } + + OPCODE_TID(req6) = cpu_to_be32(MK_OPCODE_TID(CPL_ACT_OPEN_REQ6, + ((ep->rss_qid<<14)|ep->atid))); + req6->local_port = la6->sin6_port; + req6->peer_port = ra6->sin6_port; + req6->local_ip_hi = *((__be64 *)(la6->sin6_addr.s6_addr)); + req6->local_ip_lo = *((__be64 *)(la6->sin6_addr.s6_addr + 8)); + req6->peer_ip_hi = *((__be64 *)(ra6->sin6_addr.s6_addr)); + req6->peer_ip_lo = *((__be64 *)(ra6->sin6_addr.s6_addr + 8)); + req6->opt0 = cpu_to_be64(opt0); + + if (is_t4(ep->com.dev->rdev.lldi.adapter_type)) { req6->params = cpu_to_be32(cxgb4_select_ntuple( ep->com.dev->rdev.lldi.ports[0], ep->l2t)); req6->opt2 = cpu_to_be32(opt2); - } - } else { - u32 isn = (prandom_u32() & ~7UL) - 1; - - if (peer2peer) - isn += 4; - - if (ep->com.remote_addr.ss_family == AF_INET) { - t5_req = (struct cpl_t5_act_open_req *) - skb_put(skb, wrlen); - INIT_TP_WR(t5_req, 0); - OPCODE_TID(t5_req) = cpu_to_be32( - MK_OPCODE_TID(CPL_ACT_OPEN_REQ, - ((ep->rss_qid << 14) | ep->atid))); - t5_req->local_port = la->sin_port; - t5_req->peer_port = ra->sin_port; - t5_req->local_ip = la->sin_addr.s_addr; - t5_req->peer_ip = ra->sin_addr.s_addr; - t5_req->opt0 = cpu_to_be64(opt0); - t5_req->params = cpu_to_be64(FILTER_TUPLE_V( - cxgb4_select_ntuple( - ep->com.dev->rdev.lldi.ports[0], - ep->l2t))); - t5_req->rsvd = cpu_to_be32(isn); - PDBG("%s snd_isn %u\n", __func__, - be32_to_cpu(t5_req->rsvd)); - t5_req->opt2 = cpu_to_be32(opt2); } else { - t5_req6 = (struct cpl_t5_act_open_req6 *) - skb_put(skb, wrlen); - INIT_TP_WR(t5_req6, 0); - OPCODE_TID(t5_req6) = cpu_to_be32( - MK_OPCODE_TID(CPL_ACT_OPEN_REQ6, - ((ep->rss_qid<<14)|ep->atid))); - t5_req6->local_port = la6->sin6_port; - t5_req6->peer_port = ra6->sin6_port; - t5_req6->local_ip_hi = *((__be64 *) - (la6->sin6_addr.s6_addr)); - t5_req6->local_ip_lo = *((__be64 *) - (la6->sin6_addr.s6_addr + 8)); - t5_req6->peer_ip_hi = *((__be64 *) - (ra6->sin6_addr.s6_addr)); - t5_req6->peer_ip_lo = *((__be64 *) - (ra6->sin6_addr.s6_addr + 8)); - t5_req6->opt0 = cpu_to_be64(opt0); - t5_req6->params = cpu_to_be64(FILTER_TUPLE_V( - cxgb4_select_ntuple( + t5req6->params = cpu_to_be64(FILTER_TUPLE_V( + cxgb4_select_ntuple( ep->com.dev->rdev.lldi.ports[0], ep->l2t))); - t5_req6->rsvd = cpu_to_be32(isn); - PDBG("%s snd_isn %u\n", __func__, - be32_to_cpu(t5_req6->rsvd)); - t5_req6->opt2 = cpu_to_be32(opt2); + t5req6->rsvd = cpu_to_be32(isn); + PDBG("%s snd_isn %u\n", __func__, t5req6->rsvd); + t5req6->opt2 = cpu_to_be32(opt2); } } set_bit(ACT_OPEN_REQ, &ep->com.history); ret = c4iw_l2t_send(&ep->com.dev->rdev, skb, ep->l2t); +clip_release: if (ret && ep->com.remote_addr.ss_family == AF_INET6) cxgb4_clip_release(ep->com.dev->rdev.lldi.ports[0], (const u32 *)&la6->sin6_addr.s6_addr, 1); @@ -1196,6 +1222,8 @@ static void connect_reply_upcall(struct c4iw_ep *ep, int status) if ((status == 0) || (status == -ECONNREFUSED)) { if (!ep->tried_with_mpa_v1) { /* this means MPA_v2 is used */ + event.ord = ep->ird; + event.ird = ep->ord; event.private_data_len = ep->plen - sizeof(struct mpa_v2_conn_params); event.private_data = ep->mpa_pkt + @@ -1203,6 +1231,8 @@ static void connect_reply_upcall(struct c4iw_ep *ep, int status) sizeof(struct mpa_v2_conn_params); } else { /* this means MPA_v1 is used */ + event.ord = cur_max_read_depth(ep->com.dev); + event.ird = cur_max_read_depth(ep->com.dev); event.private_data_len = ep->plen; event.private_data = ep->mpa_pkt + sizeof(struct mpa_message); @@ -1265,8 +1295,8 @@ static void established_upcall(struct c4iw_ep *ep) PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid); memset(&event, 0, sizeof(event)); event.event = IW_CM_EVENT_ESTABLISHED; - event.ird = ep->ird; - event.ord = ep->ord; + event.ird = ep->ord; + event.ord = ep->ird; if (ep->com.cm_id) { PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid); ep->com.cm_id->event_handler(ep->com.cm_id, &event); @@ -1898,7 +1928,7 @@ static void set_tcp_window(struct c4iw_ep *ep, struct port_info *pi) static int import_ep(struct c4iw_ep *ep, int iptype, __u8 *peer_ip, struct dst_entry *dst, struct c4iw_dev *cdev, - bool clear_mpa_v1) + bool clear_mpa_v1, enum chip_type adapter_type) { struct neighbour *n; int err, step; @@ -1933,7 +1963,8 @@ static int import_ep(struct c4iw_ep *ep, int iptype, __u8 *peer_ip, goto out; ep->mtu = pdev->mtu; ep->tx_chan = cxgb4_port_chan(pdev); - ep->smac_idx = (cxgb4_port_viid(pdev) & 0x7F) << 1; + ep->smac_idx = cxgb4_tp_smt_idx(adapter_type, + cxgb4_port_viid(pdev)); step = cdev->rdev.lldi.ntxq / cdev->rdev.lldi.nchan; ep->txq_idx = cxgb4_port_idx(pdev) * step; @@ -1952,7 +1983,8 @@ static int import_ep(struct c4iw_ep *ep, int iptype, __u8 *peer_ip, goto out; ep->mtu = dst_mtu(dst); ep->tx_chan = cxgb4_port_chan(pdev); - ep->smac_idx = (cxgb4_port_viid(pdev) & 0x7F) << 1; + ep->smac_idx = cxgb4_tp_smt_idx(adapter_type, + cxgb4_port_viid(pdev)); step = cdev->rdev.lldi.ntxq / cdev->rdev.lldi.nchan; ep->txq_idx = cxgb4_port_idx(pdev) * step; @@ -2025,7 +2057,8 @@ static int c4iw_reconnect(struct c4iw_ep *ep) err = -EHOSTUNREACH; goto fail3; } - err = import_ep(ep, iptype, ra, ep->dst, ep->com.dev, false); + err = import_ep(ep, iptype, ra, ep->dst, ep->com.dev, false, + ep->com.dev->rdev.lldi.adapter_type); if (err) { pr_err("%s - cannot alloc l2e.\n", __func__); goto fail4; @@ -2213,13 +2246,14 @@ static void accept_cr(struct c4iw_ep *ep, struct sk_buff *skb, int wscale; struct cpl_t5_pass_accept_rpl *rpl5 = NULL; int win; + enum chip_type adapter_type = ep->com.dev->rdev.lldi.adapter_type; PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid); BUG_ON(skb_cloned(skb)); skb_get(skb); rpl = cplhdr(skb); - if (is_t5(ep->com.dev->rdev.lldi.adapter_type)) { + if (!is_t4(adapter_type)) { skb_trim(skb, roundup(sizeof(*rpl5), 16)); rpl5 = (void *)rpl; INIT_TP_WR(rpl5, ep->hwtid); @@ -2266,12 +2300,16 @@ static void accept_cr(struct c4iw_ep *ep, struct sk_buff *skb, const struct tcphdr *tcph; u32 hlen = ntohl(req->hdr_len); - tcph = (const void *)(req + 1) + ETH_HDR_LEN_G(hlen) + - IP_HDR_LEN_G(hlen); + if (CHELSIO_CHIP_VERSION(adapter_type) <= CHELSIO_T5) + tcph = (const void *)(req + 1) + ETH_HDR_LEN_G(hlen) + + IP_HDR_LEN_G(hlen); + else + tcph = (const void *)(req + 1) + + T6_ETH_HDR_LEN_G(hlen) + T6_IP_HDR_LEN_G(hlen); if (tcph->ece && tcph->cwr) opt2 |= CCTRL_ECN_V(1); } - if (is_t5(ep->com.dev->rdev.lldi.adapter_type)) { + if (CHELSIO_CHIP_VERSION(adapter_type) > CHELSIO_T4) { u32 isn = (prandom_u32() & ~7UL) - 1; opt2 |= T5_OPT_2_VALID_F; opt2 |= CONG_CNTRL_V(CONG_ALG_TAHOE); @@ -2302,12 +2340,16 @@ static void reject_cr(struct c4iw_dev *dev, u32 hwtid, struct sk_buff *skb) return; } -static void get_4tuple(struct cpl_pass_accept_req *req, int *iptype, - __u8 *local_ip, __u8 *peer_ip, +static void get_4tuple(struct cpl_pass_accept_req *req, enum chip_type type, + int *iptype, __u8 *local_ip, __u8 *peer_ip, __be16 *local_port, __be16 *peer_port) { - int eth_len = ETH_HDR_LEN_G(be32_to_cpu(req->hdr_len)); - int ip_len = IP_HDR_LEN_G(be32_to_cpu(req->hdr_len)); + int eth_len = (CHELSIO_CHIP_VERSION(type) <= CHELSIO_T5) ? + ETH_HDR_LEN_G(be32_to_cpu(req->hdr_len)) : + T6_ETH_HDR_LEN_G(be32_to_cpu(req->hdr_len)); + int ip_len = (CHELSIO_CHIP_VERSION(type) <= CHELSIO_T5) ? + IP_HDR_LEN_G(be32_to_cpu(req->hdr_len)) : + T6_IP_HDR_LEN_G(be32_to_cpu(req->hdr_len)); struct iphdr *ip = (struct iphdr *)((u8 *)(req + 1) + eth_len); struct ipv6hdr *ip6 = (struct ipv6hdr *)((u8 *)(req + 1) + eth_len); struct tcphdr *tcp = (struct tcphdr *) @@ -2362,7 +2404,8 @@ static int pass_accept_req(struct c4iw_dev *dev, struct sk_buff *skb) goto reject; } - get_4tuple(req, &iptype, local_ip, peer_ip, &local_port, &peer_port); + get_4tuple(req, parent_ep->com.dev->rdev.lldi.adapter_type, &iptype, + local_ip, peer_ip, &local_port, &peer_port); /* Find output route */ if (iptype == 4) { @@ -2397,7 +2440,8 @@ static int pass_accept_req(struct c4iw_dev *dev, struct sk_buff *skb) goto reject; } - err = import_ep(child_ep, iptype, peer_ip, dst, dev, false); + err = import_ep(child_ep, iptype, peer_ip, dst, dev, false, + parent_ep->com.dev->rdev.lldi.adapter_type); if (err) { printk(KERN_ERR MOD "%s - failed to allocate l2t entry!\n", __func__); @@ -2929,7 +2973,7 @@ int c4iw_accept_cr(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) } else { if (peer2peer && (ep->mpa_attr.p2p_type != FW_RI_INIT_P2PTYPE_DISABLED) && - (p2p_type == FW_RI_INIT_P2PTYPE_READ_REQ) && ep->ord == 0) + (p2p_type == FW_RI_INIT_P2PTYPE_READ_REQ) && ep->ird == 0) ep->ird = 1; } @@ -3189,7 +3233,8 @@ int c4iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) goto fail2; } - err = import_ep(ep, iptype, ra, ep->dst, ep->com.dev, true); + err = import_ep(ep, iptype, ra, ep->dst, ep->com.dev, true, + ep->com.dev->rdev.lldi.adapter_type); if (err) { printk(KERN_ERR MOD "%s - cannot alloc l2e.\n", __func__); goto fail3; @@ -3226,6 +3271,12 @@ static int create_server6(struct c4iw_dev *dev, struct c4iw_listen_ep *ep) struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *) &ep->com.mapped_local_addr; + if (ipv6_addr_type(&sin6->sin6_addr) != IPV6_ADDR_ANY) { + err = cxgb4_clip_get(ep->com.dev->rdev.lldi.ports[0], + (const u32 *)&sin6->sin6_addr.s6_addr, 1); + if (err) + return err; + } c4iw_init_wr_wait(&ep->com.wr_wait); err = cxgb4_create_server6(ep->com.dev->rdev.lldi.ports[0], ep->stid, &sin6->sin6_addr, @@ -3237,13 +3288,13 @@ static int create_server6(struct c4iw_dev *dev, struct c4iw_listen_ep *ep) 0, 0, __func__); else if (err > 0) err = net_xmit_errno(err); - if (err) + if (err) { + cxgb4_clip_release(ep->com.dev->rdev.lldi.ports[0], + (const u32 *)&sin6->sin6_addr.s6_addr, 1); pr_err("cxgb4_create_server6/filter failed err %d stid %d laddr %pI6 lport %d\n", err, ep->stid, sin6->sin6_addr.s6_addr, ntohs(sin6->sin6_port)); - else - cxgb4_clip_get(ep->com.dev->rdev.lldi.ports[0], - (const u32 *)&sin6->sin6_addr.s6_addr, 1); + } return err; } @@ -3260,6 +3311,10 @@ static int create_server4(struct c4iw_dev *dev, struct c4iw_listen_ep *ep) sin->sin_addr.s_addr, sin->sin_port, 0, ep->com.dev->rdev.lldi.rxq_ids[0], 0, 0); if (err == -EBUSY) { + if (c4iw_fatal_error(&ep->com.dev->rdev)) { + err = -EIO; + break; + } set_current_state(TASK_UNINTERRUPTIBLE); schedule_timeout(usecs_to_jiffies(100)); } @@ -3593,20 +3648,23 @@ static int deferred_fw6_msg(struct c4iw_dev *dev, struct sk_buff *skb) static void build_cpl_pass_accept_req(struct sk_buff *skb, int stid , u8 tos) { - u32 l2info; - u16 vlantag, len, hdr_len, eth_hdr_len; + __be32 l2info; + __be16 hdr_len, vlantag, len; + u16 eth_hdr_len; + int tcp_hdr_len, ip_hdr_len; u8 intf; struct cpl_rx_pkt *cpl = cplhdr(skb); struct cpl_pass_accept_req *req; struct tcp_options_received tmp_opt; struct c4iw_dev *dev; + enum chip_type type; dev = *((struct c4iw_dev **) (skb->cb + sizeof(void *))); /* Store values from cpl_rx_pkt in temporary location. */ - vlantag = (__force u16) cpl->vlan; - len = (__force u16) cpl->len; - l2info = (__force u32) cpl->l2info; - hdr_len = (__force u16) cpl->hdr_len; + vlantag = cpl->vlan; + len = cpl->len; + l2info = cpl->l2info; + hdr_len = cpl->hdr_len; intf = cpl->iff; __skb_pull(skb, sizeof(*req) + sizeof(struct rss_header)); @@ -3623,20 +3681,28 @@ static void build_cpl_pass_accept_req(struct sk_buff *skb, int stid , u8 tos) memset(req, 0, sizeof(*req)); req->l2info = cpu_to_be16(SYN_INTF_V(intf) | SYN_MAC_IDX_V(RX_MACIDX_G( - (__force int) htonl(l2info))) | + be32_to_cpu(l2info))) | SYN_XACT_MATCH_F); - eth_hdr_len = is_t4(dev->rdev.lldi.adapter_type) ? - RX_ETHHDR_LEN_G((__force int)htonl(l2info)) : - RX_T5_ETHHDR_LEN_G((__force int)htonl(l2info)); - req->hdr_len = cpu_to_be32(SYN_RX_CHAN_V(RX_CHAN_G( - (__force int) htonl(l2info))) | - TCP_HDR_LEN_V(RX_TCPHDR_LEN_G( - (__force int) htons(hdr_len))) | - IP_HDR_LEN_V(RX_IPHDR_LEN_G( - (__force int) htons(hdr_len))) | - ETH_HDR_LEN_V(RX_ETHHDR_LEN_G(eth_hdr_len))); - req->vlan = (__force __be16) vlantag; - req->len = (__force __be16) len; + type = dev->rdev.lldi.adapter_type; + tcp_hdr_len = RX_TCPHDR_LEN_G(be16_to_cpu(hdr_len)); + ip_hdr_len = RX_IPHDR_LEN_G(be16_to_cpu(hdr_len)); + req->hdr_len = + cpu_to_be32(SYN_RX_CHAN_V(RX_CHAN_G(be32_to_cpu(l2info)))); + if (CHELSIO_CHIP_VERSION(type) <= CHELSIO_T5) { + eth_hdr_len = is_t4(type) ? + RX_ETHHDR_LEN_G(be32_to_cpu(l2info)) : + RX_T5_ETHHDR_LEN_G(be32_to_cpu(l2info)); + req->hdr_len |= cpu_to_be32(TCP_HDR_LEN_V(tcp_hdr_len) | + IP_HDR_LEN_V(ip_hdr_len) | + ETH_HDR_LEN_V(eth_hdr_len)); + } else { /* T6 and later */ + eth_hdr_len = RX_T6_ETHHDR_LEN_G(be32_to_cpu(l2info)); + req->hdr_len |= cpu_to_be32(T6_TCP_HDR_LEN_V(tcp_hdr_len) | + T6_IP_HDR_LEN_V(ip_hdr_len) | + T6_ETH_HDR_LEN_V(eth_hdr_len)); + } + req->vlan = vlantag; + req->len = len; req->tos_stid = cpu_to_be32(PASS_OPEN_TID_V(stid) | PASS_OPEN_TOS_V(tos)); req->tcpopt.mss = htons(tmp_opt.mss_clamp); @@ -3755,9 +3821,22 @@ static int rx_pkt(struct c4iw_dev *dev, struct sk_buff *skb) goto reject; } - eth_hdr_len = is_t4(dev->rdev.lldi.adapter_type) ? - RX_ETHHDR_LEN_G(htonl(cpl->l2info)) : - RX_T5_ETHHDR_LEN_G(htonl(cpl->l2info)); + switch (CHELSIO_CHIP_VERSION(dev->rdev.lldi.adapter_type)) { + case CHELSIO_T4: + eth_hdr_len = RX_ETHHDR_LEN_G(be32_to_cpu(cpl->l2info)); + break; + case CHELSIO_T5: + eth_hdr_len = RX_T5_ETHHDR_LEN_G(be32_to_cpu(cpl->l2info)); + break; + case CHELSIO_T6: + eth_hdr_len = RX_T6_ETHHDR_LEN_G(be32_to_cpu(cpl->l2info)); + break; + default: + pr_err("T%d Chip is not supported\n", + CHELSIO_CHIP_VERSION(dev->rdev.lldi.adapter_type)); + goto reject; + } + if (eth_hdr_len == ETH_HLEN) { eh = (struct ethhdr *)(req + 1); iph = (struct iphdr *)(eh + 1); diff --git a/drivers/infiniband/hw/cxgb4/cq.c b/drivers/infiniband/hw/cxgb4/cq.c index 92d5183..cf21df4 100644 --- a/drivers/infiniband/hw/cxgb4/cq.c +++ b/drivers/infiniband/hw/cxgb4/cq.c @@ -744,15 +744,12 @@ static int c4iw_poll_cq_one(struct c4iw_cq *chp, struct ib_wc *wc) case FW_RI_SEND_WITH_SE: wc->opcode = IB_WC_SEND; break; - case FW_RI_BIND_MW: - wc->opcode = IB_WC_BIND_MW; - break; case FW_RI_LOCAL_INV: wc->opcode = IB_WC_LOCAL_INV; break; case FW_RI_FAST_REGISTER: - wc->opcode = IB_WC_FAST_REG_MR; + wc->opcode = IB_WC_REG_MR; break; default: printk(KERN_ERR MOD "Unexpected opcode %d " diff --git a/drivers/infiniband/hw/cxgb4/device.c b/drivers/infiniband/hw/cxgb4/device.c index 1a29739..8024ea4 100644 --- a/drivers/infiniband/hw/cxgb4/device.c +++ b/drivers/infiniband/hw/cxgb4/device.c @@ -315,14 +315,12 @@ static int qp_release(struct inode *inode, struct file *file) static int qp_open(struct inode *inode, struct file *file) { struct c4iw_debugfs_data *qpd; - int ret = 0; int count = 1; qpd = kmalloc(sizeof *qpd, GFP_KERNEL); - if (!qpd) { - ret = -ENOMEM; - goto out; - } + if (!qpd) + return -ENOMEM; + qpd->devp = inode->i_private; qpd->pos = 0; @@ -333,8 +331,8 @@ static int qp_open(struct inode *inode, struct file *file) qpd->bufsize = count * 128; qpd->buf = vmalloc(qpd->bufsize); if (!qpd->buf) { - ret = -ENOMEM; - goto err1; + kfree(qpd); + return -ENOMEM; } spin_lock_irq(&qpd->devp->lock); @@ -343,11 +341,7 @@ static int qp_open(struct inode *inode, struct file *file) qpd->buf[qpd->pos++] = 0; file->private_data = qpd; - goto out; -err1: - kfree(qpd); -out: - return ret; + return 0; } static const struct file_operations qp_debugfs_fops = { @@ -781,8 +775,7 @@ static int c4iw_rdev_open(struct c4iw_rdev *rdev) pr_err(MOD "%s: unsupported udb/ucq densities %u/%u\n", pci_name(rdev->lldi.pdev), rdev->lldi.udb_density, rdev->lldi.ucq_density); - err = -EINVAL; - goto err1; + return -EINVAL; } if (rdev->lldi.vr->qp.start != rdev->lldi.vr->cq.start || rdev->lldi.vr->qp.size != rdev->lldi.vr->cq.size) { @@ -791,8 +784,7 @@ static int c4iw_rdev_open(struct c4iw_rdev *rdev) pci_name(rdev->lldi.pdev), rdev->lldi.vr->qp.start, rdev->lldi.vr->qp.size, rdev->lldi.vr->cq.size, rdev->lldi.vr->cq.size); - err = -EINVAL; - goto err1; + return -EINVAL; } rdev->qpmask = rdev->lldi.udb_density - 1; @@ -816,10 +808,8 @@ static int c4iw_rdev_open(struct c4iw_rdev *rdev) rdev->lldi.db_reg, rdev->lldi.gts_reg, rdev->qpmask, rdev->cqmask); - if (c4iw_num_stags(rdev) == 0) { - err = -EINVAL; - goto err1; - } + if (c4iw_num_stags(rdev) == 0) + return -EINVAL; rdev->stats.pd.total = T4_MAX_NUM_PD; rdev->stats.stag.total = rdev->lldi.vr->stag.size; @@ -831,29 +821,31 @@ static int c4iw_rdev_open(struct c4iw_rdev *rdev) err = c4iw_init_resource(rdev, c4iw_num_stags(rdev), T4_MAX_NUM_PD); if (err) { printk(KERN_ERR MOD "error %d initializing resources\n", err); - goto err1; + return err; } err = c4iw_pblpool_create(rdev); if (err) { printk(KERN_ERR MOD "error %d initializing pbl pool\n", err); - goto err2; + goto destroy_resource; } err = c4iw_rqtpool_create(rdev); if (err) { printk(KERN_ERR MOD "error %d initializing rqt pool\n", err); - goto err3; + goto destroy_pblpool; } err = c4iw_ocqp_pool_create(rdev); if (err) { printk(KERN_ERR MOD "error %d initializing ocqp pool\n", err); - goto err4; + goto destroy_rqtpool; } rdev->status_page = (struct t4_dev_status_page *) __get_free_page(GFP_KERNEL); - if (!rdev->status_page) { - pr_err(MOD "error allocating status page\n"); - goto err4; - } + if (!rdev->status_page) + goto destroy_ocqp_pool; + rdev->status_page->qp_start = rdev->lldi.vr->qp.start; + rdev->status_page->qp_size = rdev->lldi.vr->qp.size; + rdev->status_page->cq_start = rdev->lldi.vr->cq.start; + rdev->status_page->cq_size = rdev->lldi.vr->cq.size; if (c4iw_wr_log) { rdev->wr_log = kzalloc((1 << c4iw_wr_log_size_order) * @@ -869,13 +861,14 @@ static int c4iw_rdev_open(struct c4iw_rdev *rdev) rdev->status_page->db_off = 0; return 0; -err4: +destroy_ocqp_pool: + c4iw_ocqp_pool_destroy(rdev); +destroy_rqtpool: c4iw_rqtpool_destroy(rdev); -err3: +destroy_pblpool: c4iw_pblpool_destroy(rdev); -err2: +destroy_resource: c4iw_destroy_resource(&rdev->resource); -err1: return err; } @@ -962,12 +955,12 @@ static struct c4iw_dev *c4iw_alloc(const struct cxgb4_lld_info *infop) devp->rdev.lldi.sge_egrstatuspagesize; /* - * For T5 devices, we map all of BAR2 with WC. + * For T5/T6 devices, we map all of BAR2 with WC. * For T4 devices with onchip qp mem, we map only that part * of BAR2 with WC. */ devp->rdev.bar2_pa = pci_resource_start(devp->rdev.lldi.pdev, 2); - if (is_t5(devp->rdev.lldi.adapter_type)) { + if (!is_t4(devp->rdev.lldi.adapter_type)) { devp->rdev.bar2_kva = ioremap_wc(devp->rdev.bar2_pa, pci_resource_len(devp->rdev.lldi.pdev, 2)); if (!devp->rdev.bar2_kva) { @@ -1267,11 +1260,9 @@ static int enable_qp_db(int id, void *p, void *data) static void resume_rc_qp(struct c4iw_qp *qp) { spin_lock(&qp->lock); - t4_ring_sq_db(&qp->wq, qp->wq.sq.wq_pidx_inc, - is_t5(qp->rhp->rdev.lldi.adapter_type), NULL); + t4_ring_sq_db(&qp->wq, qp->wq.sq.wq_pidx_inc, NULL); qp->wq.sq.wq_pidx_inc = 0; - t4_ring_rq_db(&qp->wq, qp->wq.rq.wq_pidx_inc, - is_t5(qp->rhp->rdev.lldi.adapter_type), NULL); + t4_ring_rq_db(&qp->wq, qp->wq.rq.wq_pidx_inc, NULL); qp->wq.rq.wq_pidx_inc = 0; spin_unlock(&qp->lock); } diff --git a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h index c7bb38c..fb2de75 100644 --- a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h +++ b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h @@ -386,6 +386,10 @@ struct c4iw_mr { struct c4iw_dev *rhp; u64 kva; struct tpt_attributes attr; + u64 *mpl; + dma_addr_t mpl_addr; + u32 max_mpl_len; + u32 mpl_len; }; static inline struct c4iw_mr *to_c4iw_mr(struct ib_mr *ibmr) @@ -405,20 +409,6 @@ static inline struct c4iw_mw *to_c4iw_mw(struct ib_mw *ibmw) return container_of(ibmw, struct c4iw_mw, ibmw); } -struct c4iw_fr_page_list { - struct ib_fast_reg_page_list ibpl; - DEFINE_DMA_UNMAP_ADDR(mapping); - dma_addr_t dma_addr; - struct c4iw_dev *dev; - int pll_len; -}; - -static inline struct c4iw_fr_page_list *to_c4iw_fr_page_list( - struct ib_fast_reg_page_list *ibpl) -{ - return container_of(ibpl, struct c4iw_fr_page_list, ibpl); -} - struct c4iw_cq { struct ib_cq ibcq; struct c4iw_dev *rhp; @@ -957,8 +947,6 @@ int c4iw_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, struct ib_send_wr **bad_wr); int c4iw_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr, struct ib_recv_wr **bad_wr); -int c4iw_bind_mw(struct ib_qp *qp, struct ib_mw *mw, - struct ib_mw_bind *mw_bind); int c4iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param); int c4iw_create_listen(struct iw_cm_id *cm_id, int backlog); int c4iw_destroy_listen(struct iw_cm_id *cm_id); @@ -966,30 +954,18 @@ int c4iw_accept_cr(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param); int c4iw_reject_cr(struct iw_cm_id *cm_id, const void *pdata, u8 pdata_len); void c4iw_qp_add_ref(struct ib_qp *qp); void c4iw_qp_rem_ref(struct ib_qp *qp); -void c4iw_free_fastreg_pbl(struct ib_fast_reg_page_list *page_list); -struct ib_fast_reg_page_list *c4iw_alloc_fastreg_pbl( - struct ib_device *device, - int page_list_len); struct ib_mr *c4iw_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type, u32 max_num_sg); +int c4iw_map_mr_sg(struct ib_mr *ibmr, + struct scatterlist *sg, + int sg_nents); int c4iw_dealloc_mw(struct ib_mw *mw); struct ib_mw *c4iw_alloc_mw(struct ib_pd *pd, enum ib_mw_type type); struct ib_mr *c4iw_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, u64 virt, int acc, struct ib_udata *udata); struct ib_mr *c4iw_get_dma_mr(struct ib_pd *pd, int acc); -struct ib_mr *c4iw_register_phys_mem(struct ib_pd *pd, - struct ib_phys_buf *buffer_list, - int num_phys_buf, - int acc, - u64 *iova_start); -int c4iw_reregister_phys_mem(struct ib_mr *mr, - int mr_rereg_mask, - struct ib_pd *pd, - struct ib_phys_buf *buffer_list, - int num_phys_buf, - int acc, u64 *iova_start); int c4iw_dereg_mr(struct ib_mr *ib_mr); int c4iw_destroy_cq(struct ib_cq *ib_cq); struct ib_cq *c4iw_create_cq(struct ib_device *ibdev, diff --git a/drivers/infiniband/hw/cxgb4/mem.c b/drivers/infiniband/hw/cxgb4/mem.c index 026b91e..7849890 100644 --- a/drivers/infiniband/hw/cxgb4/mem.c +++ b/drivers/infiniband/hw/cxgb4/mem.c @@ -144,7 +144,7 @@ static int _c4iw_write_mem_inline(struct c4iw_rdev *rdev, u32 addr, u32 len, if (i == (num_wqe-1)) { req->wr.wr_hi = cpu_to_be32(FW_WR_OP_V(FW_ULPTX_WR) | FW_WR_COMPL_F); - req->wr.wr_lo = (__force __be64)&wr_wait; + req->wr.wr_lo = (__force __be64)(unsigned long)&wr_wait; } else req->wr.wr_hi = cpu_to_be32(FW_WR_OP_V(FW_ULPTX_WR)); req->wr.wr_mid = cpu_to_be32( @@ -392,32 +392,6 @@ static int register_mem(struct c4iw_dev *rhp, struct c4iw_pd *php, return ret; } -static int reregister_mem(struct c4iw_dev *rhp, struct c4iw_pd *php, - struct c4iw_mr *mhp, int shift, int npages) -{ - u32 stag; - int ret; - - if (npages > mhp->attr.pbl_size) - return -ENOMEM; - - stag = mhp->attr.stag; - ret = write_tpt_entry(&rhp->rdev, 0, &stag, 1, mhp->attr.pdid, - FW_RI_STAG_NSMR, mhp->attr.perms, - mhp->attr.mw_bind_enable, mhp->attr.zbva, - mhp->attr.va_fbo, mhp->attr.len, shift - 12, - mhp->attr.pbl_size, mhp->attr.pbl_addr); - if (ret) - return ret; - - ret = finish_mem_reg(mhp, stag); - if (ret) - dereg_mem(&rhp->rdev, mhp->attr.stag, mhp->attr.pbl_size, - mhp->attr.pbl_addr); - - return ret; -} - static int alloc_pbl(struct c4iw_mr *mhp, int npages) { mhp->attr.pbl_addr = c4iw_pblpool_alloc(&mhp->rhp->rdev, @@ -431,228 +405,6 @@ static int alloc_pbl(struct c4iw_mr *mhp, int npages) return 0; } -static int build_phys_page_list(struct ib_phys_buf *buffer_list, - int num_phys_buf, u64 *iova_start, - u64 *total_size, int *npages, - int *shift, __be64 **page_list) -{ - u64 mask; - int i, j, n; - - mask = 0; - *total_size = 0; - for (i = 0; i < num_phys_buf; ++i) { - if (i != 0 && buffer_list[i].addr & ~PAGE_MASK) - return -EINVAL; - if (i != 0 && i != num_phys_buf - 1 && - (buffer_list[i].size & ~PAGE_MASK)) - return -EINVAL; - *total_size += buffer_list[i].size; - if (i > 0) - mask |= buffer_list[i].addr; - else - mask |= buffer_list[i].addr & PAGE_MASK; - if (i != num_phys_buf - 1) - mask |= buffer_list[i].addr + buffer_list[i].size; - else - mask |= (buffer_list[i].addr + buffer_list[i].size + - PAGE_SIZE - 1) & PAGE_MASK; - } - - if (*total_size > 0xFFFFFFFFULL) - return -ENOMEM; - - /* Find largest page shift we can use to cover buffers */ - for (*shift = PAGE_SHIFT; *shift < 27; ++(*shift)) - if ((1ULL << *shift) & mask) - break; - - buffer_list[0].size += buffer_list[0].addr & ((1ULL << *shift) - 1); - buffer_list[0].addr &= ~0ull << *shift; - - *npages = 0; - for (i = 0; i < num_phys_buf; ++i) - *npages += (buffer_list[i].size + - (1ULL << *shift) - 1) >> *shift; - - if (!*npages) - return -EINVAL; - - *page_list = kmalloc(sizeof(u64) * *npages, GFP_KERNEL); - if (!*page_list) - return -ENOMEM; - - n = 0; - for (i = 0; i < num_phys_buf; ++i) - for (j = 0; - j < (buffer_list[i].size + (1ULL << *shift) - 1) >> *shift; - ++j) - (*page_list)[n++] = cpu_to_be64(buffer_list[i].addr + - ((u64) j << *shift)); - - PDBG("%s va 0x%llx mask 0x%llx shift %d len %lld pbl_size %d\n", - __func__, (unsigned long long)*iova_start, - (unsigned long long)mask, *shift, (unsigned long long)*total_size, - *npages); - - return 0; - -} - -int c4iw_reregister_phys_mem(struct ib_mr *mr, int mr_rereg_mask, - struct ib_pd *pd, struct ib_phys_buf *buffer_list, - int num_phys_buf, int acc, u64 *iova_start) -{ - - struct c4iw_mr mh, *mhp; - struct c4iw_pd *php; - struct c4iw_dev *rhp; - __be64 *page_list = NULL; - int shift = 0; - u64 total_size; - int npages; - int ret; - - PDBG("%s ib_mr %p ib_pd %p\n", __func__, mr, pd); - - /* There can be no memory windows */ - if (atomic_read(&mr->usecnt)) - return -EINVAL; - - mhp = to_c4iw_mr(mr); - rhp = mhp->rhp; - php = to_c4iw_pd(mr->pd); - - /* make sure we are on the same adapter */ - if (rhp != php->rhp) - return -EINVAL; - - memcpy(&mh, mhp, sizeof *mhp); - - if (mr_rereg_mask & IB_MR_REREG_PD) - php = to_c4iw_pd(pd); - if (mr_rereg_mask & IB_MR_REREG_ACCESS) { - mh.attr.perms = c4iw_ib_to_tpt_access(acc); - mh.attr.mw_bind_enable = (acc & IB_ACCESS_MW_BIND) == - IB_ACCESS_MW_BIND; - } - if (mr_rereg_mask & IB_MR_REREG_TRANS) { - ret = build_phys_page_list(buffer_list, num_phys_buf, - iova_start, - &total_size, &npages, - &shift, &page_list); - if (ret) - return ret; - } - - if (mr_exceeds_hw_limits(rhp, total_size)) { - kfree(page_list); - return -EINVAL; - } - - ret = reregister_mem(rhp, php, &mh, shift, npages); - kfree(page_list); - if (ret) - return ret; - if (mr_rereg_mask & IB_MR_REREG_PD) - mhp->attr.pdid = php->pdid; - if (mr_rereg_mask & IB_MR_REREG_ACCESS) - mhp->attr.perms = c4iw_ib_to_tpt_access(acc); - if (mr_rereg_mask & IB_MR_REREG_TRANS) { - mhp->attr.zbva = 0; - mhp->attr.va_fbo = *iova_start; - mhp->attr.page_size = shift - 12; - mhp->attr.len = (u32) total_size; - mhp->attr.pbl_size = npages; - } - - return 0; -} - -struct ib_mr *c4iw_register_phys_mem(struct ib_pd *pd, - struct ib_phys_buf *buffer_list, - int num_phys_buf, int acc, u64 *iova_start) -{ - __be64 *page_list; - int shift; - u64 total_size; - int npages; - struct c4iw_dev *rhp; - struct c4iw_pd *php; - struct c4iw_mr *mhp; - int ret; - - PDBG("%s ib_pd %p\n", __func__, pd); - php = to_c4iw_pd(pd); - rhp = php->rhp; - - mhp = kzalloc(sizeof(*mhp), GFP_KERNEL); - if (!mhp) - return ERR_PTR(-ENOMEM); - - mhp->rhp = rhp; - - /* First check that we have enough alignment */ - if ((*iova_start & ~PAGE_MASK) != (buffer_list[0].addr & ~PAGE_MASK)) { - ret = -EINVAL; - goto err; - } - - if (num_phys_buf > 1 && - ((buffer_list[0].addr + buffer_list[0].size) & ~PAGE_MASK)) { - ret = -EINVAL; - goto err; - } - - ret = build_phys_page_list(buffer_list, num_phys_buf, iova_start, - &total_size, &npages, &shift, - &page_list); - if (ret) - goto err; - - if (mr_exceeds_hw_limits(rhp, total_size)) { - kfree(page_list); - ret = -EINVAL; - goto err; - } - - ret = alloc_pbl(mhp, npages); - if (ret) { - kfree(page_list); - goto err; - } - - ret = write_pbl(&mhp->rhp->rdev, page_list, mhp->attr.pbl_addr, - npages); - kfree(page_list); - if (ret) - goto err_pbl; - - mhp->attr.pdid = php->pdid; - mhp->attr.zbva = 0; - - mhp->attr.perms = c4iw_ib_to_tpt_access(acc); - mhp->attr.va_fbo = *iova_start; - mhp->attr.page_size = shift - 12; - - mhp->attr.len = (u32) total_size; - mhp->attr.pbl_size = npages; - ret = register_mem(rhp, php, mhp, shift); - if (ret) - goto err_pbl; - - return &mhp->ibmr; - -err_pbl: - c4iw_pblpool_free(&mhp->rhp->rdev, mhp->attr.pbl_addr, - mhp->attr.pbl_size << 3); - -err: - kfree(mhp); - return ERR_PTR(ret); - -} - struct ib_mr *c4iw_get_dma_mr(struct ib_pd *pd, int acc) { struct c4iw_dev *rhp; @@ -863,6 +615,7 @@ struct ib_mr *c4iw_alloc_mr(struct ib_pd *pd, u32 mmid; u32 stag = 0; int ret = 0; + int length = roundup(max_num_sg * sizeof(u64), 32); if (mr_type != IB_MR_TYPE_MEM_REG || max_num_sg > t4_max_fr_depth(use_dsgl)) @@ -876,6 +629,14 @@ struct ib_mr *c4iw_alloc_mr(struct ib_pd *pd, goto err; } + mhp->mpl = dma_alloc_coherent(&rhp->rdev.lldi.pdev->dev, + length, &mhp->mpl_addr, GFP_KERNEL); + if (!mhp->mpl) { + ret = -ENOMEM; + goto err_mpl; + } + mhp->max_mpl_len = length; + mhp->rhp = rhp; ret = alloc_pbl(mhp, max_num_sg); if (ret) @@ -905,54 +666,35 @@ err2: c4iw_pblpool_free(&mhp->rhp->rdev, mhp->attr.pbl_addr, mhp->attr.pbl_size << 3); err1: + dma_free_coherent(&mhp->rhp->rdev.lldi.pdev->dev, + mhp->max_mpl_len, mhp->mpl, mhp->mpl_addr); +err_mpl: kfree(mhp); err: return ERR_PTR(ret); } -struct ib_fast_reg_page_list *c4iw_alloc_fastreg_pbl(struct ib_device *device, - int page_list_len) +static int c4iw_set_page(struct ib_mr *ibmr, u64 addr) { - struct c4iw_fr_page_list *c4pl; - struct c4iw_dev *dev = to_c4iw_dev(device); - dma_addr_t dma_addr; - int pll_len = roundup(page_list_len * sizeof(u64), 32); - - c4pl = kmalloc(sizeof(*c4pl), GFP_KERNEL); - if (!c4pl) - return ERR_PTR(-ENOMEM); + struct c4iw_mr *mhp = to_c4iw_mr(ibmr); - c4pl->ibpl.page_list = dma_alloc_coherent(&dev->rdev.lldi.pdev->dev, - pll_len, &dma_addr, - GFP_KERNEL); - if (!c4pl->ibpl.page_list) { - kfree(c4pl); - return ERR_PTR(-ENOMEM); - } - dma_unmap_addr_set(c4pl, mapping, dma_addr); - c4pl->dma_addr = dma_addr; - c4pl->dev = dev; - c4pl->pll_len = pll_len; + if (unlikely(mhp->mpl_len == mhp->max_mpl_len)) + return -ENOMEM; - PDBG("%s c4pl %p pll_len %u page_list %p dma_addr %pad\n", - __func__, c4pl, c4pl->pll_len, c4pl->ibpl.page_list, - &c4pl->dma_addr); + mhp->mpl[mhp->mpl_len++] = addr; - return &c4pl->ibpl; + return 0; } -void c4iw_free_fastreg_pbl(struct ib_fast_reg_page_list *ibpl) +int c4iw_map_mr_sg(struct ib_mr *ibmr, + struct scatterlist *sg, + int sg_nents) { - struct c4iw_fr_page_list *c4pl = to_c4iw_fr_page_list(ibpl); + struct c4iw_mr *mhp = to_c4iw_mr(ibmr); - PDBG("%s c4pl %p pll_len %u page_list %p dma_addr %pad\n", - __func__, c4pl, c4pl->pll_len, c4pl->ibpl.page_list, - &c4pl->dma_addr); + mhp->mpl_len = 0; - dma_free_coherent(&c4pl->dev->rdev.lldi.pdev->dev, - c4pl->pll_len, - c4pl->ibpl.page_list, dma_unmap_addr(c4pl, mapping)); - kfree(c4pl); + return ib_sg_to_pages(ibmr, sg, sg_nents, c4iw_set_page); } int c4iw_dereg_mr(struct ib_mr *ib_mr) @@ -962,14 +704,14 @@ int c4iw_dereg_mr(struct ib_mr *ib_mr) u32 mmid; PDBG("%s ib_mr %p\n", __func__, ib_mr); - /* There can be no memory windows */ - if (atomic_read(&ib_mr->usecnt)) - return -EINVAL; mhp = to_c4iw_mr(ib_mr); rhp = mhp->rhp; mmid = mhp->attr.stag >> 8; remove_handle(rhp, &rhp->mmidr, mmid); + if (mhp->mpl) + dma_free_coherent(&mhp->rhp->rdev.lldi.pdev->dev, + mhp->max_mpl_len, mhp->mpl, mhp->mpl_addr); dereg_mem(&rhp->rdev, mhp->attr.stag, mhp->attr.pbl_size, mhp->attr.pbl_addr); if (mhp->attr.pbl_size) diff --git a/drivers/infiniband/hw/cxgb4/provider.c b/drivers/infiniband/hw/cxgb4/provider.c index 7746113..ec04272 100644 --- a/drivers/infiniband/hw/cxgb4/provider.c +++ b/drivers/infiniband/hw/cxgb4/provider.c @@ -209,7 +209,7 @@ static int c4iw_mmap(struct ib_ucontext *context, struct vm_area_struct *vma) if (addr >= rdev->oc_mw_pa) vma->vm_page_prot = t4_pgprot_wc(vma->vm_page_prot); else { - if (is_t5(rdev->lldi.adapter_type)) + if (!is_t4(rdev->lldi.adapter_type)) vma->vm_page_prot = t4_pgprot_wc(vma->vm_page_prot); else @@ -549,16 +549,12 @@ int c4iw_register_device(struct c4iw_dev *dev) dev->ibdev.resize_cq = c4iw_resize_cq; dev->ibdev.poll_cq = c4iw_poll_cq; dev->ibdev.get_dma_mr = c4iw_get_dma_mr; - dev->ibdev.reg_phys_mr = c4iw_register_phys_mem; - dev->ibdev.rereg_phys_mr = c4iw_reregister_phys_mem; dev->ibdev.reg_user_mr = c4iw_reg_user_mr; dev->ibdev.dereg_mr = c4iw_dereg_mr; dev->ibdev.alloc_mw = c4iw_alloc_mw; - dev->ibdev.bind_mw = c4iw_bind_mw; dev->ibdev.dealloc_mw = c4iw_dealloc_mw; dev->ibdev.alloc_mr = c4iw_alloc_mr; - dev->ibdev.alloc_fast_reg_page_list = c4iw_alloc_fastreg_pbl; - dev->ibdev.free_fast_reg_page_list = c4iw_free_fastreg_pbl; + dev->ibdev.map_mr_sg = c4iw_map_mr_sg; dev->ibdev.attach_mcast = c4iw_multicast_attach; dev->ibdev.detach_mcast = c4iw_multicast_detach; dev->ibdev.process_mad = c4iw_process_mad; diff --git a/drivers/infiniband/hw/cxgb4/qp.c b/drivers/infiniband/hw/cxgb4/qp.c index 6517e12..e99345e 100644 --- a/drivers/infiniband/hw/cxgb4/qp.c +++ b/drivers/infiniband/hw/cxgb4/qp.c @@ -528,8 +528,8 @@ static int build_rdma_write(struct t4_sq *sq, union t4_wr *wqe, if (wr->num_sge > T4_MAX_SEND_SGE) return -EINVAL; wqe->write.r2 = 0; - wqe->write.stag_sink = cpu_to_be32(wr->wr.rdma.rkey); - wqe->write.to_sink = cpu_to_be64(wr->wr.rdma.remote_addr); + wqe->write.stag_sink = cpu_to_be32(rdma_wr(wr)->rkey); + wqe->write.to_sink = cpu_to_be64(rdma_wr(wr)->remote_addr); if (wr->num_sge) { if (wr->send_flags & IB_SEND_INLINE) { ret = build_immd(sq, wqe->write.u.immd_src, wr, @@ -566,10 +566,10 @@ static int build_rdma_read(union t4_wr *wqe, struct ib_send_wr *wr, u8 *len16) if (wr->num_sge > 1) return -EINVAL; if (wr->num_sge) { - wqe->read.stag_src = cpu_to_be32(wr->wr.rdma.rkey); - wqe->read.to_src_hi = cpu_to_be32((u32)(wr->wr.rdma.remote_addr + wqe->read.stag_src = cpu_to_be32(rdma_wr(wr)->rkey); + wqe->read.to_src_hi = cpu_to_be32((u32)(rdma_wr(wr)->remote_addr >> 32)); - wqe->read.to_src_lo = cpu_to_be32((u32)wr->wr.rdma.remote_addr); + wqe->read.to_src_lo = cpu_to_be32((u32)rdma_wr(wr)->remote_addr); wqe->read.stag_sink = cpu_to_be32(wr->sg_list[0].lkey); wqe->read.plen = cpu_to_be32(wr->sg_list[0].length); wqe->read.to_sink_hi = cpu_to_be32((u32)(wr->sg_list[0].addr @@ -605,47 +605,41 @@ static int build_rdma_recv(struct c4iw_qp *qhp, union t4_recv_wr *wqe, return 0; } -static int build_fastreg(struct t4_sq *sq, union t4_wr *wqe, - struct ib_send_wr *wr, u8 *len16, u8 t5dev) +static int build_memreg(struct t4_sq *sq, union t4_wr *wqe, + struct ib_reg_wr *wr, u8 *len16, u8 t5dev) { - + struct c4iw_mr *mhp = to_c4iw_mr(wr->mr); struct fw_ri_immd *imdp; __be64 *p; int i; - int pbllen = roundup(wr->wr.fast_reg.page_list_len * sizeof(u64), 32); + int pbllen = roundup(mhp->mpl_len * sizeof(u64), 32); int rem; - if (wr->wr.fast_reg.page_list_len > - t4_max_fr_depth(use_dsgl)) + if (mhp->mpl_len > t4_max_fr_depth(use_dsgl)) return -EINVAL; wqe->fr.qpbinde_to_dcacpu = 0; - wqe->fr.pgsz_shift = wr->wr.fast_reg.page_shift - 12; + wqe->fr.pgsz_shift = ilog2(wr->mr->page_size) - 12; wqe->fr.addr_type = FW_RI_VA_BASED_TO; - wqe->fr.mem_perms = c4iw_ib_to_tpt_access(wr->wr.fast_reg.access_flags); + wqe->fr.mem_perms = c4iw_ib_to_tpt_access(wr->access); wqe->fr.len_hi = 0; - wqe->fr.len_lo = cpu_to_be32(wr->wr.fast_reg.length); - wqe->fr.stag = cpu_to_be32(wr->wr.fast_reg.rkey); - wqe->fr.va_hi = cpu_to_be32(wr->wr.fast_reg.iova_start >> 32); - wqe->fr.va_lo_fbo = cpu_to_be32(wr->wr.fast_reg.iova_start & + wqe->fr.len_lo = cpu_to_be32(mhp->ibmr.length); + wqe->fr.stag = cpu_to_be32(wr->key); + wqe->fr.va_hi = cpu_to_be32(mhp->ibmr.iova >> 32); + wqe->fr.va_lo_fbo = cpu_to_be32(mhp->ibmr.iova & 0xffffffff); if (t5dev && use_dsgl && (pbllen > max_fr_immd)) { - struct c4iw_fr_page_list *c4pl = - to_c4iw_fr_page_list(wr->wr.fast_reg.page_list); struct fw_ri_dsgl *sglp; - for (i = 0; i < wr->wr.fast_reg.page_list_len; i++) { - wr->wr.fast_reg.page_list->page_list[i] = (__force u64) - cpu_to_be64((u64) - wr->wr.fast_reg.page_list->page_list[i]); - } + for (i = 0; i < mhp->mpl_len; i++) + mhp->mpl[i] = (__force u64)cpu_to_be64((u64)mhp->mpl[i]); sglp = (struct fw_ri_dsgl *)(&wqe->fr + 1); sglp->op = FW_RI_DATA_DSGL; sglp->r1 = 0; sglp->nsge = cpu_to_be16(1); - sglp->addr0 = cpu_to_be64(c4pl->dma_addr); + sglp->addr0 = cpu_to_be64(mhp->mpl_addr); sglp->len0 = cpu_to_be32(pbllen); *len16 = DIV_ROUND_UP(sizeof(wqe->fr) + sizeof(*sglp), 16); @@ -657,9 +651,8 @@ static int build_fastreg(struct t4_sq *sq, union t4_wr *wqe, imdp->immdlen = cpu_to_be32(pbllen); p = (__be64 *)(imdp + 1); rem = pbllen; - for (i = 0; i < wr->wr.fast_reg.page_list_len; i++) { - *p = cpu_to_be64( - (u64)wr->wr.fast_reg.page_list->page_list[i]); + for (i = 0; i < mhp->mpl_len; i++) { + *p = cpu_to_be64((u64)mhp->mpl[i]); rem -= sizeof(*p); if (++p == (__be64 *)&sq->queue[sq->size]) p = (__be64 *)sq->queue; @@ -712,8 +705,7 @@ static int ring_kernel_sq_db(struct c4iw_qp *qhp, u16 inc) spin_lock_irqsave(&qhp->rhp->lock, flags); spin_lock(&qhp->lock); if (qhp->rhp->db_state == NORMAL) - t4_ring_sq_db(&qhp->wq, inc, - is_t5(qhp->rhp->rdev.lldi.adapter_type), NULL); + t4_ring_sq_db(&qhp->wq, inc, NULL); else { add_to_fc_list(&qhp->rhp->db_fc_list, &qhp->db_fc_entry); qhp->wq.sq.wq_pidx_inc += inc; @@ -730,8 +722,7 @@ static int ring_kernel_rq_db(struct c4iw_qp *qhp, u16 inc) spin_lock_irqsave(&qhp->rhp->lock, flags); spin_lock(&qhp->lock); if (qhp->rhp->db_state == NORMAL) - t4_ring_rq_db(&qhp->wq, inc, - is_t5(qhp->rhp->rdev.lldi.adapter_type), NULL); + t4_ring_rq_db(&qhp->wq, inc, NULL); else { add_to_fc_list(&qhp->rhp->db_fc_list, &qhp->db_fc_entry); qhp->wq.rq.wq_pidx_inc += inc; @@ -813,13 +804,13 @@ int c4iw_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, if (!qhp->wq.sq.oldest_read) qhp->wq.sq.oldest_read = swsqe; break; - case IB_WR_FAST_REG_MR: + case IB_WR_REG_MR: fw_opcode = FW_RI_FR_NSMR_WR; swsqe->opcode = FW_RI_FAST_REGISTER; - err = build_fastreg(&qhp->wq.sq, wqe, wr, &len16, - is_t5( - qhp->rhp->rdev.lldi.adapter_type) ? - 1 : 0); + err = build_memreg(&qhp->wq.sq, wqe, reg_wr(wr), &len16, + is_t5( + qhp->rhp->rdev.lldi.adapter_type) ? + 1 : 0); break; case IB_WR_LOCAL_INV: if (wr->send_flags & IB_SEND_FENCE) @@ -860,8 +851,7 @@ int c4iw_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, idx += DIV_ROUND_UP(len16*16, T4_EQ_ENTRY_SIZE); } if (!qhp->rhp->rdev.status_page->db_off) { - t4_ring_sq_db(&qhp->wq, idx, - is_t5(qhp->rhp->rdev.lldi.adapter_type), wqe); + t4_ring_sq_db(&qhp->wq, idx, wqe); spin_unlock_irqrestore(&qhp->lock, flag); } else { spin_unlock_irqrestore(&qhp->lock, flag); @@ -934,8 +924,7 @@ int c4iw_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr, num_wrs--; } if (!qhp->rhp->rdev.status_page->db_off) { - t4_ring_rq_db(&qhp->wq, idx, - is_t5(qhp->rhp->rdev.lldi.adapter_type), wqe); + t4_ring_rq_db(&qhp->wq, idx, wqe); spin_unlock_irqrestore(&qhp->lock, flag); } else { spin_unlock_irqrestore(&qhp->lock, flag); @@ -944,11 +933,6 @@ int c4iw_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr, return err; } -int c4iw_bind_mw(struct ib_qp *qp, struct ib_mw *mw, struct ib_mw_bind *mw_bind) -{ - return -ENOSYS; -} - static inline void build_term_codes(struct t4_cqe *err_cqe, u8 *layer_type, u8 *ecode) { @@ -1875,7 +1859,7 @@ int c4iw_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, attrs.rq_db_inc = attr->rq_psn; mask |= (attr_mask & IB_QP_SQ_PSN) ? C4IW_QP_ATTR_SQ_DB : 0; mask |= (attr_mask & IB_QP_RQ_PSN) ? C4IW_QP_ATTR_RQ_DB : 0; - if (is_t5(to_c4iw_qp(ibqp)->rhp->rdev.lldi.adapter_type) && + if (!is_t4(to_c4iw_qp(ibqp)->rhp->rdev.lldi.adapter_type) && (mask & (C4IW_QP_ATTR_SQ_DB|C4IW_QP_ATTR_RQ_DB))) return -EINVAL; diff --git a/drivers/infiniband/hw/cxgb4/t4.h b/drivers/infiniband/hw/cxgb4/t4.h index 274a7ab..6126bbe 100644 --- a/drivers/infiniband/hw/cxgb4/t4.h +++ b/drivers/infiniband/hw/cxgb4/t4.h @@ -455,8 +455,7 @@ static inline void pio_copy(u64 __iomem *dst, u64 *src) } } -static inline void t4_ring_sq_db(struct t4_wq *wq, u16 inc, u8 t5, - union t4_wr *wqe) +static inline void t4_ring_sq_db(struct t4_wq *wq, u16 inc, union t4_wr *wqe) { /* Flush host queue memory writes. */ @@ -482,7 +481,7 @@ static inline void t4_ring_sq_db(struct t4_wq *wq, u16 inc, u8 t5, writel(QID_V(wq->sq.qid) | PIDX_V(inc), wq->db); } -static inline void t4_ring_rq_db(struct t4_wq *wq, u16 inc, u8 t5, +static inline void t4_ring_rq_db(struct t4_wq *wq, u16 inc, union t4_recv_wr *wqe) { @@ -700,4 +699,11 @@ static inline void t4_set_cq_in_error(struct t4_cq *cq) struct t4_dev_status_page { u8 db_off; + u8 pad1; + u16 pad2; + u32 pad3; + u64 qp_start; + u64 qp_size; + u64 cq_start; + u64 cq_size; }; diff --git a/drivers/infiniband/hw/cxgb4/user.h b/drivers/infiniband/hw/cxgb4/user.h index cbd0ce1..295f422 100644 --- a/drivers/infiniband/hw/cxgb4/user.h +++ b/drivers/infiniband/hw/cxgb4/user.h @@ -32,7 +32,7 @@ #ifndef __C4IW_USER_H__ #define __C4IW_USER_H__ -#define C4IW_UVERBS_ABI_VERSION 2 +#define C4IW_UVERBS_ABI_VERSION 3 /* * Make sure that all structs defined in this file remain laid out so diff --git a/drivers/infiniband/hw/mlx4/ah.c b/drivers/infiniband/hw/mlx4/ah.c index 1688a17..105246f 100644 --- a/drivers/infiniband/hw/mlx4/ah.c +++ b/drivers/infiniband/hw/mlx4/ah.c @@ -76,7 +76,10 @@ static struct ib_ah *create_iboe_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr struct mlx4_dev *dev = ibdev->dev; int is_mcast = 0; struct in6_addr in6; - u16 vlan_tag; + u16 vlan_tag = 0xffff; + union ib_gid sgid; + struct ib_gid_attr gid_attr; + int ret; memcpy(&in6, ah_attr->grh.dgid.raw, sizeof(in6)); if (rdma_is_multicast_addr(&in6)) { @@ -85,12 +88,23 @@ static struct ib_ah *create_iboe_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr } else { memcpy(ah->av.eth.mac, ah_attr->dmac, ETH_ALEN); } - vlan_tag = ah_attr->vlan_id; + ret = ib_get_cached_gid(pd->device, ah_attr->port_num, + ah_attr->grh.sgid_index, &sgid, &gid_attr); + if (ret) + return ERR_PTR(ret); + eth_zero_addr(ah->av.eth.s_mac); + if (gid_attr.ndev) { + if (is_vlan_dev(gid_attr.ndev)) + vlan_tag = vlan_dev_vlan_id(gid_attr.ndev); + memcpy(ah->av.eth.s_mac, gid_attr.ndev->dev_addr, ETH_ALEN); + dev_put(gid_attr.ndev); + } if (vlan_tag < 0x1000) vlan_tag |= (ah_attr->sl & 7) << 13; ah->av.eth.port_pd = cpu_to_be32(to_mpd(pd)->pdn | (ah_attr->port_num << 24)); ah->av.eth.gid_index = mlx4_ib_gid_index_to_real_index(ibdev, ah_attr->port_num, ah_attr->grh.sgid_index); ah->av.eth.vlan = cpu_to_be16(vlan_tag); + ah->av.eth.hop_limit = ah_attr->grh.hop_limit; if (ah_attr->static_rate) { ah->av.eth.stat_rate = ah_attr->static_rate + MLX4_STAT_RATE_OFFSET; while (ah->av.eth.stat_rate > IB_RATE_2_5_GBPS + MLX4_STAT_RATE_OFFSET && diff --git a/drivers/infiniband/hw/mlx4/cq.c b/drivers/infiniband/hw/mlx4/cq.c index 5fd49f9..9f8b516 100644 --- a/drivers/infiniband/hw/mlx4/cq.c +++ b/drivers/infiniband/hw/mlx4/cq.c @@ -811,14 +811,11 @@ repoll: wc->opcode = IB_WC_MASKED_FETCH_ADD; wc->byte_len = 8; break; - case MLX4_OPCODE_BIND_MW: - wc->opcode = IB_WC_BIND_MW; - break; case MLX4_OPCODE_LSO: wc->opcode = IB_WC_LSO; break; case MLX4_OPCODE_FMR: - wc->opcode = IB_WC_FAST_REG_MR; + wc->opcode = IB_WC_REG_MR; break; case MLX4_OPCODE_LOCAL_INVAL: wc->opcode = IB_WC_LOCAL_INV; diff --git a/drivers/infiniband/hw/mlx4/mad.c b/drivers/infiniband/hw/mlx4/mad.c index 1cd75ff..d68f506 100644 --- a/drivers/infiniband/hw/mlx4/mad.c +++ b/drivers/infiniband/hw/mlx4/mad.c @@ -40,6 +40,7 @@ #include <linux/gfp.h> #include <rdma/ib_pma.h> +#include <linux/mlx4/driver.h> #include "mlx4_ib.h" enum { @@ -457,7 +458,8 @@ int mlx4_ib_send_to_slave(struct mlx4_ib_dev *dev, int slave, u8 port, struct ib_grh *grh, struct ib_mad *mad) { struct ib_sge list; - struct ib_send_wr wr, *bad_wr; + struct ib_ud_wr wr; + struct ib_send_wr *bad_wr; struct mlx4_ib_demux_pv_ctx *tun_ctx; struct mlx4_ib_demux_pv_qp *tun_qp; struct mlx4_rcv_tunnel_mad *tun_mad; @@ -582,18 +584,18 @@ int mlx4_ib_send_to_slave(struct mlx4_ib_dev *dev, int slave, u8 port, list.length = sizeof (struct mlx4_rcv_tunnel_mad); list.lkey = tun_ctx->pd->local_dma_lkey; - wr.wr.ud.ah = ah; - wr.wr.ud.port_num = port; - wr.wr.ud.remote_qkey = IB_QP_SET_QKEY; - wr.wr.ud.remote_qpn = dqpn; - wr.next = NULL; - wr.wr_id = ((u64) tun_tx_ix) | MLX4_TUN_SET_WRID_QPN(dest_qpt); - wr.sg_list = &list; - wr.num_sge = 1; - wr.opcode = IB_WR_SEND; - wr.send_flags = IB_SEND_SIGNALED; - - ret = ib_post_send(src_qp, &wr, &bad_wr); + wr.ah = ah; + wr.port_num = port; + wr.remote_qkey = IB_QP_SET_QKEY; + wr.remote_qpn = dqpn; + wr.wr.next = NULL; + wr.wr.wr_id = ((u64) tun_tx_ix) | MLX4_TUN_SET_WRID_QPN(dest_qpt); + wr.wr.sg_list = &list; + wr.wr.num_sge = 1; + wr.wr.opcode = IB_WR_SEND; + wr.wr.send_flags = IB_SEND_SIGNALED; + + ret = ib_post_send(src_qp, &wr.wr, &bad_wr); out: if (ret) ib_destroy_ah(ah); @@ -605,8 +607,8 @@ static int mlx4_ib_demux_mad(struct ib_device *ibdev, u8 port, struct ib_mad *mad) { struct mlx4_ib_dev *dev = to_mdev(ibdev); - int err; - int slave; + int err, other_port; + int slave = -1; u8 *slave_id; int is_eth = 0; @@ -624,7 +626,17 @@ static int mlx4_ib_demux_mad(struct ib_device *ibdev, u8 port, mlx4_ib_warn(ibdev, "RoCE mgmt class is not CM\n"); return -EINVAL; } - if (mlx4_get_slave_from_roce_gid(dev->dev, port, grh->dgid.raw, &slave)) { + err = mlx4_get_slave_from_roce_gid(dev->dev, port, grh->dgid.raw, &slave); + if (err && mlx4_is_mf_bonded(dev->dev)) { + other_port = (port == 1) ? 2 : 1; + err = mlx4_get_slave_from_roce_gid(dev->dev, other_port, grh->dgid.raw, &slave); + if (!err) { + port = other_port; + pr_debug("resolved slave %d from gid %pI6 wire port %d other %d\n", + slave, grh->dgid.raw, port, other_port); + } + } + if (err) { mlx4_ib_warn(ibdev, "failed matching grh\n"); return -ENOENT; } @@ -805,17 +817,48 @@ static int ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY; } -static void edit_counter(struct mlx4_counter *cnt, - struct ib_pma_portcounters *pma_cnt) +static void edit_counter(struct mlx4_counter *cnt, void *counters, + __be16 attr_id) { - ASSIGN_32BIT_COUNTER(pma_cnt->port_xmit_data, - (be64_to_cpu(cnt->tx_bytes) >> 2)); - ASSIGN_32BIT_COUNTER(pma_cnt->port_rcv_data, - (be64_to_cpu(cnt->rx_bytes) >> 2)); - ASSIGN_32BIT_COUNTER(pma_cnt->port_xmit_packets, - be64_to_cpu(cnt->tx_frames)); - ASSIGN_32BIT_COUNTER(pma_cnt->port_rcv_packets, - be64_to_cpu(cnt->rx_frames)); + switch (attr_id) { + case IB_PMA_PORT_COUNTERS: + { + struct ib_pma_portcounters *pma_cnt = + (struct ib_pma_portcounters *)counters; + + ASSIGN_32BIT_COUNTER(pma_cnt->port_xmit_data, + (be64_to_cpu(cnt->tx_bytes) >> 2)); + ASSIGN_32BIT_COUNTER(pma_cnt->port_rcv_data, + (be64_to_cpu(cnt->rx_bytes) >> 2)); + ASSIGN_32BIT_COUNTER(pma_cnt->port_xmit_packets, + be64_to_cpu(cnt->tx_frames)); + ASSIGN_32BIT_COUNTER(pma_cnt->port_rcv_packets, + be64_to_cpu(cnt->rx_frames)); + break; + } + case IB_PMA_PORT_COUNTERS_EXT: + { + struct ib_pma_portcounters_ext *pma_cnt_ext = + (struct ib_pma_portcounters_ext *)counters; + + pma_cnt_ext->port_xmit_data = + cpu_to_be64(be64_to_cpu(cnt->tx_bytes) >> 2); + pma_cnt_ext->port_rcv_data = + cpu_to_be64(be64_to_cpu(cnt->rx_bytes) >> 2); + pma_cnt_ext->port_xmit_packets = cnt->tx_frames; + pma_cnt_ext->port_rcv_packets = cnt->rx_frames; + break; + } + } +} + +static int iboe_process_mad_port_info(void *out_mad) +{ + struct ib_class_port_info cpi = {}; + + cpi.capability_mask = IB_PMA_CLASS_CAP_EXT_WIDTH; + memcpy(out_mad, &cpi, sizeof(cpi)); + return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY; } static int iboe_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, @@ -824,23 +867,38 @@ static int iboe_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, { struct mlx4_counter counter_stats; struct mlx4_ib_dev *dev = to_mdev(ibdev); - int err; + struct counter_index *tmp_counter; + int err = IB_MAD_RESULT_FAILURE, stats_avail = 0; if (in_mad->mad_hdr.mgmt_class != IB_MGMT_CLASS_PERF_MGMT) return -EINVAL; + if (in_mad->mad_hdr.attr_id == IB_PMA_CLASS_PORT_INFO) + return iboe_process_mad_port_info((void *)(out_mad->data + 40)); + memset(&counter_stats, 0, sizeof(counter_stats)); - err = mlx4_get_counter_stats(dev->dev, - dev->counters[port_num - 1].index, - &counter_stats, 0); - if (err) - err = IB_MAD_RESULT_FAILURE; - else { + mutex_lock(&dev->counters_table[port_num - 1].mutex); + list_for_each_entry(tmp_counter, + &dev->counters_table[port_num - 1].counters_list, + list) { + err = mlx4_get_counter_stats(dev->dev, + tmp_counter->index, + &counter_stats, 0); + if (err) { + err = IB_MAD_RESULT_FAILURE; + stats_avail = 0; + break; + } + stats_avail = 1; + } + mutex_unlock(&dev->counters_table[port_num - 1].mutex); + if (stats_avail) { memset(out_mad->data, 0, sizeof out_mad->data); switch (counter_stats.counter_mode & 0xf) { case 0: edit_counter(&counter_stats, - (void *)(out_mad->data + 40)); + (void *)(out_mad->data + 40), + in_mad->mad_hdr.attr_id); err = IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY; break; default: @@ -871,8 +929,10 @@ int mlx4_ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, */ if (link == IB_LINK_LAYER_INFINIBAND) { if (mlx4_is_slave(dev->dev) && - in_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_PERF_MGMT && - in_mad->mad_hdr.attr_id == IB_PMA_PORT_COUNTERS) + (in_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_PERF_MGMT && + (in_mad->mad_hdr.attr_id == IB_PMA_PORT_COUNTERS || + in_mad->mad_hdr.attr_id == IB_PMA_PORT_COUNTERS_EXT || + in_mad->mad_hdr.attr_id == IB_PMA_CLASS_PORT_INFO))) return iboe_process_mad(ibdev, mad_flags, port_num, in_wc, in_grh, in_mad, out_mad); @@ -1172,10 +1232,11 @@ static int is_proxy_qp0(struct mlx4_ib_dev *dev, int qpn, int slave) int mlx4_ib_send_to_wire(struct mlx4_ib_dev *dev, int slave, u8 port, enum ib_qp_type dest_qpt, u16 pkey_index, u32 remote_qpn, u32 qkey, struct ib_ah_attr *attr, - u8 *s_mac, struct ib_mad *mad) + u8 *s_mac, u16 vlan_id, struct ib_mad *mad) { struct ib_sge list; - struct ib_send_wr wr, *bad_wr; + struct ib_ud_wr wr; + struct ib_send_wr *bad_wr; struct mlx4_ib_demux_pv_ctx *sqp_ctx; struct mlx4_ib_demux_pv_qp *sqp; struct mlx4_mad_snd_buf *sqp_mad; @@ -1246,22 +1307,25 @@ int mlx4_ib_send_to_wire(struct mlx4_ib_dev *dev, int slave, u8 port, list.length = sizeof (struct mlx4_mad_snd_buf); list.lkey = sqp_ctx->pd->local_dma_lkey; - wr.wr.ud.ah = ah; - wr.wr.ud.port_num = port; - wr.wr.ud.pkey_index = wire_pkey_ix; - wr.wr.ud.remote_qkey = qkey; - wr.wr.ud.remote_qpn = remote_qpn; - wr.next = NULL; - wr.wr_id = ((u64) wire_tx_ix) | MLX4_TUN_SET_WRID_QPN(src_qpnum); - wr.sg_list = &list; - wr.num_sge = 1; - wr.opcode = IB_WR_SEND; - wr.send_flags = IB_SEND_SIGNALED; + wr.ah = ah; + wr.port_num = port; + wr.pkey_index = wire_pkey_ix; + wr.remote_qkey = qkey; + wr.remote_qpn = remote_qpn; + wr.wr.next = NULL; + wr.wr.wr_id = ((u64) wire_tx_ix) | MLX4_TUN_SET_WRID_QPN(src_qpnum); + wr.wr.sg_list = &list; + wr.wr.num_sge = 1; + wr.wr.opcode = IB_WR_SEND; + wr.wr.send_flags = IB_SEND_SIGNALED; if (s_mac) memcpy(to_mah(ah)->av.eth.s_mac, s_mac, 6); + if (vlan_id < 0x1000) + vlan_id |= (attr->sl & 7) << 13; + to_mah(ah)->av.eth.vlan = cpu_to_be16(vlan_id); - ret = ib_post_send(send_qp, &wr, &bad_wr); + ret = ib_post_send(send_qp, &wr.wr, &bad_wr); out: if (ret) ib_destroy_ah(ah); @@ -1295,6 +1359,7 @@ static void mlx4_ib_multiplex_mad(struct mlx4_ib_demux_pv_ctx *ctx, struct ib_wc u8 *slave_id; int slave; int port; + u16 vlan_id; /* Get slave that sent this packet */ if (wc->src_qp < dev->dev->phys_caps.base_proxy_sqpn || @@ -1383,10 +1448,10 @@ static void mlx4_ib_multiplex_mad(struct mlx4_ib_demux_pv_ctx *ctx, struct ib_wc fill_in_real_sgid_index(dev, slave, ctx->port, &ah_attr); memcpy(ah_attr.dmac, tunnel->hdr.mac, 6); - ah_attr.vlan_id = be16_to_cpu(tunnel->hdr.vlan); + vlan_id = be16_to_cpu(tunnel->hdr.vlan); /* if slave have default vlan use it */ mlx4_get_slave_default_vlan(dev->dev, ctx->port, slave, - &ah_attr.vlan_id, &ah_attr.sl); + &vlan_id, &ah_attr.sl); mlx4_ib_send_to_wire(dev, slave, ctx->port, is_proxy_qp0(dev, wc->src_qp, slave) ? @@ -1394,7 +1459,7 @@ static void mlx4_ib_multiplex_mad(struct mlx4_ib_demux_pv_ctx *ctx, struct ib_wc be16_to_cpu(tunnel->hdr.pkey_index), be32_to_cpu(tunnel->hdr.remote_qpn), be32_to_cpu(tunnel->hdr.qkey), - &ah_attr, wc->smac, &tunnel->mad); + &ah_attr, wc->smac, vlan_id, &tunnel->mad); } static int mlx4_ib_alloc_pv_bufs(struct mlx4_ib_demux_pv_ctx *ctx, diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c index efecdf0..1c7ab6c 100644 --- a/drivers/infiniband/hw/mlx4/main.c +++ b/drivers/infiniband/hw/mlx4/main.c @@ -154,9 +154,9 @@ static struct net_device *mlx4_ib_get_netdev(struct ib_device *device, u8 port_n return dev; } -static int mlx4_ib_update_gids(struct gid_entry *gids, - struct mlx4_ib_dev *ibdev, - u8 port_num) +static int mlx4_ib_update_gids_v1(struct gid_entry *gids, + struct mlx4_ib_dev *ibdev, + u8 port_num) { struct mlx4_cmd_mailbox *mailbox; int err; @@ -187,6 +187,63 @@ static int mlx4_ib_update_gids(struct gid_entry *gids, return err; } +static int mlx4_ib_update_gids_v1_v2(struct gid_entry *gids, + struct mlx4_ib_dev *ibdev, + u8 port_num) +{ + struct mlx4_cmd_mailbox *mailbox; + int err; + struct mlx4_dev *dev = ibdev->dev; + int i; + struct { + union ib_gid gid; + __be32 rsrvd1[2]; + __be16 rsrvd2; + u8 type; + u8 version; + __be32 rsrvd3; + } *gid_tbl; + + mailbox = mlx4_alloc_cmd_mailbox(dev); + if (IS_ERR(mailbox)) + return -ENOMEM; + + gid_tbl = mailbox->buf; + for (i = 0; i < MLX4_MAX_PORT_GIDS; ++i) { + memcpy(&gid_tbl[i].gid, &gids[i].gid, sizeof(union ib_gid)); + if (gids[i].gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP) { + gid_tbl[i].version = 2; + if (!ipv6_addr_v4mapped((struct in6_addr *)&gids[i].gid)) + gid_tbl[i].type = 1; + else + memset(&gid_tbl[i].gid, 0, 12); + } + } + + err = mlx4_cmd(dev, mailbox->dma, + MLX4_SET_PORT_ROCE_ADDR << 8 | port_num, + 1, MLX4_CMD_SET_PORT, MLX4_CMD_TIME_CLASS_B, + MLX4_CMD_WRAPPED); + if (mlx4_is_bonded(dev)) + err += mlx4_cmd(dev, mailbox->dma, + MLX4_SET_PORT_ROCE_ADDR << 8 | 2, + 1, MLX4_CMD_SET_PORT, MLX4_CMD_TIME_CLASS_B, + MLX4_CMD_WRAPPED); + + mlx4_free_cmd_mailbox(dev, mailbox); + return err; +} + +static int mlx4_ib_update_gids(struct gid_entry *gids, + struct mlx4_ib_dev *ibdev, + u8 port_num) +{ + if (ibdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_ROCE_V1_V2) + return mlx4_ib_update_gids_v1_v2(gids, ibdev, port_num); + + return mlx4_ib_update_gids_v1(gids, ibdev, port_num); +} + static int mlx4_ib_add_gid(struct ib_device *device, u8 port_num, unsigned int index, @@ -215,7 +272,8 @@ static int mlx4_ib_add_gid(struct ib_device *device, port_gid_table = &iboe->gids[port_num - 1]; spin_lock_bh(&iboe->lock); for (i = 0; i < MLX4_MAX_PORT_GIDS; ++i) { - if (!memcmp(&port_gid_table->gids[i].gid, gid, sizeof(*gid))) { + if (!memcmp(&port_gid_table->gids[i].gid, gid, sizeof(*gid)) && + (port_gid_table->gids[i].gid_type == attr->gid_type)) { found = i; break; } @@ -233,6 +291,7 @@ static int mlx4_ib_add_gid(struct ib_device *device, } else { *context = port_gid_table->gids[free].ctx; memcpy(&port_gid_table->gids[free].gid, gid, sizeof(*gid)); + port_gid_table->gids[free].gid_type = attr->gid_type; port_gid_table->gids[free].ctx->real_index = free; port_gid_table->gids[free].ctx->refcount = 1; hw_update = 1; @@ -248,8 +307,10 @@ static int mlx4_ib_add_gid(struct ib_device *device, if (!gids) { ret = -ENOMEM; } else { - for (i = 0; i < MLX4_MAX_PORT_GIDS; i++) + for (i = 0; i < MLX4_MAX_PORT_GIDS; i++) { memcpy(&gids[i].gid, &port_gid_table->gids[i].gid, sizeof(union ib_gid)); + gids[i].gid_type = port_gid_table->gids[i].gid_type; + } } } spin_unlock_bh(&iboe->lock); @@ -325,6 +386,7 @@ int mlx4_ib_gid_index_to_real_index(struct mlx4_ib_dev *ibdev, int i; int ret; unsigned long flags; + struct ib_gid_attr attr; if (port_num > MLX4_MAX_PORTS) return -EINVAL; @@ -335,10 +397,13 @@ int mlx4_ib_gid_index_to_real_index(struct mlx4_ib_dev *ibdev, if (!rdma_cap_roce_gid_table(&ibdev->ib_dev, port_num)) return index; - ret = ib_get_cached_gid(&ibdev->ib_dev, port_num, index, &gid); + ret = ib_get_cached_gid(&ibdev->ib_dev, port_num, index, &gid, &attr); if (ret) return ret; + if (attr.ndev) + dev_put(attr.ndev); + if (!memcmp(&gid, &zgid, sizeof(gid))) return -EINVAL; @@ -346,7 +411,8 @@ int mlx4_ib_gid_index_to_real_index(struct mlx4_ib_dev *ibdev, port_gid_table = &iboe->gids[port_num - 1]; for (i = 0; i < MLX4_MAX_PORT_GIDS; ++i) - if (!memcmp(&port_gid_table->gids[i].gid, &gid, sizeof(gid))) { + if (!memcmp(&port_gid_table->gids[i].gid, &gid, sizeof(gid)) && + attr.gid_type == port_gid_table->gids[i].gid_type) { ctx = port_gid_table->gids[i].ctx; break; } @@ -442,6 +508,8 @@ static int mlx4_ib_query_device(struct ib_device *ibdev, props->device_cap_flags |= IB_DEVICE_MANAGED_FLOW_STEERING; } + props->device_cap_flags |= IB_DEVICE_RAW_IP_CSUM; + props->vendor_id = be32_to_cpup((__be32 *) (out_mad->data + 36)) & 0xffffff; props->vendor_part_id = dev->dev->persist->pdev->device; @@ -454,7 +522,7 @@ static int mlx4_ib_query_device(struct ib_device *ibdev, props->max_qp_wr = dev->dev->caps.max_wqes - MLX4_IB_SQ_MAX_SPARE; props->max_sge = min(dev->dev->caps.max_sq_sg, dev->dev->caps.max_rq_sg); - props->max_sge_rd = props->max_sge; + props->max_sge_rd = MLX4_MAX_SGE_RD; props->max_cq = dev->dev->quotas.cq; props->max_cqe = dev->dev->caps.max_cqes; props->max_mr = dev->dev->quotas.mpt; @@ -754,7 +822,7 @@ static int mlx4_ib_query_gid(struct ib_device *ibdev, u8 port, int index, if (!rdma_cap_roce_gid_table(ibdev, port)) return -ENODEV; - ret = ib_get_cached_gid(ibdev, port, index, gid); + ret = ib_get_cached_gid(ibdev, port, index, gid, NULL); if (ret == -EAGAIN) { memcpy(gid, &zgid, sizeof(*gid)); return 0; @@ -1247,6 +1315,22 @@ static int add_gid_entry(struct ib_qp *ibqp, union ib_gid *gid) return 0; } +static void mlx4_ib_delete_counters_table(struct mlx4_ib_dev *ibdev, + struct mlx4_ib_counters *ctr_table) +{ + struct counter_index *counter, *tmp_count; + + mutex_lock(&ctr_table->mutex); + list_for_each_entry_safe(counter, tmp_count, &ctr_table->counters_list, + list) { + if (counter->allocated) + mlx4_counter_free(ibdev->dev, counter->index); + list_del(&counter->list); + kfree(counter); + } + mutex_unlock(&ctr_table->mutex); +} + int mlx4_ib_add_mc(struct mlx4_ib_dev *mdev, struct mlx4_ib_qp *mqp, union ib_gid *gid) { @@ -2101,6 +2185,7 @@ static int mlx4_port_immutable(struct ib_device *ibdev, u8 port_num, struct ib_port_immutable *immutable) { struct ib_port_attr attr; + struct mlx4_ib_dev *mdev = to_mdev(ibdev); int err; err = mlx4_ib_query_port(ibdev, port_num, &attr); @@ -2110,10 +2195,15 @@ static int mlx4_port_immutable(struct ib_device *ibdev, u8 port_num, immutable->pkey_tbl_len = attr.pkey_tbl_len; immutable->gid_tbl_len = attr.gid_tbl_len; - if (mlx4_ib_port_link_layer(ibdev, port_num) == IB_LINK_LAYER_INFINIBAND) + if (mlx4_ib_port_link_layer(ibdev, port_num) == IB_LINK_LAYER_INFINIBAND) { immutable->core_cap_flags = RDMA_CORE_PORT_IBA_IB; - else - immutable->core_cap_flags = RDMA_CORE_PORT_IBA_ROCE; + } else { + if (mdev->dev->caps.flags & MLX4_DEV_CAP_FLAG_IBOE) + immutable->core_cap_flags = RDMA_CORE_PORT_IBA_ROCE; + if (mdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_ROCE_V1_V2) + immutable->core_cap_flags = RDMA_CORE_PORT_IBA_ROCE | + RDMA_CORE_PORT_IBA_ROCE_UDP_ENCAP; + } immutable->max_mad_size = IB_MGMT_MAD_SIZE; @@ -2131,6 +2221,7 @@ static void *mlx4_ib_add(struct mlx4_dev *dev) int num_req_counters; int allocated; u32 counter_index; + struct counter_index *new_counter_index = NULL; pr_info_once("%s", mlx4_ib_version); @@ -2247,8 +2338,7 @@ static void *mlx4_ib_add(struct mlx4_dev *dev) ibdev->ib_dev.rereg_user_mr = mlx4_ib_rereg_user_mr; ibdev->ib_dev.dereg_mr = mlx4_ib_dereg_mr; ibdev->ib_dev.alloc_mr = mlx4_ib_alloc_mr; - ibdev->ib_dev.alloc_fast_reg_page_list = mlx4_ib_alloc_fast_reg_page_list; - ibdev->ib_dev.free_fast_reg_page_list = mlx4_ib_free_fast_reg_page_list; + ibdev->ib_dev.map_mr_sg = mlx4_ib_map_mr_sg; ibdev->ib_dev.attach_mcast = mlx4_ib_mcg_attach; ibdev->ib_dev.detach_mcast = mlx4_ib_mcg_detach; ibdev->ib_dev.process_mad = mlx4_ib_process_mad; @@ -2265,7 +2355,6 @@ static void *mlx4_ib_add(struct mlx4_dev *dev) if (dev->caps.flags & MLX4_DEV_CAP_FLAG_MEM_WINDOW || dev->caps.bmme_flags & MLX4_BMME_FLAG_TYPE_2_WIN) { ibdev->ib_dev.alloc_mw = mlx4_ib_alloc_mw; - ibdev->ib_dev.bind_mw = mlx4_ib_bind_mw; ibdev->ib_dev.dealloc_mw = mlx4_ib_dealloc_mw; ibdev->ib_dev.uverbs_cmd_mask |= @@ -2293,7 +2382,8 @@ static void *mlx4_ib_add(struct mlx4_dev *dev) ibdev->ib_dev.uverbs_ex_cmd_mask |= (1ull << IB_USER_VERBS_EX_CMD_QUERY_DEVICE) | - (1ull << IB_USER_VERBS_EX_CMD_CREATE_CQ); + (1ull << IB_USER_VERBS_EX_CMD_CREATE_CQ) | + (1ull << IB_USER_VERBS_EX_CMD_CREATE_QP); mlx4_ib_alloc_eqs(dev, ibdev); @@ -2302,6 +2392,11 @@ static void *mlx4_ib_add(struct mlx4_dev *dev) if (init_node_data(ibdev)) goto err_map; + for (i = 0; i < ibdev->num_ports; ++i) { + mutex_init(&ibdev->counters_table[i].mutex); + INIT_LIST_HEAD(&ibdev->counters_table[i].counters_list); + } + num_req_counters = mlx4_is_bonded(dev) ? 1 : ibdev->num_ports; for (i = 0; i < num_req_counters; ++i) { mutex_init(&ibdev->qp1_proxy_lock[i]); @@ -2320,15 +2415,34 @@ static void *mlx4_ib_add(struct mlx4_dev *dev) counter_index = mlx4_get_default_counter_index(dev, i + 1); } - ibdev->counters[i].index = counter_index; - ibdev->counters[i].allocated = allocated; + new_counter_index = kmalloc(sizeof(*new_counter_index), + GFP_KERNEL); + if (!new_counter_index) { + if (allocated) + mlx4_counter_free(ibdev->dev, counter_index); + goto err_counter; + } + new_counter_index->index = counter_index; + new_counter_index->allocated = allocated; + list_add_tail(&new_counter_index->list, + &ibdev->counters_table[i].counters_list); + ibdev->counters_table[i].default_counter = counter_index; pr_info("counter index %d for port %d allocated %d\n", counter_index, i + 1, allocated); } if (mlx4_is_bonded(dev)) for (i = 1; i < ibdev->num_ports ; ++i) { - ibdev->counters[i].index = ibdev->counters[0].index; - ibdev->counters[i].allocated = 0; + new_counter_index = + kmalloc(sizeof(struct counter_index), + GFP_KERNEL); + if (!new_counter_index) + goto err_counter; + new_counter_index->index = counter_index; + new_counter_index->allocated = 0; + list_add_tail(&new_counter_index->list, + &ibdev->counters_table[i].counters_list); + ibdev->counters_table[i].default_counter = + counter_index; } mlx4_foreach_port(i, dev, MLX4_PORT_TYPE_IB) @@ -2380,7 +2494,8 @@ static void *mlx4_ib_add(struct mlx4_dev *dev) if (mlx4_ib_init_sriov(ibdev)) goto err_mad; - if (dev->caps.flags & MLX4_DEV_CAP_FLAG_IBOE) { + if (dev->caps.flags & MLX4_DEV_CAP_FLAG_IBOE || + dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_ROCE_V1_V2) { if (!iboe->nb.notifier_call) { iboe->nb.notifier_call = mlx4_ib_netdev_event; err = register_netdevice_notifier(&iboe->nb); @@ -2389,6 +2504,12 @@ static void *mlx4_ib_add(struct mlx4_dev *dev) goto err_notif; } } + if (dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_ROCE_V1_V2) { + err = mlx4_config_roce_v2_port(dev, ROCE_V2_UDP_DPORT); + if (err) { + goto err_notif; + } + } } for (j = 0; j < ARRAY_SIZE(mlx4_class_attributes); ++j) { @@ -2437,12 +2558,9 @@ err_steer_qp_release: mlx4_qp_release_range(dev, ibdev->steer_qpn_base, ibdev->steer_qpn_count); err_counter: - for (i = 0; i < ibdev->num_ports; ++i) { - if (ibdev->counters[i].index != -1 && - ibdev->counters[i].allocated) - mlx4_counter_free(ibdev->dev, - ibdev->counters[i].index); - } + for (i = 0; i < ibdev->num_ports; ++i) + mlx4_ib_delete_counters_table(ibdev, &ibdev->counters_table[i]); + err_map: iounmap(ibdev->uar_map); @@ -2546,9 +2664,8 @@ static void mlx4_ib_remove(struct mlx4_dev *dev, void *ibdev_ptr) iounmap(ibdev->uar_map); for (p = 0; p < ibdev->num_ports; ++p) - if (ibdev->counters[p].index != -1 && - ibdev->counters[p].allocated) - mlx4_counter_free(ibdev->dev, ibdev->counters[p].index); + mlx4_ib_delete_counters_table(ibdev, &ibdev->counters_table[p]); + mlx4_foreach_port(p, dev, MLX4_PORT_TYPE_IB) mlx4_CLOSE_PORT(dev, p); diff --git a/drivers/infiniband/hw/mlx4/mcg.c b/drivers/infiniband/hw/mlx4/mcg.c index 2d5bccd..99451d8 100644 --- a/drivers/infiniband/hw/mlx4/mcg.c +++ b/drivers/infiniband/hw/mlx4/mcg.c @@ -222,7 +222,7 @@ static int send_mad_to_wire(struct mlx4_ib_demux_ctx *ctx, struct ib_mad *mad) spin_unlock_irqrestore(&dev->sm_lock, flags); return mlx4_ib_send_to_wire(dev, mlx4_master_func_num(dev->dev), ctx->port, IB_QPT_GSI, 0, 1, IB_QP1_QKEY, - &ah_attr, NULL, mad); + &ah_attr, NULL, 0xffff, mad); } static int send_mad_to_slave(int slave, struct mlx4_ib_demux_ctx *ctx, diff --git a/drivers/infiniband/hw/mlx4/mlx4_ib.h b/drivers/infiniband/hw/mlx4/mlx4_ib.h index 1e7b23b..52ce7b0 100644 --- a/drivers/infiniband/hw/mlx4/mlx4_ib.h +++ b/drivers/infiniband/hw/mlx4/mlx4_ib.h @@ -129,10 +129,17 @@ struct mlx4_ib_cq { struct list_head recv_qp_list; }; +#define MLX4_MR_PAGES_ALIGN 0x40 + struct mlx4_ib_mr { struct ib_mr ibmr; + __be64 *pages; + dma_addr_t page_map; + u32 npages; + u32 max_pages; struct mlx4_mr mmr; struct ib_umem *umem; + void *pages_alloc; }; struct mlx4_ib_mw { @@ -140,12 +147,6 @@ struct mlx4_ib_mw { struct mlx4_mw mmw; }; -struct mlx4_ib_fast_reg_page_list { - struct ib_fast_reg_page_list ibfrpl; - __be64 *mapped_page_list; - dma_addr_t map; -}; - struct mlx4_ib_fmr { struct ib_fmr ibfmr; struct mlx4_fmr mfmr; @@ -176,11 +177,18 @@ struct mlx4_ib_wq { unsigned tail; }; +enum { + MLX4_IB_QP_CREATE_ROCE_V2_GSI = IB_QP_CREATE_RESERVED_START +}; + enum mlx4_ib_qp_flags { MLX4_IB_QP_LSO = IB_QP_CREATE_IPOIB_UD_LSO, MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK = IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK, MLX4_IB_QP_NETIF = IB_QP_CREATE_NETIF_QP, MLX4_IB_QP_CREATE_USE_GFP_NOIO = IB_QP_CREATE_USE_GFP_NOIO, + + /* Mellanox specific flags start from IB_QP_CREATE_RESERVED_START */ + MLX4_IB_ROCE_V2_GSI_QP = MLX4_IB_QP_CREATE_ROCE_V2_GSI, MLX4_IB_SRIOV_TUNNEL_QP = 1 << 30, MLX4_IB_SRIOV_SQP = 1 << 31, }; @@ -320,6 +328,7 @@ struct mlx4_ib_qp { struct list_head qps_list; struct list_head cq_recv_list; struct list_head cq_send_list; + struct counter_index *counter_index; }; struct mlx4_ib_srq { @@ -476,6 +485,7 @@ struct gid_cache_context { struct gid_entry { union ib_gid gid; + enum ib_gid_type gid_type; struct gid_cache_context *ctx; }; @@ -528,10 +538,17 @@ struct mlx4_ib_iov_port { }; struct counter_index { + struct list_head list; u32 index; u8 allocated; }; +struct mlx4_ib_counters { + struct list_head counters_list; + struct mutex mutex; /* mutex for accessing counters list */ + u32 default_counter; +}; + struct mlx4_ib_dev { struct ib_device ib_dev; struct mlx4_dev *dev; @@ -550,7 +567,7 @@ struct mlx4_ib_dev { struct mutex cap_mask_mutex; bool ib_active; struct mlx4_ib_iboe iboe; - struct counter_index counters[MLX4_MAX_PORTS]; + struct mlx4_ib_counters counters_table[MLX4_MAX_PORTS]; int *eq_table; struct kobject *iov_parent; struct kobject *ports_parent; @@ -638,11 +655,6 @@ static inline struct mlx4_ib_mw *to_mmw(struct ib_mw *ibmw) return container_of(ibmw, struct mlx4_ib_mw, ibmw); } -static inline struct mlx4_ib_fast_reg_page_list *to_mfrpl(struct ib_fast_reg_page_list *ibfrpl) -{ - return container_of(ibfrpl, struct mlx4_ib_fast_reg_page_list, ibfrpl); -} - static inline struct mlx4_ib_fmr *to_mfmr(struct ib_fmr *ibfmr) { return container_of(ibfmr, struct mlx4_ib_fmr, ibfmr); @@ -700,16 +712,13 @@ struct ib_mr *mlx4_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, struct ib_udata *udata); int mlx4_ib_dereg_mr(struct ib_mr *mr); struct ib_mw *mlx4_ib_alloc_mw(struct ib_pd *pd, enum ib_mw_type type); -int mlx4_ib_bind_mw(struct ib_qp *qp, struct ib_mw *mw, - struct ib_mw_bind *mw_bind); int mlx4_ib_dealloc_mw(struct ib_mw *mw); struct ib_mr *mlx4_ib_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type, u32 max_num_sg); -struct ib_fast_reg_page_list *mlx4_ib_alloc_fast_reg_page_list(struct ib_device *ibdev, - int page_list_len); -void mlx4_ib_free_fast_reg_page_list(struct ib_fast_reg_page_list *page_list); - +int mlx4_ib_map_mr_sg(struct ib_mr *ibmr, + struct scatterlist *sg, + int sg_nents); int mlx4_ib_modify_cq(struct ib_cq *cq, u16 cq_count, u16 cq_period); int mlx4_ib_resize_cq(struct ib_cq *ibcq, int entries, struct ib_udata *udata); struct ib_cq *mlx4_ib_create_cq(struct ib_device *ibdev, @@ -813,7 +822,7 @@ int mlx4_ib_send_to_slave(struct mlx4_ib_dev *dev, int slave, u8 port, int mlx4_ib_send_to_wire(struct mlx4_ib_dev *dev, int slave, u8 port, enum ib_qp_type dest_qpt, u16 pkey_index, u32 remote_qpn, u32 qkey, struct ib_ah_attr *attr, u8 *s_mac, - struct ib_mad *mad); + u16 vlan_id, struct ib_mad *mad); __be64 mlx4_ib_get_new_demux_tid(struct mlx4_ib_demux_ctx *ctx); diff --git a/drivers/infiniband/hw/mlx4/mr.c b/drivers/infiniband/hw/mlx4/mr.c index 2542fd3..242b94e 100644 --- a/drivers/infiniband/hw/mlx4/mr.c +++ b/drivers/infiniband/hw/mlx4/mr.c @@ -59,7 +59,7 @@ struct ib_mr *mlx4_ib_get_dma_mr(struct ib_pd *pd, int acc) struct mlx4_ib_mr *mr; int err; - mr = kmalloc(sizeof *mr, GFP_KERNEL); + mr = kzalloc(sizeof(*mr), GFP_KERNEL); if (!mr) return ERR_PTR(-ENOMEM); @@ -140,7 +140,7 @@ struct ib_mr *mlx4_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, int err; int n; - mr = kmalloc(sizeof *mr, GFP_KERNEL); + mr = kzalloc(sizeof(*mr), GFP_KERNEL); if (!mr) return ERR_PTR(-ENOMEM); @@ -271,11 +271,59 @@ release_mpt_entry: return err; } +static int +mlx4_alloc_priv_pages(struct ib_device *device, + struct mlx4_ib_mr *mr, + int max_pages) +{ + int size = max_pages * sizeof(u64); + int add_size; + int ret; + + add_size = max_t(int, MLX4_MR_PAGES_ALIGN - ARCH_KMALLOC_MINALIGN, 0); + + mr->pages_alloc = kzalloc(size + add_size, GFP_KERNEL); + if (!mr->pages_alloc) + return -ENOMEM; + + mr->pages = PTR_ALIGN(mr->pages_alloc, MLX4_MR_PAGES_ALIGN); + + mr->page_map = dma_map_single(device->dma_device, mr->pages, + size, DMA_TO_DEVICE); + + if (dma_mapping_error(device->dma_device, mr->page_map)) { + ret = -ENOMEM; + goto err; + } + + return 0; +err: + kfree(mr->pages_alloc); + + return ret; +} + +static void +mlx4_free_priv_pages(struct mlx4_ib_mr *mr) +{ + if (mr->pages) { + struct ib_device *device = mr->ibmr.device; + int size = mr->max_pages * sizeof(u64); + + dma_unmap_single(device->dma_device, mr->page_map, + size, DMA_TO_DEVICE); + kfree(mr->pages_alloc); + mr->pages = NULL; + } +} + int mlx4_ib_dereg_mr(struct ib_mr *ibmr) { struct mlx4_ib_mr *mr = to_mmr(ibmr); int ret; + mlx4_free_priv_pages(mr); + ret = mlx4_mr_free(to_mdev(ibmr->device)->dev, &mr->mmr); if (ret) return ret; @@ -318,28 +366,6 @@ err_free: return ERR_PTR(err); } -int mlx4_ib_bind_mw(struct ib_qp *qp, struct ib_mw *mw, - struct ib_mw_bind *mw_bind) -{ - struct ib_send_wr wr; - struct ib_send_wr *bad_wr; - int ret; - - memset(&wr, 0, sizeof(wr)); - wr.opcode = IB_WR_BIND_MW; - wr.wr_id = mw_bind->wr_id; - wr.send_flags = mw_bind->send_flags; - wr.wr.bind_mw.mw = mw; - wr.wr.bind_mw.bind_info = mw_bind->bind_info; - wr.wr.bind_mw.rkey = ib_inc_rkey(mw->rkey); - - ret = mlx4_ib_post_send(qp, &wr, &bad_wr); - if (!ret) - mw->rkey = wr.wr.bind_mw.rkey; - - return ret; -} - int mlx4_ib_dealloc_mw(struct ib_mw *ibmw) { struct mlx4_ib_mw *mw = to_mmw(ibmw); @@ -362,7 +388,7 @@ struct ib_mr *mlx4_ib_alloc_mr(struct ib_pd *pd, max_num_sg > MLX4_MAX_FAST_REG_PAGES) return ERR_PTR(-EINVAL); - mr = kmalloc(sizeof *mr, GFP_KERNEL); + mr = kzalloc(sizeof(*mr), GFP_KERNEL); if (!mr) return ERR_PTR(-ENOMEM); @@ -371,71 +397,30 @@ struct ib_mr *mlx4_ib_alloc_mr(struct ib_pd *pd, if (err) goto err_free; + err = mlx4_alloc_priv_pages(pd->device, mr, max_num_sg); + if (err) + goto err_free_mr; + + mr->max_pages = max_num_sg; + err = mlx4_mr_enable(dev->dev, &mr->mmr); if (err) - goto err_mr; + goto err_free_pl; mr->ibmr.rkey = mr->ibmr.lkey = mr->mmr.key; mr->umem = NULL; return &mr->ibmr; -err_mr: +err_free_pl: + mlx4_free_priv_pages(mr); +err_free_mr: (void) mlx4_mr_free(dev->dev, &mr->mmr); - err_free: kfree(mr); return ERR_PTR(err); } -struct ib_fast_reg_page_list *mlx4_ib_alloc_fast_reg_page_list(struct ib_device *ibdev, - int page_list_len) -{ - struct mlx4_ib_dev *dev = to_mdev(ibdev); - struct mlx4_ib_fast_reg_page_list *mfrpl; - int size = page_list_len * sizeof (u64); - - if (page_list_len > MLX4_MAX_FAST_REG_PAGES) - return ERR_PTR(-EINVAL); - - mfrpl = kmalloc(sizeof *mfrpl, GFP_KERNEL); - if (!mfrpl) - return ERR_PTR(-ENOMEM); - - mfrpl->ibfrpl.page_list = kmalloc(size, GFP_KERNEL); - if (!mfrpl->ibfrpl.page_list) - goto err_free; - - mfrpl->mapped_page_list = dma_alloc_coherent(&dev->dev->persist-> - pdev->dev, - size, &mfrpl->map, - GFP_KERNEL); - if (!mfrpl->mapped_page_list) - goto err_free; - - WARN_ON(mfrpl->map & 0x3f); - - return &mfrpl->ibfrpl; - -err_free: - kfree(mfrpl->ibfrpl.page_list); - kfree(mfrpl); - return ERR_PTR(-ENOMEM); -} - -void mlx4_ib_free_fast_reg_page_list(struct ib_fast_reg_page_list *page_list) -{ - struct mlx4_ib_dev *dev = to_mdev(page_list->device); - struct mlx4_ib_fast_reg_page_list *mfrpl = to_mfrpl(page_list); - int size = page_list->max_page_list_len * sizeof (u64); - - dma_free_coherent(&dev->dev->persist->pdev->dev, size, - mfrpl->mapped_page_list, - mfrpl->map); - kfree(mfrpl->ibfrpl.page_list); - kfree(mfrpl); -} - struct ib_fmr *mlx4_ib_fmr_alloc(struct ib_pd *pd, int acc, struct ib_fmr_attr *fmr_attr) { @@ -528,3 +513,37 @@ int mlx4_ib_fmr_dealloc(struct ib_fmr *ibfmr) return err; } + +static int mlx4_set_page(struct ib_mr *ibmr, u64 addr) +{ + struct mlx4_ib_mr *mr = to_mmr(ibmr); + + if (unlikely(mr->npages == mr->max_pages)) + return -ENOMEM; + + mr->pages[mr->npages++] = cpu_to_be64(addr | MLX4_MTT_FLAG_PRESENT); + + return 0; +} + +int mlx4_ib_map_mr_sg(struct ib_mr *ibmr, + struct scatterlist *sg, + int sg_nents) +{ + struct mlx4_ib_mr *mr = to_mmr(ibmr); + int rc; + + mr->npages = 0; + + ib_dma_sync_single_for_cpu(ibmr->device, mr->page_map, + sizeof(u64) * mr->max_pages, + DMA_TO_DEVICE); + + rc = ib_sg_to_pages(ibmr, sg, sg_nents, mlx4_set_page); + + ib_dma_sync_single_for_device(ibmr->device, mr->page_map, + sizeof(u64) * mr->max_pages, + DMA_TO_DEVICE); + + return rc; +} diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c index 4ad9be3..fd97534 100644 --- a/drivers/infiniband/hw/mlx4/qp.c +++ b/drivers/infiniband/hw/mlx4/qp.c @@ -32,8 +32,11 @@ */ #include <linux/log2.h> +#include <linux/etherdevice.h> +#include <net/ip.h> #include <linux/slab.h> #include <linux/netdevice.h> +#include <linux/vmalloc.h> #include <rdma/ib_cache.h> #include <rdma/ib_pack.h> @@ -84,6 +87,7 @@ struct mlx4_ib_sqp { u32 send_psn; struct ib_ud_header ud_header; u8 header_buf[MLX4_IB_UD_HEADER_SIZE]; + struct ib_qp *roce_v2_gsi; }; enum { @@ -111,10 +115,9 @@ static const __be32 mlx4_ib_opcode[] = { [IB_WR_ATOMIC_FETCH_AND_ADD] = cpu_to_be32(MLX4_OPCODE_ATOMIC_FA), [IB_WR_SEND_WITH_INV] = cpu_to_be32(MLX4_OPCODE_SEND_INVAL), [IB_WR_LOCAL_INV] = cpu_to_be32(MLX4_OPCODE_LOCAL_INVAL), - [IB_WR_FAST_REG_MR] = cpu_to_be32(MLX4_OPCODE_FMR), + [IB_WR_REG_MR] = cpu_to_be32(MLX4_OPCODE_FMR), [IB_WR_MASKED_ATOMIC_CMP_AND_SWP] = cpu_to_be32(MLX4_OPCODE_MASKED_ATOMIC_CS), [IB_WR_MASKED_ATOMIC_FETCH_AND_ADD] = cpu_to_be32(MLX4_OPCODE_MASKED_ATOMIC_FA), - [IB_WR_BIND_MW] = cpu_to_be32(MLX4_OPCODE_BIND_MW), }; static struct mlx4_ib_sqp *to_msqp(struct mlx4_ib_qp *mqp) @@ -153,7 +156,10 @@ static int is_sqp(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp) } } } - return proxy_sqp; + if (proxy_sqp) + return 1; + + return !!(qp->flags & MLX4_IB_ROCE_V2_GSI_QP); } /* used for INIT/CLOSE port logic */ @@ -617,6 +623,18 @@ static int qp0_enabled_vf(struct mlx4_dev *dev, int qpn) return 0; } +static void mlx4_ib_free_qp_counter(struct mlx4_ib_dev *dev, + struct mlx4_ib_qp *qp) +{ + mutex_lock(&dev->counters_table[qp->port - 1].mutex); + mlx4_counter_free(dev->dev, qp->counter_index->index); + list_del(&qp->counter_index->list); + mutex_unlock(&dev->counters_table[qp->port - 1].mutex); + + kfree(qp->counter_index); + qp->counter_index = NULL; +} + static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd, struct ib_qp_init_attr *init_attr, struct ib_udata *udata, int sqpn, struct mlx4_ib_qp **caller_qp, @@ -746,9 +764,6 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd, } else { qp->sq_no_prefetch = 0; - if (init_attr->create_flags & IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK) - qp->flags |= MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK; - if (init_attr->create_flags & IB_QP_CREATE_IPOIB_UD_LSO) qp->flags |= MLX4_IB_QP_LSO; @@ -786,8 +801,16 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd, if (err) goto err_mtt; - qp->sq.wrid = kmalloc(qp->sq.wqe_cnt * sizeof (u64), gfp); - qp->rq.wrid = kmalloc(qp->rq.wqe_cnt * sizeof (u64), gfp); + qp->sq.wrid = kmalloc_array(qp->sq.wqe_cnt, sizeof(u64), + gfp | __GFP_NOWARN); + if (!qp->sq.wrid) + qp->sq.wrid = __vmalloc(qp->sq.wqe_cnt * sizeof(u64), + gfp, PAGE_KERNEL); + qp->rq.wrid = kmalloc_array(qp->rq.wqe_cnt, sizeof(u64), + gfp | __GFP_NOWARN); + if (!qp->rq.wrid) + qp->rq.wrid = __vmalloc(qp->rq.wqe_cnt * sizeof(u64), + gfp, PAGE_KERNEL); if (!qp->sq.wrid || !qp->rq.wrid) { err = -ENOMEM; goto err_wrid; @@ -822,6 +845,9 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd, goto err_proxy; } + if (init_attr->create_flags & IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK) + qp->flags |= MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK; + err = mlx4_qp_alloc(dev->dev, qpn, &qp->mqp, gfp); if (err) goto err_qpn; @@ -874,8 +900,8 @@ err_wrid: if (qp_has_rq(init_attr)) mlx4_ib_db_unmap_user(to_mucontext(pd->uobject->context), &qp->db); } else { - kfree(qp->sq.wrid); - kfree(qp->rq.wrid); + kvfree(qp->sq.wrid); + kvfree(qp->rq.wrid); } err_mtt: @@ -1050,8 +1076,8 @@ static void destroy_qp_common(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp, &qp->db); ib_umem_release(qp->umem); } else { - kfree(qp->sq.wrid); - kfree(qp->rq.wrid); + kvfree(qp->sq.wrid); + kvfree(qp->rq.wrid); if (qp->mlx4_ib_qp_type & (MLX4_IB_QPT_PROXY_SMI_OWNER | MLX4_IB_QPT_PROXY_SMI | MLX4_IB_QPT_PROXY_GSI)) free_proxy_bufs(&dev->ib_dev, qp); @@ -1080,12 +1106,13 @@ static u32 get_sqp_num(struct mlx4_ib_dev *dev, struct ib_qp_init_attr *attr) return dev->dev->caps.qp1_proxy[attr->port_num - 1]; } -struct ib_qp *mlx4_ib_create_qp(struct ib_pd *pd, - struct ib_qp_init_attr *init_attr, - struct ib_udata *udata) +static struct ib_qp *_mlx4_ib_create_qp(struct ib_pd *pd, + struct ib_qp_init_attr *init_attr, + struct ib_udata *udata) { struct mlx4_ib_qp *qp = NULL; int err; + int sup_u_create_flags = MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK; u16 xrcdn = 0; gfp_t gfp; @@ -1100,6 +1127,7 @@ struct ib_qp *mlx4_ib_create_qp(struct ib_pd *pd, MLX4_IB_SRIOV_TUNNEL_QP | MLX4_IB_SRIOV_SQP | MLX4_IB_QP_NETIF | + MLX4_IB_QP_CREATE_ROCE_V2_GSI | MLX4_IB_QP_CREATE_USE_GFP_NOIO)) return ERR_PTR(-EINVAL); @@ -1108,13 +1136,21 @@ struct ib_qp *mlx4_ib_create_qp(struct ib_pd *pd, return ERR_PTR(-EINVAL); } - if (init_attr->create_flags && - (udata || - ((init_attr->create_flags & ~(MLX4_IB_SRIOV_SQP | MLX4_IB_QP_CREATE_USE_GFP_NOIO)) && - init_attr->qp_type != IB_QPT_UD) || - ((init_attr->create_flags & MLX4_IB_SRIOV_SQP) && - init_attr->qp_type > IB_QPT_GSI))) - return ERR_PTR(-EINVAL); + if (init_attr->create_flags) { + if (udata && init_attr->create_flags & ~(sup_u_create_flags)) + return ERR_PTR(-EINVAL); + + if ((init_attr->create_flags & ~(MLX4_IB_SRIOV_SQP | + MLX4_IB_QP_CREATE_USE_GFP_NOIO | + MLX4_IB_QP_CREATE_ROCE_V2_GSI | + MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK) && + init_attr->qp_type != IB_QPT_UD) || + (init_attr->create_flags & MLX4_IB_SRIOV_SQP && + init_attr->qp_type > IB_QPT_GSI) || + (init_attr->create_flags & MLX4_IB_QP_CREATE_ROCE_V2_GSI && + init_attr->qp_type != IB_QPT_GSI)) + return ERR_PTR(-EINVAL); + } switch (init_attr->qp_type) { case IB_QPT_XRC_TGT: @@ -1151,19 +1187,29 @@ struct ib_qp *mlx4_ib_create_qp(struct ib_pd *pd, case IB_QPT_SMI: case IB_QPT_GSI: { + int sqpn; + /* Userspace is not allowed to create special QPs: */ if (udata) return ERR_PTR(-EINVAL); + if (init_attr->create_flags & MLX4_IB_QP_CREATE_ROCE_V2_GSI) { + int res = mlx4_qp_reserve_range(to_mdev(pd->device)->dev, 1, 1, &sqpn, 0); + + if (res) + return ERR_PTR(res); + } else { + sqpn = get_sqp_num(to_mdev(pd->device), init_attr); + } err = create_qp_common(to_mdev(pd->device), pd, init_attr, udata, - get_sqp_num(to_mdev(pd->device), init_attr), + sqpn, &qp, gfp); if (err) return ERR_PTR(err); qp->port = init_attr->port_num; - qp->ibqp.qp_num = init_attr->qp_type == IB_QPT_SMI ? 0 : 1; - + qp->ibqp.qp_num = init_attr->qp_type == IB_QPT_SMI ? 0 : + init_attr->create_flags & MLX4_IB_QP_CREATE_ROCE_V2_GSI ? sqpn : 1; break; } default: @@ -1174,7 +1220,41 @@ struct ib_qp *mlx4_ib_create_qp(struct ib_pd *pd, return &qp->ibqp; } -int mlx4_ib_destroy_qp(struct ib_qp *qp) +struct ib_qp *mlx4_ib_create_qp(struct ib_pd *pd, + struct ib_qp_init_attr *init_attr, + struct ib_udata *udata) { + struct ib_device *device = pd ? pd->device : init_attr->xrcd->device; + struct ib_qp *ibqp; + struct mlx4_ib_dev *dev = to_mdev(device); + + ibqp = _mlx4_ib_create_qp(pd, init_attr, udata); + + if (!IS_ERR(ibqp) && + (init_attr->qp_type == IB_QPT_GSI) && + !(init_attr->create_flags & MLX4_IB_QP_CREATE_ROCE_V2_GSI)) { + struct mlx4_ib_sqp *sqp = to_msqp((to_mqp(ibqp))); + int is_eth = rdma_cap_eth_ah(&dev->ib_dev, init_attr->port_num); + + if (is_eth && + dev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_ROCE_V1_V2) { + init_attr->create_flags |= MLX4_IB_QP_CREATE_ROCE_V2_GSI; + sqp->roce_v2_gsi = ib_create_qp(pd, init_attr); + + if (IS_ERR(sqp->roce_v2_gsi)) { + pr_err("Failed to create GSI QP for RoCEv2 (%ld)\n", PTR_ERR(sqp->roce_v2_gsi)); + sqp->roce_v2_gsi = NULL; + } else { + sqp = to_msqp(to_mqp(sqp->roce_v2_gsi)); + sqp->qp.flags |= MLX4_IB_ROCE_V2_GSI_QP; + } + + init_attr->create_flags &= ~MLX4_IB_QP_CREATE_ROCE_V2_GSI; + } + } + return ibqp; +} + +static int _mlx4_ib_destroy_qp(struct ib_qp *qp) { struct mlx4_ib_dev *dev = to_mdev(qp->device); struct mlx4_ib_qp *mqp = to_mqp(qp); @@ -1189,6 +1269,9 @@ int mlx4_ib_destroy_qp(struct ib_qp *qp) mutex_unlock(&dev->qp1_proxy_lock[mqp->port - 1]); } + if (mqp->counter_index) + mlx4_ib_free_qp_counter(dev, mqp); + pd = get_pd(mqp); destroy_qp_common(dev, mqp, !!pd->ibpd.uobject); @@ -1200,6 +1283,20 @@ int mlx4_ib_destroy_qp(struct ib_qp *qp) return 0; } +int mlx4_ib_destroy_qp(struct ib_qp *qp) +{ + struct mlx4_ib_qp *mqp = to_mqp(qp); + + if (mqp->mlx4_ib_qp_type == MLX4_IB_QPT_GSI) { + struct mlx4_ib_sqp *sqp = to_msqp(mqp); + + if (sqp->roce_v2_gsi) + ib_destroy_qp(sqp->roce_v2_gsi); + } + + return _mlx4_ib_destroy_qp(qp); +} + static int to_mlx4_st(struct mlx4_ib_dev *dev, enum mlx4_ib_qp_type type) { switch (type) { @@ -1391,11 +1488,12 @@ static int _mlx4_set_path(struct mlx4_ib_dev *dev, const struct ib_ah_attr *ah, static int mlx4_set_path(struct mlx4_ib_dev *dev, const struct ib_qp_attr *qp, enum ib_qp_attr_mask qp_attr_mask, struct mlx4_ib_qp *mqp, - struct mlx4_qp_path *path, u8 port) + struct mlx4_qp_path *path, u8 port, + u16 vlan_id, u8 *smac) { return _mlx4_set_path(dev, &qp->ah_attr, - mlx4_mac_to_u64((u8 *)qp->smac), - (qp_attr_mask & IB_QP_VID) ? qp->vlan_id : 0xffff, + mlx4_mac_to_u64(smac), + vlan_id, path, &mqp->pri, port); } @@ -1406,9 +1504,8 @@ static int mlx4_set_alt_path(struct mlx4_ib_dev *dev, struct mlx4_qp_path *path, u8 port) { return _mlx4_set_path(dev, &qp->alt_ah_attr, - mlx4_mac_to_u64((u8 *)qp->alt_smac), - (qp_attr_mask & IB_QP_ALT_VID) ? - qp->alt_vlan_id : 0xffff, + 0, + 0xffff, path, &mqp->alt, port); } @@ -1424,7 +1521,8 @@ static void update_mcg_macs(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp) } } -static int handle_eth_ud_smac_index(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp, u8 *smac, +static int handle_eth_ud_smac_index(struct mlx4_ib_dev *dev, + struct mlx4_ib_qp *qp, struct mlx4_qp_context *context) { u64 u64_mac; @@ -1447,6 +1545,58 @@ static int handle_eth_ud_smac_index(struct mlx4_ib_dev *dev, struct mlx4_ib_qp * return 0; } +static int create_qp_lb_counter(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp) +{ + struct counter_index *new_counter_index; + int err; + u32 tmp_idx; + + if (rdma_port_get_link_layer(&dev->ib_dev, qp->port) != + IB_LINK_LAYER_ETHERNET || + !(qp->flags & MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK) || + !(dev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_LB_SRC_CHK)) + return 0; + + err = mlx4_counter_alloc(dev->dev, &tmp_idx); + if (err) + return err; + + new_counter_index = kmalloc(sizeof(*new_counter_index), GFP_KERNEL); + if (!new_counter_index) { + mlx4_counter_free(dev->dev, tmp_idx); + return -ENOMEM; + } + + new_counter_index->index = tmp_idx; + new_counter_index->allocated = 1; + qp->counter_index = new_counter_index; + + mutex_lock(&dev->counters_table[qp->port - 1].mutex); + list_add_tail(&new_counter_index->list, + &dev->counters_table[qp->port - 1].counters_list); + mutex_unlock(&dev->counters_table[qp->port - 1].mutex); + + return 0; +} + +enum { + MLX4_QPC_ROCE_MODE_1 = 0, + MLX4_QPC_ROCE_MODE_2 = 2, + MLX4_QPC_ROCE_MODE_UNDEFINED = 0xff +}; + +static u8 gid_type_to_qpc(enum ib_gid_type gid_type) +{ + switch (gid_type) { + case IB_GID_TYPE_ROCE: + return MLX4_QPC_ROCE_MODE_1; + case IB_GID_TYPE_ROCE_UDP_ENCAP: + return MLX4_QPC_ROCE_MODE_2; + default: + return MLX4_QPC_ROCE_MODE_UNDEFINED; + } +} + static int __mlx4_ib_modify_qp(struct ib_qp *ibqp, const struct ib_qp_attr *attr, int attr_mask, enum ib_qp_state cur_state, enum ib_qp_state new_state) @@ -1460,6 +1610,7 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp, int sqd_event; int steer_qp = 0; int err = -EINVAL; + int counter_index; /* APM is not supported under RoCE */ if (attr_mask & IB_QP_ALT_PATH && @@ -1519,6 +1670,9 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp, context->sq_size_stride = ilog2(qp->sq.wqe_cnt) << 3; context->sq_size_stride |= qp->sq.wqe_shift - 4; + if (new_state == IB_QPS_RESET && qp->counter_index) + mlx4_ib_free_qp_counter(dev, qp); + if (cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT) { context->sq_size_stride |= !!qp->sq_no_prefetch << 7; context->xrcd = cpu_to_be32((u32) qp->xrcdn); @@ -1527,9 +1681,12 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp, } if (qp->ibqp.uobject) - context->usr_page = cpu_to_be32(to_mucontext(ibqp->uobject->context)->uar.index); + context->usr_page = cpu_to_be32( + mlx4_to_hw_uar_index(dev->dev, + to_mucontext(ibqp->uobject->context)->uar.index)); else - context->usr_page = cpu_to_be32(dev->priv_uar.index); + context->usr_page = cpu_to_be32( + mlx4_to_hw_uar_index(dev->dev, dev->priv_uar.index)); if (attr_mask & IB_QP_DEST_QPN) context->remote_qpn = cpu_to_be32(attr->dest_qp_num); @@ -1543,10 +1700,24 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp, } if (cur_state == IB_QPS_INIT && new_state == IB_QPS_RTR) { - if (dev->counters[qp->port - 1].index != -1) { - context->pri_path.counter_index = - dev->counters[qp->port - 1].index; + err = create_qp_lb_counter(dev, qp); + if (err) + goto out; + + counter_index = + dev->counters_table[qp->port - 1].default_counter; + if (qp->counter_index) + counter_index = qp->counter_index->index; + + if (counter_index != -1) { + context->pri_path.counter_index = counter_index; optpar |= MLX4_QP_OPTPAR_COUNTER_INDEX; + if (qp->counter_index) { + context->pri_path.fl |= + MLX4_FL_ETH_SRC_CHECK_MC_LB; + context->pri_path.vlan_control |= + MLX4_CTRL_ETH_SRC_CHECK_IF_COUNTER; + } } else context->pri_path.counter_index = MLX4_SINK_COUNTER_INDEX(dev->dev); @@ -1555,6 +1726,14 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp, mlx4_ib_steer_qp_reg(dev, qp, 1); steer_qp = 1; } + + if (ibqp->qp_type == IB_QPT_GSI) { + enum ib_gid_type gid_type = qp->flags & MLX4_IB_ROCE_V2_GSI_QP ? + IB_GID_TYPE_ROCE_UDP_ENCAP : IB_GID_TYPE_ROCE; + u8 qpc_roce_mode = gid_type_to_qpc(gid_type); + + context->rlkey_roce_mode |= (qpc_roce_mode << 6); + } } if (attr_mask & IB_QP_PKEY_INDEX) { @@ -1565,13 +1744,50 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp, } if (attr_mask & IB_QP_AV) { + u8 port_num = mlx4_is_bonded(to_mdev(ibqp->device)->dev) ? 1 : + attr_mask & IB_QP_PORT ? attr->port_num : qp->port; + union ib_gid gid; + struct ib_gid_attr gid_attr; + u16 vlan = 0xffff; + u8 smac[ETH_ALEN]; + int status = 0; + int is_eth = rdma_cap_eth_ah(&dev->ib_dev, port_num) && + attr->ah_attr.ah_flags & IB_AH_GRH; + + if (is_eth) { + int index = attr->ah_attr.grh.sgid_index; + + status = ib_get_cached_gid(ibqp->device, port_num, + index, &gid, &gid_attr); + if (!status && !memcmp(&gid, &zgid, sizeof(gid))) + status = -ENOENT; + if (!status && gid_attr.ndev) { + vlan = rdma_vlan_dev_vlan_id(gid_attr.ndev); + memcpy(smac, gid_attr.ndev->dev_addr, ETH_ALEN); + dev_put(gid_attr.ndev); + } + } + if (status) + goto out; + if (mlx4_set_path(dev, attr, attr_mask, qp, &context->pri_path, - attr_mask & IB_QP_PORT ? - attr->port_num : qp->port)) + port_num, vlan, smac)) goto out; optpar |= (MLX4_QP_OPTPAR_PRIMARY_ADDR_PATH | MLX4_QP_OPTPAR_SCHED_QUEUE); + + if (is_eth && + (cur_state == IB_QPS_INIT && new_state == IB_QPS_RTR)) { + u8 qpc_roce_mode = gid_type_to_qpc(gid_attr.gid_type); + + if (qpc_roce_mode == MLX4_QPC_ROCE_MODE_UNDEFINED) { + err = -EINVAL; + goto out; + } + context->rlkey_roce_mode |= (qpc_roce_mode << 6); + } + } if (attr_mask & IB_QP_TIMEOUT) { @@ -1704,7 +1920,7 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp, if (qp->mlx4_ib_qp_type == MLX4_IB_QPT_UD || qp->mlx4_ib_qp_type == MLX4_IB_QPT_PROXY_GSI || qp->mlx4_ib_qp_type == MLX4_IB_QPT_TUN_GSI) { - err = handle_eth_ud_smac_index(dev, qp, (u8 *)attr->smac, context); + err = handle_eth_ud_smac_index(dev, qp, context); if (err) { err = -EINVAL; goto out; @@ -1743,7 +1959,7 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp, sqd_event = 0; if (!ibqp->uobject && cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT) - context->rlkey |= (1 << 4); + context->rlkey_roce_mode |= (1 << 4); /* * Before passing a kernel QP to the HW, make sure that the @@ -1848,6 +2064,8 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp, } } out: + if (err && qp->counter_index) + mlx4_ib_free_qp_counter(dev, qp); if (err && steer_qp) mlx4_ib_steer_qp_reg(dev, qp, 0); kfree(context); @@ -1918,8 +2136,8 @@ out: return err; } -int mlx4_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, - int attr_mask, struct ib_udata *udata) +static int _mlx4_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, + int attr_mask, struct ib_udata *udata) { struct mlx4_ib_dev *dev = to_mdev(ibqp->device); struct mlx4_ib_qp *qp = to_mqp(ibqp); @@ -2022,6 +2240,27 @@ out: return err; } +int mlx4_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, + int attr_mask, struct ib_udata *udata) +{ + struct mlx4_ib_qp *mqp = to_mqp(ibqp); + int ret; + + ret = _mlx4_ib_modify_qp(ibqp, attr, attr_mask, udata); + + if (mqp->mlx4_ib_qp_type == MLX4_IB_QPT_GSI) { + struct mlx4_ib_sqp *sqp = to_msqp(mqp); + int err = 0; + + if (sqp->roce_v2_gsi) + err = ib_modify_qp(sqp->roce_v2_gsi, attr, attr_mask); + if (err) + pr_err("Failed to modify GSI QP for RoCEv2 (%d)\n", + err); + } + return ret; +} + static int vf_get_qp0_qkey(struct mlx4_dev *dev, int qpn, u32 *qkey) { int i; @@ -2036,14 +2275,14 @@ static int vf_get_qp0_qkey(struct mlx4_dev *dev, int qpn, u32 *qkey) } static int build_sriov_qp0_header(struct mlx4_ib_sqp *sqp, - struct ib_send_wr *wr, + struct ib_ud_wr *wr, void *wqe, unsigned *mlx_seg_len) { struct mlx4_ib_dev *mdev = to_mdev(sqp->qp.ibqp.device); struct ib_device *ib_dev = &mdev->ib_dev; struct mlx4_wqe_mlx_seg *mlx = wqe; struct mlx4_wqe_inline_seg *inl = wqe + sizeof *mlx; - struct mlx4_ib_ah *ah = to_mah(wr->wr.ud.ah); + struct mlx4_ib_ah *ah = to_mah(wr->ah); u16 pkey; u32 qkey; int send_size; @@ -2051,20 +2290,20 @@ static int build_sriov_qp0_header(struct mlx4_ib_sqp *sqp, int spc; int i; - if (wr->opcode != IB_WR_SEND) + if (wr->wr.opcode != IB_WR_SEND) return -EINVAL; send_size = 0; - for (i = 0; i < wr->num_sge; ++i) - send_size += wr->sg_list[i].length; + for (i = 0; i < wr->wr.num_sge; ++i) + send_size += wr->wr.sg_list[i].length; /* for proxy-qp0 sends, need to add in size of tunnel header */ /* for tunnel-qp0 sends, tunnel header is already in s/g list */ if (sqp->qp.mlx4_ib_qp_type == MLX4_IB_QPT_PROXY_SMI_OWNER) send_size += sizeof (struct mlx4_ib_tunnel_header); - ib_ud_header_init(send_size, 1, 0, 0, 0, 0, &sqp->ud_header); + ib_ud_header_init(send_size, 1, 0, 0, 0, 0, 0, 0, &sqp->ud_header); if (sqp->qp.mlx4_ib_qp_type == MLX4_IB_QPT_PROXY_SMI_OWNER) { sqp->ud_header.lrh.service_level = @@ -2082,11 +2321,11 @@ static int build_sriov_qp0_header(struct mlx4_ib_sqp *sqp, mlx->rlid = sqp->ud_header.lrh.destination_lid; sqp->ud_header.lrh.virtual_lane = 0; - sqp->ud_header.bth.solicited_event = !!(wr->send_flags & IB_SEND_SOLICITED); + sqp->ud_header.bth.solicited_event = !!(wr->wr.send_flags & IB_SEND_SOLICITED); ib_get_cached_pkey(ib_dev, sqp->qp.port, 0, &pkey); sqp->ud_header.bth.pkey = cpu_to_be16(pkey); if (sqp->qp.mlx4_ib_qp_type == MLX4_IB_QPT_TUN_SMI_OWNER) - sqp->ud_header.bth.destination_qpn = cpu_to_be32(wr->wr.ud.remote_qpn); + sqp->ud_header.bth.destination_qpn = cpu_to_be32(wr->remote_qpn); else sqp->ud_header.bth.destination_qpn = cpu_to_be32(mdev->dev->caps.qp0_tunnel[sqp->qp.port - 1]); @@ -2148,24 +2387,15 @@ static int build_sriov_qp0_header(struct mlx4_ib_sqp *sqp, return 0; } -static void mlx4_u64_to_smac(u8 *dst_mac, u64 src_mac) -{ - int i; - - for (i = ETH_ALEN; i; i--) { - dst_mac[i - 1] = src_mac & 0xff; - src_mac >>= 8; - } -} - -static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_send_wr *wr, +#define MLX4_ROCEV2_QP1_SPORT 0xC000 +static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_ud_wr *wr, void *wqe, unsigned *mlx_seg_len) { struct ib_device *ib_dev = sqp->qp.ibqp.device; struct mlx4_wqe_mlx_seg *mlx = wqe; struct mlx4_wqe_ctrl_seg *ctrl = wqe; struct mlx4_wqe_inline_seg *inl = wqe + sizeof *mlx; - struct mlx4_ib_ah *ah = to_mah(wr->wr.ud.ah); + struct mlx4_ib_ah *ah = to_mah(wr->ah); union ib_gid sgid; u16 pkey; int send_size; @@ -2177,14 +2407,18 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_send_wr *wr, bool is_eth; bool is_vlan = false; bool is_grh; + bool is_udp = false; + int ip_version = 0; send_size = 0; - for (i = 0; i < wr->num_sge; ++i) - send_size += wr->sg_list[i].length; + for (i = 0; i < wr->wr.num_sge; ++i) + send_size += wr->wr.sg_list[i].length; is_eth = rdma_port_get_link_layer(sqp->qp.ibqp.device, sqp->qp.port) == IB_LINK_LAYER_ETHERNET; is_grh = mlx4_ib_ah_grh_present(ah); if (is_eth) { + struct ib_gid_attr gid_attr; + if (mlx4_is_mfunc(to_mdev(ib_dev)->dev)) { /* When multi-function is enabled, the ib_core gid * indexes don't necessarily match the hw ones, so @@ -2197,17 +2431,36 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_send_wr *wr, } else { err = ib_get_cached_gid(ib_dev, be32_to_cpu(ah->av.ib.port_pd) >> 24, - ah->av.ib.gid_index, &sgid); - if (err) + ah->av.ib.gid_index, &sgid, + &gid_attr); + if (!err) { + if (gid_attr.ndev) + dev_put(gid_attr.ndev); + if (!memcmp(&sgid, &zgid, sizeof(sgid))) + err = -ENOENT; + } + if (!err) { + is_udp = gid_attr.gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP; + if (is_udp) { + if (ipv6_addr_v4mapped((struct in6_addr *)&sgid)) + ip_version = 4; + else + ip_version = 6; + is_grh = false; + } + } else { return err; + } } - if (ah->av.eth.vlan != cpu_to_be16(0xffff)) { vlan = be16_to_cpu(ah->av.eth.vlan) & 0x0fff; is_vlan = 1; } } - ib_ud_header_init(send_size, !is_eth, is_eth, is_vlan, is_grh, 0, &sqp->ud_header); + err = ib_ud_header_init(send_size, !is_eth, is_eth, is_vlan, is_grh, + ip_version, is_udp, 0, &sqp->ud_header); + if (err) + return err; if (!is_eth) { sqp->ud_header.lrh.service_level = @@ -2216,7 +2469,7 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_send_wr *wr, sqp->ud_header.lrh.source_lid = cpu_to_be16(ah->av.ib.g_slid & 0x7f); } - if (is_grh) { + if (is_grh || (ip_version == 6)) { sqp->ud_header.grh.traffic_class = (be32_to_cpu(ah->av.ib.sl_tclass_flowlabel) >> 20) & 0xff; sqp->ud_header.grh.flow_label = @@ -2239,12 +2492,31 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_send_wr *wr, ib_get_cached_gid(ib_dev, be32_to_cpu(ah->av.ib.port_pd) >> 24, ah->av.ib.gid_index, - &sqp->ud_header.grh.source_gid); + &sqp->ud_header.grh.source_gid, NULL); } memcpy(sqp->ud_header.grh.destination_gid.raw, ah->av.ib.dgid, 16); } + if (ip_version == 4) { + sqp->ud_header.ip4.tos = + (be32_to_cpu(ah->av.ib.sl_tclass_flowlabel) >> 20) & 0xff; + sqp->ud_header.ip4.id = 0; + sqp->ud_header.ip4.frag_off = htons(IP_DF); + sqp->ud_header.ip4.ttl = ah->av.eth.hop_limit; + + memcpy(&sqp->ud_header.ip4.saddr, + sgid.raw + 12, 4); + memcpy(&sqp->ud_header.ip4.daddr, ah->av.ib.dgid + 12, 4); + sqp->ud_header.ip4.check = ib_ud_ip4_csum(&sqp->ud_header); + } + + if (is_udp) { + sqp->ud_header.udp.dport = htons(ROCE_V2_UDP_DPORT); + sqp->ud_header.udp.sport = htons(MLX4_ROCEV2_QP1_SPORT); + sqp->ud_header.udp.csum = 0; + } + mlx->flags &= cpu_to_be32(MLX4_WQE_CTRL_CQ_UPDATE); if (!is_eth) { @@ -2257,7 +2529,7 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_send_wr *wr, mlx->rlid = sqp->ud_header.lrh.destination_lid; } - switch (wr->opcode) { + switch (wr->wr.opcode) { case IB_WR_SEND: sqp->ud_header.bth.opcode = IB_OPCODE_UD_SEND_ONLY; sqp->ud_header.immediate_present = 0; @@ -2265,7 +2537,7 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_send_wr *wr, case IB_WR_SEND_WITH_IMM: sqp->ud_header.bth.opcode = IB_OPCODE_UD_SEND_ONLY_WITH_IMMEDIATE; sqp->ud_header.immediate_present = 1; - sqp->ud_header.immediate_data = wr->ex.imm_data; + sqp->ud_header.immediate_data = wr->wr.ex.imm_data; break; default: return -EINVAL; @@ -2273,34 +2545,27 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_send_wr *wr, if (is_eth) { struct in6_addr in6; - + u16 ether_type; u16 pcp = (be32_to_cpu(ah->av.ib.sl_tclass_flowlabel) >> 29) << 13; + ether_type = (!is_udp) ? MLX4_IB_IBOE_ETHERTYPE : + (ip_version == 4 ? ETH_P_IP : ETH_P_IPV6); + mlx->sched_prio = cpu_to_be16(pcp); + ether_addr_copy(sqp->ud_header.eth.smac_h, ah->av.eth.s_mac); memcpy(sqp->ud_header.eth.dmac_h, ah->av.eth.mac, 6); - /* FIXME: cache smac value? */ memcpy(&ctrl->srcrb_flags16[0], ah->av.eth.mac, 2); memcpy(&ctrl->imm, ah->av.eth.mac + 2, 4); memcpy(&in6, sgid.raw, sizeof(in6)); - if (!mlx4_is_mfunc(to_mdev(ib_dev)->dev)) { - u64 mac = atomic64_read(&to_mdev(ib_dev)->iboe.mac[sqp->qp.port - 1]); - u8 smac[ETH_ALEN]; - - mlx4_u64_to_smac(smac, mac); - memcpy(sqp->ud_header.eth.smac_h, smac, ETH_ALEN); - } else { - /* use the src mac of the tunnel */ - memcpy(sqp->ud_header.eth.smac_h, ah->av.eth.s_mac, ETH_ALEN); - } if (!memcmp(sqp->ud_header.eth.smac_h, sqp->ud_header.eth.dmac_h, 6)) mlx->flags |= cpu_to_be32(MLX4_WQE_CTRL_FORCE_LOOPBACK); if (!is_vlan) { - sqp->ud_header.eth.type = cpu_to_be16(MLX4_IB_IBOE_ETHERTYPE); + sqp->ud_header.eth.type = cpu_to_be16(ether_type); } else { - sqp->ud_header.vlan.type = cpu_to_be16(MLX4_IB_IBOE_ETHERTYPE); + sqp->ud_header.vlan.type = cpu_to_be16(ether_type); sqp->ud_header.vlan.tag = cpu_to_be16(vlan | pcp); } } else { @@ -2308,16 +2573,16 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_send_wr *wr, if (sqp->ud_header.lrh.destination_lid == IB_LID_PERMISSIVE) sqp->ud_header.lrh.source_lid = IB_LID_PERMISSIVE; } - sqp->ud_header.bth.solicited_event = !!(wr->send_flags & IB_SEND_SOLICITED); + sqp->ud_header.bth.solicited_event = !!(wr->wr.send_flags & IB_SEND_SOLICITED); if (!sqp->qp.ibqp.qp_num) ib_get_cached_pkey(ib_dev, sqp->qp.port, sqp->pkey_index, &pkey); else - ib_get_cached_pkey(ib_dev, sqp->qp.port, wr->wr.ud.pkey_index, &pkey); + ib_get_cached_pkey(ib_dev, sqp->qp.port, wr->pkey_index, &pkey); sqp->ud_header.bth.pkey = cpu_to_be16(pkey); - sqp->ud_header.bth.destination_qpn = cpu_to_be32(wr->wr.ud.remote_qpn); + sqp->ud_header.bth.destination_qpn = cpu_to_be32(wr->remote_qpn); sqp->ud_header.bth.psn = cpu_to_be32((sqp->send_psn++) & ((1 << 24) - 1)); - sqp->ud_header.deth.qkey = cpu_to_be32(wr->wr.ud.remote_qkey & 0x80000000 ? - sqp->qkey : wr->wr.ud.remote_qkey); + sqp->ud_header.deth.qkey = cpu_to_be32(wr->remote_qkey & 0x80000000 ? + sqp->qkey : wr->remote_qkey); sqp->ud_header.deth.source_qpn = cpu_to_be32(sqp->qp.ibqp.qp_num); header_size = ib_ud_header_pack(&sqp->ud_header, sqp->header_buf); @@ -2405,45 +2670,22 @@ static __be32 convert_access(int acc) cpu_to_be32(MLX4_WQE_FMR_PERM_LOCAL_READ); } -static void set_fmr_seg(struct mlx4_wqe_fmr_seg *fseg, struct ib_send_wr *wr) +static void set_reg_seg(struct mlx4_wqe_fmr_seg *fseg, + struct ib_reg_wr *wr) { - struct mlx4_ib_fast_reg_page_list *mfrpl = to_mfrpl(wr->wr.fast_reg.page_list); - int i; + struct mlx4_ib_mr *mr = to_mmr(wr->mr); - for (i = 0; i < wr->wr.fast_reg.page_list_len; ++i) - mfrpl->mapped_page_list[i] = - cpu_to_be64(wr->wr.fast_reg.page_list->page_list[i] | - MLX4_MTT_FLAG_PRESENT); - - fseg->flags = convert_access(wr->wr.fast_reg.access_flags); - fseg->mem_key = cpu_to_be32(wr->wr.fast_reg.rkey); - fseg->buf_list = cpu_to_be64(mfrpl->map); - fseg->start_addr = cpu_to_be64(wr->wr.fast_reg.iova_start); - fseg->reg_len = cpu_to_be64(wr->wr.fast_reg.length); + fseg->flags = convert_access(wr->access); + fseg->mem_key = cpu_to_be32(wr->key); + fseg->buf_list = cpu_to_be64(mr->page_map); + fseg->start_addr = cpu_to_be64(mr->ibmr.iova); + fseg->reg_len = cpu_to_be64(mr->ibmr.length); fseg->offset = 0; /* XXX -- is this just for ZBVA? */ - fseg->page_size = cpu_to_be32(wr->wr.fast_reg.page_shift); + fseg->page_size = cpu_to_be32(ilog2(mr->ibmr.page_size)); fseg->reserved[0] = 0; fseg->reserved[1] = 0; } -static void set_bind_seg(struct mlx4_wqe_bind_seg *bseg, struct ib_send_wr *wr) -{ - bseg->flags1 = - convert_access(wr->wr.bind_mw.bind_info.mw_access_flags) & - cpu_to_be32(MLX4_WQE_FMR_AND_BIND_PERM_REMOTE_READ | - MLX4_WQE_FMR_AND_BIND_PERM_REMOTE_WRITE | - MLX4_WQE_FMR_AND_BIND_PERM_ATOMIC); - bseg->flags2 = 0; - if (wr->wr.bind_mw.mw->type == IB_MW_TYPE_2) - bseg->flags2 |= cpu_to_be32(MLX4_WQE_BIND_TYPE_2); - if (wr->wr.bind_mw.bind_info.mw_access_flags & IB_ZERO_BASED) - bseg->flags2 |= cpu_to_be32(MLX4_WQE_BIND_ZERO_BASED); - bseg->new_rkey = cpu_to_be32(wr->wr.bind_mw.rkey); - bseg->lkey = cpu_to_be32(wr->wr.bind_mw.bind_info.mr->lkey); - bseg->addr = cpu_to_be64(wr->wr.bind_mw.bind_info.addr); - bseg->length = cpu_to_be64(wr->wr.bind_mw.bind_info.length); -} - static void set_local_inv_seg(struct mlx4_wqe_local_inval_seg *iseg, u32 rkey) { memset(iseg, 0, sizeof(*iseg)); @@ -2458,46 +2700,47 @@ static __always_inline void set_raddr_seg(struct mlx4_wqe_raddr_seg *rseg, rseg->reserved = 0; } -static void set_atomic_seg(struct mlx4_wqe_atomic_seg *aseg, struct ib_send_wr *wr) +static void set_atomic_seg(struct mlx4_wqe_atomic_seg *aseg, + struct ib_atomic_wr *wr) { - if (wr->opcode == IB_WR_ATOMIC_CMP_AND_SWP) { - aseg->swap_add = cpu_to_be64(wr->wr.atomic.swap); - aseg->compare = cpu_to_be64(wr->wr.atomic.compare_add); - } else if (wr->opcode == IB_WR_MASKED_ATOMIC_FETCH_AND_ADD) { - aseg->swap_add = cpu_to_be64(wr->wr.atomic.compare_add); - aseg->compare = cpu_to_be64(wr->wr.atomic.compare_add_mask); + if (wr->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP) { + aseg->swap_add = cpu_to_be64(wr->swap); + aseg->compare = cpu_to_be64(wr->compare_add); + } else if (wr->wr.opcode == IB_WR_MASKED_ATOMIC_FETCH_AND_ADD) { + aseg->swap_add = cpu_to_be64(wr->compare_add); + aseg->compare = cpu_to_be64(wr->compare_add_mask); } else { - aseg->swap_add = cpu_to_be64(wr->wr.atomic.compare_add); + aseg->swap_add = cpu_to_be64(wr->compare_add); aseg->compare = 0; } } static void set_masked_atomic_seg(struct mlx4_wqe_masked_atomic_seg *aseg, - struct ib_send_wr *wr) + struct ib_atomic_wr *wr) { - aseg->swap_add = cpu_to_be64(wr->wr.atomic.swap); - aseg->swap_add_mask = cpu_to_be64(wr->wr.atomic.swap_mask); - aseg->compare = cpu_to_be64(wr->wr.atomic.compare_add); - aseg->compare_mask = cpu_to_be64(wr->wr.atomic.compare_add_mask); + aseg->swap_add = cpu_to_be64(wr->swap); + aseg->swap_add_mask = cpu_to_be64(wr->swap_mask); + aseg->compare = cpu_to_be64(wr->compare_add); + aseg->compare_mask = cpu_to_be64(wr->compare_add_mask); } static void set_datagram_seg(struct mlx4_wqe_datagram_seg *dseg, - struct ib_send_wr *wr) + struct ib_ud_wr *wr) { - memcpy(dseg->av, &to_mah(wr->wr.ud.ah)->av, sizeof (struct mlx4_av)); - dseg->dqpn = cpu_to_be32(wr->wr.ud.remote_qpn); - dseg->qkey = cpu_to_be32(wr->wr.ud.remote_qkey); - dseg->vlan = to_mah(wr->wr.ud.ah)->av.eth.vlan; - memcpy(dseg->mac, to_mah(wr->wr.ud.ah)->av.eth.mac, 6); + memcpy(dseg->av, &to_mah(wr->ah)->av, sizeof (struct mlx4_av)); + dseg->dqpn = cpu_to_be32(wr->remote_qpn); + dseg->qkey = cpu_to_be32(wr->remote_qkey); + dseg->vlan = to_mah(wr->ah)->av.eth.vlan; + memcpy(dseg->mac, to_mah(wr->ah)->av.eth.mac, 6); } static void set_tunnel_datagram_seg(struct mlx4_ib_dev *dev, struct mlx4_wqe_datagram_seg *dseg, - struct ib_send_wr *wr, + struct ib_ud_wr *wr, enum mlx4_ib_qp_type qpt) { - union mlx4_ext_av *av = &to_mah(wr->wr.ud.ah)->av; + union mlx4_ext_av *av = &to_mah(wr->ah)->av; struct mlx4_av sqp_av = {0}; int port = *((u8 *) &av->ib.port_pd) & 0x3; @@ -2516,18 +2759,18 @@ static void set_tunnel_datagram_seg(struct mlx4_ib_dev *dev, dseg->qkey = cpu_to_be32(IB_QP_SET_QKEY); } -static void build_tunnel_header(struct ib_send_wr *wr, void *wqe, unsigned *mlx_seg_len) +static void build_tunnel_header(struct ib_ud_wr *wr, void *wqe, unsigned *mlx_seg_len) { struct mlx4_wqe_inline_seg *inl = wqe; struct mlx4_ib_tunnel_header hdr; - struct mlx4_ib_ah *ah = to_mah(wr->wr.ud.ah); + struct mlx4_ib_ah *ah = to_mah(wr->ah); int spc; int i; memcpy(&hdr.av, &ah->av, sizeof hdr.av); - hdr.remote_qpn = cpu_to_be32(wr->wr.ud.remote_qpn); - hdr.pkey_index = cpu_to_be16(wr->wr.ud.pkey_index); - hdr.qkey = cpu_to_be32(wr->wr.ud.remote_qkey); + hdr.remote_qpn = cpu_to_be32(wr->remote_qpn); + hdr.pkey_index = cpu_to_be16(wr->pkey_index); + hdr.qkey = cpu_to_be32(wr->remote_qkey); memcpy(hdr.mac, ah->av.eth.mac, 6); hdr.vlan = ah->av.eth.vlan; @@ -2599,22 +2842,22 @@ static void __set_data_seg(struct mlx4_wqe_data_seg *dseg, struct ib_sge *sg) dseg->addr = cpu_to_be64(sg->addr); } -static int build_lso_seg(struct mlx4_wqe_lso_seg *wqe, struct ib_send_wr *wr, +static int build_lso_seg(struct mlx4_wqe_lso_seg *wqe, struct ib_ud_wr *wr, struct mlx4_ib_qp *qp, unsigned *lso_seg_len, __be32 *lso_hdr_sz, __be32 *blh) { - unsigned halign = ALIGN(sizeof *wqe + wr->wr.ud.hlen, 16); + unsigned halign = ALIGN(sizeof *wqe + wr->hlen, 16); if (unlikely(halign > MLX4_IB_CACHE_LINE_SIZE)) *blh = cpu_to_be32(1 << 6); if (unlikely(!(qp->flags & MLX4_IB_QP_LSO) && - wr->num_sge > qp->sq.max_gs - (halign >> 4))) + wr->wr.num_sge > qp->sq.max_gs - (halign >> 4))) return -EINVAL; - memcpy(wqe->header, wr->wr.ud.header, wr->wr.ud.hlen); + memcpy(wqe->header, wr->header, wr->hlen); - *lso_hdr_sz = cpu_to_be32(wr->wr.ud.mss << 16 | wr->wr.ud.hlen); + *lso_hdr_sz = cpu_to_be32(wr->mss << 16 | wr->hlen); *lso_seg_len = halign; return 0; } @@ -2662,6 +2905,29 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, int i; struct mlx4_ib_dev *mdev = to_mdev(ibqp->device); + if (qp->mlx4_ib_qp_type == MLX4_IB_QPT_GSI) { + struct mlx4_ib_sqp *sqp = to_msqp(qp); + + if (sqp->roce_v2_gsi) { + struct mlx4_ib_ah *ah = to_mah(ud_wr(wr)->ah); + struct ib_gid_attr gid_attr; + union ib_gid gid; + + if (!ib_get_cached_gid(ibqp->device, + be32_to_cpu(ah->av.ib.port_pd) >> 24, + ah->av.ib.gid_index, &gid, + &gid_attr)) { + if (gid_attr.ndev) + dev_put(gid_attr.ndev); + qp = (gid_attr.gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP) ? + to_mqp(sqp->roce_v2_gsi) : qp; + } else { + pr_err("Failed to get gid at index %d. RoCEv2 will not work properly\n", + ah->av.ib.gid_index); + } + } + } + spin_lock_irqsave(&qp->sq.lock, flags); if (mdev->dev->persist->state & MLX4_DEVICE_STATE_INTERNAL_ERROR) { err = -EIO; @@ -2713,11 +2979,11 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, case IB_WR_ATOMIC_CMP_AND_SWP: case IB_WR_ATOMIC_FETCH_AND_ADD: case IB_WR_MASKED_ATOMIC_FETCH_AND_ADD: - set_raddr_seg(wqe, wr->wr.atomic.remote_addr, - wr->wr.atomic.rkey); + set_raddr_seg(wqe, atomic_wr(wr)->remote_addr, + atomic_wr(wr)->rkey); wqe += sizeof (struct mlx4_wqe_raddr_seg); - set_atomic_seg(wqe, wr); + set_atomic_seg(wqe, atomic_wr(wr)); wqe += sizeof (struct mlx4_wqe_atomic_seg); size += (sizeof (struct mlx4_wqe_raddr_seg) + @@ -2726,11 +2992,11 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, break; case IB_WR_MASKED_ATOMIC_CMP_AND_SWP: - set_raddr_seg(wqe, wr->wr.atomic.remote_addr, - wr->wr.atomic.rkey); + set_raddr_seg(wqe, atomic_wr(wr)->remote_addr, + atomic_wr(wr)->rkey); wqe += sizeof (struct mlx4_wqe_raddr_seg); - set_masked_atomic_seg(wqe, wr); + set_masked_atomic_seg(wqe, atomic_wr(wr)); wqe += sizeof (struct mlx4_wqe_masked_atomic_seg); size += (sizeof (struct mlx4_wqe_raddr_seg) + @@ -2741,8 +3007,8 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, case IB_WR_RDMA_READ: case IB_WR_RDMA_WRITE: case IB_WR_RDMA_WRITE_WITH_IMM: - set_raddr_seg(wqe, wr->wr.rdma.remote_addr, - wr->wr.rdma.rkey); + set_raddr_seg(wqe, rdma_wr(wr)->remote_addr, + rdma_wr(wr)->rkey); wqe += sizeof (struct mlx4_wqe_raddr_seg); size += sizeof (struct mlx4_wqe_raddr_seg) / 16; break; @@ -2755,21 +3021,14 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, size += sizeof (struct mlx4_wqe_local_inval_seg) / 16; break; - case IB_WR_FAST_REG_MR: + case IB_WR_REG_MR: ctrl->srcrb_flags |= cpu_to_be32(MLX4_WQE_CTRL_STRONG_ORDER); - set_fmr_seg(wqe, wr); - wqe += sizeof (struct mlx4_wqe_fmr_seg); - size += sizeof (struct mlx4_wqe_fmr_seg) / 16; + set_reg_seg(wqe, reg_wr(wr)); + wqe += sizeof(struct mlx4_wqe_fmr_seg); + size += sizeof(struct mlx4_wqe_fmr_seg) / 16; break; - case IB_WR_BIND_MW: - ctrl->srcrb_flags |= - cpu_to_be32(MLX4_WQE_CTRL_STRONG_ORDER); - set_bind_seg(wqe, wr); - wqe += sizeof(struct mlx4_wqe_bind_seg); - size += sizeof(struct mlx4_wqe_bind_seg) / 16; - break; default: /* No extra segments required for sends */ break; @@ -2777,7 +3036,8 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, break; case MLX4_IB_QPT_TUN_SMI_OWNER: - err = build_sriov_qp0_header(to_msqp(qp), wr, ctrl, &seglen); + err = build_sriov_qp0_header(to_msqp(qp), ud_wr(wr), + ctrl, &seglen); if (unlikely(err)) { *bad_wr = wr; goto out; @@ -2788,19 +3048,20 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, case MLX4_IB_QPT_TUN_SMI: case MLX4_IB_QPT_TUN_GSI: /* this is a UD qp used in MAD responses to slaves. */ - set_datagram_seg(wqe, wr); + set_datagram_seg(wqe, ud_wr(wr)); /* set the forced-loopback bit in the data seg av */ *(__be32 *) wqe |= cpu_to_be32(0x80000000); wqe += sizeof (struct mlx4_wqe_datagram_seg); size += sizeof (struct mlx4_wqe_datagram_seg) / 16; break; case MLX4_IB_QPT_UD: - set_datagram_seg(wqe, wr); + set_datagram_seg(wqe, ud_wr(wr)); wqe += sizeof (struct mlx4_wqe_datagram_seg); size += sizeof (struct mlx4_wqe_datagram_seg) / 16; if (wr->opcode == IB_WR_LSO) { - err = build_lso_seg(wqe, wr, qp, &seglen, &lso_hdr_sz, &blh); + err = build_lso_seg(wqe, ud_wr(wr), qp, &seglen, + &lso_hdr_sz, &blh); if (unlikely(err)) { *bad_wr = wr; goto out; @@ -2812,7 +3073,8 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, break; case MLX4_IB_QPT_PROXY_SMI_OWNER: - err = build_sriov_qp0_header(to_msqp(qp), wr, ctrl, &seglen); + err = build_sriov_qp0_header(to_msqp(qp), ud_wr(wr), + ctrl, &seglen); if (unlikely(err)) { *bad_wr = wr; goto out; @@ -2823,7 +3085,7 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, add_zero_len_inline(wqe); wqe += 16; size++; - build_tunnel_header(wr, wqe, &seglen); + build_tunnel_header(ud_wr(wr), wqe, &seglen); wqe += seglen; size += seglen / 16; break; @@ -2833,18 +3095,20 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, * In this case we first add a UD segment targeting * the tunnel qp, and then add a header with address * information */ - set_tunnel_datagram_seg(to_mdev(ibqp->device), wqe, wr, + set_tunnel_datagram_seg(to_mdev(ibqp->device), wqe, + ud_wr(wr), qp->mlx4_ib_qp_type); wqe += sizeof (struct mlx4_wqe_datagram_seg); size += sizeof (struct mlx4_wqe_datagram_seg) / 16; - build_tunnel_header(wr, wqe, &seglen); + build_tunnel_header(ud_wr(wr), wqe, &seglen); wqe += seglen; size += seglen / 16; break; case MLX4_IB_QPT_SMI: case MLX4_IB_QPT_GSI: - err = build_mlx_header(to_msqp(qp), wr, ctrl, &seglen); + err = build_mlx_header(to_msqp(qp), ud_wr(wr), ctrl, + &seglen); if (unlikely(err)) { *bad_wr = wr; goto out; diff --git a/drivers/infiniband/hw/mlx4/srq.c b/drivers/infiniband/hw/mlx4/srq.c index dce5dfe..0597f3e 100644 --- a/drivers/infiniband/hw/mlx4/srq.c +++ b/drivers/infiniband/hw/mlx4/srq.c @@ -34,6 +34,7 @@ #include <linux/mlx4/qp.h> #include <linux/mlx4/srq.h> #include <linux/slab.h> +#include <linux/vmalloc.h> #include "mlx4_ib.h" #include "user.h" @@ -170,10 +171,15 @@ struct ib_srq *mlx4_ib_create_srq(struct ib_pd *pd, if (err) goto err_mtt; - srq->wrid = kmalloc(srq->msrq.max * sizeof (u64), GFP_KERNEL); + srq->wrid = kmalloc_array(srq->msrq.max, sizeof(u64), + GFP_KERNEL | __GFP_NOWARN); if (!srq->wrid) { - err = -ENOMEM; - goto err_mtt; + srq->wrid = __vmalloc(srq->msrq.max * sizeof(u64), + GFP_KERNEL, PAGE_KERNEL); + if (!srq->wrid) { + err = -ENOMEM; + goto err_mtt; + } } } @@ -204,7 +210,7 @@ err_wrid: if (pd->uobject) mlx4_ib_db_unmap_user(to_mucontext(pd->uobject->context), &srq->db); else - kfree(srq->wrid); + kvfree(srq->wrid); err_mtt: mlx4_mtt_cleanup(dev->dev, &srq->mtt); @@ -281,7 +287,7 @@ int mlx4_ib_destroy_srq(struct ib_srq *srq) mlx4_ib_db_unmap_user(to_mucontext(srq->uobject->context), &msrq->db); ib_umem_release(msrq->umem); } else { - kfree(msrq->wrid); + kvfree(msrq->wrid); mlx4_buf_free(dev->dev, msrq->msrq.max << msrq->msrq.wqe_shift, &msrq->buf); mlx4_db_free(dev->dev, &msrq->db); diff --git a/drivers/infiniband/hw/mlx5/ah.c b/drivers/infiniband/hw/mlx5/ah.c index 6608058..745efa4 100644 --- a/drivers/infiniband/hw/mlx5/ah.c +++ b/drivers/infiniband/hw/mlx5/ah.c @@ -32,8 +32,10 @@ #include "mlx5_ib.h" -struct ib_ah *create_ib_ah(struct ib_ah_attr *ah_attr, - struct mlx5_ib_ah *ah) +static struct ib_ah *create_ib_ah(struct mlx5_ib_dev *dev, + struct mlx5_ib_ah *ah, + struct ib_ah_attr *ah_attr, + enum rdma_link_layer ll) { if (ah_attr->ah_flags & IB_AH_GRH) { memcpy(ah->av.rgid, &ah_attr->grh.dgid, 16); @@ -44,9 +46,20 @@ struct ib_ah *create_ib_ah(struct ib_ah_attr *ah_attr, ah->av.tclass = ah_attr->grh.traffic_class; } - ah->av.rlid = cpu_to_be16(ah_attr->dlid); - ah->av.fl_mlid = ah_attr->src_path_bits & 0x7f; - ah->av.stat_rate_sl = (ah_attr->static_rate << 4) | (ah_attr->sl & 0xf); + ah->av.stat_rate_sl = (ah_attr->static_rate << 4); + + if (ll == IB_LINK_LAYER_ETHERNET) { + memcpy(ah->av.rmac, ah_attr->dmac, sizeof(ah_attr->dmac)); + ah->av.udp_sport = + mlx5_get_roce_udp_sport(dev, + ah_attr->port_num, + ah_attr->grh.sgid_index); + ah->av.stat_rate_sl |= (ah_attr->sl & 0x7) << 1; + } else { + ah->av.rlid = cpu_to_be16(ah_attr->dlid); + ah->av.fl_mlid = ah_attr->src_path_bits & 0x7f; + ah->av.stat_rate_sl |= (ah_attr->sl & 0xf); + } return &ah->ibah; } @@ -54,12 +67,19 @@ struct ib_ah *create_ib_ah(struct ib_ah_attr *ah_attr, struct ib_ah *mlx5_ib_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr) { struct mlx5_ib_ah *ah; + struct mlx5_ib_dev *dev = to_mdev(pd->device); + enum rdma_link_layer ll; + + ll = pd->device->get_link_layer(pd->device, ah_attr->port_num); + + if (ll == IB_LINK_LAYER_ETHERNET && !(ah_attr->ah_flags & IB_AH_GRH)) + return ERR_PTR(-EINVAL); ah = kzalloc(sizeof(*ah), GFP_ATOMIC); if (!ah) return ERR_PTR(-ENOMEM); - return create_ib_ah(ah_attr, ah); /* never fails */ + return create_ib_ah(dev, ah, ah_attr, ll); /* never fails */ } int mlx5_ib_query_ah(struct ib_ah *ibah, struct ib_ah_attr *ah_attr) diff --git a/drivers/infiniband/hw/mlx5/cq.c b/drivers/infiniband/hw/mlx5/cq.c index 2d0dbbf..fd1de31 100644 --- a/drivers/infiniband/hw/mlx5/cq.c +++ b/drivers/infiniband/hw/mlx5/cq.c @@ -109,8 +109,8 @@ static enum ib_wc_opcode get_umr_comp(struct mlx5_ib_wq *wq, int idx) case IB_WR_LOCAL_INV: return IB_WC_LOCAL_INV; - case IB_WR_FAST_REG_MR: - return IB_WC_FAST_REG_MR; + case IB_WR_REG_MR: + return IB_WC_REG_MR; default: pr_warn("unknown completion status\n"); @@ -154,9 +154,6 @@ static void handle_good_req(struct ib_wc *wc, struct mlx5_cqe64 *cqe, wc->opcode = IB_WC_MASKED_FETCH_ADD; wc->byte_len = 8; break; - case MLX5_OPCODE_BIND_MW: - wc->opcode = IB_WC_BIND_MW; - break; case MLX5_OPCODE_UMR: wc->opcode = get_umr_comp(wq, idx); break; @@ -171,6 +168,7 @@ enum { static void handle_responder(struct ib_wc *wc, struct mlx5_cqe64 *cqe, struct mlx5_ib_qp *qp) { + enum rdma_link_layer ll = rdma_port_get_link_layer(qp->ibqp.device, 1); struct mlx5_ib_dev *dev = to_mdev(qp->ibqp.device); struct mlx5_ib_srq *srq; struct mlx5_ib_wq *wq; @@ -236,6 +234,22 @@ static void handle_responder(struct ib_wc *wc, struct mlx5_cqe64 *cqe, } else { wc->pkey_index = 0; } + + if (ll != IB_LINK_LAYER_ETHERNET) + return; + + switch (wc->sl & 0x3) { + case MLX5_CQE_ROCE_L3_HEADER_TYPE_GRH: + wc->network_hdr_type = RDMA_NETWORK_IB; + break; + case MLX5_CQE_ROCE_L3_HEADER_TYPE_IPV6: + wc->network_hdr_type = RDMA_NETWORK_IPV6; + break; + case MLX5_CQE_ROCE_L3_HEADER_TYPE_IPV4: + wc->network_hdr_type = RDMA_NETWORK_IPV4; + break; + } + wc->wc_flags |= IB_WC_WITH_NETWORK_HDR_TYPE; } static void dump_cqe(struct mlx5_ib_dev *dev, struct mlx5_err_cqe *cqe) @@ -756,16 +770,16 @@ struct ib_cq *mlx5_ib_create_cq(struct ib_device *ibdev, int uninitialized_var(index); int uninitialized_var(inlen); int cqe_size; - int irqn; + unsigned int irqn; int eqn; int err; - if (attr->flags) - return ERR_PTR(-EINVAL); - if (entries < 0) return ERR_PTR(-EINVAL); + if (check_cq_create_flags(attr->flags)) + return ERR_PTR(-EOPNOTSUPP); + entries = roundup_pow_of_two(entries + 1); if (entries > (1 << MLX5_CAP_GEN(dev->mdev, log_max_cq_sz))) return ERR_PTR(-EINVAL); @@ -779,6 +793,7 @@ struct ib_cq *mlx5_ib_create_cq(struct ib_device *ibdev, spin_lock_init(&cq->lock); cq->resize_buf = NULL; cq->resize_umem = NULL; + cq->create_flags = attr->flags; if (context) { err = create_cq_user(dev, udata, context, cq, entries, @@ -796,6 +811,10 @@ struct ib_cq *mlx5_ib_create_cq(struct ib_device *ibdev, cq->cqe_size = cqe_size; cqb->ctx.cqe_sz_flags = cqe_sz_to_mlx_sz(cqe_size) << 5; + + if (cq->create_flags & IB_CQ_FLAGS_IGNORE_OVERRUN) + cqb->ctx.cqe_sz_flags |= (1 << 1); + cqb->ctx.log_sz_usr_page = cpu_to_be32((ilog2(entries) << 24) | index); err = mlx5_vector2eqn(dev->mdev, vector, &eqn, &irqn); if (err) diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 68508d5..03c418c 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -40,9 +40,14 @@ #include <linux/io-mapping.h> #include <linux/sched.h> #include <rdma/ib_user_verbs.h> +#include <rdma/ib_addr.h> +#include <rdma/ib_cache.h> #include <linux/mlx5/vport.h> #include <rdma/ib_smi.h> #include <rdma/ib_umem.h> +#include <linux/in.h> +#include <linux/etherdevice.h> +#include <linux/mlx5/fs.h> #include "user.h" #include "mlx5_ib.h" @@ -63,12 +68,14 @@ static char mlx5_version[] = DRIVER_NAME ": Mellanox Connect-IB Infiniband driver v" DRIVER_VERSION " (" DRIVER_RELDATE ")\n"; +enum { + MLX5_ATOMIC_SIZE_QP_8BYTES = 1 << 3, +}; + static enum rdma_link_layer -mlx5_ib_port_link_layer(struct ib_device *device) +mlx5_port_type_cap_to_rdma_ll(int port_type_cap) { - struct mlx5_ib_dev *dev = to_mdev(device); - - switch (MLX5_CAP_GEN(dev->mdev, port_type)) { + switch (port_type_cap) { case MLX5_CAP_PORT_TYPE_IB: return IB_LINK_LAYER_INFINIBAND; case MLX5_CAP_PORT_TYPE_ETH: @@ -78,6 +85,202 @@ mlx5_ib_port_link_layer(struct ib_device *device) } } +static enum rdma_link_layer +mlx5_ib_port_link_layer(struct ib_device *device, u8 port_num) +{ + struct mlx5_ib_dev *dev = to_mdev(device); + int port_type_cap = MLX5_CAP_GEN(dev->mdev, port_type); + + return mlx5_port_type_cap_to_rdma_ll(port_type_cap); +} + +static int mlx5_netdev_event(struct notifier_block *this, + unsigned long event, void *ptr) +{ + struct net_device *ndev = netdev_notifier_info_to_dev(ptr); + struct mlx5_ib_dev *ibdev = container_of(this, struct mlx5_ib_dev, + roce.nb); + + if ((event != NETDEV_UNREGISTER) && (event != NETDEV_REGISTER)) + return NOTIFY_DONE; + + write_lock(&ibdev->roce.netdev_lock); + if (ndev->dev.parent == &ibdev->mdev->pdev->dev) + ibdev->roce.netdev = (event == NETDEV_UNREGISTER) ? NULL : ndev; + write_unlock(&ibdev->roce.netdev_lock); + + return NOTIFY_DONE; +} + +static struct net_device *mlx5_ib_get_netdev(struct ib_device *device, + u8 port_num) +{ + struct mlx5_ib_dev *ibdev = to_mdev(device); + struct net_device *ndev; + + /* Ensure ndev does not disappear before we invoke dev_hold() + */ + read_lock(&ibdev->roce.netdev_lock); + ndev = ibdev->roce.netdev; + if (ndev) + dev_hold(ndev); + read_unlock(&ibdev->roce.netdev_lock); + + return ndev; +} + +static int mlx5_query_port_roce(struct ib_device *device, u8 port_num, + struct ib_port_attr *props) +{ + struct mlx5_ib_dev *dev = to_mdev(device); + struct net_device *ndev; + enum ib_mtu ndev_ib_mtu; + u16 qkey_viol_cntr; + + memset(props, 0, sizeof(*props)); + + props->port_cap_flags |= IB_PORT_CM_SUP; + props->port_cap_flags |= IB_PORT_IP_BASED_GIDS; + + props->gid_tbl_len = MLX5_CAP_ROCE(dev->mdev, + roce_address_table_size); + props->max_mtu = IB_MTU_4096; + props->max_msg_sz = 1 << MLX5_CAP_GEN(dev->mdev, log_max_msg); + props->pkey_tbl_len = 1; + props->state = IB_PORT_DOWN; + props->phys_state = 3; + + mlx5_query_nic_vport_qkey_viol_cntr(dev->mdev, &qkey_viol_cntr); + props->qkey_viol_cntr = qkey_viol_cntr; + + ndev = mlx5_ib_get_netdev(device, port_num); + if (!ndev) + return 0; + + if (netif_running(ndev) && netif_carrier_ok(ndev)) { + props->state = IB_PORT_ACTIVE; + props->phys_state = 5; + } + + ndev_ib_mtu = iboe_get_mtu(ndev->mtu); + + dev_put(ndev); + + props->active_mtu = min(props->max_mtu, ndev_ib_mtu); + + props->active_width = IB_WIDTH_4X; /* TODO */ + props->active_speed = IB_SPEED_QDR; /* TODO */ + + return 0; +} + +static void ib_gid_to_mlx5_roce_addr(const union ib_gid *gid, + const struct ib_gid_attr *attr, + void *mlx5_addr) +{ +#define MLX5_SET_RA(p, f, v) MLX5_SET(roce_addr_layout, p, f, v) + char *mlx5_addr_l3_addr = MLX5_ADDR_OF(roce_addr_layout, mlx5_addr, + source_l3_address); + void *mlx5_addr_mac = MLX5_ADDR_OF(roce_addr_layout, mlx5_addr, + source_mac_47_32); + + if (!gid) + return; + + ether_addr_copy(mlx5_addr_mac, attr->ndev->dev_addr); + + if (is_vlan_dev(attr->ndev)) { + MLX5_SET_RA(mlx5_addr, vlan_valid, 1); + MLX5_SET_RA(mlx5_addr, vlan_id, vlan_dev_vlan_id(attr->ndev)); + } + + switch (attr->gid_type) { + case IB_GID_TYPE_IB: + MLX5_SET_RA(mlx5_addr, roce_version, MLX5_ROCE_VERSION_1); + break; + case IB_GID_TYPE_ROCE_UDP_ENCAP: + MLX5_SET_RA(mlx5_addr, roce_version, MLX5_ROCE_VERSION_2); + break; + + default: + WARN_ON(true); + } + + if (attr->gid_type != IB_GID_TYPE_IB) { + if (ipv6_addr_v4mapped((void *)gid)) + MLX5_SET_RA(mlx5_addr, roce_l3_type, + MLX5_ROCE_L3_TYPE_IPV4); + else + MLX5_SET_RA(mlx5_addr, roce_l3_type, + MLX5_ROCE_L3_TYPE_IPV6); + } + + if ((attr->gid_type == IB_GID_TYPE_IB) || + !ipv6_addr_v4mapped((void *)gid)) + memcpy(mlx5_addr_l3_addr, gid, sizeof(*gid)); + else + memcpy(&mlx5_addr_l3_addr[12], &gid->raw[12], 4); +} + +static int set_roce_addr(struct ib_device *device, u8 port_num, + unsigned int index, + const union ib_gid *gid, + const struct ib_gid_attr *attr) +{ + struct mlx5_ib_dev *dev = to_mdev(device); + u32 in[MLX5_ST_SZ_DW(set_roce_address_in)]; + u32 out[MLX5_ST_SZ_DW(set_roce_address_out)]; + void *in_addr = MLX5_ADDR_OF(set_roce_address_in, in, roce_address); + enum rdma_link_layer ll = mlx5_ib_port_link_layer(device, port_num); + + if (ll != IB_LINK_LAYER_ETHERNET) + return -EINVAL; + + memset(in, 0, sizeof(in)); + + ib_gid_to_mlx5_roce_addr(gid, attr, in_addr); + + MLX5_SET(set_roce_address_in, in, roce_address_index, index); + MLX5_SET(set_roce_address_in, in, opcode, MLX5_CMD_OP_SET_ROCE_ADDRESS); + + memset(out, 0, sizeof(out)); + return mlx5_cmd_exec(dev->mdev, in, sizeof(in), out, sizeof(out)); +} + +static int mlx5_ib_add_gid(struct ib_device *device, u8 port_num, + unsigned int index, const union ib_gid *gid, + const struct ib_gid_attr *attr, + __always_unused void **context) +{ + return set_roce_addr(device, port_num, index, gid, attr); +} + +static int mlx5_ib_del_gid(struct ib_device *device, u8 port_num, + unsigned int index, __always_unused void **context) +{ + return set_roce_addr(device, port_num, index, NULL, NULL); +} + +__be16 mlx5_get_roce_udp_sport(struct mlx5_ib_dev *dev, u8 port_num, + int index) +{ + struct ib_gid_attr attr; + union ib_gid gid; + + if (ib_get_cached_gid(&dev->ib_dev, port_num, index, &gid, &attr)) + return 0; + + if (!attr.ndev) + return 0; + + dev_put(attr.ndev); + + if (attr.gid_type != IB_GID_TYPE_ROCE_UDP_ENCAP) + return 0; + + return cpu_to_be16(MLX5_CAP_ROCE(dev->mdev, r_roce_min_src_udp_port)); +} + static int mlx5_use_mad_ifc(struct mlx5_ib_dev *dev) { return !dev->mdev->issi; @@ -94,13 +297,35 @@ static int mlx5_get_vport_access_method(struct ib_device *ibdev) if (mlx5_use_mad_ifc(to_mdev(ibdev))) return MLX5_VPORT_ACCESS_METHOD_MAD; - if (mlx5_ib_port_link_layer(ibdev) == + if (mlx5_ib_port_link_layer(ibdev, 1) == IB_LINK_LAYER_ETHERNET) return MLX5_VPORT_ACCESS_METHOD_NIC; return MLX5_VPORT_ACCESS_METHOD_HCA; } +static void get_atomic_caps(struct mlx5_ib_dev *dev, + struct ib_device_attr *props) +{ + u8 tmp; + u8 atomic_operations = MLX5_CAP_ATOMIC(dev->mdev, atomic_operations); + u8 atomic_size_qp = MLX5_CAP_ATOMIC(dev->mdev, atomic_size_qp); + u8 atomic_req_8B_endianness_mode = + MLX5_CAP_ATOMIC(dev->mdev, atomic_req_8B_endianess_mode); + + /* Check if HW supports 8 bytes standard atomic operations and capable + * of host endianness respond + */ + tmp = MLX5_ATOMIC_OPS_CMP_SWAP | MLX5_ATOMIC_OPS_FETCH_ADD; + if (((atomic_operations & tmp) == tmp) && + (atomic_size_qp & MLX5_ATOMIC_SIZE_QP_8BYTES) && + (atomic_req_8B_endianness_mode)) { + props->atomic_cap = IB_ATOMIC_HCA; + } else { + props->atomic_cap = IB_ATOMIC_NONE; + } +} + static int mlx5_query_system_image_guid(struct ib_device *ibdev, __be64 *sys_image_guid) { @@ -116,13 +341,21 @@ static int mlx5_query_system_image_guid(struct ib_device *ibdev, case MLX5_VPORT_ACCESS_METHOD_HCA: err = mlx5_query_hca_vport_system_image_guid(mdev, &tmp); - if (!err) - *sys_image_guid = cpu_to_be64(tmp); - return err; + break; + + case MLX5_VPORT_ACCESS_METHOD_NIC: + err = mlx5_query_nic_vport_system_image_guid(mdev, &tmp); + break; default: return -EINVAL; } + + if (!err) + *sys_image_guid = cpu_to_be64(tmp); + + return err; + } static int mlx5_query_max_pkeys(struct ib_device *ibdev, @@ -176,13 +409,20 @@ static int mlx5_query_node_guid(struct mlx5_ib_dev *dev, case MLX5_VPORT_ACCESS_METHOD_HCA: err = mlx5_query_hca_vport_node_guid(dev->mdev, &tmp); - if (!err) - *node_guid = cpu_to_be64(tmp); - return err; + break; + + case MLX5_VPORT_ACCESS_METHOD_NIC: + err = mlx5_query_nic_vport_node_guid(dev->mdev, &tmp); + break; default: return -EINVAL; } + + if (!err) + *node_guid = cpu_to_be64(tmp); + + return err; } struct mlx5_reg_node_desc { @@ -260,6 +500,10 @@ static int mlx5_ib_query_device(struct ib_device *ibdev, if (MLX5_CAP_GEN(mdev, block_lb_mc)) props->device_cap_flags |= IB_DEVICE_BLOCK_MULTICAST_LOOPBACK; + if (MLX5_CAP_GEN(dev->mdev, eth_net_offloads) && + (MLX5_CAP_ETH(dev->mdev, csum_cap))) + props->device_cap_flags |= IB_DEVICE_RAW_IP_CSUM; + props->vendor_part_id = mdev->pdev->device; props->hw_ver = mdev->pdev->revision; @@ -275,7 +519,7 @@ static int mlx5_ib_query_device(struct ib_device *ibdev, props->max_sge = min(max_rq_sg, max_sq_sg); props->max_sge_rd = props->max_sge; props->max_cq = 1 << MLX5_CAP_GEN(mdev, log_max_cq); - props->max_cqe = (1 << MLX5_CAP_GEN(mdev, log_max_eq_sz)) - 1; + props->max_cqe = (1 << MLX5_CAP_GEN(mdev, log_max_cq_sz)) - 1; props->max_mr = 1 << MLX5_CAP_GEN(mdev, log_max_mkey); props->max_pd = 1 << MLX5_CAP_GEN(mdev, log_max_pd); props->max_qp_rd_atom = 1 << MLX5_CAP_GEN(mdev, log_max_ra_req_qp); @@ -286,13 +530,15 @@ static int mlx5_ib_query_device(struct ib_device *ibdev, props->max_res_rd_atom = props->max_qp_rd_atom * props->max_qp; props->max_srq_sge = max_rq_sg - 1; props->max_fast_reg_page_list_len = (unsigned int)-1; - props->atomic_cap = IB_ATOMIC_NONE; + get_atomic_caps(dev, props); props->masked_atomic_cap = IB_ATOMIC_NONE; props->max_mcast_grp = 1 << MLX5_CAP_GEN(mdev, log_max_mcg); props->max_mcast_qp_attach = MLX5_CAP_GEN(mdev, max_qp_mcg); props->max_total_mcast_qp_attach = props->max_mcast_qp_attach * props->max_mcast_grp; props->max_map_per_fmr = INT_MAX; /* no limit in ConnectIB */ + props->hca_core_clock = MLX5_CAP_GEN(mdev, device_frequency_khz); + props->timestamp_mask = 0x7FFFFFFFFFFFFFFFULL; #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING if (MLX5_CAP_GEN(mdev, pg)) @@ -300,6 +546,9 @@ static int mlx5_ib_query_device(struct ib_device *ibdev, props->odp_caps = dev->odp_caps; #endif + if (MLX5_CAP_GEN(mdev, cd)) + props->device_cap_flags |= IB_DEVICE_CROSS_CHANNEL; + return 0; } @@ -480,6 +729,9 @@ int mlx5_ib_query_port(struct ib_device *ibdev, u8 port, case MLX5_VPORT_ACCESS_METHOD_HCA: return mlx5_query_hca_port(ibdev, port, props); + case MLX5_VPORT_ACCESS_METHOD_NIC: + return mlx5_query_port_roce(ibdev, port, props); + default: return -EINVAL; } @@ -580,8 +832,8 @@ static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev, struct ib_udata *udata) { struct mlx5_ib_dev *dev = to_mdev(ibdev); - struct mlx5_ib_alloc_ucontext_req_v2 req; - struct mlx5_ib_alloc_ucontext_resp resp; + struct mlx5_ib_alloc_ucontext_req_v2 req = {}; + struct mlx5_ib_alloc_ucontext_resp resp = {}; struct mlx5_ib_ucontext *context; struct mlx5_uuar_info *uuari; struct mlx5_uar *uars; @@ -592,24 +844,28 @@ static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev, int err; int i; size_t reqlen; + size_t min_req_v2 = offsetof(struct mlx5_ib_alloc_ucontext_req_v2, + max_cqe_version); if (!dev->ib_active) return ERR_PTR(-EAGAIN); - memset(&req, 0, sizeof(req)); + if (udata->inlen < sizeof(struct ib_uverbs_cmd_hdr)) + return ERR_PTR(-EINVAL); + reqlen = udata->inlen - sizeof(struct ib_uverbs_cmd_hdr); if (reqlen == sizeof(struct mlx5_ib_alloc_ucontext_req)) ver = 0; - else if (reqlen == sizeof(struct mlx5_ib_alloc_ucontext_req_v2)) + else if (reqlen >= min_req_v2) ver = 2; else return ERR_PTR(-EINVAL); - err = ib_copy_from_udata(&req, udata, reqlen); + err = ib_copy_from_udata(&req, udata, min(reqlen, sizeof(req))); if (err) return ERR_PTR(err); - if (req.flags || req.reserved) + if (req.flags) return ERR_PTR(-EINVAL); if (req.total_num_uuars > MLX5_MAX_UUARS) @@ -618,6 +874,14 @@ static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev, if (req.total_num_uuars == 0) return ERR_PTR(-EINVAL); + if (req.comp_mask || req.reserved0 || req.reserved1 || req.reserved2) + return ERR_PTR(-EOPNOTSUPP); + + if (reqlen > sizeof(req) && + !ib_is_udata_cleared(udata, sizeof(req), + reqlen - sizeof(req))) + return ERR_PTR(-EOPNOTSUPP); + req.total_num_uuars = ALIGN(req.total_num_uuars, MLX5_NON_FP_BF_REGS_PER_PAGE); if (req.num_low_latency_uuars > req.total_num_uuars - 1) @@ -633,6 +897,11 @@ static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev, resp.max_send_wqebb = 1 << MLX5_CAP_GEN(dev->mdev, log_max_qp_sz); resp.max_recv_wr = 1 << MLX5_CAP_GEN(dev->mdev, log_max_qp_sz); resp.max_srq_recv_wr = 1 << MLX5_CAP_GEN(dev->mdev, log_max_srq_sz); + resp.cqe_version = min_t(__u8, + (__u8)MLX5_CAP_GEN(dev->mdev, cqe_version), + req.max_cqe_version); + resp.response_length = min(offsetof(typeof(resp), response_length) + + sizeof(resp.response_length), udata->outlen); context = kzalloc(sizeof(*context), GFP_KERNEL); if (!context) @@ -678,22 +947,49 @@ static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev, context->ibucontext.invalidate_range = &mlx5_ib_invalidate_range; #endif + if (MLX5_CAP_GEN(dev->mdev, log_max_transport_domain)) { + err = mlx5_core_alloc_transport_domain(dev->mdev, + &context->tdn); + if (err) + goto out_uars; + } + INIT_LIST_HEAD(&context->db_page_list); mutex_init(&context->db_page_mutex); resp.tot_uuars = req.total_num_uuars; resp.num_ports = MLX5_CAP_GEN(dev->mdev, num_ports); - err = ib_copy_to_udata(udata, &resp, - sizeof(resp) - sizeof(resp.reserved)); + + if (field_avail(typeof(resp), cqe_version, udata->outlen)) + resp.response_length += sizeof(resp.cqe_version); + + if (field_avail(typeof(resp), hca_core_clock_offset, udata->outlen)) { + resp.comp_mask |= + MLX5_IB_ALLOC_UCONTEXT_RESP_MASK_CORE_CLOCK_OFFSET; + resp.hca_core_clock_offset = + offsetof(struct mlx5_init_seg, internal_timer_h) % + PAGE_SIZE; + resp.response_length += sizeof(resp.hca_core_clock_offset) + + sizeof(resp.reserved2) + + sizeof(resp.reserved3); + } + + err = ib_copy_to_udata(udata, &resp, resp.response_length); if (err) - goto out_uars; + goto out_td; uuari->ver = ver; uuari->num_low_latency_uuars = req.num_low_latency_uuars; uuari->uars = uars; uuari->num_uars = num_uars; + context->cqe_version = resp.cqe_version; + return &context->ibucontext; +out_td: + if (MLX5_CAP_GEN(dev->mdev, log_max_transport_domain)) + mlx5_core_dealloc_transport_domain(dev->mdev, context->tdn); + out_uars: for (i--; i >= 0; i--) mlx5_cmd_free_uar(dev->mdev, uars[i].index); @@ -718,6 +1014,9 @@ static int mlx5_ib_dealloc_ucontext(struct ib_ucontext *ibcontext) struct mlx5_uuar_info *uuari = &context->uuari; int i; + if (MLX5_CAP_GEN(dev->mdev, log_max_transport_domain)) + mlx5_core_dealloc_transport_domain(dev->mdev, context->tdn); + for (i = 0; i < uuari->num_uars; i++) { if (mlx5_cmd_free_uar(dev->mdev, uuari->uars[i].index)) mlx5_ib_warn(dev, "failed to free UAR 0x%x\n", uuari->uars[i].index); @@ -787,6 +1086,30 @@ static int mlx5_ib_mmap(struct ib_ucontext *ibcontext, struct vm_area_struct *vm case MLX5_IB_MMAP_GET_CONTIGUOUS_PAGES: return -ENOSYS; + case MLX5_IB_MMAP_CORE_CLOCK: + if (vma->vm_end - vma->vm_start != PAGE_SIZE) + return -EINVAL; + + if (vma->vm_flags & (VM_WRITE | VM_EXEC)) + return -EPERM; + + /* Don't expose to user-space information it shouldn't have */ + if (PAGE_SIZE > 4096) + return -EOPNOTSUPP; + + vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); + pfn = (dev->mdev->iseg_base + + offsetof(struct mlx5_init_seg, internal_timer_h)) >> + PAGE_SHIFT; + if (io_remap_pfn_range(vma, vma->vm_start, pfn, + PAGE_SIZE, vma->vm_page_prot)) + return -EAGAIN; + + mlx5_ib_dbg(dev, "mapped internal timer at 0x%lx, PA 0x%llx\n", + vma->vm_start, + (unsigned long long)pfn << PAGE_SHIFT); + break; + default: return -EINVAL; } @@ -835,6 +1158,457 @@ static int mlx5_ib_dealloc_pd(struct ib_pd *pd) return 0; } +static bool outer_header_zero(u32 *match_criteria) +{ + int size = MLX5_ST_SZ_BYTES(fte_match_param); + char *outer_headers_c = MLX5_ADDR_OF(fte_match_param, match_criteria, + outer_headers); + + return outer_headers_c[0] == 0 && !memcmp(outer_headers_c, + outer_headers_c + 1, + size - 1); +} + +static int parse_flow_attr(u32 *match_c, u32 *match_v, + union ib_flow_spec *ib_spec) +{ + void *outer_headers_c = MLX5_ADDR_OF(fte_match_param, match_c, + outer_headers); + void *outer_headers_v = MLX5_ADDR_OF(fte_match_param, match_v, + outer_headers); + switch (ib_spec->type) { + case IB_FLOW_SPEC_ETH: + if (ib_spec->size != sizeof(ib_spec->eth)) + return -EINVAL; + + ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, outer_headers_c, + dmac_47_16), + ib_spec->eth.mask.dst_mac); + ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, outer_headers_v, + dmac_47_16), + ib_spec->eth.val.dst_mac); + + if (ib_spec->eth.mask.vlan_tag) { + MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c, + vlan_tag, 1); + MLX5_SET(fte_match_set_lyr_2_4, outer_headers_v, + vlan_tag, 1); + + MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c, + first_vid, ntohs(ib_spec->eth.mask.vlan_tag)); + MLX5_SET(fte_match_set_lyr_2_4, outer_headers_v, + first_vid, ntohs(ib_spec->eth.val.vlan_tag)); + + MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c, + first_cfi, + ntohs(ib_spec->eth.mask.vlan_tag) >> 12); + MLX5_SET(fte_match_set_lyr_2_4, outer_headers_v, + first_cfi, + ntohs(ib_spec->eth.val.vlan_tag) >> 12); + + MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c, + first_prio, + ntohs(ib_spec->eth.mask.vlan_tag) >> 13); + MLX5_SET(fte_match_set_lyr_2_4, outer_headers_v, + first_prio, + ntohs(ib_spec->eth.val.vlan_tag) >> 13); + } + MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c, + ethertype, ntohs(ib_spec->eth.mask.ether_type)); + MLX5_SET(fte_match_set_lyr_2_4, outer_headers_v, + ethertype, ntohs(ib_spec->eth.val.ether_type)); + break; + case IB_FLOW_SPEC_IPV4: + if (ib_spec->size != sizeof(ib_spec->ipv4)) + return -EINVAL; + + MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c, + ethertype, 0xffff); + MLX5_SET(fte_match_set_lyr_2_4, outer_headers_v, + ethertype, ETH_P_IP); + + memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, outer_headers_c, + src_ipv4_src_ipv6.ipv4_layout.ipv4), + &ib_spec->ipv4.mask.src_ip, + sizeof(ib_spec->ipv4.mask.src_ip)); + memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, outer_headers_v, + src_ipv4_src_ipv6.ipv4_layout.ipv4), + &ib_spec->ipv4.val.src_ip, + sizeof(ib_spec->ipv4.val.src_ip)); + memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, outer_headers_c, + dst_ipv4_dst_ipv6.ipv4_layout.ipv4), + &ib_spec->ipv4.mask.dst_ip, + sizeof(ib_spec->ipv4.mask.dst_ip)); + memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, outer_headers_v, + dst_ipv4_dst_ipv6.ipv4_layout.ipv4), + &ib_spec->ipv4.val.dst_ip, + sizeof(ib_spec->ipv4.val.dst_ip)); + break; + case IB_FLOW_SPEC_TCP: + if (ib_spec->size != sizeof(ib_spec->tcp_udp)) + return -EINVAL; + + MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c, ip_protocol, + 0xff); + MLX5_SET(fte_match_set_lyr_2_4, outer_headers_v, ip_protocol, + IPPROTO_TCP); + + MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c, tcp_sport, + ntohs(ib_spec->tcp_udp.mask.src_port)); + MLX5_SET(fte_match_set_lyr_2_4, outer_headers_v, tcp_sport, + ntohs(ib_spec->tcp_udp.val.src_port)); + + MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c, tcp_dport, + ntohs(ib_spec->tcp_udp.mask.dst_port)); + MLX5_SET(fte_match_set_lyr_2_4, outer_headers_v, tcp_dport, + ntohs(ib_spec->tcp_udp.val.dst_port)); + break; + case IB_FLOW_SPEC_UDP: + if (ib_spec->size != sizeof(ib_spec->tcp_udp)) + return -EINVAL; + + MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c, ip_protocol, + 0xff); + MLX5_SET(fte_match_set_lyr_2_4, outer_headers_v, ip_protocol, + IPPROTO_UDP); + + MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c, udp_sport, + ntohs(ib_spec->tcp_udp.mask.src_port)); + MLX5_SET(fte_match_set_lyr_2_4, outer_headers_v, udp_sport, + ntohs(ib_spec->tcp_udp.val.src_port)); + + MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c, udp_dport, + ntohs(ib_spec->tcp_udp.mask.dst_port)); + MLX5_SET(fte_match_set_lyr_2_4, outer_headers_v, udp_dport, + ntohs(ib_spec->tcp_udp.val.dst_port)); + break; + default: + return -EINVAL; + } + + return 0; +} + +/* If a flow could catch both multicast and unicast packets, + * it won't fall into the multicast flow steering table and this rule + * could steal other multicast packets. + */ +static bool flow_is_multicast_only(struct ib_flow_attr *ib_attr) +{ + struct ib_flow_spec_eth *eth_spec; + + if (ib_attr->type != IB_FLOW_ATTR_NORMAL || + ib_attr->size < sizeof(struct ib_flow_attr) + + sizeof(struct ib_flow_spec_eth) || + ib_attr->num_of_specs < 1) + return false; + + eth_spec = (struct ib_flow_spec_eth *)(ib_attr + 1); + if (eth_spec->type != IB_FLOW_SPEC_ETH || + eth_spec->size != sizeof(*eth_spec)) + return false; + + return is_multicast_ether_addr(eth_spec->mask.dst_mac) && + is_multicast_ether_addr(eth_spec->val.dst_mac); +} + +static bool is_valid_attr(struct ib_flow_attr *flow_attr) +{ + union ib_flow_spec *ib_spec = (union ib_flow_spec *)(flow_attr + 1); + bool has_ipv4_spec = false; + bool eth_type_ipv4 = true; + unsigned int spec_index; + + /* Validate that ethertype is correct */ + for (spec_index = 0; spec_index < flow_attr->num_of_specs; spec_index++) { + if (ib_spec->type == IB_FLOW_SPEC_ETH && + ib_spec->eth.mask.ether_type) { + if (!((ib_spec->eth.mask.ether_type == htons(0xffff)) && + ib_spec->eth.val.ether_type == htons(ETH_P_IP))) + eth_type_ipv4 = false; + } else if (ib_spec->type == IB_FLOW_SPEC_IPV4) { + has_ipv4_spec = true; + } + ib_spec = (void *)ib_spec + ib_spec->size; + } + return !has_ipv4_spec || eth_type_ipv4; +} + +static void put_flow_table(struct mlx5_ib_dev *dev, + struct mlx5_ib_flow_prio *prio, bool ft_added) +{ + prio->refcount -= !!ft_added; + if (!prio->refcount) { + mlx5_destroy_flow_table(prio->flow_table); + prio->flow_table = NULL; + } +} + +static int mlx5_ib_destroy_flow(struct ib_flow *flow_id) +{ + struct mlx5_ib_dev *dev = to_mdev(flow_id->qp->device); + struct mlx5_ib_flow_handler *handler = container_of(flow_id, + struct mlx5_ib_flow_handler, + ibflow); + struct mlx5_ib_flow_handler *iter, *tmp; + + mutex_lock(&dev->flow_db.lock); + + list_for_each_entry_safe(iter, tmp, &handler->list, list) { + mlx5_del_flow_rule(iter->rule); + list_del(&iter->list); + kfree(iter); + } + + mlx5_del_flow_rule(handler->rule); + put_flow_table(dev, &dev->flow_db.prios[handler->prio], true); + mutex_unlock(&dev->flow_db.lock); + + kfree(handler); + + return 0; +} + +#define MLX5_FS_MAX_TYPES 10 +#define MLX5_FS_MAX_ENTRIES 32000UL +static struct mlx5_ib_flow_prio *get_flow_table(struct mlx5_ib_dev *dev, + struct ib_flow_attr *flow_attr) +{ + struct mlx5_flow_namespace *ns = NULL; + struct mlx5_ib_flow_prio *prio; + struct mlx5_flow_table *ft; + int num_entries; + int num_groups; + int priority; + int err = 0; + + if (flow_attr->type == IB_FLOW_ATTR_NORMAL) { + if (flow_is_multicast_only(flow_attr)) + priority = MLX5_IB_FLOW_MCAST_PRIO; + else + priority = flow_attr->priority; + ns = mlx5_get_flow_namespace(dev->mdev, + MLX5_FLOW_NAMESPACE_BYPASS); + num_entries = MLX5_FS_MAX_ENTRIES; + num_groups = MLX5_FS_MAX_TYPES; + prio = &dev->flow_db.prios[priority]; + } else if (flow_attr->type == IB_FLOW_ATTR_ALL_DEFAULT || + flow_attr->type == IB_FLOW_ATTR_MC_DEFAULT) { + ns = mlx5_get_flow_namespace(dev->mdev, + MLX5_FLOW_NAMESPACE_LEFTOVERS); + build_leftovers_ft_param(&priority, + &num_entries, + &num_groups); + prio = &dev->flow_db.prios[MLX5_IB_FLOW_LEFTOVERS_PRIO]; + } + + if (!ns) + return ERR_PTR(-ENOTSUPP); + + ft = prio->flow_table; + if (!ft) { + ft = mlx5_create_auto_grouped_flow_table(ns, priority, + num_entries, + num_groups); + + if (!IS_ERR(ft)) { + prio->refcount = 0; + prio->flow_table = ft; + } else { + err = PTR_ERR(ft); + } + } + + return err ? ERR_PTR(err) : prio; +} + +static struct mlx5_ib_flow_handler *create_flow_rule(struct mlx5_ib_dev *dev, + struct mlx5_ib_flow_prio *ft_prio, + struct ib_flow_attr *flow_attr, + struct mlx5_flow_destination *dst) +{ + struct mlx5_flow_table *ft = ft_prio->flow_table; + struct mlx5_ib_flow_handler *handler; + void *ib_flow = flow_attr + 1; + u8 match_criteria_enable = 0; + unsigned int spec_index; + u32 *match_c; + u32 *match_v; + int err = 0; + + if (!is_valid_attr(flow_attr)) + return ERR_PTR(-EINVAL); + + match_c = kzalloc(MLX5_ST_SZ_BYTES(fte_match_param), GFP_KERNEL); + match_v = kzalloc(MLX5_ST_SZ_BYTES(fte_match_param), GFP_KERNEL); + handler = kzalloc(sizeof(*handler), GFP_KERNEL); + if (!handler || !match_c || !match_v) { + err = -ENOMEM; + goto free; + } + + INIT_LIST_HEAD(&handler->list); + + for (spec_index = 0; spec_index < flow_attr->num_of_specs; spec_index++) { + err = parse_flow_attr(match_c, match_v, ib_flow); + if (err < 0) + goto free; + + ib_flow += ((union ib_flow_spec *)ib_flow)->size; + } + + /* Outer header support only */ + match_criteria_enable = (!outer_header_zero(match_c)) << 0; + handler->rule = mlx5_add_flow_rule(ft, match_criteria_enable, + match_c, match_v, + MLX5_FLOW_CONTEXT_ACTION_FWD_DEST, + MLX5_FS_DEFAULT_FLOW_TAG, + dst); + + if (IS_ERR(handler->rule)) { + err = PTR_ERR(handler->rule); + goto free; + } + + handler->prio = ft_prio - dev->flow_db.prios; + + ft_prio->flow_table = ft; +free: + if (err) + kfree(handler); + kfree(match_c); + kfree(match_v); + return err ? ERR_PTR(err) : handler; +} + +enum { + LEFTOVERS_MC, + LEFTOVERS_UC, +}; + +static struct mlx5_ib_flow_handler *create_leftovers_rule(struct mlx5_ib_dev *dev, + struct mlx5_ib_flow_prio *ft_prio, + struct ib_flow_attr *flow_attr, + struct mlx5_flow_destination *dst) +{ + struct mlx5_ib_flow_handler *handler_ucast = NULL; + struct mlx5_ib_flow_handler *handler = NULL; + + static struct { + struct ib_flow_attr flow_attr; + struct ib_flow_spec_eth eth_flow; + } leftovers_specs[] = { + [LEFTOVERS_MC] = { + .flow_attr = { + .num_of_specs = 1, + .size = sizeof(leftovers_specs[0]) + }, + .eth_flow = { + .type = IB_FLOW_SPEC_ETH, + .size = sizeof(struct ib_flow_spec_eth), + .mask = {.dst_mac = {0x1} }, + .val = {.dst_mac = {0x1} } + } + }, + [LEFTOVERS_UC] = { + .flow_attr = { + .num_of_specs = 1, + .size = sizeof(leftovers_specs[0]) + }, + .eth_flow = { + .type = IB_FLOW_SPEC_ETH, + .size = sizeof(struct ib_flow_spec_eth), + .mask = {.dst_mac = {0x1} }, + .val = {.dst_mac = {} } + } + } + }; + + handler = create_flow_rule(dev, ft_prio, + &leftovers_specs[LEFTOVERS_MC].flow_attr, + dst); + if (!IS_ERR(handler) && + flow_attr->type == IB_FLOW_ATTR_ALL_DEFAULT) { + handler_ucast = create_flow_rule(dev, ft_prio, + &leftovers_specs[LEFTOVERS_UC].flow_attr, + dst); + if (IS_ERR(handler_ucast)) { + kfree(handler); + handler = handler_ucast; + } else { + list_add(&handler_ucast->list, &handler->list); + } + } + + return handler; +} + +static struct ib_flow *mlx5_ib_create_flow(struct ib_qp *qp, + struct ib_flow_attr *flow_attr, + int domain) +{ + struct mlx5_ib_dev *dev = to_mdev(qp->device); + struct mlx5_ib_flow_handler *handler = NULL; + struct mlx5_flow_destination *dst = NULL; + struct mlx5_ib_flow_prio *ft_prio; + int err; + + if (flow_attr->priority > MLX5_IB_FLOW_LAST_PRIO) + return ERR_PTR(-ENOSPC); + + if (domain != IB_FLOW_DOMAIN_USER || + flow_attr->port > MLX5_CAP_GEN(dev->mdev, num_ports) || + flow_attr->flags) + return ERR_PTR(-EINVAL); + + dst = kzalloc(sizeof(*dst), GFP_KERNEL); + if (!dst) + return ERR_PTR(-ENOMEM); + + mutex_lock(&dev->flow_db.lock); + + ft_prio = get_flow_table(dev, flow_attr); + if (IS_ERR(ft_prio)) { + err = PTR_ERR(ft_prio); + goto unlock; + } + + dst->type = MLX5_FLOW_DESTINATION_TYPE_TIR; + dst->tir_num = to_mqp(qp)->raw_packet_qp.rq.tirn; + + if (flow_attr->type == IB_FLOW_ATTR_NORMAL) { + handler = create_flow_rule(dev, ft_prio, flow_attr, + dst); + } else if (flow_attr->type == IB_FLOW_ATTR_ALL_DEFAULT || + flow_attr->type == IB_FLOW_ATTR_MC_DEFAULT) { + handler = create_leftovers_rule(dev, ft_prio, flow_attr, + dst); + } else { + err = -EINVAL; + goto destroy_ft; + } + + if (IS_ERR(handler)) { + err = PTR_ERR(handler); + handler = NULL; + goto destroy_ft; + } + + ft_prio->refcount++; + mutex_unlock(&dev->flow_db.lock); + kfree(dst); + + return &handler->ibflow; + +destroy_ft: + put_flow_table(dev, ft_prio, false); +unlock: + mutex_unlock(&dev->flow_db.lock); + kfree(dst); + kfree(handler); + return ERR_PTR(err); +} + static int mlx5_ib_mcg_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) { struct mlx5_ib_dev *dev = to_mdev(ibqp->device); @@ -1304,6 +2078,32 @@ static void destroy_dev_resources(struct mlx5_ib_resources *devr) mlx5_ib_dealloc_pd(devr->p0); } +static u32 get_core_cap_flags(struct ib_device *ibdev) +{ + struct mlx5_ib_dev *dev = to_mdev(ibdev); + enum rdma_link_layer ll = mlx5_ib_port_link_layer(ibdev, 1); + u8 l3_type_cap = MLX5_CAP_ROCE(dev->mdev, l3_type); + u8 roce_version_cap = MLX5_CAP_ROCE(dev->mdev, roce_version); + u32 ret = 0; + + if (ll == IB_LINK_LAYER_INFINIBAND) + return RDMA_CORE_PORT_IBA_IB; + + if (!(l3_type_cap & MLX5_ROCE_L3_TYPE_IPV4_CAP)) + return 0; + + if (!(l3_type_cap & MLX5_ROCE_L3_TYPE_IPV6_CAP)) + return 0; + + if (roce_version_cap & MLX5_ROCE_VERSION_1_CAP) + ret |= RDMA_CORE_PORT_IBA_ROCE; + + if (roce_version_cap & MLX5_ROCE_VERSION_2_CAP) + ret |= RDMA_CORE_PORT_IBA_ROCE_UDP_ENCAP; + + return ret; +} + static int mlx5_port_immutable(struct ib_device *ibdev, u8 port_num, struct ib_port_immutable *immutable) { @@ -1316,20 +2116,50 @@ static int mlx5_port_immutable(struct ib_device *ibdev, u8 port_num, immutable->pkey_tbl_len = attr.pkey_tbl_len; immutable->gid_tbl_len = attr.gid_tbl_len; - immutable->core_cap_flags = RDMA_CORE_PORT_IBA_IB; + immutable->core_cap_flags = get_core_cap_flags(ibdev); immutable->max_mad_size = IB_MGMT_MAD_SIZE; return 0; } +static int mlx5_enable_roce(struct mlx5_ib_dev *dev) +{ + int err; + + dev->roce.nb.notifier_call = mlx5_netdev_event; + err = register_netdevice_notifier(&dev->roce.nb); + if (err) + return err; + + err = mlx5_nic_vport_enable_roce(dev->mdev); + if (err) + goto err_unregister_netdevice_notifier; + + return 0; + +err_unregister_netdevice_notifier: + unregister_netdevice_notifier(&dev->roce.nb); + return err; +} + +static void mlx5_disable_roce(struct mlx5_ib_dev *dev) +{ + mlx5_nic_vport_disable_roce(dev->mdev); + unregister_netdevice_notifier(&dev->roce.nb); +} + static void *mlx5_ib_add(struct mlx5_core_dev *mdev) { struct mlx5_ib_dev *dev; + enum rdma_link_layer ll; + int port_type_cap; int err; int i; - /* don't create IB instance over Eth ports, no RoCE yet! */ - if (MLX5_CAP_GEN(mdev, port_type) == MLX5_CAP_PORT_TYPE_ETH) + port_type_cap = MLX5_CAP_GEN(mdev, port_type); + ll = mlx5_port_type_cap_to_rdma_ll(port_type_cap); + + if ((ll == IB_LINK_LAYER_ETHERNET) && !MLX5_CAP_GEN(mdev, roce)) return NULL; printk_once(KERN_INFO "%s", mlx5_version); @@ -1340,6 +2170,7 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev) dev->mdev = mdev; + rwlock_init(&dev->roce.netdev_lock); err = get_port_caps(dev); if (err) goto err_dealloc; @@ -1385,11 +2216,18 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev) (1ull << IB_USER_VERBS_CMD_CREATE_XSRQ) | (1ull << IB_USER_VERBS_CMD_OPEN_QP); dev->ib_dev.uverbs_ex_cmd_mask = - (1ull << IB_USER_VERBS_EX_CMD_QUERY_DEVICE); + (1ull << IB_USER_VERBS_EX_CMD_QUERY_DEVICE) | + (1ull << IB_USER_VERBS_EX_CMD_CREATE_CQ) | + (1ull << IB_USER_VERBS_EX_CMD_CREATE_QP); dev->ib_dev.query_device = mlx5_ib_query_device; dev->ib_dev.query_port = mlx5_ib_query_port; + dev->ib_dev.get_link_layer = mlx5_ib_port_link_layer; + if (ll == IB_LINK_LAYER_ETHERNET) + dev->ib_dev.get_netdev = mlx5_ib_get_netdev; dev->ib_dev.query_gid = mlx5_ib_query_gid; + dev->ib_dev.add_gid = mlx5_ib_add_gid; + dev->ib_dev.del_gid = mlx5_ib_del_gid; dev->ib_dev.query_pkey = mlx5_ib_query_pkey; dev->ib_dev.modify_device = mlx5_ib_modify_device; dev->ib_dev.modify_port = mlx5_ib_modify_port; @@ -1425,8 +2263,7 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev) dev->ib_dev.detach_mcast = mlx5_ib_mcg_detach; dev->ib_dev.process_mad = mlx5_ib_process_mad; dev->ib_dev.alloc_mr = mlx5_ib_alloc_mr; - dev->ib_dev.alloc_fast_reg_page_list = mlx5_ib_alloc_fast_reg_page_list; - dev->ib_dev.free_fast_reg_page_list = mlx5_ib_free_fast_reg_page_list; + dev->ib_dev.map_mr_sg = mlx5_ib_map_mr_sg; dev->ib_dev.check_mr_status = mlx5_ib_check_mr_status; dev->ib_dev.get_port_immutable = mlx5_port_immutable; @@ -1440,15 +2277,30 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev) (1ull << IB_USER_VERBS_CMD_CLOSE_XRCD); } + if (mlx5_ib_port_link_layer(&dev->ib_dev, 1) == + IB_LINK_LAYER_ETHERNET) { + dev->ib_dev.create_flow = mlx5_ib_create_flow; + dev->ib_dev.destroy_flow = mlx5_ib_destroy_flow; + dev->ib_dev.uverbs_ex_cmd_mask |= + (1ull << IB_USER_VERBS_EX_CMD_CREATE_FLOW) | + (1ull << IB_USER_VERBS_EX_CMD_DESTROY_FLOW); + } err = init_node_data(dev); if (err) goto err_dealloc; + mutex_init(&dev->flow_db.lock); mutex_init(&dev->cap_mask_mutex); + if (ll == IB_LINK_LAYER_ETHERNET) { + err = mlx5_enable_roce(dev); + if (err) + goto err_dealloc; + } + err = create_dev_resources(&dev->devr); if (err) - goto err_dealloc; + goto err_disable_roce; err = mlx5_ib_odp_init_one(dev); if (err) @@ -1485,6 +2337,10 @@ err_odp: err_rsrc: destroy_dev_resources(&dev->devr); +err_disable_roce: + if (ll == IB_LINK_LAYER_ETHERNET) + mlx5_disable_roce(dev); + err_dealloc: ib_dealloc_device((struct ib_device *)dev); @@ -1494,11 +2350,14 @@ err_dealloc: static void mlx5_ib_remove(struct mlx5_core_dev *mdev, void *context) { struct mlx5_ib_dev *dev = context; + enum rdma_link_layer ll = mlx5_ib_port_link_layer(&dev->ib_dev, 1); ib_unregister_device(&dev->ib_dev); destroy_umrc_res(dev); mlx5_ib_odp_remove_one(dev); destroy_dev_resources(&dev->devr); + if (ll == IB_LINK_LAYER_ETHERNET) + mlx5_disable_roce(dev); ib_dealloc_device(&dev->ib_dev); } diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index 22123b7..d2b9737 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -42,6 +42,7 @@ #include <linux/mlx5/qp.h> #include <linux/mlx5/srq.h> #include <linux/types.h> +#include <linux/mlx5/transobj.h> #define mlx5_ib_dbg(dev, format, arg...) \ pr_debug("%s:%s:%d:(pid %d): " format, (dev)->ib_dev.name, __func__, \ @@ -55,6 +56,11 @@ pr_err("%s:%s:%d:(pid %d): " format, (dev)->ib_dev.name, __func__, \ pr_warn("%s:%s:%d:(pid %d): " format, (dev)->ib_dev.name, __func__, \ __LINE__, current->pid, ##arg) +#define field_avail(type, fld, sz) (offsetof(type, fld) + \ + sizeof(((type *)0)->fld) <= (sz)) +#define MLX5_IB_DEFAULT_UIDX 0xffffff +#define MLX5_USER_ASSIGNED_UIDX_MASK __mlx5_mask(qpc, user_index) + enum { MLX5_IB_MMAP_CMD_SHIFT = 8, MLX5_IB_MMAP_CMD_MASK = 0xff, @@ -62,7 +68,9 @@ enum { enum mlx5_ib_mmap_cmd { MLX5_IB_MMAP_REGULAR_PAGE = 0, - MLX5_IB_MMAP_GET_CONTIGUOUS_PAGES = 1, /* always last */ + MLX5_IB_MMAP_GET_CONTIGUOUS_PAGES = 1, + /* 5 is chosen in order to be compatible with old versions of libmlx5 */ + MLX5_IB_MMAP_CORE_CLOCK = 5, }; enum { @@ -85,6 +93,15 @@ enum mlx5_ib_mad_ifc_flags { MLX5_MAD_IFC_NET_VIEW = 4, }; +enum { + MLX5_CROSS_CHANNEL_UUAR = 0, +}; + +enum { + MLX5_CQE_VERSION_V0, + MLX5_CQE_VERSION_V1, +}; + struct mlx5_ib_ucontext { struct ib_ucontext ibucontext; struct list_head db_page_list; @@ -93,6 +110,9 @@ struct mlx5_ib_ucontext { */ struct mutex db_page_mutex; struct mlx5_uuar_info uuari; + u8 cqe_version; + /* Transport Domain number */ + u32 tdn; }; static inline struct mlx5_ib_ucontext *to_mucontext(struct ib_ucontext *ibucontext) @@ -105,6 +125,36 @@ struct mlx5_ib_pd { u32 pdn; }; +#define MLX5_IB_FLOW_MCAST_PRIO (MLX5_BY_PASS_NUM_PRIOS - 1) +#define MLX5_IB_FLOW_LAST_PRIO (MLX5_IB_FLOW_MCAST_PRIO - 1) +#if (MLX5_IB_FLOW_LAST_PRIO <= 0) +#error "Invalid number of bypass priorities" +#endif +#define MLX5_IB_FLOW_LEFTOVERS_PRIO (MLX5_IB_FLOW_MCAST_PRIO + 1) + +#define MLX5_IB_NUM_FLOW_FT (MLX5_IB_FLOW_LEFTOVERS_PRIO + 1) +struct mlx5_ib_flow_prio { + struct mlx5_flow_table *flow_table; + unsigned int refcount; +}; + +struct mlx5_ib_flow_handler { + struct list_head list; + struct ib_flow ibflow; + unsigned int prio; + struct mlx5_flow_rule *rule; +}; + +struct mlx5_ib_flow_db { + struct mlx5_ib_flow_prio prios[MLX5_IB_NUM_FLOW_FT]; + /* Protect flow steering bypass flow tables + * when add/del flow rules. + * only single add/removal of flow steering rule could be done + * simultaneously. + */ + struct mutex lock; +}; + /* Use macros here so that don't have to duplicate * enum ib_send_flags and enum ib_qp_type for low-level driver */ @@ -171,35 +221,70 @@ struct mlx5_ib_pfault { struct mlx5_pagefault mpfault; }; +struct mlx5_ib_ubuffer { + struct ib_umem *umem; + int buf_size; + u64 buf_addr; +}; + +struct mlx5_ib_qp_base { + struct mlx5_ib_qp *container_mibqp; + struct mlx5_core_qp mqp; + struct mlx5_ib_ubuffer ubuffer; +}; + +struct mlx5_ib_qp_trans { + struct mlx5_ib_qp_base base; + u16 xrcdn; + u8 alt_port; + u8 atomic_rd_en; + u8 resp_depth; +}; + +struct mlx5_ib_rq { + struct mlx5_ib_qp_base base; + struct mlx5_ib_wq *rq; + struct mlx5_ib_ubuffer ubuffer; + struct mlx5_db *doorbell; + u32 tirn; + u8 state; +}; + +struct mlx5_ib_sq { + struct mlx5_ib_qp_base base; + struct mlx5_ib_wq *sq; + struct mlx5_ib_ubuffer ubuffer; + struct mlx5_db *doorbell; + u32 tisn; + u8 state; +}; + +struct mlx5_ib_raw_packet_qp { + struct mlx5_ib_sq sq; + struct mlx5_ib_rq rq; +}; + struct mlx5_ib_qp { struct ib_qp ibqp; - struct mlx5_core_qp mqp; + union { + struct mlx5_ib_qp_trans trans_qp; + struct mlx5_ib_raw_packet_qp raw_packet_qp; + }; struct mlx5_buf buf; struct mlx5_db db; struct mlx5_ib_wq rq; - u32 doorbell_qpn; u8 sq_signal_bits; u8 fm_cache; - int sq_max_wqes_per_wr; - int sq_spare_wqes; struct mlx5_ib_wq sq; - struct ib_umem *umem; - int buf_size; - /* serialize qp state modifications */ struct mutex mutex; - u16 xrcdn; u32 flags; u8 port; - u8 alt_port; - u8 atomic_rd_en; - u8 resp_depth; u8 state; - int mlx_type; int wq_sig; int scat_cqe; int max_inline_data; @@ -242,9 +327,13 @@ struct mlx5_ib_cq_buf { enum mlx5_ib_qp_flags { MLX5_IB_QP_BLOCK_MULTICAST_LOOPBACK = 1 << 0, MLX5_IB_QP_SIGNATURE_HANDLING = 1 << 1, + MLX5_IB_QP_CROSS_CHANNEL = 1 << 2, + MLX5_IB_QP_MANAGED_SEND = 1 << 3, + MLX5_IB_QP_MANAGED_RECV = 1 << 4, }; struct mlx5_umr_wr { + struct ib_send_wr wr; union { u64 virt_addr; u64 offset; @@ -257,6 +346,11 @@ struct mlx5_umr_wr { u32 mkey; }; +static inline struct mlx5_umr_wr *umr_wr(struct ib_send_wr *wr) +{ + return container_of(wr, struct mlx5_umr_wr, wr); +} + struct mlx5_shared_mr_info { int mr_id; struct ib_umem *umem; @@ -278,6 +372,7 @@ struct mlx5_ib_cq { struct mlx5_ib_cq_buf *resize_buf; struct ib_umem *resize_umem; int cqe_size; + u32 create_flags; }; struct mlx5_ib_srq { @@ -313,6 +408,11 @@ enum mlx5_ib_mtt_access_flags { struct mlx5_ib_mr { struct ib_mr ibmr; + void *descs; + dma_addr_t desc_map; + int ndescs; + int max_descs; + int desc_size; struct mlx5_core_mr mmr; struct ib_umem *umem; struct mlx5_shared_mr_info *smr_info; @@ -324,12 +424,7 @@ struct mlx5_ib_mr { struct mlx5_create_mkey_mbox_out out; struct mlx5_core_sig_ctx *sig; int live; -}; - -struct mlx5_ib_fast_reg_page_list { - struct ib_fast_reg_page_list ibfrpl; - __be64 *mapped_page_list; - dma_addr_t map; + void *descs_alloc; }; struct mlx5_ib_umr_context { @@ -358,20 +453,6 @@ enum { MLX5_FMR_BUSY, }; -struct mlx5_ib_fmr { - struct ib_fmr ibfmr; - struct mlx5_core_mr mr; - int access_flags; - int state; - /* protect fmr state - */ - spinlock_t lock; - u64 wrid; - struct ib_send_wr wr[2]; - u8 page_shift; - struct ib_fast_reg_page_list page_list; -}; - struct mlx5_cache_ent { struct list_head head; /* sync access to the cahce entry @@ -415,9 +496,19 @@ struct mlx5_ib_resources { struct ib_srq *s1; }; +struct mlx5_roce { + /* Protect mlx5_ib_get_netdev from invoking dev_hold() with a NULL + * netdev pointer + */ + rwlock_t netdev_lock; + struct net_device *netdev; + struct notifier_block nb; +}; + struct mlx5_ib_dev { struct ib_device ib_dev; struct mlx5_core_dev *mdev; + struct mlx5_roce roce; MLX5_DECLARE_DOORBELL_LOCK(uar_lock); int num_ports; /* serialize update of capability mask @@ -439,6 +530,7 @@ struct mlx5_ib_dev { */ struct srcu_struct mr_srcu; #endif + struct mlx5_ib_flow_db flow_db; }; static inline struct mlx5_ib_cq *to_mibcq(struct mlx5_core_cq *mcq) @@ -456,11 +548,6 @@ static inline struct mlx5_ib_dev *to_mdev(struct ib_device *ibdev) return container_of(ibdev, struct mlx5_ib_dev, ib_dev); } -static inline struct mlx5_ib_fmr *to_mfmr(struct ib_fmr *ibfmr) -{ - return container_of(ibfmr, struct mlx5_ib_fmr, ibfmr); -} - static inline struct mlx5_ib_cq *to_mcq(struct ib_cq *ibcq) { return container_of(ibcq, struct mlx5_ib_cq, ibcq); @@ -468,7 +555,7 @@ static inline struct mlx5_ib_cq *to_mcq(struct ib_cq *ibcq) static inline struct mlx5_ib_qp *to_mibqp(struct mlx5_core_qp *mqp) { - return container_of(mqp, struct mlx5_ib_qp, mqp); + return container_of(mqp, struct mlx5_ib_qp_base, mqp)->container_mibqp; } static inline struct mlx5_ib_mr *to_mibmr(struct mlx5_core_mr *mmr) @@ -501,11 +588,6 @@ static inline struct mlx5_ib_mr *to_mmr(struct ib_mr *ibmr) return container_of(ibmr, struct mlx5_ib_mr, ibmr); } -static inline struct mlx5_ib_fast_reg_page_list *to_mfrpl(struct ib_fast_reg_page_list *ibfrpl) -{ - return container_of(ibfrpl, struct mlx5_ib_fast_reg_page_list, ibfrpl); -} - struct mlx5_ib_ah { struct ib_ah ibah; struct mlx5_av av; @@ -525,8 +607,6 @@ void mlx5_ib_free_srq_wqe(struct mlx5_ib_srq *srq, int wqe_index); int mlx5_MAD_IFC(struct mlx5_ib_dev *dev, int ignore_mkey, int ignore_bkey, u8 port, const struct ib_wc *in_wc, const struct ib_grh *in_grh, const void *in_mad, void *response_mad); -struct ib_ah *create_ib_ah(struct ib_ah_attr *ah_attr, - struct mlx5_ib_ah *ah); struct ib_ah *mlx5_ib_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr); int mlx5_ib_query_ah(struct ib_ah *ibah, struct ib_ah_attr *ah_attr); int mlx5_ib_destroy_ah(struct ib_ah *ah); @@ -553,7 +633,8 @@ int mlx5_ib_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr, struct ib_recv_wr **bad_wr); void *mlx5_get_send_wqe(struct mlx5_ib_qp *qp, int n); int mlx5_ib_read_user_wqe(struct mlx5_ib_qp *qp, int send, int wqe_index, - void *buffer, u32 length); + void *buffer, u32 length, + struct mlx5_ib_qp_base *base); struct ib_cq *mlx5_ib_create_cq(struct ib_device *ibdev, const struct ib_cq_init_attr *attr, struct ib_ucontext *context, @@ -573,15 +654,9 @@ int mlx5_ib_dereg_mr(struct ib_mr *ibmr); struct ib_mr *mlx5_ib_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type, u32 max_num_sg); -struct ib_fast_reg_page_list *mlx5_ib_alloc_fast_reg_page_list(struct ib_device *ibdev, - int page_list_len); -void mlx5_ib_free_fast_reg_page_list(struct ib_fast_reg_page_list *page_list); -struct ib_fmr *mlx5_ib_fmr_alloc(struct ib_pd *pd, int acc, - struct ib_fmr_attr *fmr_attr); -int mlx5_ib_map_phys_fmr(struct ib_fmr *ibfmr, u64 *page_list, - int npages, u64 iova); -int mlx5_ib_unmap_fmr(struct list_head *fmr_list); -int mlx5_ib_fmr_dealloc(struct ib_fmr *ibfmr); +int mlx5_ib_map_mr_sg(struct ib_mr *ibmr, + struct scatterlist *sg, + int sg_nents); int mlx5_ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, const struct ib_wc *in_wc, const struct ib_grh *in_grh, const struct ib_mad_hdr *in, size_t in_mad_size, @@ -661,6 +736,9 @@ static inline void mlx5_ib_qp_enable_pagefaults(struct mlx5_ib_qp *qp) {} #endif /* CONFIG_INFINIBAND_ON_DEMAND_PAGING */ +__be16 mlx5_get_roce_udp_sport(struct mlx5_ib_dev *dev, u8 port_num, + int index); + static inline void init_query_mad(struct ib_smp *mad) { mad->base_version = 1; @@ -686,4 +764,28 @@ static inline int is_qp1(enum ib_qp_type qp_type) #define MLX5_MAX_UMR_SHIFT 16 #define MLX5_MAX_UMR_PAGES (1 << MLX5_MAX_UMR_SHIFT) +static inline u32 check_cq_create_flags(u32 flags) +{ + /* + * It returns non-zero value for unsupported CQ + * create flags, otherwise it returns zero. + */ + return (flags & ~(IB_CQ_FLAGS_IGNORE_OVERRUN | + IB_CQ_FLAGS_TIMESTAMP_COMPLETION)); +} + +static inline int verify_assign_uidx(u8 cqe_version, u32 cmd_uidx, + u32 *user_index) +{ + if (cqe_version) { + if ((cmd_uidx == MLX5_IB_DEFAULT_UIDX) || + (cmd_uidx & ~MLX5_USER_ASSIGNED_UIDX_MASK)) + return -EINVAL; + *user_index = cmd_uidx; + } else { + *user_index = MLX5_IB_DEFAULT_UIDX; + } + + return 0; +} #endif /* MLX5_IB_H */ diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index 54a15b5..6000f7a 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -381,7 +381,19 @@ static void __cache_work_func(struct mlx5_cache_ent *ent) } } } else if (ent->cur > 2 * ent->limit) { - if (!someone_adding(cache) && + /* + * The remove_keys() logic is performed as garbage collection + * task. Such task is intended to be run when no other active + * processes are running. + * + * The need_resched() will return TRUE if there are user tasks + * to be activated in near future. + * + * In such case, we don't execute remove_keys() and postpone + * the garbage collection work to try to run in next cycle, + * in order to free CPU resources to other tasks. + */ + if (!need_resched() && !someone_adding(cache) && time_after(jiffies, cache->last_add + 300 * HZ)) { remove_keys(dev, i, 1); if (ent->cur > ent->limit) @@ -687,7 +699,7 @@ static void prep_umr_reg_wqe(struct ib_pd *pd, struct ib_send_wr *wr, int access_flags) { struct mlx5_ib_dev *dev = to_mdev(pd->device); - struct mlx5_umr_wr *umrwr = (struct mlx5_umr_wr *)&wr->wr.fast_reg; + struct mlx5_umr_wr *umrwr = umr_wr(wr); sg->addr = dma; sg->length = ALIGN(sizeof(u64) * n, 64); @@ -715,7 +727,7 @@ static void prep_umr_reg_wqe(struct ib_pd *pd, struct ib_send_wr *wr, static void prep_umr_unreg_wqe(struct mlx5_ib_dev *dev, struct ib_send_wr *wr, u32 key) { - struct mlx5_umr_wr *umrwr = (struct mlx5_umr_wr *)&wr->wr.fast_reg; + struct mlx5_umr_wr *umrwr = umr_wr(wr); wr->send_flags = MLX5_IB_SEND_UMR_UNREG | MLX5_IB_SEND_UMR_FAIL_IF_FREE; wr->opcode = MLX5_IB_WR_UMR; @@ -752,7 +764,8 @@ static struct mlx5_ib_mr *reg_umr(struct ib_pd *pd, struct ib_umem *umem, struct device *ddev = dev->ib_dev.dma_device; struct umr_common *umrc = &dev->umrc; struct mlx5_ib_umr_context umr_context; - struct ib_send_wr wr, *bad; + struct mlx5_umr_wr umrwr; + struct ib_send_wr *bad; struct mlx5_ib_mr *mr; struct ib_sge sg; int size; @@ -798,14 +811,14 @@ static struct mlx5_ib_mr *reg_umr(struct ib_pd *pd, struct ib_umem *umem, goto free_pas; } - memset(&wr, 0, sizeof(wr)); - wr.wr_id = (u64)(unsigned long)&umr_context; - prep_umr_reg_wqe(pd, &wr, &sg, dma, npages, mr->mmr.key, page_shift, - virt_addr, len, access_flags); + memset(&umrwr, 0, sizeof(umrwr)); + umrwr.wr.wr_id = (u64)(unsigned long)&umr_context; + prep_umr_reg_wqe(pd, &umrwr.wr, &sg, dma, npages, mr->mmr.key, + page_shift, virt_addr, len, access_flags); mlx5_ib_init_umr_context(&umr_context); down(&umrc->sem); - err = ib_post_send(umrc->qp, &wr, &bad); + err = ib_post_send(umrc->qp, &umrwr.wr, &bad); if (err) { mlx5_ib_warn(dev, "post send failed, err %d\n", err); goto unmap_dma; @@ -851,8 +864,8 @@ int mlx5_ib_update_mtt(struct mlx5_ib_mr *mr, u64 start_page_index, int npages, int size; __be64 *pas; dma_addr_t dma; - struct ib_send_wr wr, *bad; - struct mlx5_umr_wr *umrwr = (struct mlx5_umr_wr *)&wr.wr.fast_reg; + struct ib_send_wr *bad; + struct mlx5_umr_wr wr; struct ib_sge sg; int err = 0; const int page_index_alignment = MLX5_UMR_MTT_ALIGNMENT / sizeof(u64); @@ -917,26 +930,26 @@ int mlx5_ib_update_mtt(struct mlx5_ib_mr *mr, u64 start_page_index, int npages, dma_sync_single_for_device(ddev, dma, size, DMA_TO_DEVICE); memset(&wr, 0, sizeof(wr)); - wr.wr_id = (u64)(unsigned long)&umr_context; + wr.wr.wr_id = (u64)(unsigned long)&umr_context; sg.addr = dma; sg.length = ALIGN(npages * sizeof(u64), MLX5_UMR_MTT_ALIGNMENT); sg.lkey = dev->umrc.pd->local_dma_lkey; - wr.send_flags = MLX5_IB_SEND_UMR_FAIL_IF_FREE | + wr.wr.send_flags = MLX5_IB_SEND_UMR_FAIL_IF_FREE | MLX5_IB_SEND_UMR_UPDATE_MTT; - wr.sg_list = &sg; - wr.num_sge = 1; - wr.opcode = MLX5_IB_WR_UMR; - umrwr->npages = sg.length / sizeof(u64); - umrwr->page_shift = PAGE_SHIFT; - umrwr->mkey = mr->mmr.key; - umrwr->target.offset = start_page_index; + wr.wr.sg_list = &sg; + wr.wr.num_sge = 1; + wr.wr.opcode = MLX5_IB_WR_UMR; + wr.npages = sg.length / sizeof(u64); + wr.page_shift = PAGE_SHIFT; + wr.mkey = mr->mmr.key; + wr.target.offset = start_page_index; mlx5_ib_init_umr_context(&umr_context); down(&umrc->sem); - err = ib_post_send(umrc->qp, &wr, &bad); + err = ib_post_send(umrc->qp, &wr.wr, &bad); if (err) { mlx5_ib_err(dev, "UMR post send failed, err %d\n", err); } else { @@ -1122,16 +1135,17 @@ static int unreg_umr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr) { struct umr_common *umrc = &dev->umrc; struct mlx5_ib_umr_context umr_context; - struct ib_send_wr wr, *bad; + struct mlx5_umr_wr umrwr; + struct ib_send_wr *bad; int err; - memset(&wr, 0, sizeof(wr)); - wr.wr_id = (u64)(unsigned long)&umr_context; - prep_umr_unreg_wqe(dev, &wr, mr->mmr.key); + memset(&umrwr.wr, 0, sizeof(umrwr)); + umrwr.wr.wr_id = (u64)(unsigned long)&umr_context; + prep_umr_unreg_wqe(dev, &umrwr.wr, mr->mmr.key); mlx5_ib_init_umr_context(&umr_context); down(&umrc->sem); - err = ib_post_send(umrc->qp, &wr, &bad); + err = ib_post_send(umrc->qp, &umrwr.wr, &bad); if (err) { up(&umrc->sem); mlx5_ib_dbg(dev, "err %d\n", err); @@ -1151,6 +1165,52 @@ error: return err; } +static int +mlx5_alloc_priv_descs(struct ib_device *device, + struct mlx5_ib_mr *mr, + int ndescs, + int desc_size) +{ + int size = ndescs * desc_size; + int add_size; + int ret; + + add_size = max_t(int, MLX5_UMR_ALIGN - ARCH_KMALLOC_MINALIGN, 0); + + mr->descs_alloc = kzalloc(size + add_size, GFP_KERNEL); + if (!mr->descs_alloc) + return -ENOMEM; + + mr->descs = PTR_ALIGN(mr->descs_alloc, MLX5_UMR_ALIGN); + + mr->desc_map = dma_map_single(device->dma_device, mr->descs, + size, DMA_TO_DEVICE); + if (dma_mapping_error(device->dma_device, mr->desc_map)) { + ret = -ENOMEM; + goto err; + } + + return 0; +err: + kfree(mr->descs_alloc); + + return ret; +} + +static void +mlx5_free_priv_descs(struct mlx5_ib_mr *mr) +{ + if (mr->descs) { + struct ib_device *device = mr->ibmr.device; + int size = mr->max_descs * mr->desc_size; + + dma_unmap_single(device->dma_device, mr->desc_map, + size, DMA_TO_DEVICE); + kfree(mr->descs_alloc); + mr->descs = NULL; + } +} + static int clean_mr(struct mlx5_ib_mr *mr) { struct mlx5_ib_dev *dev = to_mdev(mr->ibmr.device); @@ -1170,6 +1230,8 @@ static int clean_mr(struct mlx5_ib_mr *mr) mr->sig = NULL; } + mlx5_free_priv_descs(mr); + if (!umred) { err = destroy_mkey(dev, mr); if (err) { @@ -1259,6 +1321,14 @@ struct ib_mr *mlx5_ib_alloc_mr(struct ib_pd *pd, if (mr_type == IB_MR_TYPE_MEM_REG) { access_mode = MLX5_ACCESS_MODE_MTT; in->seg.log2_page_size = PAGE_SHIFT; + + err = mlx5_alloc_priv_descs(pd->device, mr, + ndescs, sizeof(u64)); + if (err) + goto err_free_in; + + mr->desc_size = sizeof(u64); + mr->max_descs = ndescs; } else if (mr_type == IB_MR_TYPE_SIGNATURE) { u32 psv_index[2]; @@ -1315,6 +1385,7 @@ err_destroy_psv: mlx5_ib_warn(dev, "failed to destroy wire psv %d\n", mr->sig->psv_wire.psv_idx); } + mlx5_free_priv_descs(mr); err_free_sig: kfree(mr->sig); err_free_in: @@ -1324,48 +1395,6 @@ err_free: return ERR_PTR(err); } -struct ib_fast_reg_page_list *mlx5_ib_alloc_fast_reg_page_list(struct ib_device *ibdev, - int page_list_len) -{ - struct mlx5_ib_fast_reg_page_list *mfrpl; - int size = page_list_len * sizeof(u64); - - mfrpl = kmalloc(sizeof(*mfrpl), GFP_KERNEL); - if (!mfrpl) - return ERR_PTR(-ENOMEM); - - mfrpl->ibfrpl.page_list = kmalloc(size, GFP_KERNEL); - if (!mfrpl->ibfrpl.page_list) - goto err_free; - - mfrpl->mapped_page_list = dma_alloc_coherent(ibdev->dma_device, - size, &mfrpl->map, - GFP_KERNEL); - if (!mfrpl->mapped_page_list) - goto err_free; - - WARN_ON(mfrpl->map & 0x3f); - - return &mfrpl->ibfrpl; - -err_free: - kfree(mfrpl->ibfrpl.page_list); - kfree(mfrpl); - return ERR_PTR(-ENOMEM); -} - -void mlx5_ib_free_fast_reg_page_list(struct ib_fast_reg_page_list *page_list) -{ - struct mlx5_ib_fast_reg_page_list *mfrpl = to_mfrpl(page_list); - struct mlx5_ib_dev *dev = to_mdev(page_list->device); - int size = page_list->max_page_list_len * sizeof(u64); - - dma_free_coherent(&dev->mdev->pdev->dev, size, mfrpl->mapped_page_list, - mfrpl->map); - kfree(mfrpl->ibfrpl.page_list); - kfree(mfrpl); -} - int mlx5_ib_check_mr_status(struct ib_mr *ibmr, u32 check_mask, struct ib_mr_status *mr_status) { @@ -1406,3 +1435,39 @@ int mlx5_ib_check_mr_status(struct ib_mr *ibmr, u32 check_mask, done: return ret; } + +static int mlx5_set_page(struct ib_mr *ibmr, u64 addr) +{ + struct mlx5_ib_mr *mr = to_mmr(ibmr); + __be64 *descs; + + if (unlikely(mr->ndescs == mr->max_descs)) + return -ENOMEM; + + descs = mr->descs; + descs[mr->ndescs++] = cpu_to_be64(addr | MLX5_EN_RD | MLX5_EN_WR); + + return 0; +} + +int mlx5_ib_map_mr_sg(struct ib_mr *ibmr, + struct scatterlist *sg, + int sg_nents) +{ + struct mlx5_ib_mr *mr = to_mmr(ibmr); + int n; + + mr->ndescs = 0; + + ib_dma_sync_single_for_cpu(ibmr->device, mr->desc_map, + mr->desc_size * mr->max_descs, + DMA_TO_DEVICE); + + n = ib_sg_to_pages(ibmr, sg, sg_nents, mlx5_set_page); + + ib_dma_sync_single_for_device(ibmr->device, mr->desc_map, + mr->desc_size * mr->max_descs, + DMA_TO_DEVICE); + + return n; +} diff --git a/drivers/infiniband/hw/mlx5/odp.c b/drivers/infiniband/hw/mlx5/odp.c index aa8391e..b8d7636 100644 --- a/drivers/infiniband/hw/mlx5/odp.c +++ b/drivers/infiniband/hw/mlx5/odp.c @@ -153,14 +153,16 @@ static struct mlx5_ib_mr *mlx5_ib_odp_find_mr_lkey(struct mlx5_ib_dev *dev, static void mlx5_ib_page_fault_resume(struct mlx5_ib_qp *qp, struct mlx5_ib_pfault *pfault, - int error) { + int error) +{ struct mlx5_ib_dev *dev = to_mdev(qp->ibqp.pd->device); - int ret = mlx5_core_page_fault_resume(dev->mdev, qp->mqp.qpn, + u32 qpn = qp->trans_qp.base.mqp.qpn; + int ret = mlx5_core_page_fault_resume(dev->mdev, + qpn, pfault->mpfault.flags, error); if (ret) - pr_err("Failed to resolve the page fault on QP 0x%x\n", - qp->mqp.qpn); + pr_err("Failed to resolve the page fault on QP 0x%x\n", qpn); } /* @@ -391,6 +393,7 @@ static int mlx5_ib_mr_initiator_pfault_handler( #if defined(DEBUG) u32 ctrl_wqe_index, ctrl_qpn; #endif + u32 qpn = qp->trans_qp.base.mqp.qpn; ds = be32_to_cpu(ctrl->qpn_ds) & MLX5_WQE_CTRL_DS_MASK; if (ds * MLX5_WQE_DS_UNITS > wqe_length) { @@ -401,7 +404,7 @@ static int mlx5_ib_mr_initiator_pfault_handler( if (ds == 0) { mlx5_ib_err(dev, "Got WQE with zero DS. wqe_index=%x, qpn=%x\n", - wqe_index, qp->mqp.qpn); + wqe_index, qpn); return -EFAULT; } @@ -411,16 +414,16 @@ static int mlx5_ib_mr_initiator_pfault_handler( MLX5_WQE_CTRL_WQE_INDEX_SHIFT; if (wqe_index != ctrl_wqe_index) { mlx5_ib_err(dev, "Got WQE with invalid wqe_index. wqe_index=0x%x, qpn=0x%x ctrl->wqe_index=0x%x\n", - wqe_index, qp->mqp.qpn, + wqe_index, qpn, ctrl_wqe_index); return -EFAULT; } ctrl_qpn = (be32_to_cpu(ctrl->qpn_ds) & MLX5_WQE_CTRL_QPN_MASK) >> MLX5_WQE_CTRL_QPN_SHIFT; - if (qp->mqp.qpn != ctrl_qpn) { + if (qpn != ctrl_qpn) { mlx5_ib_err(dev, "Got WQE with incorrect QP number. wqe_index=0x%x, qpn=0x%x ctrl->qpn=0x%x\n", - wqe_index, qp->mqp.qpn, + wqe_index, qpn, ctrl_qpn); return -EFAULT; } @@ -537,6 +540,7 @@ static void mlx5_ib_mr_wqe_pfault_handler(struct mlx5_ib_qp *qp, int resume_with_error = 0; u16 wqe_index = pfault->mpfault.wqe.wqe_index; int requestor = pfault->mpfault.flags & MLX5_PFAULT_REQUESTOR; + u32 qpn = qp->trans_qp.base.mqp.qpn; buffer = (char *)__get_free_page(GFP_KERNEL); if (!buffer) { @@ -546,10 +550,10 @@ static void mlx5_ib_mr_wqe_pfault_handler(struct mlx5_ib_qp *qp, } ret = mlx5_ib_read_user_wqe(qp, requestor, wqe_index, buffer, - PAGE_SIZE); + PAGE_SIZE, &qp->trans_qp.base); if (ret < 0) { mlx5_ib_err(dev, "Failed reading a WQE following page fault, error=%x, wqe_index=%x, qpn=%x\n", - -ret, wqe_index, qp->mqp.qpn); + -ret, wqe_index, qpn); resume_with_error = 1; goto resolve_page_fault; } @@ -586,7 +590,8 @@ static void mlx5_ib_mr_wqe_pfault_handler(struct mlx5_ib_qp *qp, resolve_page_fault: mlx5_ib_page_fault_resume(qp, pfault, resume_with_error); mlx5_ib_dbg(dev, "PAGE FAULT completed. QP 0x%x resume_with_error=%d, flags: 0x%x\n", - qp->mqp.qpn, resume_with_error, pfault->mpfault.flags); + qpn, resume_with_error, + pfault->mpfault.flags); free_page((unsigned long)buffer); } @@ -753,7 +758,7 @@ void mlx5_ib_odp_create_qp(struct mlx5_ib_qp *qp) qp->disable_page_faults = 1; spin_lock_init(&qp->disable_page_faults_lock); - qp->mqp.pfault_handler = mlx5_ib_pfault_handler; + qp->trans_qp.base.mqp.pfault_handler = mlx5_ib_pfault_handler; for (i = 0; i < MLX5_IB_PAGEFAULT_CONTEXTS; ++i) INIT_WORK(&qp->pagefaults[i].work, mlx5_ib_qp_pfault_action); diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index 6f521a3..34cb8e8 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -32,6 +32,8 @@ #include <linux/module.h> #include <rdma/ib_umem.h> +#include <rdma/ib_cache.h> +#include <rdma/ib_user_verbs.h> #include "mlx5_ib.h" #include "user.h" @@ -64,7 +66,7 @@ static const u32 mlx5_ib_opcode[] = { [IB_WR_ATOMIC_FETCH_AND_ADD] = MLX5_OPCODE_ATOMIC_FA, [IB_WR_SEND_WITH_INV] = MLX5_OPCODE_SEND_INVAL, [IB_WR_LOCAL_INV] = MLX5_OPCODE_UMR, - [IB_WR_FAST_REG_MR] = MLX5_OPCODE_UMR, + [IB_WR_REG_MR] = MLX5_OPCODE_UMR, [IB_WR_MASKED_ATOMIC_CMP_AND_SWP] = MLX5_OPCODE_ATOMIC_MASKED_CS, [IB_WR_MASKED_ATOMIC_FETCH_AND_ADD] = MLX5_OPCODE_ATOMIC_MASKED_FA, [MLX5_IB_WR_UMR] = MLX5_OPCODE_UMR, @@ -114,14 +116,15 @@ void *mlx5_get_send_wqe(struct mlx5_ib_qp *qp, int n) * Return: the number of bytes copied, or an error code. */ int mlx5_ib_read_user_wqe(struct mlx5_ib_qp *qp, int send, int wqe_index, - void *buffer, u32 length) + void *buffer, u32 length, + struct mlx5_ib_qp_base *base) { struct ib_device *ibdev = qp->ibqp.device; struct mlx5_ib_dev *dev = to_mdev(ibdev); struct mlx5_ib_wq *wq = send ? &qp->sq : &qp->rq; size_t offset; size_t wq_end; - struct ib_umem *umem = qp->umem; + struct ib_umem *umem = base->ubuffer.umem; u32 first_copy_length; int wqe_length; int ret; @@ -172,8 +175,10 @@ static void mlx5_ib_qp_event(struct mlx5_core_qp *qp, int type) struct ib_qp *ibqp = &to_mibqp(qp)->ibqp; struct ib_event event; - if (type == MLX5_EVENT_TYPE_PATH_MIG) - to_mibqp(qp)->port = to_mibqp(qp)->alt_port; + if (type == MLX5_EVENT_TYPE_PATH_MIG) { + /* This event is only valid for trans_qps */ + to_mibqp(qp)->port = to_mibqp(qp)->trans_qp.alt_port; + } if (ibqp->event_handler) { event.device = ibqp->device; @@ -265,8 +270,10 @@ static int sq_overhead(enum ib_qp_type qp_type) /* fall through */ case IB_QPT_RC: size += sizeof(struct mlx5_wqe_ctrl_seg) + - sizeof(struct mlx5_wqe_atomic_seg) + - sizeof(struct mlx5_wqe_raddr_seg); + max(sizeof(struct mlx5_wqe_atomic_seg) + + sizeof(struct mlx5_wqe_raddr_seg), + sizeof(struct mlx5_wqe_umr_ctrl_seg) + + sizeof(struct mlx5_mkey_seg)); break; case IB_QPT_XRC_TGT: @@ -274,9 +281,9 @@ static int sq_overhead(enum ib_qp_type qp_type) case IB_QPT_UC: size += sizeof(struct mlx5_wqe_ctrl_seg) + - sizeof(struct mlx5_wqe_raddr_seg) + - sizeof(struct mlx5_wqe_umr_ctrl_seg) + - sizeof(struct mlx5_mkey_seg); + max(sizeof(struct mlx5_wqe_raddr_seg), + sizeof(struct mlx5_wqe_umr_ctrl_seg) + + sizeof(struct mlx5_mkey_seg)); break; case IB_QPT_UD: @@ -366,7 +373,9 @@ static int calc_sq_size(struct mlx5_ib_dev *dev, struct ib_qp_init_attr *attr, static int set_user_buf_size(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, - struct mlx5_ib_create_qp *ucmd) + struct mlx5_ib_create_qp *ucmd, + struct mlx5_ib_qp_base *base, + struct ib_qp_init_attr *attr) { int desc_sz = 1 << qp->sq.wqe_shift; @@ -391,8 +400,13 @@ static int set_user_buf_size(struct mlx5_ib_dev *dev, return -EINVAL; } - qp->buf_size = (qp->rq.wqe_cnt << qp->rq.wqe_shift) + - (qp->sq.wqe_cnt << 6); + if (attr->qp_type == IB_QPT_RAW_PACKET) { + base->ubuffer.buf_size = qp->rq.wqe_cnt << qp->rq.wqe_shift; + qp->raw_packet_qp.sq.ubuffer.buf_size = qp->sq.wqe_cnt << 6; + } else { + base->ubuffer.buf_size = (qp->rq.wqe_cnt << qp->rq.wqe_shift) + + (qp->sq.wqe_cnt << 6); + } return 0; } @@ -578,8 +592,8 @@ static int to_mlx5_st(enum ib_qp_type type) case IB_QPT_SMI: return MLX5_QP_ST_QP0; case IB_QPT_GSI: return MLX5_QP_ST_QP1; case IB_QPT_RAW_IPV6: return MLX5_QP_ST_RAW_IPV6; - case IB_QPT_RAW_ETHERTYPE: return MLX5_QP_ST_RAW_ETHERTYPE; case IB_QPT_RAW_PACKET: + case IB_QPT_RAW_ETHERTYPE: return MLX5_QP_ST_RAW_ETHERTYPE; case IB_QPT_MAX: default: return -EINVAL; } @@ -590,13 +604,51 @@ static int uuarn_to_uar_index(struct mlx5_uuar_info *uuari, int uuarn) return uuari->uars[uuarn / MLX5_BF_REGS_PER_PAGE].index; } +static int mlx5_ib_umem_get(struct mlx5_ib_dev *dev, + struct ib_pd *pd, + unsigned long addr, size_t size, + struct ib_umem **umem, + int *npages, int *page_shift, int *ncont, + u32 *offset) +{ + int err; + + *umem = ib_umem_get(pd->uobject->context, addr, size, 0, 0); + if (IS_ERR(*umem)) { + mlx5_ib_dbg(dev, "umem_get failed\n"); + return PTR_ERR(*umem); + } + + mlx5_ib_cont_pages(*umem, addr, npages, page_shift, ncont, NULL); + + err = mlx5_ib_get_buf_offset(addr, *page_shift, offset); + if (err) { + mlx5_ib_warn(dev, "bad offset\n"); + goto err_umem; + } + + mlx5_ib_dbg(dev, "addr 0x%lx, size %zu, npages %d, page_shift %d, ncont %d, offset %d\n", + addr, size, *npages, *page_shift, *ncont, *offset); + + return 0; + +err_umem: + ib_umem_release(*umem); + *umem = NULL; + + return err; +} + static int create_user_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd, struct mlx5_ib_qp *qp, struct ib_udata *udata, + struct ib_qp_init_attr *attr, struct mlx5_create_qp_mbox_in **in, - struct mlx5_ib_create_qp_resp *resp, int *inlen) + struct mlx5_ib_create_qp_resp *resp, int *inlen, + struct mlx5_ib_qp_base *base) { struct mlx5_ib_ucontext *context; struct mlx5_ib_create_qp ucmd; + struct mlx5_ib_ubuffer *ubuffer = &base->ubuffer; int page_shift = 0; int uar_index; int npages; @@ -615,18 +667,23 @@ static int create_user_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd, /* * TBD: should come from the verbs when we have the API */ - uuarn = alloc_uuar(&context->uuari, MLX5_IB_LATENCY_CLASS_HIGH); - if (uuarn < 0) { - mlx5_ib_dbg(dev, "failed to allocate low latency UUAR\n"); - mlx5_ib_dbg(dev, "reverting to medium latency\n"); - uuarn = alloc_uuar(&context->uuari, MLX5_IB_LATENCY_CLASS_MEDIUM); + if (qp->flags & MLX5_IB_QP_CROSS_CHANNEL) + /* In CROSS_CHANNEL CQ and QP must use the same UAR */ + uuarn = MLX5_CROSS_CHANNEL_UUAR; + else { + uuarn = alloc_uuar(&context->uuari, MLX5_IB_LATENCY_CLASS_HIGH); if (uuarn < 0) { - mlx5_ib_dbg(dev, "failed to allocate medium latency UUAR\n"); - mlx5_ib_dbg(dev, "reverting to high latency\n"); - uuarn = alloc_uuar(&context->uuari, MLX5_IB_LATENCY_CLASS_LOW); + mlx5_ib_dbg(dev, "failed to allocate low latency UUAR\n"); + mlx5_ib_dbg(dev, "reverting to medium latency\n"); + uuarn = alloc_uuar(&context->uuari, MLX5_IB_LATENCY_CLASS_MEDIUM); if (uuarn < 0) { - mlx5_ib_warn(dev, "uuar allocation failed\n"); - return uuarn; + mlx5_ib_dbg(dev, "failed to allocate medium latency UUAR\n"); + mlx5_ib_dbg(dev, "reverting to high latency\n"); + uuarn = alloc_uuar(&context->uuari, MLX5_IB_LATENCY_CLASS_LOW); + if (uuarn < 0) { + mlx5_ib_warn(dev, "uuar allocation failed\n"); + return uuarn; + } } } } @@ -638,32 +695,20 @@ static int create_user_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd, qp->sq.wqe_shift = ilog2(MLX5_SEND_WQE_BB); qp->sq.offset = qp->rq.wqe_cnt << qp->rq.wqe_shift; - err = set_user_buf_size(dev, qp, &ucmd); + err = set_user_buf_size(dev, qp, &ucmd, base, attr); if (err) goto err_uuar; - if (ucmd.buf_addr && qp->buf_size) { - qp->umem = ib_umem_get(pd->uobject->context, ucmd.buf_addr, - qp->buf_size, 0, 0); - if (IS_ERR(qp->umem)) { - mlx5_ib_dbg(dev, "umem_get failed\n"); - err = PTR_ERR(qp->umem); + if (ucmd.buf_addr && ubuffer->buf_size) { + ubuffer->buf_addr = ucmd.buf_addr; + err = mlx5_ib_umem_get(dev, pd, ubuffer->buf_addr, + ubuffer->buf_size, + &ubuffer->umem, &npages, &page_shift, + &ncont, &offset); + if (err) goto err_uuar; - } } else { - qp->umem = NULL; - } - - if (qp->umem) { - mlx5_ib_cont_pages(qp->umem, ucmd.buf_addr, &npages, &page_shift, - &ncont, NULL); - err = mlx5_ib_get_buf_offset(ucmd.buf_addr, page_shift, &offset); - if (err) { - mlx5_ib_warn(dev, "bad offset\n"); - goto err_umem; - } - mlx5_ib_dbg(dev, "addr 0x%llx, size %d, npages %d, page_shift %d, ncont %d, offset %d\n", - ucmd.buf_addr, qp->buf_size, npages, page_shift, ncont, offset); + ubuffer->umem = NULL; } *inlen = sizeof(**in) + sizeof(*(*in)->pas) * ncont; @@ -672,8 +717,9 @@ static int create_user_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd, err = -ENOMEM; goto err_umem; } - if (qp->umem) - mlx5_ib_populate_pas(dev, qp->umem, page_shift, (*in)->pas, 0); + if (ubuffer->umem) + mlx5_ib_populate_pas(dev, ubuffer->umem, page_shift, + (*in)->pas, 0); (*in)->ctx.log_pg_sz_remote_qpn = cpu_to_be32((page_shift - MLX5_ADAPTER_PAGE_SHIFT) << 24); (*in)->ctx.params2 = cpu_to_be32(offset << 6); @@ -704,29 +750,31 @@ err_free: kvfree(*in); err_umem: - if (qp->umem) - ib_umem_release(qp->umem); + if (ubuffer->umem) + ib_umem_release(ubuffer->umem); err_uuar: free_uuar(&context->uuari, uuarn); return err; } -static void destroy_qp_user(struct ib_pd *pd, struct mlx5_ib_qp *qp) +static void destroy_qp_user(struct ib_pd *pd, struct mlx5_ib_qp *qp, + struct mlx5_ib_qp_base *base) { struct mlx5_ib_ucontext *context; context = to_mucontext(pd->uobject->context); mlx5_ib_db_unmap_user(context, &qp->db); - if (qp->umem) - ib_umem_release(qp->umem); + if (base->ubuffer.umem) + ib_umem_release(base->ubuffer.umem); free_uuar(&context->uuari, qp->uuarn); } static int create_kernel_qp(struct mlx5_ib_dev *dev, struct ib_qp_init_attr *init_attr, struct mlx5_ib_qp *qp, - struct mlx5_create_qp_mbox_in **in, int *inlen) + struct mlx5_create_qp_mbox_in **in, int *inlen, + struct mlx5_ib_qp_base *base) { enum mlx5_ib_latency_class lc = MLX5_IB_LATENCY_CLASS_LOW; struct mlx5_uuar_info *uuari; @@ -758,9 +806,9 @@ static int create_kernel_qp(struct mlx5_ib_dev *dev, qp->rq.offset = 0; qp->sq.offset = qp->rq.wqe_cnt << qp->rq.wqe_shift; - qp->buf_size = err + (qp->rq.wqe_cnt << qp->rq.wqe_shift); + base->ubuffer.buf_size = err + (qp->rq.wqe_cnt << qp->rq.wqe_shift); - err = mlx5_buf_alloc(dev->mdev, qp->buf_size, &qp->buf); + err = mlx5_buf_alloc(dev->mdev, base->ubuffer.buf_size, &qp->buf); if (err) { mlx5_ib_dbg(dev, "err %d\n", err); goto err_uuar; @@ -853,19 +901,304 @@ static int is_connected(enum ib_qp_type qp_type) return 0; } +static int create_raw_packet_qp_tis(struct mlx5_ib_dev *dev, + struct mlx5_ib_sq *sq, u32 tdn) +{ + u32 in[MLX5_ST_SZ_DW(create_tis_in)]; + void *tisc = MLX5_ADDR_OF(create_tis_in, in, ctx); + + memset(in, 0, sizeof(in)); + + MLX5_SET(tisc, tisc, transport_domain, tdn); + + return mlx5_core_create_tis(dev->mdev, in, sizeof(in), &sq->tisn); +} + +static void destroy_raw_packet_qp_tis(struct mlx5_ib_dev *dev, + struct mlx5_ib_sq *sq) +{ + mlx5_core_destroy_tis(dev->mdev, sq->tisn); +} + +static int create_raw_packet_qp_sq(struct mlx5_ib_dev *dev, + struct mlx5_ib_sq *sq, void *qpin, + struct ib_pd *pd) +{ + struct mlx5_ib_ubuffer *ubuffer = &sq->ubuffer; + __be64 *pas; + void *in; + void *sqc; + void *qpc = MLX5_ADDR_OF(create_qp_in, qpin, qpc); + void *wq; + int inlen; + int err; + int page_shift = 0; + int npages; + int ncont = 0; + u32 offset = 0; + + err = mlx5_ib_umem_get(dev, pd, ubuffer->buf_addr, ubuffer->buf_size, + &sq->ubuffer.umem, &npages, &page_shift, + &ncont, &offset); + if (err) + return err; + + inlen = MLX5_ST_SZ_BYTES(create_sq_in) + sizeof(u64) * ncont; + in = mlx5_vzalloc(inlen); + if (!in) { + err = -ENOMEM; + goto err_umem; + } + + sqc = MLX5_ADDR_OF(create_sq_in, in, ctx); + MLX5_SET(sqc, sqc, flush_in_error_en, 1); + MLX5_SET(sqc, sqc, state, MLX5_SQC_STATE_RST); + MLX5_SET(sqc, sqc, user_index, MLX5_GET(qpc, qpc, user_index)); + MLX5_SET(sqc, sqc, cqn, MLX5_GET(qpc, qpc, cqn_snd)); + MLX5_SET(sqc, sqc, tis_lst_sz, 1); + MLX5_SET(sqc, sqc, tis_num_0, sq->tisn); + + wq = MLX5_ADDR_OF(sqc, sqc, wq); + MLX5_SET(wq, wq, wq_type, MLX5_WQ_TYPE_CYCLIC); + MLX5_SET(wq, wq, pd, MLX5_GET(qpc, qpc, pd)); + MLX5_SET(wq, wq, uar_page, MLX5_GET(qpc, qpc, uar_page)); + MLX5_SET64(wq, wq, dbr_addr, MLX5_GET64(qpc, qpc, dbr_addr)); + MLX5_SET(wq, wq, log_wq_stride, ilog2(MLX5_SEND_WQE_BB)); + MLX5_SET(wq, wq, log_wq_sz, MLX5_GET(qpc, qpc, log_sq_size)); + MLX5_SET(wq, wq, log_wq_pg_sz, page_shift - MLX5_ADAPTER_PAGE_SHIFT); + MLX5_SET(wq, wq, page_offset, offset); + + pas = (__be64 *)MLX5_ADDR_OF(wq, wq, pas); + mlx5_ib_populate_pas(dev, sq->ubuffer.umem, page_shift, pas, 0); + + err = mlx5_core_create_sq_tracked(dev->mdev, in, inlen, &sq->base.mqp); + + kvfree(in); + + if (err) + goto err_umem; + + return 0; + +err_umem: + ib_umem_release(sq->ubuffer.umem); + sq->ubuffer.umem = NULL; + + return err; +} + +static void destroy_raw_packet_qp_sq(struct mlx5_ib_dev *dev, + struct mlx5_ib_sq *sq) +{ + mlx5_core_destroy_sq_tracked(dev->mdev, &sq->base.mqp); + ib_umem_release(sq->ubuffer.umem); +} + +static int get_rq_pas_size(void *qpc) +{ + u32 log_page_size = MLX5_GET(qpc, qpc, log_page_size) + 12; + u32 log_rq_stride = MLX5_GET(qpc, qpc, log_rq_stride); + u32 log_rq_size = MLX5_GET(qpc, qpc, log_rq_size); + u32 page_offset = MLX5_GET(qpc, qpc, page_offset); + u32 po_quanta = 1 << (log_page_size - 6); + u32 rq_sz = 1 << (log_rq_size + 4 + log_rq_stride); + u32 page_size = 1 << log_page_size; + u32 rq_sz_po = rq_sz + (page_offset * po_quanta); + u32 rq_num_pas = (rq_sz_po + page_size - 1) / page_size; + + return rq_num_pas * sizeof(u64); +} + +static int create_raw_packet_qp_rq(struct mlx5_ib_dev *dev, + struct mlx5_ib_rq *rq, void *qpin) +{ + __be64 *pas; + __be64 *qp_pas; + void *in; + void *rqc; + void *wq; + void *qpc = MLX5_ADDR_OF(create_qp_in, qpin, qpc); + int inlen; + int err; + u32 rq_pas_size = get_rq_pas_size(qpc); + + inlen = MLX5_ST_SZ_BYTES(create_rq_in) + rq_pas_size; + in = mlx5_vzalloc(inlen); + if (!in) + return -ENOMEM; + + rqc = MLX5_ADDR_OF(create_rq_in, in, ctx); + MLX5_SET(rqc, rqc, vsd, 1); + MLX5_SET(rqc, rqc, mem_rq_type, MLX5_RQC_MEM_RQ_TYPE_MEMORY_RQ_INLINE); + MLX5_SET(rqc, rqc, state, MLX5_RQC_STATE_RST); + MLX5_SET(rqc, rqc, flush_in_error_en, 1); + MLX5_SET(rqc, rqc, user_index, MLX5_GET(qpc, qpc, user_index)); + MLX5_SET(rqc, rqc, cqn, MLX5_GET(qpc, qpc, cqn_rcv)); + + wq = MLX5_ADDR_OF(rqc, rqc, wq); + MLX5_SET(wq, wq, wq_type, MLX5_WQ_TYPE_CYCLIC); + MLX5_SET(wq, wq, end_padding_mode, + MLX5_GET(qpc, qpc, end_padding_mode)); + MLX5_SET(wq, wq, page_offset, MLX5_GET(qpc, qpc, page_offset)); + MLX5_SET(wq, wq, pd, MLX5_GET(qpc, qpc, pd)); + MLX5_SET64(wq, wq, dbr_addr, MLX5_GET64(qpc, qpc, dbr_addr)); + MLX5_SET(wq, wq, log_wq_stride, MLX5_GET(qpc, qpc, log_rq_stride) + 4); + MLX5_SET(wq, wq, log_wq_pg_sz, MLX5_GET(qpc, qpc, log_page_size)); + MLX5_SET(wq, wq, log_wq_sz, MLX5_GET(qpc, qpc, log_rq_size)); + + pas = (__be64 *)MLX5_ADDR_OF(wq, wq, pas); + qp_pas = (__be64 *)MLX5_ADDR_OF(create_qp_in, qpin, pas); + memcpy(pas, qp_pas, rq_pas_size); + + err = mlx5_core_create_rq_tracked(dev->mdev, in, inlen, &rq->base.mqp); + + kvfree(in); + + return err; +} + +static void destroy_raw_packet_qp_rq(struct mlx5_ib_dev *dev, + struct mlx5_ib_rq *rq) +{ + mlx5_core_destroy_rq_tracked(dev->mdev, &rq->base.mqp); +} + +static int create_raw_packet_qp_tir(struct mlx5_ib_dev *dev, + struct mlx5_ib_rq *rq, u32 tdn) +{ + u32 *in; + void *tirc; + int inlen; + int err; + + inlen = MLX5_ST_SZ_BYTES(create_tir_in); + in = mlx5_vzalloc(inlen); + if (!in) + return -ENOMEM; + + tirc = MLX5_ADDR_OF(create_tir_in, in, ctx); + MLX5_SET(tirc, tirc, disp_type, MLX5_TIRC_DISP_TYPE_DIRECT); + MLX5_SET(tirc, tirc, inline_rqn, rq->base.mqp.qpn); + MLX5_SET(tirc, tirc, transport_domain, tdn); + + err = mlx5_core_create_tir(dev->mdev, in, inlen, &rq->tirn); + + kvfree(in); + + return err; +} + +static void destroy_raw_packet_qp_tir(struct mlx5_ib_dev *dev, + struct mlx5_ib_rq *rq) +{ + mlx5_core_destroy_tir(dev->mdev, rq->tirn); +} + +static int create_raw_packet_qp(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, + struct mlx5_create_qp_mbox_in *in, + struct ib_pd *pd) +{ + struct mlx5_ib_raw_packet_qp *raw_packet_qp = &qp->raw_packet_qp; + struct mlx5_ib_sq *sq = &raw_packet_qp->sq; + struct mlx5_ib_rq *rq = &raw_packet_qp->rq; + struct ib_uobject *uobj = pd->uobject; + struct ib_ucontext *ucontext = uobj->context; + struct mlx5_ib_ucontext *mucontext = to_mucontext(ucontext); + int err; + u32 tdn = mucontext->tdn; + + if (qp->sq.wqe_cnt) { + err = create_raw_packet_qp_tis(dev, sq, tdn); + if (err) + return err; + + err = create_raw_packet_qp_sq(dev, sq, in, pd); + if (err) + goto err_destroy_tis; + + sq->base.container_mibqp = qp; + } + + if (qp->rq.wqe_cnt) { + err = create_raw_packet_qp_rq(dev, rq, in); + if (err) + goto err_destroy_sq; + + rq->base.container_mibqp = qp; + + err = create_raw_packet_qp_tir(dev, rq, tdn); + if (err) + goto err_destroy_rq; + } + + qp->trans_qp.base.mqp.qpn = qp->sq.wqe_cnt ? sq->base.mqp.qpn : + rq->base.mqp.qpn; + + return 0; + +err_destroy_rq: + destroy_raw_packet_qp_rq(dev, rq); +err_destroy_sq: + if (!qp->sq.wqe_cnt) + return err; + destroy_raw_packet_qp_sq(dev, sq); +err_destroy_tis: + destroy_raw_packet_qp_tis(dev, sq); + + return err; +} + +static void destroy_raw_packet_qp(struct mlx5_ib_dev *dev, + struct mlx5_ib_qp *qp) +{ + struct mlx5_ib_raw_packet_qp *raw_packet_qp = &qp->raw_packet_qp; + struct mlx5_ib_sq *sq = &raw_packet_qp->sq; + struct mlx5_ib_rq *rq = &raw_packet_qp->rq; + + if (qp->rq.wqe_cnt) { + destroy_raw_packet_qp_tir(dev, rq); + destroy_raw_packet_qp_rq(dev, rq); + } + + if (qp->sq.wqe_cnt) { + destroy_raw_packet_qp_sq(dev, sq); + destroy_raw_packet_qp_tis(dev, sq); + } +} + +static void raw_packet_qp_copy_info(struct mlx5_ib_qp *qp, + struct mlx5_ib_raw_packet_qp *raw_packet_qp) +{ + struct mlx5_ib_sq *sq = &raw_packet_qp->sq; + struct mlx5_ib_rq *rq = &raw_packet_qp->rq; + + sq->sq = &qp->sq; + rq->rq = &qp->rq; + sq->doorbell = &qp->db; + rq->doorbell = &qp->db; +} + static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd, struct ib_qp_init_attr *init_attr, struct ib_udata *udata, struct mlx5_ib_qp *qp) { struct mlx5_ib_resources *devr = &dev->devr; struct mlx5_core_dev *mdev = dev->mdev; + struct mlx5_ib_qp_base *base; struct mlx5_ib_create_qp_resp resp; struct mlx5_create_qp_mbox_in *in; struct mlx5_ib_create_qp ucmd; int inlen = sizeof(*in); int err; + u32 uidx = MLX5_IB_DEFAULT_UIDX; + void *qpc; + + base = init_attr->qp_type == IB_QPT_RAW_PACKET ? + &qp->raw_packet_qp.rq.base : + &qp->trans_qp.base; - mlx5_ib_odp_create_qp(qp); + if (init_attr->qp_type != IB_QPT_RAW_PACKET) + mlx5_ib_odp_create_qp(qp); mutex_init(&qp->mutex); spin_lock_init(&qp->sq.lock); @@ -880,6 +1213,21 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd, } } + if (init_attr->create_flags & + (IB_QP_CREATE_CROSS_CHANNEL | + IB_QP_CREATE_MANAGED_SEND | + IB_QP_CREATE_MANAGED_RECV)) { + if (!MLX5_CAP_GEN(mdev, cd)) { + mlx5_ib_dbg(dev, "cross-channel isn't supported\n"); + return -EINVAL; + } + if (init_attr->create_flags & IB_QP_CREATE_CROSS_CHANNEL) + qp->flags |= MLX5_IB_QP_CROSS_CHANNEL; + if (init_attr->create_flags & IB_QP_CREATE_MANAGED_SEND) + qp->flags |= MLX5_IB_QP_MANAGED_SEND; + if (init_attr->create_flags & IB_QP_CREATE_MANAGED_RECV) + qp->flags |= MLX5_IB_QP_MANAGED_RECV; + } if (init_attr->sq_sig_type == IB_SIGNAL_ALL_WR) qp->sq_signal_bits = MLX5_WQE_CTRL_CQ_UPDATE; @@ -889,6 +1237,11 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd, return -EFAULT; } + err = get_qp_user_index(to_mucontext(pd->uobject->context), + &ucmd, udata->inlen, &uidx); + if (err) + return err; + qp->wq_sig = !!(ucmd.flags & MLX5_QP_FLAG_SIGNATURE); qp->scat_cqe = !!(ucmd.flags & MLX5_QP_FLAG_SCATTER_CQE); } else { @@ -918,11 +1271,13 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd, ucmd.sq_wqe_count, max_wqes); return -EINVAL; } - err = create_user_qp(dev, pd, qp, udata, &in, &resp, &inlen); + err = create_user_qp(dev, pd, qp, udata, init_attr, &in, + &resp, &inlen, base); if (err) mlx5_ib_dbg(dev, "err %d\n", err); } else { - err = create_kernel_qp(dev, init_attr, qp, &in, &inlen); + err = create_kernel_qp(dev, init_attr, qp, &in, &inlen, + base); if (err) mlx5_ib_dbg(dev, "err %d\n", err); } @@ -954,6 +1309,13 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd, if (qp->flags & MLX5_IB_QP_BLOCK_MULTICAST_LOOPBACK) in->ctx.flags_pd |= cpu_to_be32(MLX5_QP_BLOCK_MCAST); + if (qp->flags & MLX5_IB_QP_CROSS_CHANNEL) + in->ctx.params2 |= cpu_to_be32(MLX5_QP_BIT_CC_MASTER); + if (qp->flags & MLX5_IB_QP_MANAGED_SEND) + in->ctx.params2 |= cpu_to_be32(MLX5_QP_BIT_CC_SLAVE_SEND); + if (qp->flags & MLX5_IB_QP_MANAGED_RECV) + in->ctx.params2 |= cpu_to_be32(MLX5_QP_BIT_CC_SLAVE_RECV); + if (qp->scat_cqe && is_connected(init_attr->qp_type)) { int rcqe_sz; int scqe_sz; @@ -1018,26 +1380,35 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd, in->ctx.db_rec_addr = cpu_to_be64(qp->db.dma); - err = mlx5_core_create_qp(dev->mdev, &qp->mqp, in, inlen); + if (MLX5_CAP_GEN(mdev, cqe_version) == MLX5_CQE_VERSION_V1) { + qpc = MLX5_ADDR_OF(create_qp_in, in, qpc); + /* 0xffffff means we ask to work with cqe version 0 */ + MLX5_SET(qpc, qpc, user_index, uidx); + } + + if (init_attr->qp_type == IB_QPT_RAW_PACKET) { + qp->raw_packet_qp.sq.ubuffer.buf_addr = ucmd.sq_buf_addr; + raw_packet_qp_copy_info(qp, &qp->raw_packet_qp); + err = create_raw_packet_qp(dev, qp, in, pd); + } else { + err = mlx5_core_create_qp(dev->mdev, &base->mqp, in, inlen); + } + if (err) { mlx5_ib_dbg(dev, "create qp failed\n"); goto err_create; } kvfree(in); - /* Hardware wants QPN written in big-endian order (after - * shifting) for send doorbell. Precompute this value to save - * a little bit when posting sends. - */ - qp->doorbell_qpn = swab32(qp->mqp.qpn << 8); - qp->mqp.event = mlx5_ib_qp_event; + base->container_mibqp = qp; + base->mqp.event = mlx5_ib_qp_event; return 0; err_create: if (qp->create_type == MLX5_QP_USER) - destroy_qp_user(pd, qp); + destroy_qp_user(pd, qp, base); else if (qp->create_type == MLX5_QP_KERNEL) destroy_qp_kernel(dev, qp); @@ -1129,11 +1500,11 @@ static void get_cqs(struct mlx5_ib_qp *qp, case IB_QPT_UD: case IB_QPT_RAW_IPV6: case IB_QPT_RAW_ETHERTYPE: + case IB_QPT_RAW_PACKET: *send_cq = to_mcq(qp->ibqp.send_cq); *recv_cq = to_mcq(qp->ibqp.recv_cq); break; - case IB_QPT_RAW_PACKET: case IB_QPT_MAX: default: *send_cq = NULL; @@ -1142,45 +1513,66 @@ static void get_cqs(struct mlx5_ib_qp *qp, } } +static int modify_raw_packet_qp(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, + u16 operation); + static void destroy_qp_common(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp) { struct mlx5_ib_cq *send_cq, *recv_cq; + struct mlx5_ib_qp_base *base = &qp->trans_qp.base; struct mlx5_modify_qp_mbox_in *in; int err; + base = qp->ibqp.qp_type == IB_QPT_RAW_PACKET ? + &qp->raw_packet_qp.rq.base : + &qp->trans_qp.base; + in = kzalloc(sizeof(*in), GFP_KERNEL); if (!in) return; if (qp->state != IB_QPS_RESET) { - mlx5_ib_qp_disable_pagefaults(qp); - if (mlx5_core_qp_modify(dev->mdev, to_mlx5_state(qp->state), - MLX5_QP_STATE_RST, in, 0, &qp->mqp)) - mlx5_ib_warn(dev, "mlx5_ib: modify QP %06x to RESET failed\n", - qp->mqp.qpn); + if (qp->ibqp.qp_type != IB_QPT_RAW_PACKET) { + mlx5_ib_qp_disable_pagefaults(qp); + err = mlx5_core_qp_modify(dev->mdev, + MLX5_CMD_OP_2RST_QP, in, 0, + &base->mqp); + } else { + err = modify_raw_packet_qp(dev, qp, + MLX5_CMD_OP_2RST_QP); + } + if (err) + mlx5_ib_warn(dev, "mlx5_ib: modify QP 0x%06x to RESET failed\n", + base->mqp.qpn); } get_cqs(qp, &send_cq, &recv_cq); if (qp->create_type == MLX5_QP_KERNEL) { mlx5_ib_lock_cqs(send_cq, recv_cq); - __mlx5_ib_cq_clean(recv_cq, qp->mqp.qpn, + __mlx5_ib_cq_clean(recv_cq, base->mqp.qpn, qp->ibqp.srq ? to_msrq(qp->ibqp.srq) : NULL); if (send_cq != recv_cq) - __mlx5_ib_cq_clean(send_cq, qp->mqp.qpn, NULL); + __mlx5_ib_cq_clean(send_cq, base->mqp.qpn, + NULL); mlx5_ib_unlock_cqs(send_cq, recv_cq); } - err = mlx5_core_destroy_qp(dev->mdev, &qp->mqp); - if (err) - mlx5_ib_warn(dev, "failed to destroy QP 0x%x\n", qp->mqp.qpn); - kfree(in); + if (qp->ibqp.qp_type == IB_QPT_RAW_PACKET) { + destroy_raw_packet_qp(dev, qp); + } else { + err = mlx5_core_destroy_qp(dev->mdev, &base->mqp); + if (err) + mlx5_ib_warn(dev, "failed to destroy QP 0x%x\n", + base->mqp.qpn); + } + kfree(in); if (qp->create_type == MLX5_QP_KERNEL) destroy_qp_kernel(dev, qp); else if (qp->create_type == MLX5_QP_USER) - destroy_qp_user(&get_pd(qp)->ibpd, qp); + destroy_qp_user(&get_pd(qp)->ibpd, qp, base); } static const char *ib_qp_type_str(enum ib_qp_type type) @@ -1225,6 +1617,16 @@ struct ib_qp *mlx5_ib_create_qp(struct ib_pd *pd, if (pd) { dev = to_mdev(pd->device); + + if (init_attr->qp_type == IB_QPT_RAW_PACKET) { + if (!pd->uobject) { + mlx5_ib_dbg(dev, "Raw Packet QP is not supported for kernel consumers\n"); + return ERR_PTR(-EINVAL); + } else if (!to_mucontext(pd->uobject->context)->cqe_version) { + mlx5_ib_dbg(dev, "Raw Packet QP is only supported for CQE version > 0\n"); + return ERR_PTR(-EINVAL); + } + } } else { /* being cautious here */ if (init_attr->qp_type != IB_QPT_XRC_TGT && @@ -1250,6 +1652,7 @@ struct ib_qp *mlx5_ib_create_qp(struct ib_pd *pd, } /* fall through */ + case IB_QPT_RAW_PACKET: case IB_QPT_RC: case IB_QPT_UC: case IB_QPT_UD: @@ -1272,19 +1675,19 @@ struct ib_qp *mlx5_ib_create_qp(struct ib_pd *pd, else if (is_qp1(init_attr->qp_type)) qp->ibqp.qp_num = 1; else - qp->ibqp.qp_num = qp->mqp.qpn; + qp->ibqp.qp_num = qp->trans_qp.base.mqp.qpn; mlx5_ib_dbg(dev, "ib qpnum 0x%x, mlx qpn 0x%x, rcqn 0x%x, scqn 0x%x\n", - qp->ibqp.qp_num, qp->mqp.qpn, to_mcq(init_attr->recv_cq)->mcq.cqn, + qp->ibqp.qp_num, qp->trans_qp.base.mqp.qpn, + to_mcq(init_attr->recv_cq)->mcq.cqn, to_mcq(init_attr->send_cq)->mcq.cqn); - qp->xrcdn = xrcdn; + qp->trans_qp.xrcdn = xrcdn; break; case IB_QPT_RAW_IPV6: case IB_QPT_RAW_ETHERTYPE: - case IB_QPT_RAW_PACKET: case IB_QPT_MAX: default: mlx5_ib_dbg(dev, "unsupported qp type %d\n", @@ -1318,12 +1721,12 @@ static __be32 to_mlx5_access_flags(struct mlx5_ib_qp *qp, const struct ib_qp_att if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC) dest_rd_atomic = attr->max_dest_rd_atomic; else - dest_rd_atomic = qp->resp_depth; + dest_rd_atomic = qp->trans_qp.resp_depth; if (attr_mask & IB_QP_ACCESS_FLAGS) access_flags = attr->qp_access_flags; else - access_flags = qp->atomic_rd_en; + access_flags = qp->trans_qp.atomic_rd_en; if (!dest_rd_atomic) access_flags &= IB_ACCESS_REMOTE_WRITE; @@ -1360,21 +1763,42 @@ static int ib_rate_to_mlx5(struct mlx5_ib_dev *dev, u8 rate) return rate + MLX5_STAT_RATE_OFFSET; } -static int mlx5_set_path(struct mlx5_ib_dev *dev, const struct ib_ah_attr *ah, +static int modify_raw_packet_eth_prio(struct mlx5_core_dev *dev, + struct mlx5_ib_sq *sq, u8 sl) +{ + void *in; + void *tisc; + int inlen; + int err; + + inlen = MLX5_ST_SZ_BYTES(modify_tis_in); + in = mlx5_vzalloc(inlen); + if (!in) + return -ENOMEM; + + MLX5_SET(modify_tis_in, in, bitmask.prio, 1); + + tisc = MLX5_ADDR_OF(modify_tis_in, in, ctx); + MLX5_SET(tisc, tisc, prio, ((sl & 0x7) << 1)); + + err = mlx5_core_modify_tis(dev, sq->tisn, in, inlen); + + kvfree(in); + + return err; +} + +static int mlx5_set_path(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, + const struct ib_ah_attr *ah, struct mlx5_qp_path *path, u8 port, int attr_mask, u32 path_flags, const struct ib_qp_attr *attr) { + enum rdma_link_layer ll = rdma_port_get_link_layer(&dev->ib_dev, port); int err; - path->fl = (path_flags & MLX5_PATH_FLAG_FL) ? 0x80 : 0; - path->free_ar = (path_flags & MLX5_PATH_FLAG_FREE_AR) ? 0x80 : 0; - if (attr_mask & IB_QP_PKEY_INDEX) path->pkey_index = attr->pkey_index; - path->grh_mlid = ah->src_path_bits & 0x7f; - path->rlid = cpu_to_be16(ah->dlid); - if (ah->ah_flags & IB_AH_GRH) { if (ah->grh.sgid_index >= dev->mdev->port_caps[port - 1].gid_table_len) { @@ -1383,7 +1807,27 @@ static int mlx5_set_path(struct mlx5_ib_dev *dev, const struct ib_ah_attr *ah, dev->mdev->port_caps[port - 1].gid_table_len); return -EINVAL; } - path->grh_mlid |= 1 << 7; + } + + if (ll == IB_LINK_LAYER_ETHERNET) { + if (!(ah->ah_flags & IB_AH_GRH)) + return -EINVAL; + memcpy(path->rmac, ah->dmac, sizeof(ah->dmac)); + path->udp_sport = mlx5_get_roce_udp_sport(dev, port, + ah->grh.sgid_index); + path->dci_cfi_prio_sl = (ah->sl & 0x7) << 4; + } else { + path->fl = (path_flags & MLX5_PATH_FLAG_FL) ? 0x80 : 0; + path->free_ar = (path_flags & MLX5_PATH_FLAG_FREE_AR) ? 0x80 : + 0; + path->rlid = cpu_to_be16(ah->dlid); + path->grh_mlid = ah->src_path_bits & 0x7f; + if (ah->ah_flags & IB_AH_GRH) + path->grh_mlid |= 1 << 7; + path->dci_cfi_prio_sl = ah->sl & 0xf; + } + + if (ah->ah_flags & IB_AH_GRH) { path->mgid_index = ah->grh.sgid_index; path->hop_limit = ah->grh.hop_limit; path->tclass_flowlabel = @@ -1401,7 +1845,10 @@ static int mlx5_set_path(struct mlx5_ib_dev *dev, const struct ib_ah_attr *ah, if (attr_mask & IB_QP_TIMEOUT) path->ackto_lt = attr->timeout << 3; - path->sl = ah->sl & 0xf; + if ((qp->ibqp.qp_type == IB_QPT_RAW_PACKET) && qp->sq.wqe_cnt) + return modify_raw_packet_eth_prio(dev->mdev, + &qp->raw_packet_qp.sq, + ah->sl & 0xf); return 0; } @@ -1549,12 +1996,154 @@ static int ib_mask_to_mlx5_opt(int ib_mask) return result; } +static int modify_raw_packet_qp_rq(struct mlx5_core_dev *dev, + struct mlx5_ib_rq *rq, int new_state) +{ + void *in; + void *rqc; + int inlen; + int err; + + inlen = MLX5_ST_SZ_BYTES(modify_rq_in); + in = mlx5_vzalloc(inlen); + if (!in) + return -ENOMEM; + + MLX5_SET(modify_rq_in, in, rq_state, rq->state); + + rqc = MLX5_ADDR_OF(modify_rq_in, in, ctx); + MLX5_SET(rqc, rqc, state, new_state); + + err = mlx5_core_modify_rq(dev, rq->base.mqp.qpn, in, inlen); + if (err) + goto out; + + rq->state = new_state; + +out: + kvfree(in); + return err; +} + +static int modify_raw_packet_qp_sq(struct mlx5_core_dev *dev, + struct mlx5_ib_sq *sq, int new_state) +{ + void *in; + void *sqc; + int inlen; + int err; + + inlen = MLX5_ST_SZ_BYTES(modify_sq_in); + in = mlx5_vzalloc(inlen); + if (!in) + return -ENOMEM; + + MLX5_SET(modify_sq_in, in, sq_state, sq->state); + + sqc = MLX5_ADDR_OF(modify_sq_in, in, ctx); + MLX5_SET(sqc, sqc, state, new_state); + + err = mlx5_core_modify_sq(dev, sq->base.mqp.qpn, in, inlen); + if (err) + goto out; + + sq->state = new_state; + +out: + kvfree(in); + return err; +} + +static int modify_raw_packet_qp(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, + u16 operation) +{ + struct mlx5_ib_raw_packet_qp *raw_packet_qp = &qp->raw_packet_qp; + struct mlx5_ib_rq *rq = &raw_packet_qp->rq; + struct mlx5_ib_sq *sq = &raw_packet_qp->sq; + int rq_state; + int sq_state; + int err; + + switch (operation) { + case MLX5_CMD_OP_RST2INIT_QP: + rq_state = MLX5_RQC_STATE_RDY; + sq_state = MLX5_SQC_STATE_RDY; + break; + case MLX5_CMD_OP_2ERR_QP: + rq_state = MLX5_RQC_STATE_ERR; + sq_state = MLX5_SQC_STATE_ERR; + break; + case MLX5_CMD_OP_2RST_QP: + rq_state = MLX5_RQC_STATE_RST; + sq_state = MLX5_SQC_STATE_RST; + break; + case MLX5_CMD_OP_INIT2INIT_QP: + case MLX5_CMD_OP_INIT2RTR_QP: + case MLX5_CMD_OP_RTR2RTS_QP: + case MLX5_CMD_OP_RTS2RTS_QP: + /* Nothing to do here... */ + return 0; + default: + WARN_ON(1); + return -EINVAL; + } + + if (qp->rq.wqe_cnt) { + err = modify_raw_packet_qp_rq(dev->mdev, rq, rq_state); + if (err) + return err; + } + + if (qp->sq.wqe_cnt) + return modify_raw_packet_qp_sq(dev->mdev, sq, sq_state); + + return 0; +} + static int __mlx5_ib_modify_qp(struct ib_qp *ibqp, const struct ib_qp_attr *attr, int attr_mask, enum ib_qp_state cur_state, enum ib_qp_state new_state) { + static const u16 optab[MLX5_QP_NUM_STATE][MLX5_QP_NUM_STATE] = { + [MLX5_QP_STATE_RST] = { + [MLX5_QP_STATE_RST] = MLX5_CMD_OP_2RST_QP, + [MLX5_QP_STATE_ERR] = MLX5_CMD_OP_2ERR_QP, + [MLX5_QP_STATE_INIT] = MLX5_CMD_OP_RST2INIT_QP, + }, + [MLX5_QP_STATE_INIT] = { + [MLX5_QP_STATE_RST] = MLX5_CMD_OP_2RST_QP, + [MLX5_QP_STATE_ERR] = MLX5_CMD_OP_2ERR_QP, + [MLX5_QP_STATE_INIT] = MLX5_CMD_OP_INIT2INIT_QP, + [MLX5_QP_STATE_RTR] = MLX5_CMD_OP_INIT2RTR_QP, + }, + [MLX5_QP_STATE_RTR] = { + [MLX5_QP_STATE_RST] = MLX5_CMD_OP_2RST_QP, + [MLX5_QP_STATE_ERR] = MLX5_CMD_OP_2ERR_QP, + [MLX5_QP_STATE_RTS] = MLX5_CMD_OP_RTR2RTS_QP, + }, + [MLX5_QP_STATE_RTS] = { + [MLX5_QP_STATE_RST] = MLX5_CMD_OP_2RST_QP, + [MLX5_QP_STATE_ERR] = MLX5_CMD_OP_2ERR_QP, + [MLX5_QP_STATE_RTS] = MLX5_CMD_OP_RTS2RTS_QP, + }, + [MLX5_QP_STATE_SQD] = { + [MLX5_QP_STATE_RST] = MLX5_CMD_OP_2RST_QP, + [MLX5_QP_STATE_ERR] = MLX5_CMD_OP_2ERR_QP, + }, + [MLX5_QP_STATE_SQER] = { + [MLX5_QP_STATE_RST] = MLX5_CMD_OP_2RST_QP, + [MLX5_QP_STATE_ERR] = MLX5_CMD_OP_2ERR_QP, + [MLX5_QP_STATE_RTS] = MLX5_CMD_OP_SQERR2RTS_QP, + }, + [MLX5_QP_STATE_ERR] = { + [MLX5_QP_STATE_RST] = MLX5_CMD_OP_2RST_QP, + [MLX5_QP_STATE_ERR] = MLX5_CMD_OP_2ERR_QP, + } + }; + struct mlx5_ib_dev *dev = to_mdev(ibqp->device); struct mlx5_ib_qp *qp = to_mqp(ibqp); + struct mlx5_ib_qp_base *base = &qp->trans_qp.base; struct mlx5_ib_cq *send_cq, *recv_cq; struct mlx5_qp_context *context; struct mlx5_modify_qp_mbox_in *in; @@ -1564,6 +2153,7 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp, int sqd_event; int mlx5_st; int err; + u16 op; in = kzalloc(sizeof(*in), GFP_KERNEL); if (!in) @@ -1623,7 +2213,7 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp, context->pri_path.port = attr->port_num; if (attr_mask & IB_QP_AV) { - err = mlx5_set_path(dev, &attr->ah_attr, &context->pri_path, + err = mlx5_set_path(dev, qp, &attr->ah_attr, &context->pri_path, attr_mask & IB_QP_PORT ? attr->port_num : qp->port, attr_mask, 0, attr); if (err) @@ -1634,7 +2224,8 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp, context->pri_path.ackto_lt |= attr->timeout << 3; if (attr_mask & IB_QP_ALT_PATH) { - err = mlx5_set_path(dev, &attr->alt_ah_attr, &context->alt_path, + err = mlx5_set_path(dev, qp, &attr->alt_ah_attr, + &context->alt_path, attr->alt_port_num, attr_mask, 0, attr); if (err) goto out; @@ -1706,41 +2297,51 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp, * again to RTS, and may cause the driver and the device to get out of * sync. */ if (cur_state != IB_QPS_RESET && cur_state != IB_QPS_ERR && - (new_state == IB_QPS_RESET || new_state == IB_QPS_ERR)) + (new_state == IB_QPS_RESET || new_state == IB_QPS_ERR) && + (qp->ibqp.qp_type != IB_QPT_RAW_PACKET)) mlx5_ib_qp_disable_pagefaults(qp); + if (mlx5_cur >= MLX5_QP_NUM_STATE || mlx5_new >= MLX5_QP_NUM_STATE || + !optab[mlx5_cur][mlx5_new]) + goto out; + + op = optab[mlx5_cur][mlx5_new]; optpar = ib_mask_to_mlx5_opt(attr_mask); optpar &= opt_mask[mlx5_cur][mlx5_new][mlx5_st]; in->optparam = cpu_to_be32(optpar); - err = mlx5_core_qp_modify(dev->mdev, to_mlx5_state(cur_state), - to_mlx5_state(new_state), in, sqd_event, - &qp->mqp); + + if (qp->ibqp.qp_type == IB_QPT_RAW_PACKET) + err = modify_raw_packet_qp(dev, qp, op); + else + err = mlx5_core_qp_modify(dev->mdev, op, in, sqd_event, + &base->mqp); if (err) goto out; - if (cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT) + if (cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT && + (qp->ibqp.qp_type != IB_QPT_RAW_PACKET)) mlx5_ib_qp_enable_pagefaults(qp); qp->state = new_state; if (attr_mask & IB_QP_ACCESS_FLAGS) - qp->atomic_rd_en = attr->qp_access_flags; + qp->trans_qp.atomic_rd_en = attr->qp_access_flags; if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC) - qp->resp_depth = attr->max_dest_rd_atomic; + qp->trans_qp.resp_depth = attr->max_dest_rd_atomic; if (attr_mask & IB_QP_PORT) qp->port = attr->port_num; if (attr_mask & IB_QP_ALT_PATH) - qp->alt_port = attr->alt_port_num; + qp->trans_qp.alt_port = attr->alt_port_num; /* * If we moved a kernel QP to RESET, clean up all old CQ * entries and reinitialize the QP. */ if (new_state == IB_QPS_RESET && !ibqp->uobject) { - mlx5_ib_cq_clean(recv_cq, qp->mqp.qpn, + mlx5_ib_cq_clean(recv_cq, base->mqp.qpn, ibqp->srq ? to_msrq(ibqp->srq) : NULL); if (send_cq != recv_cq) - mlx5_ib_cq_clean(send_cq, qp->mqp.qpn, NULL); + mlx5_ib_cq_clean(send_cq, base->mqp.qpn, NULL); qp->rq.head = 0; qp->rq.tail = 0; @@ -1765,15 +2366,21 @@ int mlx5_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, enum ib_qp_state cur_state, new_state; int err = -EINVAL; int port; + enum rdma_link_layer ll = IB_LINK_LAYER_UNSPECIFIED; mutex_lock(&qp->mutex); cur_state = attr_mask & IB_QP_CUR_STATE ? attr->cur_qp_state : qp->state; new_state = attr_mask & IB_QP_STATE ? attr->qp_state : cur_state; + if (!(cur_state == new_state && cur_state == IB_QPS_RESET)) { + port = attr_mask & IB_QP_PORT ? attr->port_num : qp->port; + ll = dev->ib_dev.get_link_layer(&dev->ib_dev, port); + } + if (ibqp->qp_type != MLX5_IB_QPT_REG_UMR && !ib_modify_qp_is_ok(cur_state, new_state, ibqp->qp_type, attr_mask, - IB_LINK_LAYER_UNSPECIFIED)) + ll)) goto out; if ((attr_mask & IB_QP_PORT) && @@ -1838,9 +2445,9 @@ static __always_inline void set_raddr_seg(struct mlx5_wqe_raddr_seg *rseg, static void set_datagram_seg(struct mlx5_wqe_datagram_seg *dseg, struct ib_send_wr *wr) { - memcpy(&dseg->av, &to_mah(wr->wr.ud.ah)->av, sizeof(struct mlx5_av)); - dseg->av.dqp_dct = cpu_to_be32(wr->wr.ud.remote_qpn | MLX5_EXTENDED_UD_AV); - dseg->av.key.qkey.qkey = cpu_to_be32(wr->wr.ud.remote_qkey); + memcpy(&dseg->av, &to_mah(ud_wr(wr)->ah)->av, sizeof(struct mlx5_av)); + dseg->av.dqp_dct = cpu_to_be32(ud_wr(wr)->remote_qpn | MLX5_EXTENDED_UD_AV); + dseg->av.key.qkey.qkey = cpu_to_be32(ud_wr(wr)->remote_qkey); } static void set_data_ptr_seg(struct mlx5_wqe_data_seg *dseg, struct ib_sge *sg) @@ -1896,22 +2503,24 @@ static __be64 sig_mkey_mask(void) return cpu_to_be64(result); } -static void set_frwr_umr_segment(struct mlx5_wqe_umr_ctrl_seg *umr, - struct ib_send_wr *wr, int li) +static void set_reg_umr_seg(struct mlx5_wqe_umr_ctrl_seg *umr, + struct mlx5_ib_mr *mr) { - memset(umr, 0, sizeof(*umr)); - - if (li) { - umr->mkey_mask = cpu_to_be64(MLX5_MKEY_MASK_FREE); - umr->flags = 1 << 7; - return; - } + int ndescs = mr->ndescs; - umr->flags = (1 << 5); /* fail if not free */ - umr->klm_octowords = get_klm_octo(wr->wr.fast_reg.page_list_len); + memset(umr, 0, sizeof(*umr)); + umr->flags = MLX5_UMR_CHECK_NOT_FREE; + umr->klm_octowords = get_klm_octo(ndescs); umr->mkey_mask = frwr_mkey_mask(); } +static void set_linv_umr_seg(struct mlx5_wqe_umr_ctrl_seg *umr) +{ + memset(umr, 0, sizeof(*umr)); + umr->mkey_mask = cpu_to_be64(MLX5_MKEY_MASK_FREE); + umr->flags = 1 << 7; +} + static __be64 get_umr_reg_mr_mask(void) { u64 result; @@ -1952,7 +2561,7 @@ static __be64 get_umr_update_mtt_mask(void) static void set_reg_umr_segment(struct mlx5_wqe_umr_ctrl_seg *umr, struct ib_send_wr *wr) { - struct mlx5_umr_wr *umrwr = (struct mlx5_umr_wr *)&wr->wr.fast_reg; + struct mlx5_umr_wr *umrwr = umr_wr(wr); memset(umr, 0, sizeof(*umr)); @@ -1987,29 +2596,31 @@ static u8 get_umr_flags(int acc) MLX5_PERM_LOCAL_READ | MLX5_PERM_UMR_EN; } -static void set_mkey_segment(struct mlx5_mkey_seg *seg, struct ib_send_wr *wr, - int li, int *writ) +static void set_reg_mkey_seg(struct mlx5_mkey_seg *seg, + struct mlx5_ib_mr *mr, + u32 key, int access) { - memset(seg, 0, sizeof(*seg)); - if (li) { - seg->status = MLX5_MKEY_STATUS_FREE; - return; - } + int ndescs = ALIGN(mr->ndescs, 8) >> 1; - seg->flags = get_umr_flags(wr->wr.fast_reg.access_flags) | - MLX5_ACCESS_MODE_MTT; - *writ = seg->flags & (MLX5_PERM_LOCAL_WRITE | IB_ACCESS_REMOTE_WRITE); - seg->qpn_mkey7_0 = cpu_to_be32((wr->wr.fast_reg.rkey & 0xff) | 0xffffff00); + memset(seg, 0, sizeof(*seg)); + seg->flags = get_umr_flags(access) | MLX5_ACCESS_MODE_MTT; + seg->qpn_mkey7_0 = cpu_to_be32((key & 0xff) | 0xffffff00); seg->flags_pd = cpu_to_be32(MLX5_MKEY_REMOTE_INVAL); - seg->start_addr = cpu_to_be64(wr->wr.fast_reg.iova_start); - seg->len = cpu_to_be64(wr->wr.fast_reg.length); - seg->xlt_oct_size = cpu_to_be32((wr->wr.fast_reg.page_list_len + 1) / 2); - seg->log2_page_size = wr->wr.fast_reg.page_shift; + seg->start_addr = cpu_to_be64(mr->ibmr.iova); + seg->len = cpu_to_be64(mr->ibmr.length); + seg->xlt_oct_size = cpu_to_be32(ndescs); + seg->log2_page_size = ilog2(mr->ibmr.page_size); +} + +static void set_linv_mkey_seg(struct mlx5_mkey_seg *seg) +{ + memset(seg, 0, sizeof(*seg)); + seg->status = MLX5_MKEY_STATUS_FREE; } static void set_reg_mkey_segment(struct mlx5_mkey_seg *seg, struct ib_send_wr *wr) { - struct mlx5_umr_wr *umrwr = (struct mlx5_umr_wr *)&wr->wr.fast_reg; + struct mlx5_umr_wr *umrwr = umr_wr(wr); memset(seg, 0, sizeof(*seg)); if (wr->send_flags & MLX5_IB_SEND_UMR_UNREG) { @@ -2028,21 +2639,14 @@ static void set_reg_mkey_segment(struct mlx5_mkey_seg *seg, struct ib_send_wr *w mlx5_mkey_variant(umrwr->mkey)); } -static void set_frwr_pages(struct mlx5_wqe_data_seg *dseg, - struct ib_send_wr *wr, - struct mlx5_core_dev *mdev, - struct mlx5_ib_pd *pd, - int writ) +static void set_reg_data_seg(struct mlx5_wqe_data_seg *dseg, + struct mlx5_ib_mr *mr, + struct mlx5_ib_pd *pd) { - struct mlx5_ib_fast_reg_page_list *mfrpl = to_mfrpl(wr->wr.fast_reg.page_list); - u64 *page_list = wr->wr.fast_reg.page_list->page_list; - u64 perm = MLX5_EN_RD | (writ ? MLX5_EN_WR : 0); - int i; + int bcount = mr->desc_size * mr->ndescs; - for (i = 0; i < wr->wr.fast_reg.page_list_len; i++) - mfrpl->mapped_page_list[i] = cpu_to_be64(page_list[i] | perm); - dseg->addr = cpu_to_be64(mfrpl->map); - dseg->byte_count = cpu_to_be32(ALIGN(sizeof(u64) * wr->wr.fast_reg.page_list_len, 64)); + dseg->addr = cpu_to_be64(mr->desc_map); + dseg->byte_count = cpu_to_be32(ALIGN(bcount, 64)); dseg->lkey = cpu_to_be32(pd->ibpd.local_dma_lkey); } @@ -2224,22 +2828,22 @@ static int mlx5_set_bsf(struct ib_mr *sig_mr, return 0; } -static int set_sig_data_segment(struct ib_send_wr *wr, struct mlx5_ib_qp *qp, - void **seg, int *size) +static int set_sig_data_segment(struct ib_sig_handover_wr *wr, + struct mlx5_ib_qp *qp, void **seg, int *size) { - struct ib_sig_attrs *sig_attrs = wr->wr.sig_handover.sig_attrs; - struct ib_mr *sig_mr = wr->wr.sig_handover.sig_mr; + struct ib_sig_attrs *sig_attrs = wr->sig_attrs; + struct ib_mr *sig_mr = wr->sig_mr; struct mlx5_bsf *bsf; - u32 data_len = wr->sg_list->length; - u32 data_key = wr->sg_list->lkey; - u64 data_va = wr->sg_list->addr; + u32 data_len = wr->wr.sg_list->length; + u32 data_key = wr->wr.sg_list->lkey; + u64 data_va = wr->wr.sg_list->addr; int ret; int wqe_size; - if (!wr->wr.sig_handover.prot || - (data_key == wr->wr.sig_handover.prot->lkey && - data_va == wr->wr.sig_handover.prot->addr && - data_len == wr->wr.sig_handover.prot->length)) { + if (!wr->prot || + (data_key == wr->prot->lkey && + data_va == wr->prot->addr && + data_len == wr->prot->length)) { /** * Source domain doesn't contain signature information * or data and protection are interleaved in memory. @@ -2273,8 +2877,8 @@ static int set_sig_data_segment(struct ib_send_wr *wr, struct mlx5_ib_qp *qp, struct mlx5_stride_block_ctrl_seg *sblock_ctrl; struct mlx5_stride_block_entry *data_sentry; struct mlx5_stride_block_entry *prot_sentry; - u32 prot_key = wr->wr.sig_handover.prot->lkey; - u64 prot_va = wr->wr.sig_handover.prot->addr; + u32 prot_key = wr->prot->lkey; + u64 prot_va = wr->prot->addr; u16 block_size = sig_attrs->mem.sig.dif.pi_interval; int prot_size; @@ -2326,16 +2930,16 @@ static int set_sig_data_segment(struct ib_send_wr *wr, struct mlx5_ib_qp *qp, } static void set_sig_mkey_segment(struct mlx5_mkey_seg *seg, - struct ib_send_wr *wr, u32 nelements, + struct ib_sig_handover_wr *wr, u32 nelements, u32 length, u32 pdn) { - struct ib_mr *sig_mr = wr->wr.sig_handover.sig_mr; + struct ib_mr *sig_mr = wr->sig_mr; u32 sig_key = sig_mr->rkey; u8 sigerr = to_mmr(sig_mr)->sig->sigerr_count & 1; memset(seg, 0, sizeof(*seg)); - seg->flags = get_umr_flags(wr->wr.sig_handover.access_flags) | + seg->flags = get_umr_flags(wr->access_flags) | MLX5_ACCESS_MODE_KLM; seg->qpn_mkey7_0 = cpu_to_be32((sig_key & 0xff) | 0xffffff00); seg->flags_pd = cpu_to_be32(MLX5_MKEY_REMOTE_INVAL | sigerr << 26 | @@ -2346,7 +2950,7 @@ static void set_sig_mkey_segment(struct mlx5_mkey_seg *seg, } static void set_sig_umr_segment(struct mlx5_wqe_umr_ctrl_seg *umr, - struct ib_send_wr *wr, u32 nelements) + u32 nelements) { memset(umr, 0, sizeof(*umr)); @@ -2357,37 +2961,37 @@ static void set_sig_umr_segment(struct mlx5_wqe_umr_ctrl_seg *umr, } -static int set_sig_umr_wr(struct ib_send_wr *wr, struct mlx5_ib_qp *qp, +static int set_sig_umr_wr(struct ib_send_wr *send_wr, struct mlx5_ib_qp *qp, void **seg, int *size) { - struct mlx5_ib_mr *sig_mr = to_mmr(wr->wr.sig_handover.sig_mr); + struct ib_sig_handover_wr *wr = sig_handover_wr(send_wr); + struct mlx5_ib_mr *sig_mr = to_mmr(wr->sig_mr); u32 pdn = get_pd(qp)->pdn; u32 klm_oct_size; int region_len, ret; - if (unlikely(wr->num_sge != 1) || - unlikely(wr->wr.sig_handover.access_flags & - IB_ACCESS_REMOTE_ATOMIC) || + if (unlikely(wr->wr.num_sge != 1) || + unlikely(wr->access_flags & IB_ACCESS_REMOTE_ATOMIC) || unlikely(!sig_mr->sig) || unlikely(!qp->signature_en) || unlikely(!sig_mr->sig->sig_status_checked)) return -EINVAL; /* length of the protected region, data + protection */ - region_len = wr->sg_list->length; - if (wr->wr.sig_handover.prot && - (wr->wr.sig_handover.prot->lkey != wr->sg_list->lkey || - wr->wr.sig_handover.prot->addr != wr->sg_list->addr || - wr->wr.sig_handover.prot->length != wr->sg_list->length)) - region_len += wr->wr.sig_handover.prot->length; + region_len = wr->wr.sg_list->length; + if (wr->prot && + (wr->prot->lkey != wr->wr.sg_list->lkey || + wr->prot->addr != wr->wr.sg_list->addr || + wr->prot->length != wr->wr.sg_list->length)) + region_len += wr->prot->length; /** * KLM octoword size - if protection was provided * then we use strided block format (3 octowords), * else we use single KLM (1 octoword) **/ - klm_oct_size = wr->wr.sig_handover.prot ? 3 : 1; + klm_oct_size = wr->prot ? 3 : 1; - set_sig_umr_segment(*seg, wr, klm_oct_size); + set_sig_umr_segment(*seg, klm_oct_size); *seg += sizeof(struct mlx5_wqe_umr_ctrl_seg); *size += sizeof(struct mlx5_wqe_umr_ctrl_seg) / 16; if (unlikely((*seg == qp->sq.qend))) @@ -2433,38 +3037,52 @@ static int set_psv_wr(struct ib_sig_domain *domain, return 0; } -static int set_frwr_li_wr(void **seg, struct ib_send_wr *wr, int *size, - struct mlx5_core_dev *mdev, struct mlx5_ib_pd *pd, struct mlx5_ib_qp *qp) +static int set_reg_wr(struct mlx5_ib_qp *qp, + struct ib_reg_wr *wr, + void **seg, int *size) { - int writ = 0; - int li; + struct mlx5_ib_mr *mr = to_mmr(wr->mr); + struct mlx5_ib_pd *pd = to_mpd(qp->ibqp.pd); - li = wr->opcode == IB_WR_LOCAL_INV ? 1 : 0; - if (unlikely(wr->send_flags & IB_SEND_INLINE)) + if (unlikely(wr->wr.send_flags & IB_SEND_INLINE)) { + mlx5_ib_warn(to_mdev(qp->ibqp.device), + "Invalid IB_SEND_INLINE send flag\n"); return -EINVAL; + } - set_frwr_umr_segment(*seg, wr, li); + set_reg_umr_seg(*seg, mr); *seg += sizeof(struct mlx5_wqe_umr_ctrl_seg); *size += sizeof(struct mlx5_wqe_umr_ctrl_seg) / 16; if (unlikely((*seg == qp->sq.qend))) *seg = mlx5_get_send_wqe(qp, 0); - set_mkey_segment(*seg, wr, li, &writ); + + set_reg_mkey_seg(*seg, mr, wr->key, wr->access); *seg += sizeof(struct mlx5_mkey_seg); *size += sizeof(struct mlx5_mkey_seg) / 16; if (unlikely((*seg == qp->sq.qend))) *seg = mlx5_get_send_wqe(qp, 0); - if (!li) { - if (unlikely(wr->wr.fast_reg.page_list_len > - wr->wr.fast_reg.page_list->max_page_list_len)) - return -ENOMEM; - set_frwr_pages(*seg, wr, mdev, pd, writ); - *seg += sizeof(struct mlx5_wqe_data_seg); - *size += (sizeof(struct mlx5_wqe_data_seg) / 16); - } + set_reg_data_seg(*seg, mr, pd); + *seg += sizeof(struct mlx5_wqe_data_seg); + *size += (sizeof(struct mlx5_wqe_data_seg) / 16); + return 0; } +static void set_linv_wr(struct mlx5_ib_qp *qp, void **seg, int *size) +{ + set_linv_umr_seg(*seg); + *seg += sizeof(struct mlx5_wqe_umr_ctrl_seg); + *size += sizeof(struct mlx5_wqe_umr_ctrl_seg) / 16; + if (unlikely((*seg == qp->sq.qend))) + *seg = mlx5_get_send_wqe(qp, 0); + set_linv_mkey_seg(*seg); + *seg += sizeof(struct mlx5_mkey_seg); + *size += sizeof(struct mlx5_mkey_seg) / 16; + if (unlikely((*seg == qp->sq.qend))) + *seg = mlx5_get_send_wqe(qp, 0); +} + static void dump_wqe(struct mlx5_ib_qp *qp, int idx, int size_16) { __be32 *p = NULL; @@ -2559,7 +3177,7 @@ static void finish_wqe(struct mlx5_ib_qp *qp, ctrl->opmod_idx_opcode = cpu_to_be32(((u32)(qp->sq.cur_post) << 8) | mlx5_opcode | ((u32)opmod << 24)); - ctrl->qpn_ds = cpu_to_be32(size | (qp->mqp.qpn << 8)); + ctrl->qpn_ds = cpu_to_be32(size | (qp->trans_qp.base.mqp.qpn << 8)); ctrl->fm_ce_se |= fence; qp->fm_cache = next_fence; if (unlikely(qp->wq_sig)) @@ -2578,7 +3196,6 @@ int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, { struct mlx5_wqe_ctrl_seg *ctrl = NULL; /* compiler warning */ struct mlx5_ib_dev *dev = to_mdev(ibqp->device); - struct mlx5_core_dev *mdev = dev->mdev; struct mlx5_ib_qp *qp = to_mqp(ibqp); struct mlx5_ib_mr *mr; struct mlx5_wqe_data_seg *dpseg; @@ -2627,7 +3244,6 @@ int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, switch (ibqp->qp_type) { case IB_QPT_XRC_INI: xrc = seg; - xrc->xrc_srqn = htonl(wr->xrc_remote_srq_num); seg += sizeof(*xrc); size += sizeof(*xrc) / 16; /* fall through */ @@ -2636,8 +3252,8 @@ int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, case IB_WR_RDMA_READ: case IB_WR_RDMA_WRITE: case IB_WR_RDMA_WRITE_WITH_IMM: - set_raddr_seg(seg, wr->wr.rdma.remote_addr, - wr->wr.rdma.rkey); + set_raddr_seg(seg, rdma_wr(wr)->remote_addr, + rdma_wr(wr)->rkey); seg += sizeof(struct mlx5_wqe_raddr_seg); size += sizeof(struct mlx5_wqe_raddr_seg) / 16; break; @@ -2654,22 +3270,16 @@ int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, next_fence = MLX5_FENCE_MODE_INITIATOR_SMALL; qp->sq.wr_data[idx] = IB_WR_LOCAL_INV; ctrl->imm = cpu_to_be32(wr->ex.invalidate_rkey); - err = set_frwr_li_wr(&seg, wr, &size, mdev, to_mpd(ibqp->pd), qp); - if (err) { - mlx5_ib_warn(dev, "\n"); - *bad_wr = wr; - goto out; - } + set_linv_wr(qp, &seg, &size); num_sge = 0; break; - case IB_WR_FAST_REG_MR: + case IB_WR_REG_MR: next_fence = MLX5_FENCE_MODE_INITIATOR_SMALL; - qp->sq.wr_data[idx] = IB_WR_FAST_REG_MR; - ctrl->imm = cpu_to_be32(wr->wr.fast_reg.rkey); - err = set_frwr_li_wr(&seg, wr, &size, mdev, to_mpd(ibqp->pd), qp); + qp->sq.wr_data[idx] = IB_WR_REG_MR; + ctrl->imm = cpu_to_be32(reg_wr(wr)->key); + err = set_reg_wr(qp, reg_wr(wr), &seg, &size); if (err) { - mlx5_ib_warn(dev, "\n"); *bad_wr = wr; goto out; } @@ -2678,7 +3288,7 @@ int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, case IB_WR_REG_SIG_MR: qp->sq.wr_data[idx] = IB_WR_REG_SIG_MR; - mr = to_mmr(wr->wr.sig_handover.sig_mr); + mr = to_mmr(sig_handover_wr(wr)->sig_mr); ctrl->imm = cpu_to_be32(mr->ibmr.rkey); err = set_sig_umr_wr(wr, qp, &seg, &size); @@ -2706,7 +3316,7 @@ int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, goto out; } - err = set_psv_wr(&wr->wr.sig_handover.sig_attrs->mem, + err = set_psv_wr(&sig_handover_wr(wr)->sig_attrs->mem, mr->sig->psv_memory.psv_idx, &seg, &size); if (err) { @@ -2728,7 +3338,7 @@ int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, } next_fence = MLX5_FENCE_MODE_INITIATOR_SMALL; - err = set_psv_wr(&wr->wr.sig_handover.sig_attrs->wire, + err = set_psv_wr(&sig_handover_wr(wr)->sig_attrs->wire, mr->sig->psv_wire.psv_idx, &seg, &size); if (err) { @@ -2752,8 +3362,8 @@ int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, switch (wr->opcode) { case IB_WR_RDMA_WRITE: case IB_WR_RDMA_WRITE_WITH_IMM: - set_raddr_seg(seg, wr->wr.rdma.remote_addr, - wr->wr.rdma.rkey); + set_raddr_seg(seg, rdma_wr(wr)->remote_addr, + rdma_wr(wr)->rkey); seg += sizeof(struct mlx5_wqe_raddr_seg); size += sizeof(struct mlx5_wqe_raddr_seg) / 16; break; @@ -2780,7 +3390,7 @@ int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, goto out; } qp->sq.wr_data[idx] = MLX5_IB_WR_UMR; - ctrl->imm = cpu_to_be32(wr->wr.fast_reg.rkey); + ctrl->imm = cpu_to_be32(umr_wr(wr)->mkey); set_reg_umr_segment(seg, wr); seg += sizeof(struct mlx5_wqe_umr_ctrl_seg); size += sizeof(struct mlx5_wqe_umr_ctrl_seg) / 16; @@ -3000,7 +3610,7 @@ static void to_ib_ah_attr(struct mlx5_ib_dev *ibdev, struct ib_ah_attr *ib_ah_at ib_ah_attr->port_num > MLX5_CAP_GEN(dev, num_ports)) return; - ib_ah_attr->sl = path->sl & 0xf; + ib_ah_attr->sl = path->dci_cfi_prio_sl & 0xf; ib_ah_attr->dlid = be16_to_cpu(path->rlid); ib_ah_attr->src_path_bits = path->grh_mlid & 0x7f; @@ -3018,39 +3628,153 @@ static void to_ib_ah_attr(struct mlx5_ib_dev *ibdev, struct ib_ah_attr *ib_ah_at } } -int mlx5_ib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, int qp_attr_mask, - struct ib_qp_init_attr *qp_init_attr) +static int query_raw_packet_qp_sq_state(struct mlx5_ib_dev *dev, + struct mlx5_ib_sq *sq, + u8 *sq_state) +{ + void *out; + void *sqc; + int inlen; + int err; + + inlen = MLX5_ST_SZ_BYTES(query_sq_out); + out = mlx5_vzalloc(inlen); + if (!out) + return -ENOMEM; + + err = mlx5_core_query_sq(dev->mdev, sq->base.mqp.qpn, out); + if (err) + goto out; + + sqc = MLX5_ADDR_OF(query_sq_out, out, sq_context); + *sq_state = MLX5_GET(sqc, sqc, state); + sq->state = *sq_state; + +out: + kvfree(out); + return err; +} + +static int query_raw_packet_qp_rq_state(struct mlx5_ib_dev *dev, + struct mlx5_ib_rq *rq, + u8 *rq_state) +{ + void *out; + void *rqc; + int inlen; + int err; + + inlen = MLX5_ST_SZ_BYTES(query_rq_out); + out = mlx5_vzalloc(inlen); + if (!out) + return -ENOMEM; + + err = mlx5_core_query_rq(dev->mdev, rq->base.mqp.qpn, out); + if (err) + goto out; + + rqc = MLX5_ADDR_OF(query_rq_out, out, rq_context); + *rq_state = MLX5_GET(rqc, rqc, state); + rq->state = *rq_state; + +out: + kvfree(out); + return err; +} + +static int sqrq_state_to_qp_state(u8 sq_state, u8 rq_state, + struct mlx5_ib_qp *qp, u8 *qp_state) +{ + static const u8 sqrq_trans[MLX5_RQ_NUM_STATE][MLX5_SQ_NUM_STATE] = { + [MLX5_RQC_STATE_RST] = { + [MLX5_SQC_STATE_RST] = IB_QPS_RESET, + [MLX5_SQC_STATE_RDY] = MLX5_QP_STATE_BAD, + [MLX5_SQC_STATE_ERR] = MLX5_QP_STATE_BAD, + [MLX5_SQ_STATE_NA] = IB_QPS_RESET, + }, + [MLX5_RQC_STATE_RDY] = { + [MLX5_SQC_STATE_RST] = MLX5_QP_STATE_BAD, + [MLX5_SQC_STATE_RDY] = MLX5_QP_STATE, + [MLX5_SQC_STATE_ERR] = IB_QPS_SQE, + [MLX5_SQ_STATE_NA] = MLX5_QP_STATE, + }, + [MLX5_RQC_STATE_ERR] = { + [MLX5_SQC_STATE_RST] = MLX5_QP_STATE_BAD, + [MLX5_SQC_STATE_RDY] = MLX5_QP_STATE_BAD, + [MLX5_SQC_STATE_ERR] = IB_QPS_ERR, + [MLX5_SQ_STATE_NA] = IB_QPS_ERR, + }, + [MLX5_RQ_STATE_NA] = { + [MLX5_SQC_STATE_RST] = IB_QPS_RESET, + [MLX5_SQC_STATE_RDY] = MLX5_QP_STATE, + [MLX5_SQC_STATE_ERR] = MLX5_QP_STATE, + [MLX5_SQ_STATE_NA] = MLX5_QP_STATE_BAD, + }, + }; + + *qp_state = sqrq_trans[rq_state][sq_state]; + + if (*qp_state == MLX5_QP_STATE_BAD) { + WARN(1, "Buggy Raw Packet QP state, SQ 0x%x state: 0x%x, RQ 0x%x state: 0x%x", + qp->raw_packet_qp.sq.base.mqp.qpn, sq_state, + qp->raw_packet_qp.rq.base.mqp.qpn, rq_state); + return -EINVAL; + } + + if (*qp_state == MLX5_QP_STATE) + *qp_state = qp->state; + + return 0; +} + +static int query_raw_packet_qp_state(struct mlx5_ib_dev *dev, + struct mlx5_ib_qp *qp, + u8 *raw_packet_qp_state) +{ + struct mlx5_ib_raw_packet_qp *raw_packet_qp = &qp->raw_packet_qp; + struct mlx5_ib_sq *sq = &raw_packet_qp->sq; + struct mlx5_ib_rq *rq = &raw_packet_qp->rq; + int err; + u8 sq_state = MLX5_SQ_STATE_NA; + u8 rq_state = MLX5_RQ_STATE_NA; + + if (qp->sq.wqe_cnt) { + err = query_raw_packet_qp_sq_state(dev, sq, &sq_state); + if (err) + return err; + } + + if (qp->rq.wqe_cnt) { + err = query_raw_packet_qp_rq_state(dev, rq, &rq_state); + if (err) + return err; + } + + return sqrq_state_to_qp_state(sq_state, rq_state, qp, + raw_packet_qp_state); +} + +static int query_qp_attr(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, + struct ib_qp_attr *qp_attr) { - struct mlx5_ib_dev *dev = to_mdev(ibqp->device); - struct mlx5_ib_qp *qp = to_mqp(ibqp); struct mlx5_query_qp_mbox_out *outb; struct mlx5_qp_context *context; int mlx5_state; int err = 0; -#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING - /* - * Wait for any outstanding page faults, in case the user frees memory - * based upon this query's result. - */ - flush_workqueue(mlx5_ib_page_fault_wq); -#endif - - mutex_lock(&qp->mutex); outb = kzalloc(sizeof(*outb), GFP_KERNEL); - if (!outb) { - err = -ENOMEM; - goto out; - } + if (!outb) + return -ENOMEM; + context = &outb->ctx; - err = mlx5_core_qp_query(dev->mdev, &qp->mqp, outb, sizeof(*outb)); + err = mlx5_core_qp_query(dev->mdev, &qp->trans_qp.base.mqp, outb, + sizeof(*outb)); if (err) - goto out_free; + goto out; mlx5_state = be32_to_cpu(context->flags) >> 28; qp->state = to_ib_qp_state(mlx5_state); - qp_attr->qp_state = qp->state; qp_attr->path_mtu = context->mtu_msgmax >> 5; qp_attr->path_mig_state = to_ib_mig_state((be32_to_cpu(context->flags) >> 11) & 0x3); @@ -3084,6 +3808,43 @@ int mlx5_ib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, int qp_attr qp_attr->retry_cnt = (be32_to_cpu(context->params1) >> 16) & 0x7; qp_attr->rnr_retry = (be32_to_cpu(context->params1) >> 13) & 0x7; qp_attr->alt_timeout = context->alt_path.ackto_lt >> 3; + +out: + kfree(outb); + return err; +} + +int mlx5_ib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, + int qp_attr_mask, struct ib_qp_init_attr *qp_init_attr) +{ + struct mlx5_ib_dev *dev = to_mdev(ibqp->device); + struct mlx5_ib_qp *qp = to_mqp(ibqp); + int err = 0; + u8 raw_packet_qp_state; + +#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING + /* + * Wait for any outstanding page faults, in case the user frees memory + * based upon this query's result. + */ + flush_workqueue(mlx5_ib_page_fault_wq); +#endif + + mutex_lock(&qp->mutex); + + if (qp->ibqp.qp_type == IB_QPT_RAW_PACKET) { + err = query_raw_packet_qp_state(dev, qp, &raw_packet_qp_state); + if (err) + goto out; + qp->state = raw_packet_qp_state; + qp_attr->port_num = 1; + } else { + err = query_qp_attr(dev, qp, qp_attr); + if (err) + goto out; + } + + qp_attr->qp_state = qp->state; qp_attr->cur_qp_state = qp_attr->qp_state; qp_attr->cap.max_recv_wr = qp->rq.wqe_cnt; qp_attr->cap.max_recv_sge = qp->rq.max_gs; @@ -3107,12 +3868,16 @@ int mlx5_ib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, int qp_attr if (qp->flags & MLX5_IB_QP_BLOCK_MULTICAST_LOOPBACK) qp_init_attr->create_flags |= IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK; + if (qp->flags & MLX5_IB_QP_CROSS_CHANNEL) + qp_init_attr->create_flags |= IB_QP_CREATE_CROSS_CHANNEL; + if (qp->flags & MLX5_IB_QP_MANAGED_SEND) + qp_init_attr->create_flags |= IB_QP_CREATE_MANAGED_SEND; + if (qp->flags & MLX5_IB_QP_MANAGED_RECV) + qp_init_attr->create_flags |= IB_QP_CREATE_MANAGED_RECV; + qp_init_attr->sq_sig_type = qp->sq_signal_bits & MLX5_WQE_CTRL_CQ_UPDATE ? IB_SIGNAL_ALL_WR : IB_SIGNAL_REQ_WR; -out_free: - kfree(outb); - out: mutex_unlock(&qp->mutex); return err; diff --git a/drivers/infiniband/hw/mlx5/srq.c b/drivers/infiniband/hw/mlx5/srq.c index e008505..3b2ddd6 100644 --- a/drivers/infiniband/hw/mlx5/srq.c +++ b/drivers/infiniband/hw/mlx5/srq.c @@ -75,31 +75,42 @@ static void mlx5_ib_srq_event(struct mlx5_core_srq *srq, enum mlx5_event type) static int create_srq_user(struct ib_pd *pd, struct mlx5_ib_srq *srq, struct mlx5_create_srq_mbox_in **in, - struct ib_udata *udata, int buf_size, int *inlen) + struct ib_udata *udata, int buf_size, int *inlen, + int is_xrc) { struct mlx5_ib_dev *dev = to_mdev(pd->device); - struct mlx5_ib_create_srq ucmd; + struct mlx5_ib_create_srq ucmd = {}; size_t ucmdlen; + void *xsrqc; int err; int npages; int page_shift; int ncont; u32 offset; + u32 uidx = MLX5_IB_DEFAULT_UIDX; - ucmdlen = - (udata->inlen - sizeof(struct ib_uverbs_cmd_hdr) < - sizeof(ucmd)) ? (sizeof(ucmd) - - sizeof(ucmd.reserved)) : sizeof(ucmd); + ucmdlen = min(udata->inlen, sizeof(ucmd)); if (ib_copy_from_udata(&ucmd, udata, ucmdlen)) { mlx5_ib_dbg(dev, "failed copy udata\n"); return -EFAULT; } - if (ucmdlen == sizeof(ucmd) && - ucmd.reserved != 0) + if (ucmd.reserved0 || ucmd.reserved1) return -EINVAL; + if (udata->inlen > sizeof(ucmd) && + !ib_is_udata_cleared(udata, sizeof(ucmd), + udata->inlen - sizeof(ucmd))) + return -EINVAL; + + if (is_xrc) { + err = get_srq_user_index(to_mucontext(pd->uobject->context), + &ucmd, udata->inlen, &uidx); + if (err) + return err; + } + srq->wq_sig = !!(ucmd.flags & MLX5_SRQ_FLAG_SIGNATURE); srq->umem = ib_umem_get(pd->uobject->context, ucmd.buf_addr, buf_size, @@ -138,6 +149,13 @@ static int create_srq_user(struct ib_pd *pd, struct mlx5_ib_srq *srq, (*in)->ctx.log_pg_sz = page_shift - MLX5_ADAPTER_PAGE_SHIFT; (*in)->ctx.pgoff_cqn = cpu_to_be32(offset << 26); + if ((MLX5_CAP_GEN(dev->mdev, cqe_version) == MLX5_CQE_VERSION_V1) && + is_xrc){ + xsrqc = MLX5_ADDR_OF(create_xrc_srq_in, *in, + xrc_srq_context_entry); + MLX5_SET(xrc_srqc, xsrqc, user_index, uidx); + } + return 0; err_in: @@ -151,13 +169,14 @@ err_umem: static int create_srq_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_srq *srq, struct mlx5_create_srq_mbox_in **in, int buf_size, - int *inlen) + int *inlen, int is_xrc) { int err; int i; struct mlx5_wqe_srq_next_seg *next; int page_shift; int npages; + void *xsrqc; err = mlx5_db_alloc(dev->mdev, &srq->db); if (err) { @@ -204,6 +223,14 @@ static int create_srq_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_srq *srq, (*in)->ctx.log_pg_sz = page_shift - MLX5_ADAPTER_PAGE_SHIFT; + if ((MLX5_CAP_GEN(dev->mdev, cqe_version) == MLX5_CQE_VERSION_V1) && + is_xrc){ + xsrqc = MLX5_ADDR_OF(create_xrc_srq_in, *in, + xrc_srq_context_entry); + /* 0xffffff means we ask to work with cqe version 0 */ + MLX5_SET(xrc_srqc, xsrqc, user_index, MLX5_IB_DEFAULT_UIDX); + } + return 0; err_in: @@ -275,10 +302,14 @@ struct ib_srq *mlx5_ib_create_srq(struct ib_pd *pd, desc_size, init_attr->attr.max_wr, srq->msrq.max, srq->msrq.max_gs, srq->msrq.max_avail_gather); + is_xrc = (init_attr->srq_type == IB_SRQT_XRC); + if (pd->uobject) - err = create_srq_user(pd, srq, &in, udata, buf_size, &inlen); + err = create_srq_user(pd, srq, &in, udata, buf_size, &inlen, + is_xrc); else - err = create_srq_kernel(dev, srq, &in, buf_size, &inlen); + err = create_srq_kernel(dev, srq, &in, buf_size, &inlen, + is_xrc); if (err) { mlx5_ib_warn(dev, "create srq %s failed, err %d\n", @@ -286,7 +317,6 @@ struct ib_srq *mlx5_ib_create_srq(struct ib_pd *pd, goto err_srq; } - is_xrc = (init_attr->srq_type == IB_SRQT_XRC); in->ctx.state_log_sz = ilog2(srq->msrq.max); flgs = ((srq->msrq.wqe_shift - 4) | (is_xrc << 5) | (srq->wq_sig << 7)) << 24; xrcdn = 0; diff --git a/drivers/infiniband/hw/mlx5/user.h b/drivers/infiniband/hw/mlx5/user.h index 76fb7b9..b94a554 100644 --- a/drivers/infiniband/hw/mlx5/user.h +++ b/drivers/infiniband/hw/mlx5/user.h @@ -35,6 +35,8 @@ #include <linux/types.h> +#include "mlx5_ib.h" + enum { MLX5_QP_FLAG_SIGNATURE = 1 << 0, MLX5_QP_FLAG_SCATTER_CQE = 1 << 1, @@ -66,7 +68,15 @@ struct mlx5_ib_alloc_ucontext_req_v2 { __u32 total_num_uuars; __u32 num_low_latency_uuars; __u32 flags; - __u32 reserved; + __u32 comp_mask; + __u8 max_cqe_version; + __u8 reserved0; + __u16 reserved1; + __u32 reserved2; +}; + +enum mlx5_ib_alloc_ucontext_resp_mask { + MLX5_IB_ALLOC_UCONTEXT_RESP_MASK_CORE_CLOCK_OFFSET = 1UL << 0, }; struct mlx5_ib_alloc_ucontext_resp { @@ -80,7 +90,13 @@ struct mlx5_ib_alloc_ucontext_resp { __u32 max_recv_wr; __u32 max_srq_recv_wr; __u16 num_ports; - __u16 reserved; + __u16 reserved1; + __u32 comp_mask; + __u32 response_length; + __u8 cqe_version; + __u8 reserved2; + __u16 reserved3; + __u64 hca_core_clock_offset; }; struct mlx5_ib_alloc_pd_resp { @@ -110,7 +126,9 @@ struct mlx5_ib_create_srq { __u64 buf_addr; __u64 db_addr; __u32 flags; - __u32 reserved; /* explicit padding (optional on i386) */ + __u32 reserved0; /* explicit padding (optional on i386) */ + __u32 uidx; + __u32 reserved1; }; struct mlx5_ib_create_srq_resp { @@ -125,9 +143,48 @@ struct mlx5_ib_create_qp { __u32 rq_wqe_count; __u32 rq_wqe_shift; __u32 flags; + __u32 uidx; + __u32 reserved0; + __u64 sq_buf_addr; }; struct mlx5_ib_create_qp_resp { __u32 uuar_index; }; + +static inline int get_qp_user_index(struct mlx5_ib_ucontext *ucontext, + struct mlx5_ib_create_qp *ucmd, + int inlen, + u32 *user_index) +{ + u8 cqe_version = ucontext->cqe_version; + + if (field_avail(struct mlx5_ib_create_qp, uidx, inlen) && + !cqe_version && (ucmd->uidx == MLX5_IB_DEFAULT_UIDX)) + return 0; + + if (!!(field_avail(struct mlx5_ib_create_qp, uidx, inlen) != + !!cqe_version)) + return -EINVAL; + + return verify_assign_uidx(cqe_version, ucmd->uidx, user_index); +} + +static inline int get_srq_user_index(struct mlx5_ib_ucontext *ucontext, + struct mlx5_ib_create_srq *ucmd, + int inlen, + u32 *user_index) +{ + u8 cqe_version = ucontext->cqe_version; + + if (field_avail(struct mlx5_ib_create_srq, uidx, inlen) && + !cqe_version && (ucmd->uidx == MLX5_IB_DEFAULT_UIDX)) + return 0; + + if (!!(field_avail(struct mlx5_ib_create_srq, uidx, inlen) != + !!cqe_version)) + return -EINVAL; + + return verify_assign_uidx(cqe_version, ucmd->uidx, user_index); +} #endif /* MLX5_IB_USER_H */ diff --git a/drivers/infiniband/hw/mthca/mthca_av.c b/drivers/infiniband/hw/mthca/mthca_av.c index 32f6c63..bcac294 100644 --- a/drivers/infiniband/hw/mthca/mthca_av.c +++ b/drivers/infiniband/hw/mthca/mthca_av.c @@ -281,7 +281,7 @@ int mthca_read_ah(struct mthca_dev *dev, struct mthca_ah *ah, ib_get_cached_gid(&dev->ib_dev, be32_to_cpu(ah->av->port_pd) >> 24, ah->av->gid_index % dev->limits.gid_table_len, - &header->grh.source_gid); + &header->grh.source_gid, NULL); memcpy(header->grh.destination_gid.raw, ah->av->dgid, 16); } diff --git a/drivers/infiniband/hw/mthca/mthca_cq.c b/drivers/infiniband/hw/mthca/mthca_cq.c index 40ba833..a6531ff 100644 --- a/drivers/infiniband/hw/mthca/mthca_cq.c +++ b/drivers/infiniband/hw/mthca/mthca_cq.c @@ -608,9 +608,6 @@ static inline int mthca_poll_one(struct mthca_dev *dev, entry->opcode = IB_WC_FETCH_ADD; entry->byte_len = MTHCA_ATOMIC_BYTE_LEN; break; - case MTHCA_OPCODE_BIND_MW: - entry->opcode = IB_WC_BIND_MW; - break; default: entry->opcode = MTHCA_OPCODE_INVALID; break; diff --git a/drivers/infiniband/hw/mthca/mthca_provider.c b/drivers/infiniband/hw/mthca/mthca_provider.c index dc2d48c..9866c35 100644 --- a/drivers/infiniband/hw/mthca/mthca_provider.c +++ b/drivers/infiniband/hw/mthca/mthca_provider.c @@ -898,89 +898,6 @@ static struct ib_mr *mthca_get_dma_mr(struct ib_pd *pd, int acc) return &mr->ibmr; } -static struct ib_mr *mthca_reg_phys_mr(struct ib_pd *pd, - struct ib_phys_buf *buffer_list, - int num_phys_buf, - int acc, - u64 *iova_start) -{ - struct mthca_mr *mr; - u64 *page_list; - u64 total_size; - unsigned long mask; - int shift; - int npages; - int err; - int i, j, n; - - mask = buffer_list[0].addr ^ *iova_start; - total_size = 0; - for (i = 0; i < num_phys_buf; ++i) { - if (i != 0) - mask |= buffer_list[i].addr; - if (i != num_phys_buf - 1) - mask |= buffer_list[i].addr + buffer_list[i].size; - - total_size += buffer_list[i].size; - } - - if (mask & ~PAGE_MASK) - return ERR_PTR(-EINVAL); - - shift = __ffs(mask | 1 << 31); - - buffer_list[0].size += buffer_list[0].addr & ((1ULL << shift) - 1); - buffer_list[0].addr &= ~0ull << shift; - - mr = kmalloc(sizeof *mr, GFP_KERNEL); - if (!mr) - return ERR_PTR(-ENOMEM); - - npages = 0; - for (i = 0; i < num_phys_buf; ++i) - npages += (buffer_list[i].size + (1ULL << shift) - 1) >> shift; - - if (!npages) - return &mr->ibmr; - - page_list = kmalloc(npages * sizeof *page_list, GFP_KERNEL); - if (!page_list) { - kfree(mr); - return ERR_PTR(-ENOMEM); - } - - n = 0; - for (i = 0; i < num_phys_buf; ++i) - for (j = 0; - j < (buffer_list[i].size + (1ULL << shift) - 1) >> shift; - ++j) - page_list[n++] = buffer_list[i].addr + ((u64) j << shift); - - mthca_dbg(to_mdev(pd->device), "Registering memory at %llx (iova %llx) " - "in PD %x; shift %d, npages %d.\n", - (unsigned long long) buffer_list[0].addr, - (unsigned long long) *iova_start, - to_mpd(pd)->pd_num, - shift, npages); - - err = mthca_mr_alloc_phys(to_mdev(pd->device), - to_mpd(pd)->pd_num, - page_list, shift, npages, - *iova_start, total_size, - convert_access(acc), mr); - - if (err) { - kfree(page_list); - kfree(mr); - return ERR_PTR(err); - } - - kfree(page_list); - mr->umem = NULL; - - return &mr->ibmr; -} - static struct ib_mr *mthca_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, u64 virt, int acc, struct ib_udata *udata) { @@ -1346,7 +1263,6 @@ int mthca_register_device(struct mthca_dev *dev) dev->ib_dev.destroy_cq = mthca_destroy_cq; dev->ib_dev.poll_cq = mthca_poll_cq; dev->ib_dev.get_dma_mr = mthca_get_dma_mr; - dev->ib_dev.reg_phys_mr = mthca_reg_phys_mr; dev->ib_dev.reg_user_mr = mthca_reg_user_mr; dev->ib_dev.dereg_mr = mthca_dereg_mr; dev->ib_dev.get_port_immutable = mthca_port_immutable; diff --git a/drivers/infiniband/hw/mthca/mthca_qp.c b/drivers/infiniband/hw/mthca/mthca_qp.c index e354b2f..96e5fb9 100644 --- a/drivers/infiniband/hw/mthca/mthca_qp.c +++ b/drivers/infiniband/hw/mthca/mthca_qp.c @@ -1476,7 +1476,7 @@ void mthca_free_qp(struct mthca_dev *dev, /* Create UD header for an MLX send and build a data segment for it */ static int build_mlx_header(struct mthca_dev *dev, struct mthca_sqp *sqp, - int ind, struct ib_send_wr *wr, + int ind, struct ib_ud_wr *wr, struct mthca_mlx_seg *mlx, struct mthca_data_seg *data) { @@ -1485,10 +1485,10 @@ static int build_mlx_header(struct mthca_dev *dev, struct mthca_sqp *sqp, u16 pkey; ib_ud_header_init(256, /* assume a MAD */ 1, 0, 0, - mthca_ah_grh_present(to_mah(wr->wr.ud.ah)), 0, + mthca_ah_grh_present(to_mah(wr->ah)), 0, 0, 0, &sqp->ud_header); - err = mthca_read_ah(dev, to_mah(wr->wr.ud.ah), &sqp->ud_header); + err = mthca_read_ah(dev, to_mah(wr->ah), &sqp->ud_header); if (err) return err; mlx->flags &= ~cpu_to_be32(MTHCA_NEXT_SOLICIT | 1); @@ -1499,7 +1499,7 @@ static int build_mlx_header(struct mthca_dev *dev, struct mthca_sqp *sqp, mlx->rlid = sqp->ud_header.lrh.destination_lid; mlx->vcrc = 0; - switch (wr->opcode) { + switch (wr->wr.opcode) { case IB_WR_SEND: sqp->ud_header.bth.opcode = IB_OPCODE_UD_SEND_ONLY; sqp->ud_header.immediate_present = 0; @@ -1507,7 +1507,7 @@ static int build_mlx_header(struct mthca_dev *dev, struct mthca_sqp *sqp, case IB_WR_SEND_WITH_IMM: sqp->ud_header.bth.opcode = IB_OPCODE_UD_SEND_ONLY_WITH_IMMEDIATE; sqp->ud_header.immediate_present = 1; - sqp->ud_header.immediate_data = wr->ex.imm_data; + sqp->ud_header.immediate_data = wr->wr.ex.imm_data; break; default: return -EINVAL; @@ -1516,18 +1516,18 @@ static int build_mlx_header(struct mthca_dev *dev, struct mthca_sqp *sqp, sqp->ud_header.lrh.virtual_lane = !sqp->qp.ibqp.qp_num ? 15 : 0; if (sqp->ud_header.lrh.destination_lid == IB_LID_PERMISSIVE) sqp->ud_header.lrh.source_lid = IB_LID_PERMISSIVE; - sqp->ud_header.bth.solicited_event = !!(wr->send_flags & IB_SEND_SOLICITED); + sqp->ud_header.bth.solicited_event = !!(wr->wr.send_flags & IB_SEND_SOLICITED); if (!sqp->qp.ibqp.qp_num) ib_get_cached_pkey(&dev->ib_dev, sqp->qp.port, sqp->pkey_index, &pkey); else ib_get_cached_pkey(&dev->ib_dev, sqp->qp.port, - wr->wr.ud.pkey_index, &pkey); + wr->pkey_index, &pkey); sqp->ud_header.bth.pkey = cpu_to_be16(pkey); - sqp->ud_header.bth.destination_qpn = cpu_to_be32(wr->wr.ud.remote_qpn); + sqp->ud_header.bth.destination_qpn = cpu_to_be32(wr->remote_qpn); sqp->ud_header.bth.psn = cpu_to_be32((sqp->send_psn++) & ((1 << 24) - 1)); - sqp->ud_header.deth.qkey = cpu_to_be32(wr->wr.ud.remote_qkey & 0x80000000 ? - sqp->qkey : wr->wr.ud.remote_qkey); + sqp->ud_header.deth.qkey = cpu_to_be32(wr->remote_qkey & 0x80000000 ? + sqp->qkey : wr->remote_qkey); sqp->ud_header.deth.source_qpn = cpu_to_be32(sqp->qp.ibqp.qp_num); header_size = ib_ud_header_pack(&sqp->ud_header, @@ -1569,34 +1569,34 @@ static __always_inline void set_raddr_seg(struct mthca_raddr_seg *rseg, } static __always_inline void set_atomic_seg(struct mthca_atomic_seg *aseg, - struct ib_send_wr *wr) + struct ib_atomic_wr *wr) { - if (wr->opcode == IB_WR_ATOMIC_CMP_AND_SWP) { - aseg->swap_add = cpu_to_be64(wr->wr.atomic.swap); - aseg->compare = cpu_to_be64(wr->wr.atomic.compare_add); + if (wr->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP) { + aseg->swap_add = cpu_to_be64(wr->swap); + aseg->compare = cpu_to_be64(wr->compare_add); } else { - aseg->swap_add = cpu_to_be64(wr->wr.atomic.compare_add); + aseg->swap_add = cpu_to_be64(wr->compare_add); aseg->compare = 0; } } static void set_tavor_ud_seg(struct mthca_tavor_ud_seg *useg, - struct ib_send_wr *wr) + struct ib_ud_wr *wr) { - useg->lkey = cpu_to_be32(to_mah(wr->wr.ud.ah)->key); - useg->av_addr = cpu_to_be64(to_mah(wr->wr.ud.ah)->avdma); - useg->dqpn = cpu_to_be32(wr->wr.ud.remote_qpn); - useg->qkey = cpu_to_be32(wr->wr.ud.remote_qkey); + useg->lkey = cpu_to_be32(to_mah(wr->ah)->key); + useg->av_addr = cpu_to_be64(to_mah(wr->ah)->avdma); + useg->dqpn = cpu_to_be32(wr->remote_qpn); + useg->qkey = cpu_to_be32(wr->remote_qkey); } static void set_arbel_ud_seg(struct mthca_arbel_ud_seg *useg, - struct ib_send_wr *wr) + struct ib_ud_wr *wr) { - memcpy(useg->av, to_mah(wr->wr.ud.ah)->av, MTHCA_AV_SIZE); - useg->dqpn = cpu_to_be32(wr->wr.ud.remote_qpn); - useg->qkey = cpu_to_be32(wr->wr.ud.remote_qkey); + memcpy(useg->av, to_mah(wr->ah)->av, MTHCA_AV_SIZE); + useg->dqpn = cpu_to_be32(wr->remote_qpn); + useg->qkey = cpu_to_be32(wr->remote_qkey); } int mthca_tavor_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, @@ -1664,11 +1664,11 @@ int mthca_tavor_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, switch (wr->opcode) { case IB_WR_ATOMIC_CMP_AND_SWP: case IB_WR_ATOMIC_FETCH_AND_ADD: - set_raddr_seg(wqe, wr->wr.atomic.remote_addr, - wr->wr.atomic.rkey); + set_raddr_seg(wqe, atomic_wr(wr)->remote_addr, + atomic_wr(wr)->rkey); wqe += sizeof (struct mthca_raddr_seg); - set_atomic_seg(wqe, wr); + set_atomic_seg(wqe, atomic_wr(wr)); wqe += sizeof (struct mthca_atomic_seg); size += (sizeof (struct mthca_raddr_seg) + sizeof (struct mthca_atomic_seg)) / 16; @@ -1677,8 +1677,8 @@ int mthca_tavor_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, case IB_WR_RDMA_WRITE: case IB_WR_RDMA_WRITE_WITH_IMM: case IB_WR_RDMA_READ: - set_raddr_seg(wqe, wr->wr.rdma.remote_addr, - wr->wr.rdma.rkey); + set_raddr_seg(wqe, rdma_wr(wr)->remote_addr, + rdma_wr(wr)->rkey); wqe += sizeof (struct mthca_raddr_seg); size += sizeof (struct mthca_raddr_seg) / 16; break; @@ -1694,8 +1694,8 @@ int mthca_tavor_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, switch (wr->opcode) { case IB_WR_RDMA_WRITE: case IB_WR_RDMA_WRITE_WITH_IMM: - set_raddr_seg(wqe, wr->wr.rdma.remote_addr, - wr->wr.rdma.rkey); + set_raddr_seg(wqe, rdma_wr(wr)->remote_addr, + rdma_wr(wr)->rkey); wqe += sizeof (struct mthca_raddr_seg); size += sizeof (struct mthca_raddr_seg) / 16; break; @@ -1708,13 +1708,13 @@ int mthca_tavor_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, break; case UD: - set_tavor_ud_seg(wqe, wr); + set_tavor_ud_seg(wqe, ud_wr(wr)); wqe += sizeof (struct mthca_tavor_ud_seg); size += sizeof (struct mthca_tavor_ud_seg) / 16; break; case MLX: - err = build_mlx_header(dev, to_msqp(qp), ind, wr, + err = build_mlx_header(dev, to_msqp(qp), ind, ud_wr(wr), wqe - sizeof (struct mthca_next_seg), wqe); if (err) { @@ -2005,11 +2005,11 @@ int mthca_arbel_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, switch (wr->opcode) { case IB_WR_ATOMIC_CMP_AND_SWP: case IB_WR_ATOMIC_FETCH_AND_ADD: - set_raddr_seg(wqe, wr->wr.atomic.remote_addr, - wr->wr.atomic.rkey); + set_raddr_seg(wqe, atomic_wr(wr)->remote_addr, + atomic_wr(wr)->rkey); wqe += sizeof (struct mthca_raddr_seg); - set_atomic_seg(wqe, wr); + set_atomic_seg(wqe, atomic_wr(wr)); wqe += sizeof (struct mthca_atomic_seg); size += (sizeof (struct mthca_raddr_seg) + sizeof (struct mthca_atomic_seg)) / 16; @@ -2018,8 +2018,8 @@ int mthca_arbel_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, case IB_WR_RDMA_READ: case IB_WR_RDMA_WRITE: case IB_WR_RDMA_WRITE_WITH_IMM: - set_raddr_seg(wqe, wr->wr.rdma.remote_addr, - wr->wr.rdma.rkey); + set_raddr_seg(wqe, rdma_wr(wr)->remote_addr, + rdma_wr(wr)->rkey); wqe += sizeof (struct mthca_raddr_seg); size += sizeof (struct mthca_raddr_seg) / 16; break; @@ -2035,8 +2035,8 @@ int mthca_arbel_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, switch (wr->opcode) { case IB_WR_RDMA_WRITE: case IB_WR_RDMA_WRITE_WITH_IMM: - set_raddr_seg(wqe, wr->wr.rdma.remote_addr, - wr->wr.rdma.rkey); + set_raddr_seg(wqe, rdma_wr(wr)->remote_addr, + rdma_wr(wr)->rkey); wqe += sizeof (struct mthca_raddr_seg); size += sizeof (struct mthca_raddr_seg) / 16; break; @@ -2049,13 +2049,13 @@ int mthca_arbel_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, break; case UD: - set_arbel_ud_seg(wqe, wr); + set_arbel_ud_seg(wqe, ud_wr(wr)); wqe += sizeof (struct mthca_arbel_ud_seg); size += sizeof (struct mthca_arbel_ud_seg) / 16; break; case MLX: - err = build_mlx_header(dev, to_msqp(qp), ind, wr, + err = build_mlx_header(dev, to_msqp(qp), ind, ud_wr(wr), wqe - sizeof (struct mthca_next_seg), wqe); if (err) { diff --git a/drivers/infiniband/hw/nes/nes_cm.c b/drivers/infiniband/hw/nes/nes_cm.c index 8a3ad17..cb9f0f2 100644 --- a/drivers/infiniband/hw/nes/nes_cm.c +++ b/drivers/infiniband/hw/nes/nes_cm.c @@ -134,7 +134,7 @@ static void record_ird_ord(struct nes_cm_node *, u16, u16); /* External CM API Interface */ /* instance of function pointers for client API */ /* set address of this instance to cm_core->cm_ops at cm_core alloc */ -static struct nes_cm_ops nes_cm_api = { +static const struct nes_cm_ops nes_cm_api = { mini_cm_accelerated, mini_cm_listen, mini_cm_del_listen, @@ -3232,7 +3232,6 @@ int nes_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) int passive_state; struct nes_ib_device *nesibdev; struct ib_mr *ibmr = NULL; - struct ib_phys_buf ibphysbuf; struct nes_pd *nespd; u64 tagged_offset; u8 mpa_frame_offset = 0; @@ -3316,21 +3315,19 @@ int nes_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) u64temp = (unsigned long)nesqp; nesibdev = nesvnic->nesibdev; nespd = nesqp->nespd; - ibphysbuf.addr = nesqp->ietf_frame_pbase + mpa_frame_offset; - ibphysbuf.size = buff_len; tagged_offset = (u64)(unsigned long)*start_buff; - ibmr = nesibdev->ibdev.reg_phys_mr((struct ib_pd *)nespd, - &ibphysbuf, 1, - IB_ACCESS_LOCAL_WRITE, - &tagged_offset); - if (!ibmr) { + ibmr = nes_reg_phys_mr(&nespd->ibpd, + nesqp->ietf_frame_pbase + mpa_frame_offset, + buff_len, IB_ACCESS_LOCAL_WRITE, + &tagged_offset); + if (IS_ERR(ibmr)) { nes_debug(NES_DBG_CM, "Unable to register memory region" "for lSMM for cm_node = %p \n", cm_node); pci_free_consistent(nesdev->pcidev, nesqp->private_data_len + nesqp->ietf_frame_size, nesqp->ietf_frame, nesqp->ietf_frame_pbase); - return -ENOMEM; + return PTR_ERR(ibmr); } ibmr->pd = &nespd->ibpd; diff --git a/drivers/infiniband/hw/nes/nes_cm.h b/drivers/infiniband/hw/nes/nes_cm.h index 32a6420..147c2c8 100644 --- a/drivers/infiniband/hw/nes/nes_cm.h +++ b/drivers/infiniband/hw/nes/nes_cm.h @@ -423,7 +423,7 @@ struct nes_cm_core { struct timer_list tcp_timer; - struct nes_cm_ops *api; + const struct nes_cm_ops *api; int (*post_event)(struct nes_cm_event *event); atomic_t events_posted; diff --git a/drivers/infiniband/hw/nes/nes_hw.h b/drivers/infiniband/hw/nes/nes_hw.h index d748e4b..c908020 100644 --- a/drivers/infiniband/hw/nes/nes_hw.h +++ b/drivers/infiniband/hw/nes/nes_hw.h @@ -1200,12 +1200,6 @@ struct nes_fast_mr_wqe_pbl { dma_addr_t paddr; }; -struct nes_ib_fast_reg_page_list { - struct ib_fast_reg_page_list ibfrpl; - struct nes_fast_mr_wqe_pbl nes_wqe_pbl; - u64 pbl; -}; - struct nes_listener { struct work_struct work; struct workqueue_struct *wq; diff --git a/drivers/infiniband/hw/nes/nes_utils.c b/drivers/infiniband/hw/nes/nes_utils.c index 2042c0f..6d3a169 100644 --- a/drivers/infiniband/hw/nes/nes_utils.c +++ b/drivers/infiniband/hw/nes/nes_utils.c @@ -727,7 +727,7 @@ int nes_arp_table(struct nes_device *nesdev, u32 ip_addr, u8 *mac_addr, u32 acti if (action == NES_ARP_DELETE) { nes_debug(NES_DBG_NETDEV, "DELETE, arp_index=%d\n", arp_index); nesadapter->arp_table[arp_index].ip_addr = 0; - memset(nesadapter->arp_table[arp_index].mac_addr, 0x00, ETH_ALEN); + eth_zero_addr(nesadapter->arp_table[arp_index].mac_addr); nes_free_resource(nesadapter, nesadapter->allocated_arps, arp_index); return arp_index; } diff --git a/drivers/infiniband/hw/nes/nes_verbs.c b/drivers/infiniband/hw/nes/nes_verbs.c index 44cb513..8c4daf7 100644 --- a/drivers/infiniband/hw/nes/nes_verbs.c +++ b/drivers/infiniband/hw/nes/nes_verbs.c @@ -51,6 +51,7 @@ atomic_t qps_created; atomic_t sw_qps_destroyed; static void nes_unregister_ofa_device(struct nes_ib_device *nesibdev); +static int nes_dereg_mr(struct ib_mr *ib_mr); /** * nes_alloc_mw @@ -205,80 +206,6 @@ static int nes_dealloc_mw(struct ib_mw *ibmw) } -/** - * nes_bind_mw - */ -static int nes_bind_mw(struct ib_qp *ibqp, struct ib_mw *ibmw, - struct ib_mw_bind *ibmw_bind) -{ - u64 u64temp; - struct nes_vnic *nesvnic = to_nesvnic(ibqp->device); - struct nes_device *nesdev = nesvnic->nesdev; - /* struct nes_mr *nesmr = to_nesmw(ibmw); */ - struct nes_qp *nesqp = to_nesqp(ibqp); - struct nes_hw_qp_wqe *wqe; - unsigned long flags = 0; - u32 head; - u32 wqe_misc = 0; - u32 qsize; - - if (nesqp->ibqp_state > IB_QPS_RTS) - return -EINVAL; - - spin_lock_irqsave(&nesqp->lock, flags); - - head = nesqp->hwqp.sq_head; - qsize = nesqp->hwqp.sq_tail; - - /* Check for SQ overflow */ - if (((head + (2 * qsize) - nesqp->hwqp.sq_tail) % qsize) == (qsize - 1)) { - spin_unlock_irqrestore(&nesqp->lock, flags); - return -ENOMEM; - } - - wqe = &nesqp->hwqp.sq_vbase[head]; - /* nes_debug(NES_DBG_MR, "processing sq wqe at %p, head = %u.\n", wqe, head); */ - nes_fill_init_qp_wqe(wqe, nesqp, head); - u64temp = ibmw_bind->wr_id; - set_wqe_64bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_COMP_SCRATCH_LOW_IDX, u64temp); - wqe_misc = NES_IWARP_SQ_OP_BIND; - - wqe_misc |= NES_IWARP_SQ_WQE_LOCAL_FENCE; - - if (ibmw_bind->send_flags & IB_SEND_SIGNALED) - wqe_misc |= NES_IWARP_SQ_WQE_SIGNALED_COMPL; - - if (ibmw_bind->bind_info.mw_access_flags & IB_ACCESS_REMOTE_WRITE) - wqe_misc |= NES_CQP_STAG_RIGHTS_REMOTE_WRITE; - if (ibmw_bind->bind_info.mw_access_flags & IB_ACCESS_REMOTE_READ) - wqe_misc |= NES_CQP_STAG_RIGHTS_REMOTE_READ; - - set_wqe_32bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_MISC_IDX, wqe_misc); - set_wqe_32bit_value(wqe->wqe_words, NES_IWARP_SQ_BIND_WQE_MR_IDX, - ibmw_bind->bind_info.mr->lkey); - set_wqe_32bit_value(wqe->wqe_words, NES_IWARP_SQ_BIND_WQE_MW_IDX, ibmw->rkey); - set_wqe_32bit_value(wqe->wqe_words, NES_IWARP_SQ_BIND_WQE_LENGTH_LOW_IDX, - ibmw_bind->bind_info.length); - wqe->wqe_words[NES_IWARP_SQ_BIND_WQE_LENGTH_HIGH_IDX] = 0; - u64temp = (u64)ibmw_bind->bind_info.addr; - set_wqe_64bit_value(wqe->wqe_words, NES_IWARP_SQ_BIND_WQE_VA_FBO_LOW_IDX, u64temp); - - head++; - if (head >= qsize) - head = 0; - - nesqp->hwqp.sq_head = head; - barrier(); - - nes_write32(nesdev->regs+NES_WQE_ALLOC, - (1 << 24) | 0x00800000 | nesqp->hwqp.qp_id); - - spin_unlock_irqrestore(&nesqp->lock, flags); - - return 0; -} - - /* * nes_alloc_fast_mr */ @@ -443,79 +370,46 @@ static struct ib_mr *nes_alloc_mr(struct ib_pd *ibpd, } else { kfree(nesmr); nes_free_resource(nesadapter, nesadapter->allocated_mrs, stag_index); - ibmr = ERR_PTR(-ENOMEM); + return ERR_PTR(-ENOMEM); } + + nesmr->pages = pci_alloc_consistent(nesdev->pcidev, + max_num_sg * sizeof(u64), + &nesmr->paddr); + if (!nesmr->paddr) + goto err; + + nesmr->max_pages = max_num_sg; + return ibmr; -} -/* - * nes_alloc_fast_reg_page_list - */ -static struct ib_fast_reg_page_list *nes_alloc_fast_reg_page_list( - struct ib_device *ibdev, - int page_list_len) -{ - struct nes_vnic *nesvnic = to_nesvnic(ibdev); - struct nes_device *nesdev = nesvnic->nesdev; - struct ib_fast_reg_page_list *pifrpl; - struct nes_ib_fast_reg_page_list *pnesfrpl; +err: + nes_dereg_mr(ibmr); - if (page_list_len > (NES_4K_PBL_CHUNK_SIZE / sizeof(u64))) - return ERR_PTR(-E2BIG); - /* - * Allocate the ib_fast_reg_page_list structure, the - * nes_fast_bpl structure, and the PLB table. - */ - pnesfrpl = kmalloc(sizeof(struct nes_ib_fast_reg_page_list) + - page_list_len * sizeof(u64), GFP_KERNEL); + return ERR_PTR(-ENOMEM); +} - if (!pnesfrpl) - return ERR_PTR(-ENOMEM); +static int nes_set_page(struct ib_mr *ibmr, u64 addr) +{ + struct nes_mr *nesmr = to_nesmr(ibmr); - pifrpl = &pnesfrpl->ibfrpl; - pifrpl->page_list = &pnesfrpl->pbl; - pifrpl->max_page_list_len = page_list_len; - /* - * Allocate the WQE PBL - */ - pnesfrpl->nes_wqe_pbl.kva = pci_alloc_consistent(nesdev->pcidev, - page_list_len * sizeof(u64), - &pnesfrpl->nes_wqe_pbl.paddr); + if (unlikely(nesmr->npages == nesmr->max_pages)) + return -ENOMEM; - if (!pnesfrpl->nes_wqe_pbl.kva) { - kfree(pnesfrpl); - return ERR_PTR(-ENOMEM); - } - nes_debug(NES_DBG_MR, "nes_alloc_fast_reg_pbl: nes_frpl = %p, " - "ibfrpl = %p, ibfrpl.page_list = %p, pbl.kva = %p, " - "pbl.paddr = %llx\n", pnesfrpl, &pnesfrpl->ibfrpl, - pnesfrpl->ibfrpl.page_list, pnesfrpl->nes_wqe_pbl.kva, - (unsigned long long) pnesfrpl->nes_wqe_pbl.paddr); + nesmr->pages[nesmr->npages++] = cpu_to_le64(addr); - return pifrpl; + return 0; } -/* - * nes_free_fast_reg_page_list - */ -static void nes_free_fast_reg_page_list(struct ib_fast_reg_page_list *pifrpl) +static int nes_map_mr_sg(struct ib_mr *ibmr, + struct scatterlist *sg, + int sg_nents) { - struct nes_vnic *nesvnic = to_nesvnic(pifrpl->device); - struct nes_device *nesdev = nesvnic->nesdev; - struct nes_ib_fast_reg_page_list *pnesfrpl; + struct nes_mr *nesmr = to_nesmr(ibmr); - pnesfrpl = container_of(pifrpl, struct nes_ib_fast_reg_page_list, ibfrpl); - /* - * Free the WQE PBL. - */ - pci_free_consistent(nesdev->pcidev, - pifrpl->max_page_list_len * sizeof(u64), - pnesfrpl->nes_wqe_pbl.kva, - pnesfrpl->nes_wqe_pbl.paddr); - /* - * Free the PBL structure - */ - kfree(pnesfrpl); + nesmr->npages = 0; + + return ib_sg_to_pages(ibmr, sg, sg_nents, nes_set_page); } /** @@ -2106,9 +2000,8 @@ static int nes_reg_mr(struct nes_device *nesdev, struct nes_pd *nespd, /** * nes_reg_phys_mr */ -static struct ib_mr *nes_reg_phys_mr(struct ib_pd *ib_pd, - struct ib_phys_buf *buffer_list, int num_phys_buf, int acc, - u64 * iova_start) +struct ib_mr *nes_reg_phys_mr(struct ib_pd *ib_pd, u64 addr, u64 size, + int acc, u64 *iova_start) { u64 region_length; struct nes_pd *nespd = to_nespd(ib_pd); @@ -2120,13 +2013,10 @@ static struct ib_mr *nes_reg_phys_mr(struct ib_pd *ib_pd, struct nes_vpbl vpbl; struct nes_root_vpbl root_vpbl; u32 stag; - u32 i; unsigned long mask; u32 stag_index = 0; u32 next_stag_index = 0; u32 driver_key = 0; - u32 root_pbl_index = 0; - u32 cur_pbl_index = 0; int err = 0; int ret = 0; u16 pbl_count = 0; @@ -2145,11 +2035,8 @@ static struct ib_mr *nes_reg_phys_mr(struct ib_pd *ib_pd, next_stag_index >>= 8; next_stag_index %= nesadapter->max_mr; - if (num_phys_buf > (1024*512)) { - return ERR_PTR(-E2BIG); - } - if ((buffer_list[0].addr ^ *iova_start) & ~PAGE_MASK) + if ((addr ^ *iova_start) & ~PAGE_MASK) return ERR_PTR(-EINVAL); err = nes_alloc_resource(nesadapter, nesadapter->allocated_mrs, nesadapter->max_mr, @@ -2164,84 +2051,33 @@ static struct ib_mr *nes_reg_phys_mr(struct ib_pd *ib_pd, return ERR_PTR(-ENOMEM); } - for (i = 0; i < num_phys_buf; i++) { + /* Allocate a 4K buffer for the PBL */ + vpbl.pbl_vbase = pci_alloc_consistent(nesdev->pcidev, 4096, + &vpbl.pbl_pbase); + nes_debug(NES_DBG_MR, "Allocating leaf PBL, va = %p, pa = 0x%016lX\n", + vpbl.pbl_vbase, (unsigned long)vpbl.pbl_pbase); + if (!vpbl.pbl_vbase) { + nes_free_resource(nesadapter, nesadapter->allocated_mrs, stag_index); + ibmr = ERR_PTR(-ENOMEM); + kfree(nesmr); + goto reg_phys_err; + } - if ((i & 0x01FF) == 0) { - if (root_pbl_index == 1) { - /* Allocate the root PBL */ - root_vpbl.pbl_vbase = pci_alloc_consistent(nesdev->pcidev, 8192, - &root_vpbl.pbl_pbase); - nes_debug(NES_DBG_MR, "Allocating root PBL, va = %p, pa = 0x%08X\n", - root_vpbl.pbl_vbase, (unsigned int)root_vpbl.pbl_pbase); - if (!root_vpbl.pbl_vbase) { - pci_free_consistent(nesdev->pcidev, 4096, vpbl.pbl_vbase, - vpbl.pbl_pbase); - nes_free_resource(nesadapter, nesadapter->allocated_mrs, stag_index); - kfree(nesmr); - return ERR_PTR(-ENOMEM); - } - root_vpbl.leaf_vpbl = kzalloc(sizeof(*root_vpbl.leaf_vpbl)*1024, GFP_KERNEL); - if (!root_vpbl.leaf_vpbl) { - pci_free_consistent(nesdev->pcidev, 8192, root_vpbl.pbl_vbase, - root_vpbl.pbl_pbase); - pci_free_consistent(nesdev->pcidev, 4096, vpbl.pbl_vbase, - vpbl.pbl_pbase); - nes_free_resource(nesadapter, nesadapter->allocated_mrs, stag_index); - kfree(nesmr); - return ERR_PTR(-ENOMEM); - } - root_vpbl.pbl_vbase[0].pa_low = cpu_to_le32((u32)vpbl.pbl_pbase); - root_vpbl.pbl_vbase[0].pa_high = - cpu_to_le32((u32)((((u64)vpbl.pbl_pbase) >> 32))); - root_vpbl.leaf_vpbl[0] = vpbl; - } - /* Allocate a 4K buffer for the PBL */ - vpbl.pbl_vbase = pci_alloc_consistent(nesdev->pcidev, 4096, - &vpbl.pbl_pbase); - nes_debug(NES_DBG_MR, "Allocating leaf PBL, va = %p, pa = 0x%016lX\n", - vpbl.pbl_vbase, (unsigned long)vpbl.pbl_pbase); - if (!vpbl.pbl_vbase) { - nes_free_resource(nesadapter, nesadapter->allocated_mrs, stag_index); - ibmr = ERR_PTR(-ENOMEM); - kfree(nesmr); - goto reg_phys_err; - } - /* Fill in the root table */ - if (1 <= root_pbl_index) { - root_vpbl.pbl_vbase[root_pbl_index].pa_low = - cpu_to_le32((u32)vpbl.pbl_pbase); - root_vpbl.pbl_vbase[root_pbl_index].pa_high = - cpu_to_le32((u32)((((u64)vpbl.pbl_pbase) >> 32))); - root_vpbl.leaf_vpbl[root_pbl_index] = vpbl; - } - root_pbl_index++; - cur_pbl_index = 0; - } - mask = !buffer_list[i].size; - if (i != 0) - mask |= buffer_list[i].addr; - if (i != num_phys_buf - 1) - mask |= buffer_list[i].addr + buffer_list[i].size; - - if (mask & ~PAGE_MASK) { - nes_free_resource(nesadapter, nesadapter->allocated_mrs, stag_index); - nes_debug(NES_DBG_MR, "Invalid buffer addr or size\n"); - ibmr = ERR_PTR(-EINVAL); - kfree(nesmr); - goto reg_phys_err; - } + mask = !size; - region_length += buffer_list[i].size; - if ((i != 0) && (single_page)) { - if ((buffer_list[i-1].addr+PAGE_SIZE) != buffer_list[i].addr) - single_page = 0; - } - vpbl.pbl_vbase[cur_pbl_index].pa_low = cpu_to_le32((u32)buffer_list[i].addr & PAGE_MASK); - vpbl.pbl_vbase[cur_pbl_index++].pa_high = - cpu_to_le32((u32)((((u64)buffer_list[i].addr) >> 32))); + if (mask & ~PAGE_MASK) { + nes_free_resource(nesadapter, nesadapter->allocated_mrs, stag_index); + nes_debug(NES_DBG_MR, "Invalid buffer addr or size\n"); + ibmr = ERR_PTR(-EINVAL); + kfree(nesmr); + goto reg_phys_err; } + region_length += size; + vpbl.pbl_vbase[0].pa_low = cpu_to_le32((u32)addr & PAGE_MASK); + vpbl.pbl_vbase[0].pa_high = cpu_to_le32((u32)((((u64)addr) >> 32))); + stag = stag_index << 8; stag |= driver_key; stag += (u32)stag_key; @@ -2251,17 +2087,15 @@ static struct ib_mr *nes_reg_phys_mr(struct ib_pd *ib_pd, stag, (unsigned long)*iova_start, (unsigned long)region_length, stag_index); /* Make the leaf PBL the root if only one PBL */ - if (root_pbl_index == 1) { - root_vpbl.pbl_pbase = vpbl.pbl_pbase; - } + root_vpbl.pbl_pbase = vpbl.pbl_pbase; if (single_page) { pbl_count = 0; } else { - pbl_count = root_pbl_index; + pbl_count = 1; } ret = nes_reg_mr(nesdev, nespd, stag, region_length, &root_vpbl, - buffer_list[0].addr, pbl_count, (u16)cur_pbl_index, acc, iova_start, + addr, pbl_count, 1, acc, iova_start, &nesmr->pbls_used, &nesmr->pbl_4k); if (ret == 0) { @@ -2274,21 +2108,9 @@ static struct ib_mr *nes_reg_phys_mr(struct ib_pd *ib_pd, ibmr = ERR_PTR(-ENOMEM); } - reg_phys_err: - /* free the resources */ - if (root_pbl_index == 1) { - /* single PBL case */ - pci_free_consistent(nesdev->pcidev, 4096, vpbl.pbl_vbase, vpbl.pbl_pbase); - } else { - for (i=0; i<root_pbl_index; i++) { - pci_free_consistent(nesdev->pcidev, 4096, root_vpbl.leaf_vpbl[i].pbl_vbase, - root_vpbl.leaf_vpbl[i].pbl_pbase); - } - kfree(root_vpbl.leaf_vpbl); - pci_free_consistent(nesdev->pcidev, 8192, root_vpbl.pbl_vbase, - root_vpbl.pbl_pbase); - } - +reg_phys_err: + /* single PBL case */ + pci_free_consistent(nesdev->pcidev, 4096, vpbl.pbl_vbase, vpbl.pbl_pbase); return ibmr; } @@ -2298,17 +2120,13 @@ static struct ib_mr *nes_reg_phys_mr(struct ib_pd *ib_pd, */ static struct ib_mr *nes_get_dma_mr(struct ib_pd *pd, int acc) { - struct ib_phys_buf bl; u64 kva = 0; nes_debug(NES_DBG_MR, "\n"); - bl.size = (u64)0xffffffffffULL; - bl.addr = 0; - return nes_reg_phys_mr(pd, &bl, 1, acc, &kva); + return nes_reg_phys_mr(pd, 0, 0xffffffffffULL, acc, &kva); } - /** * nes_reg_user_mr */ @@ -2683,6 +2501,13 @@ static int nes_dereg_mr(struct ib_mr *ib_mr) u16 major_code; u16 minor_code; + + if (nesmr->pages) + pci_free_consistent(nesdev->pcidev, + nesmr->max_pages * sizeof(u64), + nesmr->pages, + nesmr->paddr); + if (nesmr->region) { ib_umem_release(nesmr->region); } @@ -3372,9 +3197,9 @@ static int nes_post_send(struct ib_qp *ibqp, struct ib_send_wr *ib_wr, wqe_misc |= NES_IWARP_SQ_WQE_LOCAL_FENCE; set_wqe_32bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_RDMA_STAG_IDX, - ib_wr->wr.rdma.rkey); + rdma_wr(ib_wr)->rkey); set_wqe_64bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_RDMA_TO_LOW_IDX, - ib_wr->wr.rdma.remote_addr); + rdma_wr(ib_wr)->remote_addr); if ((ib_wr->send_flags & IB_SEND_INLINE) && ((nes_drv_opt & NES_DRV_OPT_NO_INLINE_DATA) == 0) && @@ -3409,9 +3234,9 @@ static int nes_post_send(struct ib_qp *ibqp, struct ib_send_wr *ib_wr, } set_wqe_64bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_RDMA_TO_LOW_IDX, - ib_wr->wr.rdma.remote_addr); + rdma_wr(ib_wr)->remote_addr); set_wqe_32bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_RDMA_STAG_IDX, - ib_wr->wr.rdma.rkey); + rdma_wr(ib_wr)->rkey); set_wqe_32bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_RDMA_LENGTH_IDX, ib_wr->sg_list->length); set_wqe_64bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_FRAG0_LOW_IDX, @@ -3425,19 +3250,13 @@ static int nes_post_send(struct ib_qp *ibqp, struct ib_send_wr *ib_wr, NES_IWARP_SQ_LOCINV_WQE_INV_STAG_IDX, ib_wr->ex.invalidate_rkey); break; - case IB_WR_FAST_REG_MR: + case IB_WR_REG_MR: { - int i; - int flags = ib_wr->wr.fast_reg.access_flags; - struct nes_ib_fast_reg_page_list *pnesfrpl = - container_of(ib_wr->wr.fast_reg.page_list, - struct nes_ib_fast_reg_page_list, - ibfrpl); - u64 *src_page_list = pnesfrpl->ibfrpl.page_list; - u64 *dst_page_list = pnesfrpl->nes_wqe_pbl.kva; - - if (ib_wr->wr.fast_reg.page_list_len > - (NES_4K_PBL_CHUNK_SIZE / sizeof(u64))) { + struct nes_mr *mr = to_nesmr(reg_wr(ib_wr)->mr); + int page_shift = ilog2(reg_wr(ib_wr)->mr->page_size); + int flags = reg_wr(ib_wr)->access; + + if (mr->npages > (NES_4K_PBL_CHUNK_SIZE / sizeof(u64))) { nes_debug(NES_DBG_IW_TX, "SQ_FMR: bad page_list_len\n"); err = -EINVAL; break; @@ -3445,19 +3264,19 @@ static int nes_post_send(struct ib_qp *ibqp, struct ib_send_wr *ib_wr, wqe_misc = NES_IWARP_SQ_OP_FAST_REG; set_wqe_64bit_value(wqe->wqe_words, NES_IWARP_SQ_FMR_WQE_VA_FBO_LOW_IDX, - ib_wr->wr.fast_reg.iova_start); + mr->ibmr.iova); set_wqe_32bit_value(wqe->wqe_words, NES_IWARP_SQ_FMR_WQE_LENGTH_LOW_IDX, - ib_wr->wr.fast_reg.length); + mr->ibmr.length); set_wqe_32bit_value(wqe->wqe_words, NES_IWARP_SQ_FMR_WQE_LENGTH_HIGH_IDX, 0); set_wqe_32bit_value(wqe->wqe_words, NES_IWARP_SQ_FMR_WQE_MR_STAG_IDX, - ib_wr->wr.fast_reg.rkey); - /* Set page size: */ - if (ib_wr->wr.fast_reg.page_shift == 12) { + reg_wr(ib_wr)->key); + + if (page_shift == 12) { wqe_misc |= NES_IWARP_SQ_FMR_WQE_PAGE_SIZE_4K; - } else if (ib_wr->wr.fast_reg.page_shift == 21) { + } else if (page_shift == 21) { wqe_misc |= NES_IWARP_SQ_FMR_WQE_PAGE_SIZE_2M; } else { nes_debug(NES_DBG_IW_TX, "Invalid page shift," @@ -3465,6 +3284,7 @@ static int nes_post_send(struct ib_qp *ibqp, struct ib_send_wr *ib_wr, err = -EINVAL; break; } + /* Set access_flags */ wqe_misc |= NES_IWARP_SQ_FMR_WQE_RIGHTS_ENABLE_LOCAL_READ; if (flags & IB_ACCESS_LOCAL_WRITE) @@ -3480,35 +3300,22 @@ static int nes_post_send(struct ib_qp *ibqp, struct ib_send_wr *ib_wr, wqe_misc |= NES_IWARP_SQ_FMR_WQE_RIGHTS_ENABLE_WINDOW_BIND; /* Fill in PBL info: */ - if (ib_wr->wr.fast_reg.page_list_len > - pnesfrpl->ibfrpl.max_page_list_len) { - nes_debug(NES_DBG_IW_TX, "Invalid page list length," - " ib_wr=%p, value=%u, max=%u\n", - ib_wr, ib_wr->wr.fast_reg.page_list_len, - pnesfrpl->ibfrpl.max_page_list_len); - err = -EINVAL; - break; - } - set_wqe_64bit_value(wqe->wqe_words, NES_IWARP_SQ_FMR_WQE_PBL_ADDR_LOW_IDX, - pnesfrpl->nes_wqe_pbl.paddr); + mr->paddr); set_wqe_32bit_value(wqe->wqe_words, NES_IWARP_SQ_FMR_WQE_PBL_LENGTH_IDX, - ib_wr->wr.fast_reg.page_list_len * 8); - - for (i = 0; i < ib_wr->wr.fast_reg.page_list_len; i++) - dst_page_list[i] = cpu_to_le64(src_page_list[i]); + mr->npages * 8); - nes_debug(NES_DBG_IW_TX, "SQ_FMR: iova_start: %llx, " + nes_debug(NES_DBG_IW_TX, "SQ_REG_MR: iova_start: %llx, " "length: %d, rkey: %0x, pgl_paddr: %llx, " "page_list_len: %u, wqe_misc: %x\n", - (unsigned long long) ib_wr->wr.fast_reg.iova_start, - ib_wr->wr.fast_reg.length, - ib_wr->wr.fast_reg.rkey, - (unsigned long long) pnesfrpl->nes_wqe_pbl.paddr, - ib_wr->wr.fast_reg.page_list_len, + (unsigned long long) mr->ibmr.iova, + mr->ibmr.length, + reg_wr(ib_wr)->key, + (unsigned long long) mr->paddr, + mr->npages, wqe_misc); break; } @@ -3751,7 +3558,7 @@ static int nes_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *entry) entry->opcode = IB_WC_LOCAL_INV; break; case NES_IWARP_SQ_OP_FAST_REG: - entry->opcode = IB_WC_FAST_REG_MR; + entry->opcode = IB_WC_REG_MR; break; } @@ -3931,16 +3738,13 @@ struct nes_ib_device *nes_init_ofa_device(struct net_device *netdev) nesibdev->ibdev.destroy_cq = nes_destroy_cq; nesibdev->ibdev.poll_cq = nes_poll_cq; nesibdev->ibdev.get_dma_mr = nes_get_dma_mr; - nesibdev->ibdev.reg_phys_mr = nes_reg_phys_mr; nesibdev->ibdev.reg_user_mr = nes_reg_user_mr; nesibdev->ibdev.dereg_mr = nes_dereg_mr; nesibdev->ibdev.alloc_mw = nes_alloc_mw; nesibdev->ibdev.dealloc_mw = nes_dealloc_mw; - nesibdev->ibdev.bind_mw = nes_bind_mw; nesibdev->ibdev.alloc_mr = nes_alloc_mr; - nesibdev->ibdev.alloc_fast_reg_page_list = nes_alloc_fast_reg_page_list; - nesibdev->ibdev.free_fast_reg_page_list = nes_free_fast_reg_page_list; + nesibdev->ibdev.map_mr_sg = nes_map_mr_sg; nesibdev->ibdev.attach_mcast = nes_multicast_attach; nesibdev->ibdev.detach_mcast = nes_multicast_detach; diff --git a/drivers/infiniband/hw/nes/nes_verbs.h b/drivers/infiniband/hw/nes/nes_verbs.h index 309b31c..7029088 100644 --- a/drivers/infiniband/hw/nes/nes_verbs.h +++ b/drivers/infiniband/hw/nes/nes_verbs.h @@ -79,6 +79,10 @@ struct nes_mr { u16 pbls_used; u8 mode; u8 pbl_4k; + __le64 *pages; + dma_addr_t paddr; + u32 max_pages; + u32 npages; }; struct nes_hw_pb { @@ -186,4 +190,8 @@ struct nes_qp { u8 pau_state; __u64 nesuqp_addr; }; + +struct ib_mr *nes_reg_phys_mr(struct ib_pd *ib_pd, + u64 addr, u64 size, int acc, u64 *iova_start); + #endif /* NES_VERBS_H */ diff --git a/drivers/infiniband/hw/ocrdma/ocrdma.h b/drivers/infiniband/hw/ocrdma/ocrdma.h index b4091ab..12503f1 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma.h +++ b/drivers/infiniband/hw/ocrdma/ocrdma.h @@ -55,7 +55,7 @@ #include <be_roce.h> #include "ocrdma_sli.h" -#define OCRDMA_ROCE_DRV_VERSION "10.6.0.0" +#define OCRDMA_ROCE_DRV_VERSION "11.0.0.0" #define OCRDMA_ROCE_DRV_DESC "Emulex OneConnect RoCE Driver" #define OCRDMA_NODE_DESC "Emulex OneConnect RoCE HCA" @@ -193,6 +193,8 @@ struct ocrdma_mr { struct ib_mr ibmr; struct ib_umem *umem; struct ocrdma_hw_mr hwmr; + u64 *pages; + u32 npages; }; struct ocrdma_stats { @@ -230,6 +232,10 @@ struct phy_info { u16 interface_type; }; +enum ocrdma_flags { + OCRDMA_FLAGS_LINK_STATUS_INIT = 0x01 +}; + struct ocrdma_dev { struct ib_device ibdev; struct ocrdma_dev_attr attr; @@ -278,7 +284,6 @@ struct ocrdma_dev { u32 hba_port_num; struct list_head entry; - struct rcu_head rcu; int id; u64 *stag_arr; u8 sl; /* service level */ @@ -286,6 +291,7 @@ struct ocrdma_dev { atomic_t update_sl; u16 pvid; u32 asic_id; + u32 flags; ulong last_stats_time; struct mutex stats_lock; /* provide synch for debugfs operations */ @@ -317,9 +323,6 @@ struct ocrdma_cq { */ u32 max_hw_cqe; bool phase_change; - bool deferred_arm, deferred_sol; - bool first_arm; - spinlock_t cq_lock ____cacheline_aligned; /* provide synchronization * to cq polling */ @@ -590,4 +593,9 @@ static inline u8 ocrdma_is_enabled_and_synced(u32 state) (state & OCRDMA_STATE_FLAG_SYNC); } +static inline u8 ocrdma_get_ae_link_state(u32 ae_state) +{ + return ((ae_state & OCRDMA_AE_LSC_LS_MASK) >> OCRDMA_AE_LSC_LS_SHIFT); +} + #endif diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_ah.c b/drivers/infiniband/hw/ocrdma/ocrdma_ah.c index 44766fe..3790771 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_ah.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_ah.c @@ -45,6 +45,7 @@ #include <rdma/ib_addr.h> #include <rdma/ib_mad.h> +#include <rdma/ib_cache.h> #include "ocrdma.h" #include "ocrdma_verbs.h" @@ -56,10 +57,9 @@ static inline int set_av_attr(struct ocrdma_dev *dev, struct ocrdma_ah *ah, struct ib_ah_attr *attr, union ib_gid *sgid, - int pdid, bool *isvlan) + int pdid, bool *isvlan, u16 vlan_tag) { int status = 0; - u16 vlan_tag; struct ocrdma_eth_vlan eth; struct ocrdma_grh grh; int eth_sz; @@ -68,7 +68,6 @@ static inline int set_av_attr(struct ocrdma_dev *dev, struct ocrdma_ah *ah, memset(&grh, 0, sizeof(grh)); /* VLAN */ - vlan_tag = attr->vlan_id; if (!vlan_tag || (vlan_tag > 0xFFF)) vlan_tag = dev->pvid; if (vlan_tag || dev->pfc_state) { @@ -115,9 +114,11 @@ static inline int set_av_attr(struct ocrdma_dev *dev, struct ocrdma_ah *ah, struct ib_ah *ocrdma_create_ah(struct ib_pd *ibpd, struct ib_ah_attr *attr) { u32 *ahid_addr; - bool isvlan = false; int status; struct ocrdma_ah *ah; + bool isvlan = false; + u16 vlan_tag = 0xffff; + struct ib_gid_attr sgid_attr; struct ocrdma_pd *pd = get_ocrdma_pd(ibpd); struct ocrdma_dev *dev = get_ocrdma_dev(ibpd->device); union ib_gid sgid; @@ -135,18 +136,26 @@ struct ib_ah *ocrdma_create_ah(struct ib_pd *ibpd, struct ib_ah_attr *attr) if (status) goto av_err; - status = ocrdma_query_gid(&dev->ibdev, 1, attr->grh.sgid_index, &sgid); + status = ib_get_cached_gid(&dev->ibdev, 1, attr->grh.sgid_index, &sgid, + &sgid_attr); if (status) { pr_err("%s(): Failed to query sgid, status = %d\n", __func__, status); goto av_conf_err; } + if (sgid_attr.ndev) { + if (is_vlan_dev(sgid_attr.ndev)) + vlan_tag = vlan_dev_vlan_id(sgid_attr.ndev); + dev_put(sgid_attr.ndev); + } if ((pd->uctx) && (!rdma_is_multicast_addr((struct in6_addr *)attr->grh.dgid.raw)) && (!rdma_link_local_addr((struct in6_addr *)attr->grh.dgid.raw))) { - status = rdma_addr_find_dmac_by_grh(&sgid, &attr->grh.dgid, - attr->dmac, &attr->vlan_id); + status = rdma_addr_find_l2_eth_by_grh(&sgid, &attr->grh.dgid, + attr->dmac, &vlan_tag, + &sgid_attr.ndev->ifindex, + NULL); if (status) { pr_err("%s(): Failed to resolve dmac from gid." "status = %d\n", __func__, status); @@ -154,7 +163,7 @@ struct ib_ah *ocrdma_create_ah(struct ib_pd *ibpd, struct ib_ah_attr *attr) } } - status = set_av_attr(dev, ah, attr, &sgid, pd->id, &isvlan); + status = set_av_attr(dev, ah, attr, &sgid, pd->id, &isvlan, vlan_tag); if (status) goto av_conf_err; diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_hw.c b/drivers/infiniband/hw/ocrdma/ocrdma_hw.c index aab391a..283ca84 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_hw.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_hw.c @@ -47,6 +47,7 @@ #include <rdma/ib_verbs.h> #include <rdma/ib_user_verbs.h> +#include <rdma/ib_cache.h> #include "ocrdma.h" #include "ocrdma_hw.h" @@ -578,6 +579,8 @@ static int ocrdma_mbx_create_mq(struct ocrdma_dev *dev, cmd->async_event_bitmap = BIT(OCRDMA_ASYNC_GRP5_EVE_CODE); cmd->async_event_bitmap |= BIT(OCRDMA_ASYNC_RDMA_EVE_CODE); + /* Request link events on this MQ. */ + cmd->async_event_bitmap |= BIT(OCRDMA_ASYNC_LINK_EVE_CODE); cmd->async_cqid_ringsize = cq->id; cmd->async_cqid_ringsize |= (ocrdma_encoded_q_len(mq->len) << @@ -678,11 +681,33 @@ static void ocrdma_dispatch_ibevent(struct ocrdma_dev *dev, int dev_event = 0; int type = (cqe->valid_ae_event & OCRDMA_AE_MCQE_EVENT_TYPE_MASK) >> OCRDMA_AE_MCQE_EVENT_TYPE_SHIFT; + u16 qpid = cqe->qpvalid_qpid & OCRDMA_AE_MCQE_QPID_MASK; + u16 cqid = cqe->cqvalid_cqid & OCRDMA_AE_MCQE_CQID_MASK; - if (cqe->qpvalid_qpid & OCRDMA_AE_MCQE_QPVALID) - qp = dev->qp_tbl[cqe->qpvalid_qpid & OCRDMA_AE_MCQE_QPID_MASK]; - if (cqe->cqvalid_cqid & OCRDMA_AE_MCQE_CQVALID) - cq = dev->cq_tbl[cqe->cqvalid_cqid & OCRDMA_AE_MCQE_CQID_MASK]; + /* + * Some FW version returns wrong qp or cq ids in CQEs. + * Checking whether the IDs are valid + */ + + if (cqe->qpvalid_qpid & OCRDMA_AE_MCQE_QPVALID) { + if (qpid < dev->attr.max_qp) + qp = dev->qp_tbl[qpid]; + if (qp == NULL) { + pr_err("ocrdma%d:Async event - qpid %u is not valid\n", + dev->id, qpid); + return; + } + } + + if (cqe->cqvalid_cqid & OCRDMA_AE_MCQE_CQVALID) { + if (cqid < dev->attr.max_cq) + cq = dev->cq_tbl[cqid]; + if (cq == NULL) { + pr_err("ocrdma%d:Async event - cqid %u is not valid\n", + dev->id, cqid); + return; + } + } memset(&ib_evt, 0, sizeof(ib_evt)); @@ -796,20 +821,42 @@ static void ocrdma_process_grp5_aync(struct ocrdma_dev *dev, } } +static void ocrdma_process_link_state(struct ocrdma_dev *dev, + struct ocrdma_ae_mcqe *cqe) +{ + struct ocrdma_ae_lnkst_mcqe *evt; + u8 lstate; + + evt = (struct ocrdma_ae_lnkst_mcqe *)cqe; + lstate = ocrdma_get_ae_link_state(evt->speed_state_ptn); + + if (!(lstate & OCRDMA_AE_LSC_LLINK_MASK)) + return; + + if (dev->flags & OCRDMA_FLAGS_LINK_STATUS_INIT) + ocrdma_update_link_state(dev, (lstate & OCRDMA_LINK_ST_MASK)); +} + static void ocrdma_process_acqe(struct ocrdma_dev *dev, void *ae_cqe) { /* async CQE processing */ struct ocrdma_ae_mcqe *cqe = ae_cqe; u32 evt_code = (cqe->valid_ae_event & OCRDMA_AE_MCQE_EVENT_CODE_MASK) >> OCRDMA_AE_MCQE_EVENT_CODE_SHIFT; - - if (evt_code == OCRDMA_ASYNC_RDMA_EVE_CODE) + switch (evt_code) { + case OCRDMA_ASYNC_LINK_EVE_CODE: + ocrdma_process_link_state(dev, cqe); + break; + case OCRDMA_ASYNC_RDMA_EVE_CODE: ocrdma_dispatch_ibevent(dev, cqe); - else if (evt_code == OCRDMA_ASYNC_GRP5_EVE_CODE) + break; + case OCRDMA_ASYNC_GRP5_EVE_CODE: ocrdma_process_grp5_aync(dev, cqe); - else + break; + default: pr_err("%s(%d) invalid evt code=0x%x\n", __func__, dev->id, evt_code); + } } static void ocrdma_process_mcqe(struct ocrdma_dev *dev, struct ocrdma_mcqe *cqe) @@ -1340,7 +1387,8 @@ mbx_err: return status; } -int ocrdma_mbx_get_link_speed(struct ocrdma_dev *dev, u8 *lnk_speed) +int ocrdma_mbx_get_link_speed(struct ocrdma_dev *dev, u8 *lnk_speed, + u8 *lnk_state) { int status = -ENOMEM; struct ocrdma_get_link_speed_rsp *rsp; @@ -1361,8 +1409,11 @@ int ocrdma_mbx_get_link_speed(struct ocrdma_dev *dev, u8 *lnk_speed) goto mbx_err; rsp = (struct ocrdma_get_link_speed_rsp *)cmd; - *lnk_speed = (rsp->pflt_pps_ld_pnum & OCRDMA_PHY_PS_MASK) - >> OCRDMA_PHY_PS_SHIFT; + if (lnk_speed) + *lnk_speed = (rsp->pflt_pps_ld_pnum & OCRDMA_PHY_PS_MASK) + >> OCRDMA_PHY_PS_SHIFT; + if (lnk_state) + *lnk_state = (rsp->res_lnk_st & OCRDMA_LINK_ST_MASK); mbx_err: kfree(cmd); @@ -2448,6 +2499,7 @@ static int ocrdma_set_av_params(struct ocrdma_qp *qp, int status; struct ib_ah_attr *ah_attr = &attrs->ah_attr; union ib_gid sgid, zgid; + struct ib_gid_attr sgid_attr; u32 vlan_id = 0xFFFF; u8 mac_addr[6]; struct ocrdma_dev *dev = get_ocrdma_dev(qp->ibqp.device); @@ -2466,10 +2518,14 @@ static int ocrdma_set_av_params(struct ocrdma_qp *qp, cmd->flags |= OCRDMA_QP_PARA_FLOW_LBL_VALID; memcpy(&cmd->params.dgid[0], &ah_attr->grh.dgid.raw[0], sizeof(cmd->params.dgid)); - status = ocrdma_query_gid(&dev->ibdev, 1, - ah_attr->grh.sgid_index, &sgid); - if (status) - return status; + + status = ib_get_cached_gid(&dev->ibdev, 1, ah_attr->grh.sgid_index, + &sgid, &sgid_attr); + if (!status && sgid_attr.ndev) { + vlan_id = rdma_vlan_dev_vlan_id(sgid_attr.ndev); + memcpy(mac_addr, sgid_attr.ndev->dev_addr, ETH_ALEN); + dev_put(sgid_attr.ndev); + } memset(&zgid, 0, sizeof(zgid)); if (!memcmp(&sgid, &zgid, sizeof(zgid))) @@ -2486,17 +2542,16 @@ static int ocrdma_set_av_params(struct ocrdma_qp *qp, ocrdma_cpu_to_le32(&cmd->params.dgid[0], sizeof(cmd->params.dgid)); ocrdma_cpu_to_le32(&cmd->params.sgid[0], sizeof(cmd->params.sgid)); cmd->params.vlan_dmac_b4_to_b5 = mac_addr[4] | (mac_addr[5] << 8); - if (attr_mask & IB_QP_VID) { - vlan_id = attrs->vlan_id; - } else if (dev->pfc_state) { - vlan_id = 0; - pr_err("ocrdma%d:Using VLAN with PFC is recommended\n", - dev->id); - pr_err("ocrdma%d:Using VLAN 0 for this connection\n", - dev->id); - } - if (vlan_id < 0x1000) { + if (vlan_id == 0xFFFF) + vlan_id = 0; + if (vlan_id || dev->pfc_state) { + if (!vlan_id) { + pr_err("ocrdma%d:Using VLAN with PFC is recommended\n", + dev->id); + pr_err("ocrdma%d:Using VLAN 0 for this connection\n", + dev->id); + } cmd->params.vlan_dmac_b4_to_b5 |= vlan_id << OCRDMA_QP_PARAMS_VLAN_SHIFT; cmd->flags |= OCRDMA_QP_PARA_VLAN_EN_VALID; diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_hw.h b/drivers/infiniband/hw/ocrdma/ocrdma_hw.h index 7ed885c..ebc1f44 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_hw.h +++ b/drivers/infiniband/hw/ocrdma/ocrdma_hw.h @@ -106,7 +106,8 @@ void ocrdma_ring_cq_db(struct ocrdma_dev *, u16 cq_id, bool armed, bool solicited, u16 cqe_popped); /* verbs specific mailbox commands */ -int ocrdma_mbx_get_link_speed(struct ocrdma_dev *dev, u8 *lnk_speed); +int ocrdma_mbx_get_link_speed(struct ocrdma_dev *dev, u8 *lnk_speed, + u8 *lnk_st); int ocrdma_query_config(struct ocrdma_dev *, struct ocrdma_mbx_query_config *config); @@ -153,5 +154,6 @@ char *port_speed_string(struct ocrdma_dev *dev); void ocrdma_init_service_level(struct ocrdma_dev *); void ocrdma_alloc_pd_pool(struct ocrdma_dev *dev); void ocrdma_free_pd_range(struct ocrdma_dev *dev); +void ocrdma_update_link_state(struct ocrdma_dev *dev, u8 lstate); #endif /* __OCRDMA_HW_H__ */ diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_main.c b/drivers/infiniband/hw/ocrdma/ocrdma_main.c index 87aa55d..f387430 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_main.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_main.c @@ -63,8 +63,6 @@ MODULE_DESCRIPTION(OCRDMA_ROCE_DRV_DESC " " OCRDMA_ROCE_DRV_VERSION); MODULE_AUTHOR("Emulex Corporation"); MODULE_LICENSE("Dual BSD/GPL"); -static LIST_HEAD(ocrdma_dev_list); -static DEFINE_SPINLOCK(ocrdma_devlist_lock); static DEFINE_IDR(ocrdma_dev_id); void ocrdma_get_guid(struct ocrdma_dev *dev, u8 *guid) @@ -177,13 +175,11 @@ static int ocrdma_register_device(struct ocrdma_dev *dev) dev->ibdev.req_notify_cq = ocrdma_arm_cq; dev->ibdev.get_dma_mr = ocrdma_get_dma_mr; - dev->ibdev.reg_phys_mr = ocrdma_reg_kernel_mr; dev->ibdev.dereg_mr = ocrdma_dereg_mr; dev->ibdev.reg_user_mr = ocrdma_reg_user_mr; dev->ibdev.alloc_mr = ocrdma_alloc_mr; - dev->ibdev.alloc_fast_reg_page_list = ocrdma_alloc_frmr_page_list; - dev->ibdev.free_fast_reg_page_list = ocrdma_free_frmr_page_list; + dev->ibdev.map_mr_sg = ocrdma_map_mr_sg; /* mandatory to support user space verbs consumer. */ dev->ibdev.alloc_ucontext = ocrdma_alloc_ucontext; @@ -232,6 +228,11 @@ static int ocrdma_alloc_resources(struct ocrdma_dev *dev) ocrdma_alloc_pd_pool(dev); + if (!ocrdma_alloc_stats_resources(dev)) { + pr_err("%s: stats resource allocation failed\n", __func__); + goto alloc_err; + } + spin_lock_init(&dev->av_tbl.lock); spin_lock_init(&dev->flush_q_lock); return 0; @@ -242,6 +243,7 @@ alloc_err: static void ocrdma_free_resources(struct ocrdma_dev *dev) { + ocrdma_release_stats_resources(dev); kfree(dev->stag_arr); kfree(dev->qp_tbl); kfree(dev->cq_tbl); @@ -293,6 +295,7 @@ static void ocrdma_remove_sysfiles(struct ocrdma_dev *dev) static struct ocrdma_dev *ocrdma_add(struct be_dev_info *dev_info) { int status = 0, i; + u8 lstate = 0; struct ocrdma_dev *dev; dev = (struct ocrdma_dev *)ib_alloc_device(sizeof(struct ocrdma_dev)); @@ -322,12 +325,14 @@ static struct ocrdma_dev *ocrdma_add(struct be_dev_info *dev_info) if (status) goto alloc_err; + /* Query Link state and update */ + status = ocrdma_mbx_get_link_speed(dev, NULL, &lstate); + if (!status) + ocrdma_update_link_state(dev, lstate); + for (i = 0; i < ARRAY_SIZE(ocrdma_attributes); i++) if (device_create_file(&dev->ibdev.dev, ocrdma_attributes[i])) goto sysfs_err; - spin_lock(&ocrdma_devlist_lock); - list_add_tail_rcu(&dev->entry, &ocrdma_dev_list); - spin_unlock(&ocrdma_devlist_lock); /* Init stats */ ocrdma_add_port_stats(dev); /* Interrupt Moderation */ @@ -356,9 +361,8 @@ idr_err: return NULL; } -static void ocrdma_remove_free(struct rcu_head *rcu) +static void ocrdma_remove_free(struct ocrdma_dev *dev) { - struct ocrdma_dev *dev = container_of(rcu, struct ocrdma_dev, rcu); idr_remove(&ocrdma_dev_id, dev->id); kfree(dev->mbx_cmd); @@ -375,18 +379,12 @@ static void ocrdma_remove(struct ocrdma_dev *dev) ib_unregister_device(&dev->ibdev); ocrdma_rem_port_stats(dev); - - spin_lock(&ocrdma_devlist_lock); - list_del_rcu(&dev->entry); - spin_unlock(&ocrdma_devlist_lock); - ocrdma_free_resources(dev); ocrdma_cleanup_hw(dev); - - call_rcu(&dev->rcu, ocrdma_remove_free); + ocrdma_remove_free(dev); } -static int ocrdma_open(struct ocrdma_dev *dev) +static int ocrdma_dispatch_port_active(struct ocrdma_dev *dev) { struct ib_event port_event; @@ -397,32 +395,9 @@ static int ocrdma_open(struct ocrdma_dev *dev) return 0; } -static int ocrdma_close(struct ocrdma_dev *dev) +static int ocrdma_dispatch_port_error(struct ocrdma_dev *dev) { - int i; - struct ocrdma_qp *qp, **cur_qp; struct ib_event err_event; - struct ib_qp_attr attrs; - int attr_mask = IB_QP_STATE; - - attrs.qp_state = IB_QPS_ERR; - mutex_lock(&dev->dev_lock); - if (dev->qp_tbl) { - cur_qp = dev->qp_tbl; - for (i = 0; i < OCRDMA_MAX_QP; i++) { - qp = cur_qp[i]; - if (qp && qp->ibqp.qp_type != IB_QPT_GSI) { - /* change the QP state to ERROR */ - _ocrdma_modify_qp(&qp->ibqp, &attrs, attr_mask); - - err_event.event = IB_EVENT_QP_FATAL; - err_event.element.qp = &qp->ibqp; - err_event.device = &dev->ibdev; - ib_dispatch_event(&err_event); - } - } - } - mutex_unlock(&dev->dev_lock); err_event.event = IB_EVENT_PORT_ERR; err_event.element.port_num = 1; @@ -433,7 +408,7 @@ static int ocrdma_close(struct ocrdma_dev *dev) static void ocrdma_shutdown(struct ocrdma_dev *dev) { - ocrdma_close(dev); + ocrdma_dispatch_port_error(dev); ocrdma_remove(dev); } @@ -444,16 +419,26 @@ static void ocrdma_shutdown(struct ocrdma_dev *dev) static void ocrdma_event_handler(struct ocrdma_dev *dev, u32 event) { switch (event) { - case BE_DEV_UP: - ocrdma_open(dev); - break; - case BE_DEV_DOWN: - ocrdma_close(dev); - break; case BE_DEV_SHUTDOWN: ocrdma_shutdown(dev); break; + default: + break; + } +} + +void ocrdma_update_link_state(struct ocrdma_dev *dev, u8 lstate) +{ + if (!(dev->flags & OCRDMA_FLAGS_LINK_STATUS_INIT)) { + dev->flags |= OCRDMA_FLAGS_LINK_STATUS_INIT; + if (!lstate) + return; } + + if (!lstate) + ocrdma_dispatch_port_error(dev); + else + ocrdma_dispatch_port_active(dev); } static struct ocrdma_driver ocrdma_drv = { diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_sli.h b/drivers/infiniband/hw/ocrdma/ocrdma_sli.h index 6a38268..99dd6fd 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_sli.h +++ b/drivers/infiniband/hw/ocrdma/ocrdma_sli.h @@ -465,8 +465,11 @@ struct ocrdma_ae_qp_mcqe { u32 valid_ae_event; }; -#define OCRDMA_ASYNC_RDMA_EVE_CODE 0x14 -#define OCRDMA_ASYNC_GRP5_EVE_CODE 0x5 +enum ocrdma_async_event_code { + OCRDMA_ASYNC_LINK_EVE_CODE = 0x01, + OCRDMA_ASYNC_GRP5_EVE_CODE = 0x05, + OCRDMA_ASYNC_RDMA_EVE_CODE = 0x14 +}; enum ocrdma_async_grp5_events { OCRDMA_ASYNC_EVENT_QOS_VALUE = 0x01, @@ -489,6 +492,44 @@ enum OCRDMA_ASYNC_EVENT_TYPE { OCRDMA_MAX_ASYNC_ERRORS }; +struct ocrdma_ae_lnkst_mcqe { + u32 speed_state_ptn; + u32 qos_reason_falut; + u32 evt_tag; + u32 valid_ae_event; +}; + +enum { + OCRDMA_AE_LSC_PORT_NUM_MASK = 0x3F, + OCRDMA_AE_LSC_PT_SHIFT = 0x06, + OCRDMA_AE_LSC_PT_MASK = (0x03 << + OCRDMA_AE_LSC_PT_SHIFT), + OCRDMA_AE_LSC_LS_SHIFT = 0x08, + OCRDMA_AE_LSC_LS_MASK = (0xFF << + OCRDMA_AE_LSC_LS_SHIFT), + OCRDMA_AE_LSC_LD_SHIFT = 0x10, + OCRDMA_AE_LSC_LD_MASK = (0xFF << + OCRDMA_AE_LSC_LD_SHIFT), + OCRDMA_AE_LSC_PPS_SHIFT = 0x18, + OCRDMA_AE_LSC_PPS_MASK = (0xFF << + OCRDMA_AE_LSC_PPS_SHIFT), + OCRDMA_AE_LSC_PPF_MASK = 0xFF, + OCRDMA_AE_LSC_ER_SHIFT = 0x08, + OCRDMA_AE_LSC_ER_MASK = (0xFF << + OCRDMA_AE_LSC_ER_SHIFT), + OCRDMA_AE_LSC_QOS_SHIFT = 0x10, + OCRDMA_AE_LSC_QOS_MASK = (0xFFFF << + OCRDMA_AE_LSC_QOS_SHIFT) +}; + +enum { + OCRDMA_AE_LSC_PLINK_DOWN = 0x00, + OCRDMA_AE_LSC_PLINK_UP = 0x01, + OCRDMA_AE_LSC_LLINK_DOWN = 0x02, + OCRDMA_AE_LSC_LLINK_MASK = 0x02, + OCRDMA_AE_LSC_LLINK_UP = 0x03 +}; + /* mailbox command request and responses */ enum { OCRDMA_MBX_QUERY_CFG_CQ_OVERFLOW_SHIFT = 2, @@ -676,7 +717,7 @@ enum { OCRDMA_PHY_PFLT_SHIFT = 0x18, OCRDMA_QOS_LNKSP_MASK = 0xFFFF0000, OCRDMA_QOS_LNKSP_SHIFT = 0x10, - OCRDMA_LLST_MASK = 0xFF, + OCRDMA_LINK_ST_MASK = 0x01, OCRDMA_PLFC_MASK = 0x00000400, OCRDMA_PLFC_SHIFT = 0x8, OCRDMA_PLRFC_MASK = 0x00000200, @@ -691,7 +732,7 @@ struct ocrdma_get_link_speed_rsp { u32 pflt_pps_ld_pnum; u32 qos_lsp; - u32 res_lls; + u32 res_lnk_st; }; enum { diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_stats.c b/drivers/infiniband/hw/ocrdma/ocrdma_stats.c index 69334e2..255f774 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_stats.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_stats.c @@ -64,10 +64,11 @@ static int ocrdma_add_stat(char *start, char *pcur, return cpy_len; } -static bool ocrdma_alloc_stats_mem(struct ocrdma_dev *dev) +bool ocrdma_alloc_stats_resources(struct ocrdma_dev *dev) { struct stats_mem *mem = &dev->stats_mem; + mutex_init(&dev->stats_lock); /* Alloc mbox command mem*/ mem->size = max_t(u32, sizeof(struct ocrdma_rdma_stats_req), sizeof(struct ocrdma_rdma_stats_resp)); @@ -91,13 +92,14 @@ static bool ocrdma_alloc_stats_mem(struct ocrdma_dev *dev) return true; } -static void ocrdma_release_stats_mem(struct ocrdma_dev *dev) +void ocrdma_release_stats_resources(struct ocrdma_dev *dev) { struct stats_mem *mem = &dev->stats_mem; if (mem->va) dma_free_coherent(&dev->nic_info.pdev->dev, mem->size, mem->va, mem->pa); + mem->va = NULL; kfree(mem->debugfs_mem); } @@ -838,15 +840,9 @@ void ocrdma_add_port_stats(struct ocrdma_dev *dev) &dev->reset_stats, &ocrdma_dbg_ops)) goto err; - /* Now create dma_mem for stats mbx command */ - if (!ocrdma_alloc_stats_mem(dev)) - goto err; - - mutex_init(&dev->stats_lock); return; err: - ocrdma_release_stats_mem(dev); debugfs_remove_recursive(dev->dir); dev->dir = NULL; } @@ -855,9 +851,7 @@ void ocrdma_rem_port_stats(struct ocrdma_dev *dev) { if (!dev->dir) return; - mutex_destroy(&dev->stats_lock); - ocrdma_release_stats_mem(dev); - debugfs_remove(dev->dir); + debugfs_remove_recursive(dev->dir); } void ocrdma_init_debugfs(void) diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_stats.h b/drivers/infiniband/hw/ocrdma/ocrdma_stats.h index c9e58d0..bba1fec 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_stats.h +++ b/drivers/infiniband/hw/ocrdma/ocrdma_stats.h @@ -65,6 +65,8 @@ enum OCRDMA_STATS_TYPE { void ocrdma_rem_debugfs(void); void ocrdma_init_debugfs(void); +bool ocrdma_alloc_stats_resources(struct ocrdma_dev *dev); +void ocrdma_release_stats_resources(struct ocrdma_dev *dev); void ocrdma_rem_port_stats(struct ocrdma_dev *dev); void ocrdma_add_port_stats(struct ocrdma_dev *dev); int ocrdma_pma_counters(struct ocrdma_dev *dev, diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c index 1f3affb..12420e4 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c @@ -73,7 +73,7 @@ int ocrdma_query_gid(struct ib_device *ibdev, u8 port, if (index >= OCRDMA_MAX_SGID) return -EINVAL; - ret = ib_get_cached_gid(ibdev, port, index, sgid); + ret = ib_get_cached_gid(ibdev, port, index, sgid, NULL); if (ret == -EAGAIN) { memcpy(sgid, &zgid, sizeof(*sgid)); return 0; @@ -125,8 +125,8 @@ int ocrdma_query_device(struct ib_device *ibdev, struct ib_device_attr *attr, IB_DEVICE_SYS_IMAGE_GUID | IB_DEVICE_LOCAL_DMA_LKEY | IB_DEVICE_MEM_MGT_EXTENSIONS; - attr->max_sge = min(dev->attr.max_send_sge, dev->attr.max_srq_sge); - attr->max_sge_rd = 0; + attr->max_sge = dev->attr.max_send_sge; + attr->max_sge_rd = attr->max_sge; attr->max_cq = dev->attr.max_cq; attr->max_cqe = dev->attr.max_cqe; attr->max_mr = dev->attr.max_mr; @@ -171,7 +171,7 @@ static inline void get_link_speed_and_width(struct ocrdma_dev *dev, int status; u8 speed; - status = ocrdma_mbx_get_link_speed(dev, &speed); + status = ocrdma_mbx_get_link_speed(dev, &speed, NULL); if (status) speed = OCRDMA_PHYS_LINK_SPEED_ZERO; @@ -1013,6 +1013,7 @@ int ocrdma_dereg_mr(struct ib_mr *ib_mr) (void) ocrdma_mbx_dealloc_lkey(dev, mr->hwmr.fr_mr, mr->hwmr.lkey); + kfree(mr->pages); ocrdma_free_mr_pbl_tbl(dev, &mr->hwmr); /* it could be user registered memory. */ @@ -1093,7 +1094,6 @@ struct ib_cq *ocrdma_create_cq(struct ib_device *ibdev, spin_lock_init(&cq->comp_handler_lock); INIT_LIST_HEAD(&cq->sq_head); INIT_LIST_HEAD(&cq->rq_head); - cq->first_arm = true; if (ib_ctx) { uctx = get_ocrdma_ucontext(ib_ctx); @@ -1997,13 +1997,13 @@ static void ocrdma_build_ud_hdr(struct ocrdma_qp *qp, { struct ocrdma_ewqe_ud_hdr *ud_hdr = (struct ocrdma_ewqe_ud_hdr *)(hdr + 1); - struct ocrdma_ah *ah = get_ocrdma_ah(wr->wr.ud.ah); + struct ocrdma_ah *ah = get_ocrdma_ah(ud_wr(wr)->ah); - ud_hdr->rsvd_dest_qpn = wr->wr.ud.remote_qpn; + ud_hdr->rsvd_dest_qpn = ud_wr(wr)->remote_qpn; if (qp->qp_type == IB_QPT_GSI) ud_hdr->qkey = qp->qkey; else - ud_hdr->qkey = wr->wr.ud.remote_qkey; + ud_hdr->qkey = ud_wr(wr)->remote_qkey; ud_hdr->rsvd_ahid = ah->id; if (ah->av->valid & OCRDMA_AV_VLAN_VALID) hdr->cw |= (OCRDMA_FLAG_AH_VLAN_PR << OCRDMA_WQE_FLAGS_SHIFT); @@ -2106,9 +2106,9 @@ static int ocrdma_build_write(struct ocrdma_qp *qp, struct ocrdma_hdr_wqe *hdr, status = ocrdma_build_inline_sges(qp, hdr, sge, wr, wqe_size); if (status) return status; - ext_rw->addr_lo = wr->wr.rdma.remote_addr; - ext_rw->addr_hi = upper_32_bits(wr->wr.rdma.remote_addr); - ext_rw->lrkey = wr->wr.rdma.rkey; + ext_rw->addr_lo = rdma_wr(wr)->remote_addr; + ext_rw->addr_hi = upper_32_bits(rdma_wr(wr)->remote_addr); + ext_rw->lrkey = rdma_wr(wr)->rkey; ext_rw->len = hdr->total_len; return 0; } @@ -2126,46 +2126,12 @@ static void ocrdma_build_read(struct ocrdma_qp *qp, struct ocrdma_hdr_wqe *hdr, hdr->cw |= (OCRDMA_READ << OCRDMA_WQE_OPCODE_SHIFT); hdr->cw |= (OCRDMA_TYPE_LKEY << OCRDMA_WQE_TYPE_SHIFT); - ext_rw->addr_lo = wr->wr.rdma.remote_addr; - ext_rw->addr_hi = upper_32_bits(wr->wr.rdma.remote_addr); - ext_rw->lrkey = wr->wr.rdma.rkey; + ext_rw->addr_lo = rdma_wr(wr)->remote_addr; + ext_rw->addr_hi = upper_32_bits(rdma_wr(wr)->remote_addr); + ext_rw->lrkey = rdma_wr(wr)->rkey; ext_rw->len = hdr->total_len; } -static void build_frmr_pbes(struct ib_send_wr *wr, struct ocrdma_pbl *pbl_tbl, - struct ocrdma_hw_mr *hwmr) -{ - int i; - u64 buf_addr = 0; - int num_pbes; - struct ocrdma_pbe *pbe; - - pbe = (struct ocrdma_pbe *)pbl_tbl->va; - num_pbes = 0; - - /* go through the OS phy regions & fill hw pbe entries into pbls. */ - for (i = 0; i < wr->wr.fast_reg.page_list_len; i++) { - /* number of pbes can be more for one OS buf, when - * buffers are of different sizes. - * split the ib_buf to one or more pbes. - */ - buf_addr = wr->wr.fast_reg.page_list->page_list[i]; - pbe->pa_lo = cpu_to_le32((u32) (buf_addr & PAGE_MASK)); - pbe->pa_hi = cpu_to_le32((u32) upper_32_bits(buf_addr)); - num_pbes += 1; - pbe++; - - /* if the pbl is full storing the pbes, - * move to next pbl. - */ - if (num_pbes == (hwmr->pbl_size/sizeof(u64))) { - pbl_tbl++; - pbe = (struct ocrdma_pbe *)pbl_tbl->va; - } - } - return; -} - static int get_encoded_page_size(int pg_sz) { /* Max size is 256M 4096 << 16 */ @@ -2176,48 +2142,59 @@ static int get_encoded_page_size(int pg_sz) return i; } - -static int ocrdma_build_fr(struct ocrdma_qp *qp, struct ocrdma_hdr_wqe *hdr, - struct ib_send_wr *wr) +static int ocrdma_build_reg(struct ocrdma_qp *qp, + struct ocrdma_hdr_wqe *hdr, + struct ib_reg_wr *wr) { u64 fbo; struct ocrdma_ewqe_fr *fast_reg = (struct ocrdma_ewqe_fr *)(hdr + 1); - struct ocrdma_mr *mr; - struct ocrdma_dev *dev = get_ocrdma_dev(qp->ibqp.device); + struct ocrdma_mr *mr = get_ocrdma_mr(wr->mr); + struct ocrdma_pbl *pbl_tbl = mr->hwmr.pbl_table; + struct ocrdma_pbe *pbe; u32 wqe_size = sizeof(*fast_reg) + sizeof(*hdr); + int num_pbes = 0, i; wqe_size = roundup(wqe_size, OCRDMA_WQE_ALIGN_BYTES); - if (wr->wr.fast_reg.page_list_len > dev->attr.max_pages_per_frmr) - return -EINVAL; - hdr->cw |= (OCRDMA_FR_MR << OCRDMA_WQE_OPCODE_SHIFT); hdr->cw |= ((wqe_size / OCRDMA_WQE_STRIDE) << OCRDMA_WQE_SIZE_SHIFT); - if (wr->wr.fast_reg.page_list_len == 0) - BUG(); - if (wr->wr.fast_reg.access_flags & IB_ACCESS_LOCAL_WRITE) + if (wr->access & IB_ACCESS_LOCAL_WRITE) hdr->rsvd_lkey_flags |= OCRDMA_LKEY_FLAG_LOCAL_WR; - if (wr->wr.fast_reg.access_flags & IB_ACCESS_REMOTE_WRITE) + if (wr->access & IB_ACCESS_REMOTE_WRITE) hdr->rsvd_lkey_flags |= OCRDMA_LKEY_FLAG_REMOTE_WR; - if (wr->wr.fast_reg.access_flags & IB_ACCESS_REMOTE_READ) + if (wr->access & IB_ACCESS_REMOTE_READ) hdr->rsvd_lkey_flags |= OCRDMA_LKEY_FLAG_REMOTE_RD; - hdr->lkey = wr->wr.fast_reg.rkey; - hdr->total_len = wr->wr.fast_reg.length; + hdr->lkey = wr->key; + hdr->total_len = mr->ibmr.length; - fbo = wr->wr.fast_reg.iova_start - - (wr->wr.fast_reg.page_list->page_list[0] & PAGE_MASK); + fbo = mr->ibmr.iova - mr->pages[0]; - fast_reg->va_hi = upper_32_bits(wr->wr.fast_reg.iova_start); - fast_reg->va_lo = (u32) (wr->wr.fast_reg.iova_start & 0xffffffff); + fast_reg->va_hi = upper_32_bits(mr->ibmr.iova); + fast_reg->va_lo = (u32) (mr->ibmr.iova & 0xffffffff); fast_reg->fbo_hi = upper_32_bits(fbo); fast_reg->fbo_lo = (u32) fbo & 0xffffffff; - fast_reg->num_sges = wr->wr.fast_reg.page_list_len; - fast_reg->size_sge = - get_encoded_page_size(1 << wr->wr.fast_reg.page_shift); - mr = (struct ocrdma_mr *) (unsigned long) - dev->stag_arr[(hdr->lkey >> 8) & (OCRDMA_MAX_STAG - 1)]; - build_frmr_pbes(wr, mr->hwmr.pbl_table, &mr->hwmr); + fast_reg->num_sges = mr->npages; + fast_reg->size_sge = get_encoded_page_size(mr->ibmr.page_size); + + pbe = pbl_tbl->va; + for (i = 0; i < mr->npages; i++) { + u64 buf_addr = mr->pages[i]; + + pbe->pa_lo = cpu_to_le32((u32) (buf_addr & PAGE_MASK)); + pbe->pa_hi = cpu_to_le32((u32) upper_32_bits(buf_addr)); + num_pbes += 1; + pbe++; + + /* if the pbl is full storing the pbes, + * move to next pbl. + */ + if (num_pbes == (mr->hwmr.pbl_size/sizeof(u64))) { + pbl_tbl++; + pbe = (struct ocrdma_pbe *)pbl_tbl->va; + } + } + return 0; } @@ -2300,8 +2277,8 @@ int ocrdma_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, OCRDMA_WQE_STRIDE) << OCRDMA_WQE_SIZE_SHIFT; hdr->lkey = wr->ex.invalidate_rkey; break; - case IB_WR_FAST_REG_MR: - status = ocrdma_build_fr(qp, hdr, wr); + case IB_WR_REG_MR: + status = ocrdma_build_reg(qp, hdr, reg_wr(wr)); break; default: status = -EINVAL; @@ -2567,7 +2544,7 @@ static void ocrdma_update_wc(struct ocrdma_qp *qp, struct ib_wc *ibwc, ibwc->opcode = IB_WC_SEND; break; case OCRDMA_FR_MR: - ibwc->opcode = IB_WC_FAST_REG_MR; + ibwc->opcode = IB_WC_REG_MR; break; case OCRDMA_LKEY_INV: ibwc->opcode = IB_WC_LOCAL_INV; @@ -2748,8 +2725,7 @@ static int ocrdma_update_ud_rcqe(struct ib_wc *ibwc, struct ocrdma_cqe *cqe) OCRDMA_CQE_UD_STATUS_MASK) >> OCRDMA_CQE_UD_STATUS_SHIFT; ibwc->src_qp = le32_to_cpu(cqe->flags_status_srcqpn) & OCRDMA_CQE_SRCQP_MASK; - ibwc->pkey_index = le32_to_cpu(cqe->ud.rxlen_pkey) & - OCRDMA_CQE_PKEY_MASK; + ibwc->pkey_index = 0; ibwc->wc_flags = IB_WC_GRH; ibwc->byte_len = (le32_to_cpu(cqe->ud.rxlen_pkey) >> OCRDMA_CQE_UD_XFER_LEN_SHIFT); @@ -2933,17 +2909,9 @@ expand_cqe: } stop_cqe: cq->getp = cur_getp; - if (cq->deferred_arm) { - ocrdma_ring_cq_db(dev, cq->id, true, cq->deferred_sol, - polled_hw_cqes); - cq->deferred_arm = false; - cq->deferred_sol = false; - } else { - /* We need to pop the CQE. No need to arm */ - ocrdma_ring_cq_db(dev, cq->id, false, cq->deferred_sol, - polled_hw_cqes); - cq->deferred_sol = false; - } + + if (polled_hw_cqes) + ocrdma_ring_cq_db(dev, cq->id, false, false, polled_hw_cqes); return i; } @@ -3027,13 +2995,7 @@ int ocrdma_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags cq_flags) if (cq_flags & IB_CQ_SOLICITED) sol_needed = true; - if (cq->first_arm) { - ocrdma_ring_cq_db(dev, cq_id, arm_needed, sol_needed, 0); - cq->first_arm = false; - } - - cq->deferred_arm = true; - cq->deferred_sol = sol_needed; + ocrdma_ring_cq_db(dev, cq_id, arm_needed, sol_needed, 0); spin_unlock_irqrestore(&cq->cq_lock, flags); return 0; @@ -3058,6 +3020,12 @@ struct ib_mr *ocrdma_alloc_mr(struct ib_pd *ibpd, if (!mr) return ERR_PTR(-ENOMEM); + mr->pages = kcalloc(max_num_sg, sizeof(u64), GFP_KERNEL); + if (!mr->pages) { + status = -ENOMEM; + goto pl_err; + } + status = ocrdma_get_pbl_info(dev, mr, max_num_sg); if (status) goto pbl_err; @@ -3081,189 +3049,31 @@ struct ib_mr *ocrdma_alloc_mr(struct ib_pd *ibpd, mbx_err: ocrdma_free_mr_pbl_tbl(dev, &mr->hwmr); pbl_err: + kfree(mr->pages); +pl_err: kfree(mr); return ERR_PTR(-ENOMEM); } -struct ib_fast_reg_page_list *ocrdma_alloc_frmr_page_list(struct ib_device - *ibdev, - int page_list_len) -{ - struct ib_fast_reg_page_list *frmr_list; - int size; - - size = sizeof(*frmr_list) + (page_list_len * sizeof(u64)); - frmr_list = kzalloc(size, GFP_KERNEL); - if (!frmr_list) - return ERR_PTR(-ENOMEM); - frmr_list->page_list = (u64 *)(frmr_list + 1); - return frmr_list; -} - -void ocrdma_free_frmr_page_list(struct ib_fast_reg_page_list *page_list) -{ - kfree(page_list); -} - -#define MAX_KERNEL_PBE_SIZE 65536 -static inline int count_kernel_pbes(struct ib_phys_buf *buf_list, - int buf_cnt, u32 *pbe_size) +static int ocrdma_set_page(struct ib_mr *ibmr, u64 addr) { - u64 total_size = 0; - u64 buf_size = 0; - int i; - *pbe_size = roundup(buf_list[0].size, PAGE_SIZE); - *pbe_size = roundup_pow_of_two(*pbe_size); - - /* find the smallest PBE size that we can have */ - for (i = 0; i < buf_cnt; i++) { - /* first addr may not be page aligned, so ignore checking */ - if ((i != 0) && ((buf_list[i].addr & ~PAGE_MASK) || - (buf_list[i].size & ~PAGE_MASK))) { - return 0; - } - - /* if configured PBE size is greater then the chosen one, - * reduce the PBE size. - */ - buf_size = roundup(buf_list[i].size, PAGE_SIZE); - /* pbe_size has to be even multiple of 4K 1,2,4,8...*/ - buf_size = roundup_pow_of_two(buf_size); - if (*pbe_size > buf_size) - *pbe_size = buf_size; - - total_size += buf_size; - } - *pbe_size = *pbe_size > MAX_KERNEL_PBE_SIZE ? - (MAX_KERNEL_PBE_SIZE) : (*pbe_size); + struct ocrdma_mr *mr = get_ocrdma_mr(ibmr); - /* num_pbes = total_size / (*pbe_size); this is implemented below. */ - - return total_size >> ilog2(*pbe_size); -} - -static void build_kernel_pbes(struct ib_phys_buf *buf_list, int ib_buf_cnt, - u32 pbe_size, struct ocrdma_pbl *pbl_tbl, - struct ocrdma_hw_mr *hwmr) -{ - int i; - int idx; - int pbes_per_buf = 0; - u64 buf_addr = 0; - int num_pbes; - struct ocrdma_pbe *pbe; - int total_num_pbes = 0; + if (unlikely(mr->npages == mr->hwmr.num_pbes)) + return -ENOMEM; - if (!hwmr->num_pbes) - return; + mr->pages[mr->npages++] = addr; - pbe = (struct ocrdma_pbe *)pbl_tbl->va; - num_pbes = 0; - - /* go through the OS phy regions & fill hw pbe entries into pbls. */ - for (i = 0; i < ib_buf_cnt; i++) { - buf_addr = buf_list[i].addr; - pbes_per_buf = - roundup_pow_of_two(roundup(buf_list[i].size, PAGE_SIZE)) / - pbe_size; - hwmr->len += buf_list[i].size; - /* number of pbes can be more for one OS buf, when - * buffers are of different sizes. - * split the ib_buf to one or more pbes. - */ - for (idx = 0; idx < pbes_per_buf; idx++) { - /* we program always page aligned addresses, - * first unaligned address is taken care by fbo. - */ - if (i == 0) { - /* for non zero fbo, assign the - * start of the page. - */ - pbe->pa_lo = - cpu_to_le32((u32) (buf_addr & PAGE_MASK)); - pbe->pa_hi = - cpu_to_le32((u32) upper_32_bits(buf_addr)); - } else { - pbe->pa_lo = - cpu_to_le32((u32) (buf_addr & 0xffffffff)); - pbe->pa_hi = - cpu_to_le32((u32) upper_32_bits(buf_addr)); - } - buf_addr += pbe_size; - num_pbes += 1; - total_num_pbes += 1; - pbe++; - - if (total_num_pbes == hwmr->num_pbes) - goto mr_tbl_done; - /* if the pbl is full storing the pbes, - * move to next pbl. - */ - if (num_pbes == (hwmr->pbl_size/sizeof(u64))) { - pbl_tbl++; - pbe = (struct ocrdma_pbe *)pbl_tbl->va; - num_pbes = 0; - } - } - } -mr_tbl_done: - return; + return 0; } -struct ib_mr *ocrdma_reg_kernel_mr(struct ib_pd *ibpd, - struct ib_phys_buf *buf_list, - int buf_cnt, int acc, u64 *iova_start) +int ocrdma_map_mr_sg(struct ib_mr *ibmr, + struct scatterlist *sg, + int sg_nents) { - int status = -ENOMEM; - struct ocrdma_mr *mr; - struct ocrdma_pd *pd = get_ocrdma_pd(ibpd); - struct ocrdma_dev *dev = get_ocrdma_dev(ibpd->device); - u32 num_pbes; - u32 pbe_size = 0; + struct ocrdma_mr *mr = get_ocrdma_mr(ibmr); - if ((acc & IB_ACCESS_REMOTE_WRITE) && !(acc & IB_ACCESS_LOCAL_WRITE)) - return ERR_PTR(-EINVAL); + mr->npages = 0; - mr = kzalloc(sizeof(*mr), GFP_KERNEL); - if (!mr) - return ERR_PTR(status); - - num_pbes = count_kernel_pbes(buf_list, buf_cnt, &pbe_size); - if (num_pbes == 0) { - status = -EINVAL; - goto pbl_err; - } - status = ocrdma_get_pbl_info(dev, mr, num_pbes); - if (status) - goto pbl_err; - - mr->hwmr.pbe_size = pbe_size; - mr->hwmr.fbo = *iova_start - (buf_list[0].addr & PAGE_MASK); - mr->hwmr.va = *iova_start; - mr->hwmr.local_rd = 1; - mr->hwmr.remote_wr = (acc & IB_ACCESS_REMOTE_WRITE) ? 1 : 0; - mr->hwmr.remote_rd = (acc & IB_ACCESS_REMOTE_READ) ? 1 : 0; - mr->hwmr.local_wr = (acc & IB_ACCESS_LOCAL_WRITE) ? 1 : 0; - mr->hwmr.remote_atomic = (acc & IB_ACCESS_REMOTE_ATOMIC) ? 1 : 0; - mr->hwmr.mw_bind = (acc & IB_ACCESS_MW_BIND) ? 1 : 0; - - status = ocrdma_build_pbl_tbl(dev, &mr->hwmr); - if (status) - goto pbl_err; - build_kernel_pbes(buf_list, buf_cnt, pbe_size, mr->hwmr.pbl_table, - &mr->hwmr); - status = ocrdma_reg_mr(dev, &mr->hwmr, pd->id, acc); - if (status) - goto mbx_err; - - mr->ibmr.lkey = mr->hwmr.lkey; - if (mr->hwmr.remote_wr || mr->hwmr.remote_rd) - mr->ibmr.rkey = mr->hwmr.lkey; - return &mr->ibmr; - -mbx_err: - ocrdma_free_mr_pbl_tbl(dev, &mr->hwmr); -pbl_err: - kfree(mr); - return ERR_PTR(status); + return ib_sg_to_pages(ibmr, sg, sg_nents, ocrdma_set_page); } diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.h b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.h index 308c168..8b517fd 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.h +++ b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.h @@ -117,17 +117,13 @@ int ocrdma_post_srq_recv(struct ib_srq *, struct ib_recv_wr *, int ocrdma_dereg_mr(struct ib_mr *); struct ib_mr *ocrdma_get_dma_mr(struct ib_pd *, int acc); -struct ib_mr *ocrdma_reg_kernel_mr(struct ib_pd *, - struct ib_phys_buf *buffer_list, - int num_phys_buf, int acc, u64 *iova_start); struct ib_mr *ocrdma_reg_user_mr(struct ib_pd *, u64 start, u64 length, u64 virt, int acc, struct ib_udata *); struct ib_mr *ocrdma_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type, u32 max_num_sg); -struct ib_fast_reg_page_list *ocrdma_alloc_frmr_page_list(struct ib_device - *ibdev, - int page_list_len); -void ocrdma_free_frmr_page_list(struct ib_fast_reg_page_list *page_list); +int ocrdma_map_mr_sg(struct ib_mr *ibmr, + struct scatterlist *sg, + int sg_nents); #endif /* __OCRDMA_VERBS_H__ */ diff --git a/drivers/infiniband/hw/qib/qib_fs.c b/drivers/infiniband/hw/qib/qib_fs.c index 13ef22b..fcdf3791 100644 --- a/drivers/infiniband/hw/qib/qib_fs.c +++ b/drivers/infiniband/hw/qib/qib_fs.c @@ -89,14 +89,14 @@ static int create_file(const char *name, umode_t mode, { int error; - mutex_lock(&d_inode(parent)->i_mutex); + inode_lock(d_inode(parent)); *dentry = lookup_one_len(name, parent, strlen(name)); if (!IS_ERR(*dentry)) error = qibfs_mknod(d_inode(parent), *dentry, mode, fops, data); else error = PTR_ERR(*dentry); - mutex_unlock(&d_inode(parent)->i_mutex); + inode_unlock(d_inode(parent)); return error; } @@ -481,7 +481,7 @@ static int remove_device_files(struct super_block *sb, int ret, i; root = dget(sb->s_root); - mutex_lock(&d_inode(root)->i_mutex); + inode_lock(d_inode(root)); snprintf(unit, sizeof(unit), "%u", dd->unit); dir = lookup_one_len(unit, root, strlen(unit)); @@ -491,7 +491,7 @@ static int remove_device_files(struct super_block *sb, goto bail; } - mutex_lock(&d_inode(dir)->i_mutex); + inode_lock(d_inode(dir)); remove_file(dir, "counters"); remove_file(dir, "counter_names"); remove_file(dir, "portcounter_names"); @@ -506,13 +506,13 @@ static int remove_device_files(struct super_block *sb, } } remove_file(dir, "flash"); - mutex_unlock(&d_inode(dir)->i_mutex); + inode_unlock(d_inode(dir)); ret = simple_rmdir(d_inode(root), dir); d_delete(dir); dput(dir); bail: - mutex_unlock(&d_inode(root)->i_mutex); + inode_unlock(d_inode(root)); dput(root); return ret; } diff --git a/drivers/infiniband/hw/qib/qib_init.c b/drivers/infiniband/hw/qib/qib_init.c index 7e00470..4ff340f 100644 --- a/drivers/infiniband/hw/qib/qib_init.c +++ b/drivers/infiniband/hw/qib/qib_init.c @@ -1680,7 +1680,7 @@ int qib_setup_eagerbufs(struct qib_ctxtdata *rcd) * heavy filesystem activity makes these fail, and we can * use compound pages. */ - gfp_flags = __GFP_WAIT | __GFP_IO | __GFP_COMP; + gfp_flags = __GFP_RECLAIM | __GFP_IO | __GFP_COMP; egrcnt = rcd->rcvegrcnt; egroff = rcd->rcvegr_tid_base; diff --git a/drivers/infiniband/hw/qib/qib_keys.c b/drivers/infiniband/hw/qib/qib_keys.c index 5afaa21..d725c56 100644 --- a/drivers/infiniband/hw/qib/qib_keys.c +++ b/drivers/infiniband/hw/qib/qib_keys.c @@ -336,14 +336,15 @@ bail: } /* - * Initialize the memory region specified by the work reqeust. + * Initialize the memory region specified by the work request. */ -int qib_fast_reg_mr(struct qib_qp *qp, struct ib_send_wr *wr) +int qib_reg_mr(struct qib_qp *qp, struct ib_reg_wr *wr) { struct qib_lkey_table *rkt = &to_idev(qp->ibqp.device)->lk_table; struct qib_pd *pd = to_ipd(qp->ibqp.pd); - struct qib_mregion *mr; - u32 rkey = wr->wr.fast_reg.rkey; + struct qib_mr *mr = to_imr(wr->mr); + struct qib_mregion *mrg; + u32 key = wr->key; unsigned i, n, m; int ret = -EINVAL; unsigned long flags; @@ -351,33 +352,33 @@ int qib_fast_reg_mr(struct qib_qp *qp, struct ib_send_wr *wr) size_t ps; spin_lock_irqsave(&rkt->lock, flags); - if (pd->user || rkey == 0) + if (pd->user || key == 0) goto bail; - mr = rcu_dereference_protected( - rkt->table[(rkey >> (32 - ib_qib_lkey_table_size))], + mrg = rcu_dereference_protected( + rkt->table[(key >> (32 - ib_qib_lkey_table_size))], lockdep_is_held(&rkt->lock)); - if (unlikely(mr == NULL || qp->ibqp.pd != mr->pd)) + if (unlikely(mrg == NULL || qp->ibqp.pd != mrg->pd)) goto bail; - if (wr->wr.fast_reg.page_list_len > mr->max_segs) + if (mr->npages > mrg->max_segs) goto bail; - ps = 1UL << wr->wr.fast_reg.page_shift; - if (wr->wr.fast_reg.length > ps * wr->wr.fast_reg.page_list_len) + ps = mr->ibmr.page_size; + if (mr->ibmr.length > ps * mr->npages) goto bail; - mr->user_base = wr->wr.fast_reg.iova_start; - mr->iova = wr->wr.fast_reg.iova_start; - mr->lkey = rkey; - mr->length = wr->wr.fast_reg.length; - mr->access_flags = wr->wr.fast_reg.access_flags; - page_list = wr->wr.fast_reg.page_list->page_list; + mrg->user_base = mr->ibmr.iova; + mrg->iova = mr->ibmr.iova; + mrg->lkey = key; + mrg->length = mr->ibmr.length; + mrg->access_flags = wr->access; + page_list = mr->pages; m = 0; n = 0; - for (i = 0; i < wr->wr.fast_reg.page_list_len; i++) { - mr->map[m]->segs[n].vaddr = (void *) page_list[i]; - mr->map[m]->segs[n].length = ps; + for (i = 0; i < mr->npages; i++) { + mrg->map[m]->segs[n].vaddr = (void *) page_list[i]; + mrg->map[m]->segs[n].length = ps; if (++n == QIB_SEGSZ) { m++; n = 0; diff --git a/drivers/infiniband/hw/qib/qib_mr.c b/drivers/infiniband/hw/qib/qib_mr.c index 19220dc..5f53304 100644 --- a/drivers/infiniband/hw/qib/qib_mr.c +++ b/drivers/infiniband/hw/qib/qib_mr.c @@ -150,10 +150,7 @@ static struct qib_mr *alloc_mr(int count, struct ib_pd *pd) rval = init_qib_mregion(&mr->mr, pd, count); if (rval) goto bail; - /* - * ib_reg_phys_mr() will initialize mr->ibmr except for - * lkey and rkey. - */ + rval = qib_alloc_lkey(&mr->mr, 0); if (rval) goto bail_mregion; @@ -171,52 +168,6 @@ bail: } /** - * qib_reg_phys_mr - register a physical memory region - * @pd: protection domain for this memory region - * @buffer_list: pointer to the list of physical buffers to register - * @num_phys_buf: the number of physical buffers to register - * @iova_start: the starting address passed over IB which maps to this MR - * - * Returns the memory region on success, otherwise returns an errno. - */ -struct ib_mr *qib_reg_phys_mr(struct ib_pd *pd, - struct ib_phys_buf *buffer_list, - int num_phys_buf, int acc, u64 *iova_start) -{ - struct qib_mr *mr; - int n, m, i; - struct ib_mr *ret; - - mr = alloc_mr(num_phys_buf, pd); - if (IS_ERR(mr)) { - ret = (struct ib_mr *)mr; - goto bail; - } - - mr->mr.user_base = *iova_start; - mr->mr.iova = *iova_start; - mr->mr.access_flags = acc; - - m = 0; - n = 0; - for (i = 0; i < num_phys_buf; i++) { - mr->mr.map[m]->segs[n].vaddr = (void *) buffer_list[i].addr; - mr->mr.map[m]->segs[n].length = buffer_list[i].size; - mr->mr.length += buffer_list[i].size; - n++; - if (n == QIB_SEGSZ) { - m++; - n = 0; - } - } - - ret = &mr->ibmr; - -bail: - return ret; -} - -/** * qib_reg_user_mr - register a userspace memory region * @pd: protection domain for this memory region * @start: starting userspace address @@ -303,6 +254,7 @@ int qib_dereg_mr(struct ib_mr *ibmr) int ret = 0; unsigned long timeout; + kfree(mr->pages); qib_free_lkey(&mr->mr); qib_put_mr(&mr->mr); /* will set completion if last */ @@ -323,7 +275,7 @@ out: /* * Allocate a memory region usable with the - * IB_WR_FAST_REG_MR send work request. + * IB_WR_REG_MR send work request. * * Return the memory region on success, otherwise return an errno. */ @@ -340,37 +292,38 @@ struct ib_mr *qib_alloc_mr(struct ib_pd *pd, if (IS_ERR(mr)) return (struct ib_mr *)mr; + mr->pages = kcalloc(max_num_sg, sizeof(u64), GFP_KERNEL); + if (!mr->pages) + goto err; + return &mr->ibmr; + +err: + qib_dereg_mr(&mr->ibmr); + return ERR_PTR(-ENOMEM); } -struct ib_fast_reg_page_list * -qib_alloc_fast_reg_page_list(struct ib_device *ibdev, int page_list_len) +static int qib_set_page(struct ib_mr *ibmr, u64 addr) { - unsigned size = page_list_len * sizeof(u64); - struct ib_fast_reg_page_list *pl; - - if (size > PAGE_SIZE) - return ERR_PTR(-EINVAL); - - pl = kzalloc(sizeof(*pl), GFP_KERNEL); - if (!pl) - return ERR_PTR(-ENOMEM); + struct qib_mr *mr = to_imr(ibmr); - pl->page_list = kzalloc(size, GFP_KERNEL); - if (!pl->page_list) - goto err_free; + if (unlikely(mr->npages == mr->mr.max_segs)) + return -ENOMEM; - return pl; + mr->pages[mr->npages++] = addr; -err_free: - kfree(pl); - return ERR_PTR(-ENOMEM); + return 0; } -void qib_free_fast_reg_page_list(struct ib_fast_reg_page_list *pl) +int qib_map_mr_sg(struct ib_mr *ibmr, + struct scatterlist *sg, + int sg_nents) { - kfree(pl->page_list); - kfree(pl); + struct qib_mr *mr = to_imr(ibmr); + + mr->npages = 0; + + return ib_sg_to_pages(ibmr, sg, sg_nents, qib_set_page); } /** diff --git a/drivers/infiniband/hw/qib/qib_qp.c b/drivers/infiniband/hw/qib/qib_qp.c index 4fa88ba..3eff35c 100644 --- a/drivers/infiniband/hw/qib/qib_qp.c +++ b/drivers/infiniband/hw/qib/qib_qp.c @@ -100,9 +100,10 @@ static u32 credit_table[31] = { 32768 /* 1E */ }; -static void get_map_page(struct qib_qpn_table *qpt, struct qpn_map *map) +static void get_map_page(struct qib_qpn_table *qpt, struct qpn_map *map, + gfp_t gfp) { - unsigned long page = get_zeroed_page(GFP_KERNEL); + unsigned long page = get_zeroed_page(gfp); /* * Free the page if someone raced with us installing it. @@ -121,7 +122,7 @@ static void get_map_page(struct qib_qpn_table *qpt, struct qpn_map *map) * zero/one for QP type IB_QPT_SMI/IB_QPT_GSI. */ static int alloc_qpn(struct qib_devdata *dd, struct qib_qpn_table *qpt, - enum ib_qp_type type, u8 port) + enum ib_qp_type type, u8 port, gfp_t gfp) { u32 i, offset, max_scan, qpn; struct qpn_map *map; @@ -151,7 +152,7 @@ static int alloc_qpn(struct qib_devdata *dd, struct qib_qpn_table *qpt, max_scan = qpt->nmaps - !offset; for (i = 0;;) { if (unlikely(!map->page)) { - get_map_page(qpt, map); + get_map_page(qpt, map, gfp); if (unlikely(!map->page)) break; } @@ -436,7 +437,7 @@ static void clear_mr_refs(struct qib_qp *qp, int clr_sends) if (qp->ibqp.qp_type == IB_QPT_UD || qp->ibqp.qp_type == IB_QPT_SMI || qp->ibqp.qp_type == IB_QPT_GSI) - atomic_dec(&to_iah(wqe->wr.wr.ud.ah)->refcount); + atomic_dec(&to_iah(wqe->ud_wr.ah)->refcount); if (++qp->s_last >= qp->s_size) qp->s_last = 0; } @@ -983,13 +984,21 @@ struct ib_qp *qib_create_qp(struct ib_pd *ibpd, size_t sz; size_t sg_list_sz; struct ib_qp *ret; + gfp_t gfp; + if (init_attr->cap.max_send_sge > ib_qib_max_sges || init_attr->cap.max_send_wr > ib_qib_max_qp_wrs || - init_attr->create_flags) { - ret = ERR_PTR(-EINVAL); - goto bail; - } + init_attr->create_flags & ~(IB_QP_CREATE_USE_GFP_NOIO)) + return ERR_PTR(-EINVAL); + + /* GFP_NOIO is applicable in RC QPs only */ + if (init_attr->create_flags & IB_QP_CREATE_USE_GFP_NOIO && + init_attr->qp_type != IB_QPT_RC) + return ERR_PTR(-EINVAL); + + gfp = init_attr->create_flags & IB_QP_CREATE_USE_GFP_NOIO ? + GFP_NOIO : GFP_KERNEL; /* Check receive queue parameters if no SRQ is specified. */ if (!init_attr->srq) { @@ -1021,7 +1030,8 @@ struct ib_qp *qib_create_qp(struct ib_pd *ibpd, sz = sizeof(struct qib_sge) * init_attr->cap.max_send_sge + sizeof(struct qib_swqe); - swq = vmalloc((init_attr->cap.max_send_wr + 1) * sz); + swq = __vmalloc((init_attr->cap.max_send_wr + 1) * sz, + gfp, PAGE_KERNEL); if (swq == NULL) { ret = ERR_PTR(-ENOMEM); goto bail; @@ -1037,13 +1047,13 @@ struct ib_qp *qib_create_qp(struct ib_pd *ibpd, } else if (init_attr->cap.max_recv_sge > 1) sg_list_sz = sizeof(*qp->r_sg_list) * (init_attr->cap.max_recv_sge - 1); - qp = kzalloc(sz + sg_list_sz, GFP_KERNEL); + qp = kzalloc(sz + sg_list_sz, gfp); if (!qp) { ret = ERR_PTR(-ENOMEM); goto bail_swq; } RCU_INIT_POINTER(qp->next, NULL); - qp->s_hdr = kzalloc(sizeof(*qp->s_hdr), GFP_KERNEL); + qp->s_hdr = kzalloc(sizeof(*qp->s_hdr), gfp); if (!qp->s_hdr) { ret = ERR_PTR(-ENOMEM); goto bail_qp; @@ -1058,8 +1068,16 @@ struct ib_qp *qib_create_qp(struct ib_pd *ibpd, qp->r_rq.max_sge = init_attr->cap.max_recv_sge; sz = (sizeof(struct ib_sge) * qp->r_rq.max_sge) + sizeof(struct qib_rwqe); - qp->r_rq.wq = vmalloc_user(sizeof(struct qib_rwq) + - qp->r_rq.size * sz); + if (gfp != GFP_NOIO) + qp->r_rq.wq = vmalloc_user( + sizeof(struct qib_rwq) + + qp->r_rq.size * sz); + else + qp->r_rq.wq = __vmalloc( + sizeof(struct qib_rwq) + + qp->r_rq.size * sz, + gfp, PAGE_KERNEL); + if (!qp->r_rq.wq) { ret = ERR_PTR(-ENOMEM); goto bail_qp; @@ -1090,7 +1108,7 @@ struct ib_qp *qib_create_qp(struct ib_pd *ibpd, dev = to_idev(ibpd->device); dd = dd_from_dev(dev); err = alloc_qpn(dd, &dev->qpn_table, init_attr->qp_type, - init_attr->port_num); + init_attr->port_num, gfp); if (err < 0) { ret = ERR_PTR(err); vfree(qp->r_rq.wq); diff --git a/drivers/infiniband/hw/qib/qib_qsfp.c b/drivers/infiniband/hw/qib/qib_qsfp.c index 5e27f76..4c7c3c8 100644 --- a/drivers/infiniband/hw/qib/qib_qsfp.c +++ b/drivers/infiniband/hw/qib/qib_qsfp.c @@ -292,7 +292,7 @@ int qib_refresh_qsfp_cache(struct qib_pportdata *ppd, struct qib_qsfp_cache *cp) qib_dev_porterr(ppd->dd, ppd->port, "QSFP byte0 is 0x%02X, S/B 0x0C/D\n", peek[0]); - if ((peek[2] & 2) == 0) { + if ((peek[2] & 4) == 0) { /* * If cable is paged, rather than "flat memory", we need to * set the page to zero, Even if it already appears to be zero. @@ -538,7 +538,7 @@ int qib_qsfp_dump(struct qib_pportdata *ppd, char *buf, int len) sofar += scnprintf(buf + sofar, len - sofar, "Date:%.*s\n", QSFP_DATE_LEN, cd.date); sofar += scnprintf(buf + sofar, len - sofar, "Lot:%.*s\n", - QSFP_LOT_LEN, cd.date); + QSFP_LOT_LEN, cd.lot); while (bidx < QSFP_DEFAULT_HDR_CNT) { int iidx; diff --git a/drivers/infiniband/hw/qib/qib_rc.c b/drivers/infiniband/hw/qib/qib_rc.c index 4544d6f..e6b7556 100644 --- a/drivers/infiniband/hw/qib/qib_rc.c +++ b/drivers/infiniband/hw/qib/qib_rc.c @@ -373,10 +373,11 @@ int qib_make_rc_req(struct qib_qp *qp) qp->s_flags |= QIB_S_WAIT_SSN_CREDIT; goto bail; } + ohdr->u.rc.reth.vaddr = - cpu_to_be64(wqe->wr.wr.rdma.remote_addr); + cpu_to_be64(wqe->rdma_wr.remote_addr); ohdr->u.rc.reth.rkey = - cpu_to_be32(wqe->wr.wr.rdma.rkey); + cpu_to_be32(wqe->rdma_wr.rkey); ohdr->u.rc.reth.length = cpu_to_be32(len); hwords += sizeof(struct ib_reth) / sizeof(u32); wqe->lpsn = wqe->psn; @@ -386,15 +387,15 @@ int qib_make_rc_req(struct qib_qp *qp) len = pmtu; break; } - if (wqe->wr.opcode == IB_WR_RDMA_WRITE) + if (wqe->rdma_wr.wr.opcode == IB_WR_RDMA_WRITE) qp->s_state = OP(RDMA_WRITE_ONLY); else { - qp->s_state = - OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE); + qp->s_state = OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE); /* Immediate data comes after RETH */ - ohdr->u.rc.imm_data = wqe->wr.ex.imm_data; + ohdr->u.rc.imm_data = + wqe->rdma_wr.wr.ex.imm_data; hwords += 1; - if (wqe->wr.send_flags & IB_SEND_SOLICITED) + if (wqe->rdma_wr.wr.send_flags & IB_SEND_SOLICITED) bth0 |= IB_BTH_SOLICITED; } bth2 |= IB_BTH_REQ_ACK; @@ -424,10 +425,11 @@ int qib_make_rc_req(struct qib_qp *qp) qp->s_next_psn += (len - 1) / pmtu; wqe->lpsn = qp->s_next_psn++; } + ohdr->u.rc.reth.vaddr = - cpu_to_be64(wqe->wr.wr.rdma.remote_addr); + cpu_to_be64(wqe->rdma_wr.remote_addr); ohdr->u.rc.reth.rkey = - cpu_to_be32(wqe->wr.wr.rdma.rkey); + cpu_to_be32(wqe->rdma_wr.rkey); ohdr->u.rc.reth.length = cpu_to_be32(len); qp->s_state = OP(RDMA_READ_REQUEST); hwords += sizeof(ohdr->u.rc.reth) / sizeof(u32); @@ -455,24 +457,24 @@ int qib_make_rc_req(struct qib_qp *qp) qp->s_lsn++; wqe->lpsn = wqe->psn; } - if (wqe->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP) { + if (wqe->atomic_wr.wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP) { qp->s_state = OP(COMPARE_SWAP); ohdr->u.atomic_eth.swap_data = cpu_to_be64( - wqe->wr.wr.atomic.swap); + wqe->atomic_wr.swap); ohdr->u.atomic_eth.compare_data = cpu_to_be64( - wqe->wr.wr.atomic.compare_add); + wqe->atomic_wr.compare_add); } else { qp->s_state = OP(FETCH_ADD); ohdr->u.atomic_eth.swap_data = cpu_to_be64( - wqe->wr.wr.atomic.compare_add); + wqe->atomic_wr.compare_add); ohdr->u.atomic_eth.compare_data = 0; } ohdr->u.atomic_eth.vaddr[0] = cpu_to_be32( - wqe->wr.wr.atomic.remote_addr >> 32); + wqe->atomic_wr.remote_addr >> 32); ohdr->u.atomic_eth.vaddr[1] = cpu_to_be32( - wqe->wr.wr.atomic.remote_addr); + wqe->atomic_wr.remote_addr); ohdr->u.atomic_eth.rkey = cpu_to_be32( - wqe->wr.wr.atomic.rkey); + wqe->atomic_wr.rkey); hwords += sizeof(struct ib_atomic_eth) / sizeof(u32); ss = NULL; len = 0; @@ -597,9 +599,9 @@ int qib_make_rc_req(struct qib_qp *qp) */ len = ((qp->s_psn - wqe->psn) & QIB_PSN_MASK) * pmtu; ohdr->u.rc.reth.vaddr = - cpu_to_be64(wqe->wr.wr.rdma.remote_addr + len); + cpu_to_be64(wqe->rdma_wr.remote_addr + len); ohdr->u.rc.reth.rkey = - cpu_to_be32(wqe->wr.wr.rdma.rkey); + cpu_to_be32(wqe->rdma_wr.rkey); ohdr->u.rc.reth.length = cpu_to_be32(wqe->length - len); qp->s_state = OP(RDMA_READ_REQUEST); hwords += sizeof(ohdr->u.rc.reth) / sizeof(u32); diff --git a/drivers/infiniband/hw/qib/qib_ruc.c b/drivers/infiniband/hw/qib/qib_ruc.c index 22e356c..b1aa21b 100644 --- a/drivers/infiniband/hw/qib/qib_ruc.c +++ b/drivers/infiniband/hw/qib/qib_ruc.c @@ -459,8 +459,8 @@ again: if (wqe->length == 0) break; if (unlikely(!qib_rkey_ok(qp, &qp->r_sge.sge, wqe->length, - wqe->wr.wr.rdma.remote_addr, - wqe->wr.wr.rdma.rkey, + wqe->rdma_wr.remote_addr, + wqe->rdma_wr.rkey, IB_ACCESS_REMOTE_WRITE))) goto acc_err; qp->r_sge.sg_list = NULL; @@ -472,8 +472,8 @@ again: if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_READ))) goto inv_err; if (unlikely(!qib_rkey_ok(qp, &sqp->s_sge.sge, wqe->length, - wqe->wr.wr.rdma.remote_addr, - wqe->wr.wr.rdma.rkey, + wqe->rdma_wr.remote_addr, + wqe->rdma_wr.rkey, IB_ACCESS_REMOTE_READ))) goto acc_err; release = 0; @@ -490,18 +490,18 @@ again: if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_ATOMIC))) goto inv_err; if (unlikely(!qib_rkey_ok(qp, &qp->r_sge.sge, sizeof(u64), - wqe->wr.wr.atomic.remote_addr, - wqe->wr.wr.atomic.rkey, + wqe->atomic_wr.remote_addr, + wqe->atomic_wr.rkey, IB_ACCESS_REMOTE_ATOMIC))) goto acc_err; /* Perform atomic OP and save result. */ maddr = (atomic64_t *) qp->r_sge.sge.vaddr; - sdata = wqe->wr.wr.atomic.compare_add; + sdata = wqe->atomic_wr.compare_add; *(u64 *) sqp->s_sge.sge.vaddr = - (wqe->wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD) ? + (wqe->atomic_wr.wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD) ? (u64) atomic64_add_return(sdata, maddr) - sdata : (u64) cmpxchg((u64 *) qp->r_sge.sge.vaddr, - sdata, wqe->wr.wr.atomic.swap); + sdata, wqe->atomic_wr.swap); qib_put_mr(qp->r_sge.sge.mr); qp->r_sge.num_sge = 0; goto send_comp; @@ -785,7 +785,7 @@ void qib_send_complete(struct qib_qp *qp, struct qib_swqe *wqe, if (qp->ibqp.qp_type == IB_QPT_UD || qp->ibqp.qp_type == IB_QPT_SMI || qp->ibqp.qp_type == IB_QPT_GSI) - atomic_dec(&to_iah(wqe->wr.wr.ud.ah)->refcount); + atomic_dec(&to_iah(wqe->ud_wr.ah)->refcount); /* See ch. 11.2.4.1 and 10.7.3.1 */ if (!(qp->s_flags & QIB_S_SIGNAL_REQ_WR) || diff --git a/drivers/infiniband/hw/qib/qib_uc.c b/drivers/infiniband/hw/qib/qib_uc.c index aa3a803..06a5645 100644 --- a/drivers/infiniband/hw/qib/qib_uc.c +++ b/drivers/infiniband/hw/qib/qib_uc.c @@ -129,9 +129,9 @@ int qib_make_uc_req(struct qib_qp *qp) case IB_WR_RDMA_WRITE: case IB_WR_RDMA_WRITE_WITH_IMM: ohdr->u.rc.reth.vaddr = - cpu_to_be64(wqe->wr.wr.rdma.remote_addr); + cpu_to_be64(wqe->rdma_wr.remote_addr); ohdr->u.rc.reth.rkey = - cpu_to_be32(wqe->wr.wr.rdma.rkey); + cpu_to_be32(wqe->rdma_wr.rkey); ohdr->u.rc.reth.length = cpu_to_be32(len); hwords += sizeof(struct ib_reth) / 4; if (len > pmtu) { diff --git a/drivers/infiniband/hw/qib/qib_ud.c b/drivers/infiniband/hw/qib/qib_ud.c index 26243b7..59193f6 100644 --- a/drivers/infiniband/hw/qib/qib_ud.c +++ b/drivers/infiniband/hw/qib/qib_ud.c @@ -59,7 +59,7 @@ static void qib_ud_loopback(struct qib_qp *sqp, struct qib_swqe *swqe) u32 length; enum ib_qp_type sqptype, dqptype; - qp = qib_lookup_qpn(ibp, swqe->wr.wr.ud.remote_qpn); + qp = qib_lookup_qpn(ibp, swqe->ud_wr.remote_qpn); if (!qp) { ibp->n_pkt_drops++; return; @@ -76,7 +76,7 @@ static void qib_ud_loopback(struct qib_qp *sqp, struct qib_swqe *swqe) goto drop; } - ah_attr = &to_iah(swqe->wr.wr.ud.ah)->attr; + ah_attr = &to_iah(swqe->ud_wr.ah)->attr; ppd = ppd_from_ibp(ibp); if (qp->ibqp.qp_num > 1) { @@ -106,8 +106,8 @@ static void qib_ud_loopback(struct qib_qp *sqp, struct qib_swqe *swqe) if (qp->ibqp.qp_num) { u32 qkey; - qkey = (int)swqe->wr.wr.ud.remote_qkey < 0 ? - sqp->qkey : swqe->wr.wr.ud.remote_qkey; + qkey = (int)swqe->ud_wr.remote_qkey < 0 ? + sqp->qkey : swqe->ud_wr.remote_qkey; if (unlikely(qkey != qp->qkey)) { u16 lid; @@ -210,7 +210,7 @@ static void qib_ud_loopback(struct qib_qp *sqp, struct qib_swqe *swqe) wc.qp = &qp->ibqp; wc.src_qp = sqp->ibqp.qp_num; wc.pkey_index = qp->ibqp.qp_type == IB_QPT_GSI ? - swqe->wr.wr.ud.pkey_index : 0; + swqe->ud_wr.pkey_index : 0; wc.slid = ppd->lid | (ah_attr->src_path_bits & ((1 << ppd->lmc) - 1)); wc.sl = ah_attr->sl; wc.dlid_path_bits = ah_attr->dlid & ((1 << ppd->lmc) - 1); @@ -277,7 +277,7 @@ int qib_make_ud_req(struct qib_qp *qp) /* Construct the header. */ ibp = to_iport(qp->ibqp.device, qp->port_num); ppd = ppd_from_ibp(ibp); - ah_attr = &to_iah(wqe->wr.wr.ud.ah)->attr; + ah_attr = &to_iah(wqe->ud_wr.ah)->attr; if (ah_attr->dlid >= QIB_MULTICAST_LID_BASE) { if (ah_attr->dlid != QIB_PERMISSIVE_LID) this_cpu_inc(ibp->pmastats->n_multicast_xmit); @@ -363,7 +363,7 @@ int qib_make_ud_req(struct qib_qp *qp) bth0 |= extra_bytes << 20; bth0 |= qp->ibqp.qp_type == IB_QPT_SMI ? QIB_DEFAULT_P_KEY : qib_get_pkey(ibp, qp->ibqp.qp_type == IB_QPT_GSI ? - wqe->wr.wr.ud.pkey_index : qp->s_pkey_index); + wqe->ud_wr.pkey_index : qp->s_pkey_index); ohdr->bth[0] = cpu_to_be32(bth0); /* * Use the multicast QP if the destination LID is a multicast LID. @@ -371,14 +371,14 @@ int qib_make_ud_req(struct qib_qp *qp) ohdr->bth[1] = ah_attr->dlid >= QIB_MULTICAST_LID_BASE && ah_attr->dlid != QIB_PERMISSIVE_LID ? cpu_to_be32(QIB_MULTICAST_QPN) : - cpu_to_be32(wqe->wr.wr.ud.remote_qpn); + cpu_to_be32(wqe->ud_wr.remote_qpn); ohdr->bth[2] = cpu_to_be32(qp->s_next_psn++ & QIB_PSN_MASK); /* * Qkeys with the high order bit set mean use the * qkey from the QP context instead of the WR (see 10.2.5). */ - ohdr->u.ud.deth[0] = cpu_to_be32((int)wqe->wr.wr.ud.remote_qkey < 0 ? - qp->qkey : wqe->wr.wr.ud.remote_qkey); + ohdr->u.ud.deth[0] = cpu_to_be32((int)wqe->ud_wr.remote_qkey < 0 ? + qp->qkey : wqe->ud_wr.remote_qkey); ohdr->u.ud.deth[1] = cpu_to_be32(qp->ibqp.qp_num); done: diff --git a/drivers/infiniband/hw/qib/qib_verbs.c b/drivers/infiniband/hw/qib/qib_verbs.c index 3dcc498..baf1e42 100644 --- a/drivers/infiniband/hw/qib/qib_verbs.c +++ b/drivers/infiniband/hw/qib/qib_verbs.c @@ -346,6 +346,7 @@ static int qib_post_one_send(struct qib_qp *qp, struct ib_send_wr *wr, unsigned long flags; struct qib_lkey_table *rkt; struct qib_pd *pd; + int avoid_schedule = 0; spin_lock_irqsave(&qp->s_lock, flags); @@ -362,8 +363,8 @@ static int qib_post_one_send(struct qib_qp *qp, struct ib_send_wr *wr, * undefined operations. * Make sure buffer is large enough to hold the result for atomics. */ - if (wr->opcode == IB_WR_FAST_REG_MR) { - if (qib_fast_reg_mr(qp, wr)) + if (wr->opcode == IB_WR_REG_MR) { + if (qib_reg_mr(qp, reg_wr(wr))) goto bail_inval; } else if (qp->ibqp.qp_type == IB_QPT_UC) { if ((unsigned) wr->opcode >= IB_WR_RDMA_READ) @@ -374,7 +375,7 @@ static int qib_post_one_send(struct qib_qp *qp, struct ib_send_wr *wr, wr->opcode != IB_WR_SEND_WITH_IMM) goto bail_inval; /* Check UD destination address PD */ - if (qp->ibqp.pd != wr->wr.ud.ah->pd) + if (qp->ibqp.pd != ud_wr(wr)->ah->pd) goto bail_inval; } else if ((unsigned) wr->opcode > IB_WR_ATOMIC_FETCH_AND_ADD) goto bail_inval; @@ -397,7 +398,23 @@ static int qib_post_one_send(struct qib_qp *qp, struct ib_send_wr *wr, rkt = &to_idev(qp->ibqp.device)->lk_table; pd = to_ipd(qp->ibqp.pd); wqe = get_swqe_ptr(qp, qp->s_head); - wqe->wr = *wr; + + if (qp->ibqp.qp_type != IB_QPT_UC && + qp->ibqp.qp_type != IB_QPT_RC) + memcpy(&wqe->ud_wr, ud_wr(wr), sizeof(wqe->ud_wr)); + else if (wr->opcode == IB_WR_REG_MR) + memcpy(&wqe->reg_wr, reg_wr(wr), + sizeof(wqe->reg_wr)); + else if (wr->opcode == IB_WR_RDMA_WRITE_WITH_IMM || + wr->opcode == IB_WR_RDMA_WRITE || + wr->opcode == IB_WR_RDMA_READ) + memcpy(&wqe->rdma_wr, rdma_wr(wr), sizeof(wqe->rdma_wr)); + else if (wr->opcode == IB_WR_ATOMIC_CMP_AND_SWP || + wr->opcode == IB_WR_ATOMIC_FETCH_AND_ADD) + memcpy(&wqe->atomic_wr, atomic_wr(wr), sizeof(wqe->atomic_wr)); + else + memcpy(&wqe->wr, wr, sizeof(wqe->wr)); + wqe->length = 0; j = 0; if (wr->num_sge) { @@ -422,11 +439,15 @@ static int qib_post_one_send(struct qib_qp *qp, struct ib_send_wr *wr, qp->ibqp.qp_type == IB_QPT_RC) { if (wqe->length > 0x80000000U) goto bail_inval_free; + if (wqe->length <= qp->pmtu) + avoid_schedule = 1; } else if (wqe->length > (dd_from_ibdev(qp->ibqp.device)->pport + - qp->port_num - 1)->ibmtu) + qp->port_num - 1)->ibmtu) { goto bail_inval_free; - else - atomic_inc(&to_iah(wr->wr.ud.ah)->refcount); + } else { + atomic_inc(&to_iah(ud_wr(wr)->ah)->refcount); + avoid_schedule = 1; + } wqe->ssn = qp->s_ssn++; qp->s_head = next; @@ -442,7 +463,7 @@ bail_inval_free: bail_inval: ret = -EINVAL; bail: - if (!ret && !wr->next && + if (!ret && !wr->next && !avoid_schedule && !qib_sdma_empty( dd_from_ibdev(qp->ibqp.device)->pport + qp->port_num - 1)) { qib_schedule_send(qp); @@ -2240,12 +2261,10 @@ int qib_register_ib_device(struct qib_devdata *dd) ibdev->poll_cq = qib_poll_cq; ibdev->req_notify_cq = qib_req_notify_cq; ibdev->get_dma_mr = qib_get_dma_mr; - ibdev->reg_phys_mr = qib_reg_phys_mr; ibdev->reg_user_mr = qib_reg_user_mr; ibdev->dereg_mr = qib_dereg_mr; ibdev->alloc_mr = qib_alloc_mr; - ibdev->alloc_fast_reg_page_list = qib_alloc_fast_reg_page_list; - ibdev->free_fast_reg_page_list = qib_free_fast_reg_page_list; + ibdev->map_mr_sg = qib_map_mr_sg; ibdev->alloc_fmr = qib_alloc_fmr; ibdev->map_phys_fmr = qib_map_phys_fmr; ibdev->unmap_fmr = qib_unmap_fmr; diff --git a/drivers/infiniband/hw/qib/qib_verbs.h b/drivers/infiniband/hw/qib/qib_verbs.h index a08df70..6c5e777 100644 --- a/drivers/infiniband/hw/qib/qib_verbs.h +++ b/drivers/infiniband/hw/qib/qib_verbs.h @@ -329,6 +329,8 @@ struct qib_sge { struct qib_mr { struct ib_mr ibmr; struct ib_umem *umem; + u64 *pages; + u32 npages; struct qib_mregion mr; /* must be last */ }; @@ -338,7 +340,13 @@ struct qib_mr { * in qp->s_max_sge. */ struct qib_swqe { - struct ib_send_wr wr; /* don't use wr.sg_list */ + union { + struct ib_send_wr wr; /* don't use wr.sg_list */ + struct ib_ud_wr ud_wr; + struct ib_reg_wr reg_wr; + struct ib_rdma_wr rdma_wr; + struct ib_atomic_wr atomic_wr; + }; u32 psn; /* first packet sequence number */ u32 lpsn; /* last packet sequence number */ u32 ssn; /* send sequence number */ @@ -1024,10 +1032,6 @@ int qib_resize_cq(struct ib_cq *ibcq, int cqe, struct ib_udata *udata); struct ib_mr *qib_get_dma_mr(struct ib_pd *pd, int acc); -struct ib_mr *qib_reg_phys_mr(struct ib_pd *pd, - struct ib_phys_buf *buffer_list, - int num_phys_buf, int acc, u64 *iova_start); - struct ib_mr *qib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, u64 virt_addr, int mr_access_flags, struct ib_udata *udata); @@ -1038,12 +1042,11 @@ struct ib_mr *qib_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type, u32 max_entries); -struct ib_fast_reg_page_list *qib_alloc_fast_reg_page_list( - struct ib_device *ibdev, int page_list_len); - -void qib_free_fast_reg_page_list(struct ib_fast_reg_page_list *pl); +int qib_map_mr_sg(struct ib_mr *ibmr, + struct scatterlist *sg, + int sg_nents); -int qib_fast_reg_mr(struct qib_qp *qp, struct ib_send_wr *wr); +int qib_reg_mr(struct qib_qp *qp, struct ib_reg_wr *wr); struct ib_fmr *qib_alloc_fmr(struct ib_pd *pd, int mr_access_flags, struct ib_fmr_attr *fmr_attr); diff --git a/drivers/infiniband/hw/qib/qib_verbs_mcast.c b/drivers/infiniband/hw/qib/qib_verbs_mcast.c index f8ea069..b2fb528 100644 --- a/drivers/infiniband/hw/qib/qib_verbs_mcast.c +++ b/drivers/infiniband/hw/qib/qib_verbs_mcast.c @@ -286,15 +286,13 @@ int qib_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) struct qib_ibdev *dev = to_idev(ibqp->device); struct qib_ibport *ibp = to_iport(ibqp->device, qp->port_num); struct qib_mcast *mcast = NULL; - struct qib_mcast_qp *p, *tmp; + struct qib_mcast_qp *p, *tmp, *delp = NULL; struct rb_node *n; int last = 0; int ret; - if (ibqp->qp_num <= 1 || qp->state == IB_QPS_RESET) { - ret = -EINVAL; - goto bail; - } + if (ibqp->qp_num <= 1 || qp->state == IB_QPS_RESET) + return -EINVAL; spin_lock_irq(&ibp->lock); @@ -303,8 +301,7 @@ int qib_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) while (1) { if (n == NULL) { spin_unlock_irq(&ibp->lock); - ret = -EINVAL; - goto bail; + return -EINVAL; } mcast = rb_entry(n, struct qib_mcast, rb_node); @@ -328,6 +325,7 @@ int qib_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) */ list_del_rcu(&p->list); mcast->n_attached--; + delp = p; /* If this was the last attached QP, remove the GID too. */ if (list_empty(&mcast->qp_list)) { @@ -338,15 +336,16 @@ int qib_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) } spin_unlock_irq(&ibp->lock); + /* QP not attached */ + if (!delp) + return -EINVAL; + /* + * Wait for any list walkers to finish before freeing the + * list element. + */ + wait_event(mcast->wait, atomic_read(&mcast->refcount) <= 1); + qib_mcast_qp_free(delp); - if (p) { - /* - * Wait for any list walkers to finish before freeing the - * list element. - */ - wait_event(mcast->wait, atomic_read(&mcast->refcount) <= 1); - qib_mcast_qp_free(p); - } if (last) { atomic_dec(&mcast->refcount); wait_event(mcast->wait, !atomic_read(&mcast->refcount)); @@ -355,11 +354,7 @@ int qib_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) dev->n_mcast_grps_allocated--; spin_unlock_irq(&dev->n_mcast_grps_lock); } - - ret = 0; - -bail: - return ret; + return 0; } int qib_mcast_tree_empty(struct qib_ibport *ibp) diff --git a/drivers/infiniband/hw/usnic/usnic_debugfs.c b/drivers/infiniband/hw/usnic/usnic_debugfs.c index 5e55b8b..92dc66c 100644 --- a/drivers/infiniband/hw/usnic/usnic_debugfs.c +++ b/drivers/infiniband/hw/usnic/usnic_debugfs.c @@ -157,8 +157,9 @@ void usnic_debugfs_flow_add(struct usnic_ib_qp_grp_flow *qp_flow) qp_flow, &flowinfo_ops); if (IS_ERR_OR_NULL(qp_flow->dbgfs_dentry)) { - usnic_err("Failed to create dbg fs entry for flow %u\n", - qp_flow->flow->flow_id); + usnic_err("Failed to create dbg fs entry for flow %u with error %ld\n", + qp_flow->flow->flow_id, + PTR_ERR(qp_flow->dbgfs_dentry)); } } diff --git a/drivers/infiniband/hw/usnic/usnic_ib_main.c b/drivers/infiniband/hw/usnic/usnic_ib_main.c index 0c15bd8..565c881 100644 --- a/drivers/infiniband/hw/usnic/usnic_ib_main.c +++ b/drivers/infiniband/hw/usnic/usnic_ib_main.c @@ -343,16 +343,15 @@ static void *usnic_ib_device_add(struct pci_dev *dev) netdev = pci_get_drvdata(dev); us_ibdev = (struct usnic_ib_dev *)ib_alloc_device(sizeof(*us_ibdev)); - if (IS_ERR_OR_NULL(us_ibdev)) { + if (!us_ibdev) { usnic_err("Device %s context alloc failed\n", netdev_name(pci_get_drvdata(dev))); - return ERR_PTR(us_ibdev ? PTR_ERR(us_ibdev) : -EFAULT); + return ERR_PTR(-EFAULT); } us_ibdev->ufdev = usnic_fwd_dev_alloc(dev); - if (IS_ERR_OR_NULL(us_ibdev->ufdev)) { - usnic_err("Failed to alloc ufdev for %s with err %ld\n", - pci_name(dev), PTR_ERR(us_ibdev->ufdev)); + if (!us_ibdev->ufdev) { + usnic_err("Failed to alloc ufdev for %s\n", pci_name(dev)); goto err_dealloc; } diff --git a/drivers/infiniband/hw/usnic/usnic_ib_qp_grp.c b/drivers/infiniband/hw/usnic/usnic_ib_qp_grp.c index 704d087..5b0248a 100644 --- a/drivers/infiniband/hw/usnic/usnic_ib_qp_grp.c +++ b/drivers/infiniband/hw/usnic/usnic_ib_qp_grp.c @@ -236,8 +236,8 @@ create_roce_custom_flow(struct usnic_ib_qp_grp *qp_grp, /* Create Flow Handle */ qp_flow = kzalloc(sizeof(*qp_flow), GFP_ATOMIC); - if (IS_ERR_OR_NULL(qp_flow)) { - err = qp_flow ? PTR_ERR(qp_flow) : -ENOMEM; + if (!qp_flow) { + err = -ENOMEM; goto out_dealloc_flow; } qp_flow->flow = flow; @@ -311,8 +311,8 @@ create_udp_flow(struct usnic_ib_qp_grp *qp_grp, /* Create qp_flow */ qp_flow = kzalloc(sizeof(*qp_flow), GFP_ATOMIC); - if (IS_ERR_OR_NULL(qp_flow)) { - err = qp_flow ? PTR_ERR(qp_flow) : -ENOMEM; + if (!qp_flow) { + err = -ENOMEM; goto out_dealloc_flow; } qp_flow->flow = flow; @@ -521,7 +521,7 @@ int usnic_ib_qp_grp_modify(struct usnic_ib_qp_grp *qp_grp, if (!status) { qp_grp->state = new_state; - usnic_info("Transistioned %u from %s to %s", + usnic_info("Transitioned %u from %s to %s", qp_grp->grp_id, usnic_ib_qp_grp_state_to_string(old_state), usnic_ib_qp_grp_state_to_string(new_state)); @@ -575,7 +575,7 @@ alloc_res_chunk_list(struct usnic_vnic *vnic, return res_chunk_list; out_free_res: - for (i--; i > 0; i--) + for (i--; i >= 0; i--) usnic_vnic_put_resources(res_chunk_list[i]); kfree(res_chunk_list); return ERR_PTR(err); diff --git a/drivers/infiniband/hw/usnic/usnic_ib_verbs.c b/drivers/infiniband/hw/usnic/usnic_ib_verbs.c index f8e3211..6cdb4d2 100644 --- a/drivers/infiniband/hw/usnic/usnic_ib_verbs.c +++ b/drivers/infiniband/hw/usnic/usnic_ib_verbs.c @@ -51,7 +51,7 @@ static void usnic_ib_fw_string_to_u64(char *fw_ver_str, u64 *fw_ver) { - *fw_ver = (u64) *fw_ver_str; + *fw_ver = *((u64 *)fw_ver_str); } static int usnic_ib_fill_create_qp_resp(struct usnic_ib_qp_grp *qp_grp, @@ -571,20 +571,20 @@ int usnic_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, qp_grp = to_uqp_grp(ibqp); - /* TODO: Future Support All States */ mutex_lock(&qp_grp->vf->pf->usdev_lock); - if ((attr_mask & IB_QP_STATE) && attr->qp_state == IB_QPS_INIT) { - status = usnic_ib_qp_grp_modify(qp_grp, IB_QPS_INIT, NULL); - } else if ((attr_mask & IB_QP_STATE) && attr->qp_state == IB_QPS_RTR) { - status = usnic_ib_qp_grp_modify(qp_grp, IB_QPS_RTR, NULL); - } else if ((attr_mask & IB_QP_STATE) && attr->qp_state == IB_QPS_RTS) { - status = usnic_ib_qp_grp_modify(qp_grp, IB_QPS_RTS, NULL); + if ((attr_mask & IB_QP_PORT) && attr->port_num != 1) { + /* usnic devices only have one port */ + status = -EINVAL; + goto out_unlock; + } + if (attr_mask & IB_QP_STATE) { + status = usnic_ib_qp_grp_modify(qp_grp, attr->qp_state, NULL); } else { - usnic_err("Unexpected combination mask: %u state: %u\n", - attr_mask & IB_QP_STATE, attr->qp_state); + usnic_err("Unhandled request, attr_mask=0x%x\n", attr_mask); status = -EINVAL; } +out_unlock: mutex_unlock(&qp_grp->vf->pf->usdev_lock); return status; } @@ -625,8 +625,8 @@ struct ib_mr *usnic_ib_reg_mr(struct ib_pd *pd, u64 start, u64 length, virt_addr, length); mr = kzalloc(sizeof(*mr), GFP_KERNEL); - if (IS_ERR_OR_NULL(mr)) - return ERR_PTR(mr ? PTR_ERR(mr) : -ENOMEM); + if (!mr) + return ERR_PTR(-ENOMEM); mr->umem = usnic_uiom_reg_get(to_upd(pd)->umem_pd, start, length, access_flags, 0); diff --git a/drivers/infiniband/hw/usnic/usnic_ib_verbs.h b/drivers/infiniband/hw/usnic/usnic_ib_verbs.h index 414eaa5..0d9d2e6a 100644 --- a/drivers/infiniband/hw/usnic/usnic_ib_verbs.h +++ b/drivers/infiniband/hw/usnic/usnic_ib_verbs.h @@ -43,8 +43,6 @@ int usnic_ib_query_device(struct ib_device *ibdev, struct ib_udata *uhw); int usnic_ib_query_port(struct ib_device *ibdev, u8 port, struct ib_port_attr *props); -enum rdma_protocol_type -usnic_ib_query_protocol(struct ib_device *device, u8 port_num); int usnic_ib_query_qp(struct ib_qp *qp, struct ib_qp_attr *qp_attr, int qp_attr_mask, struct ib_qp_init_attr *qp_init_attr); diff --git a/drivers/infiniband/hw/usnic/usnic_vnic.c b/drivers/infiniband/hw/usnic/usnic_vnic.c index 66de93f..8875107 100644 --- a/drivers/infiniband/hw/usnic/usnic_vnic.c +++ b/drivers/infiniband/hw/usnic/usnic_vnic.c @@ -237,7 +237,7 @@ usnic_vnic_get_resources(struct usnic_vnic *vnic, enum usnic_vnic_res_type type, struct usnic_vnic_res *res; int i; - if (usnic_vnic_res_free_cnt(vnic, type) < cnt || cnt < 1 || !owner) + if (usnic_vnic_res_free_cnt(vnic, type) < cnt || cnt < 0 || !owner) return ERR_PTR(-EINVAL); ret = kzalloc(sizeof(*ret), GFP_ATOMIC); @@ -247,26 +247,28 @@ usnic_vnic_get_resources(struct usnic_vnic *vnic, enum usnic_vnic_res_type type, return ERR_PTR(-ENOMEM); } - ret->res = kzalloc(sizeof(*(ret->res))*cnt, GFP_ATOMIC); - if (!ret->res) { - usnic_err("Failed to allocate resources for %s. Out of memory\n", - usnic_vnic_pci_name(vnic)); - kfree(ret); - return ERR_PTR(-ENOMEM); - } + if (cnt > 0) { + ret->res = kcalloc(cnt, sizeof(*(ret->res)), GFP_ATOMIC); + if (!ret->res) { + usnic_err("Failed to allocate resources for %s. Out of memory\n", + usnic_vnic_pci_name(vnic)); + kfree(ret); + return ERR_PTR(-ENOMEM); + } - spin_lock(&vnic->res_lock); - src = &vnic->chunks[type]; - for (i = 0; i < src->cnt && ret->cnt < cnt; i++) { - res = src->res[i]; - if (!res->owner) { - src->free_cnt--; - res->owner = owner; - ret->res[ret->cnt++] = res; + spin_lock(&vnic->res_lock); + src = &vnic->chunks[type]; + for (i = 0; i < src->cnt && ret->cnt < cnt; i++) { + res = src->res[i]; + if (!res->owner) { + src->free_cnt--; + res->owner = owner; + ret->res[ret->cnt++] = res; + } } - } - spin_unlock(&vnic->res_lock); + spin_unlock(&vnic->res_lock); + } ret->type = type; ret->vnic = vnic; WARN_ON(ret->cnt != cnt); @@ -281,14 +283,16 @@ void usnic_vnic_put_resources(struct usnic_vnic_res_chunk *chunk) int i; struct usnic_vnic *vnic = chunk->vnic; - spin_lock(&vnic->res_lock); - while ((i = --chunk->cnt) >= 0) { - res = chunk->res[i]; - chunk->res[i] = NULL; - res->owner = NULL; - vnic->chunks[res->type].free_cnt++; + if (chunk->cnt > 0) { + spin_lock(&vnic->res_lock); + while ((i = --chunk->cnt) >= 0) { + res = chunk->res[i]; + chunk->res[i] = NULL; + res->owner = NULL; + vnic->chunks[res->type].free_cnt++; + } + spin_unlock(&vnic->res_lock); } - spin_unlock(&vnic->res_lock); kfree(chunk->res); kfree(chunk); |