From a6054df3c1c2092e1d2cdceb6e81a7e54d6bd361 Mon Sep 17 00:00:00 2001 From: Hariprasad S Date: Fri, 5 Feb 2016 11:43:28 +0530 Subject: iw_cxgb4: make queue allocation code more readable Rename local mm* variables to more meaningful names Signed-off-by: Steve Wise Signed-off-by: Hariprasad Shenai Signed-off-by: Doug Ledford diff --git a/drivers/infiniband/hw/cxgb4/qp.c b/drivers/infiniband/hw/cxgb4/qp.c index e99345e..dadf5f1 100644 --- a/drivers/infiniband/hw/cxgb4/qp.c +++ b/drivers/infiniband/hw/cxgb4/qp.c @@ -1621,7 +1621,8 @@ struct ib_qp *c4iw_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *attrs, unsigned int sqsize, rqsize; struct c4iw_ucontext *ucontext; int ret; - struct c4iw_mm_entry *mm1, *mm2, *mm3, *mm4, *mm5 = NULL; + struct c4iw_mm_entry *sq_key_mm, *rq_key_mm = NULL, *sq_db_key_mm; + struct c4iw_mm_entry *rq_db_key_mm = NULL, *ma_sync_key_mm = NULL; PDBG("%s ib_pd %p\n", __func__, pd); @@ -1706,29 +1707,30 @@ struct ib_qp *c4iw_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *attrs, goto err2; if (udata) { - mm1 = kmalloc(sizeof *mm1, GFP_KERNEL); - if (!mm1) { + sq_key_mm = kmalloc(sizeof(*sq_key_mm), GFP_KERNEL); + if (!sq_key_mm) { ret = -ENOMEM; goto err3; } - mm2 = kmalloc(sizeof *mm2, GFP_KERNEL); - if (!mm2) { + rq_key_mm = kmalloc(sizeof(*rq_key_mm), GFP_KERNEL); + if (!rq_key_mm) { ret = -ENOMEM; goto err4; } - mm3 = kmalloc(sizeof *mm3, GFP_KERNEL); - if (!mm3) { + sq_db_key_mm = kmalloc(sizeof(*sq_db_key_mm), GFP_KERNEL); + if (!sq_db_key_mm) { ret = -ENOMEM; goto err5; } - mm4 = kmalloc(sizeof *mm4, GFP_KERNEL); - if (!mm4) { + rq_db_key_mm = kmalloc(sizeof(*rq_db_key_mm), GFP_KERNEL); + if (!rq_db_key_mm) { ret = -ENOMEM; goto err6; } if (t4_sq_onchip(&qhp->wq.sq)) { - mm5 = kmalloc(sizeof *mm5, GFP_KERNEL); - if (!mm5) { + ma_sync_key_mm = kmalloc(sizeof(*ma_sync_key_mm), + GFP_KERNEL); + if (!ma_sync_key_mm) { ret = -ENOMEM; goto err7; } @@ -1743,7 +1745,7 @@ struct ib_qp *c4iw_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *attrs, uresp.rq_size = qhp->wq.rq.size; uresp.rq_memsize = qhp->wq.rq.memsize; spin_lock(&ucontext->mmap_lock); - if (mm5) { + if (ma_sync_key_mm) { uresp.ma_sync_key = ucontext->key; ucontext->key += PAGE_SIZE; } else { @@ -1761,28 +1763,29 @@ struct ib_qp *c4iw_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *attrs, ret = ib_copy_to_udata(udata, &uresp, sizeof uresp); if (ret) goto err8; - mm1->key = uresp.sq_key; - mm1->addr = qhp->wq.sq.phys_addr; - mm1->len = PAGE_ALIGN(qhp->wq.sq.memsize); - insert_mmap(ucontext, mm1); - mm2->key = uresp.rq_key; - mm2->addr = virt_to_phys(qhp->wq.rq.queue); - mm2->len = PAGE_ALIGN(qhp->wq.rq.memsize); - insert_mmap(ucontext, mm2); - mm3->key = uresp.sq_db_gts_key; - mm3->addr = (__force unsigned long)qhp->wq.sq.bar2_pa; - mm3->len = PAGE_SIZE; - insert_mmap(ucontext, mm3); - mm4->key = uresp.rq_db_gts_key; - mm4->addr = (__force unsigned long)qhp->wq.rq.bar2_pa; - mm4->len = PAGE_SIZE; - insert_mmap(ucontext, mm4); - if (mm5) { - mm5->key = uresp.ma_sync_key; - mm5->addr = (pci_resource_start(rhp->rdev.lldi.pdev, 0) - + PCIE_MA_SYNC_A) & PAGE_MASK; - mm5->len = PAGE_SIZE; - insert_mmap(ucontext, mm5); + sq_key_mm->key = uresp.sq_key; + sq_key_mm->addr = qhp->wq.sq.phys_addr; + sq_key_mm->len = PAGE_ALIGN(qhp->wq.sq.memsize); + insert_mmap(ucontext, sq_key_mm); + rq_key_mm->key = uresp.rq_key; + rq_key_mm->addr = virt_to_phys(qhp->wq.rq.queue); + rq_key_mm->len = PAGE_ALIGN(qhp->wq.rq.memsize); + insert_mmap(ucontext, rq_key_mm); + sq_db_key_mm->key = uresp.sq_db_gts_key; + sq_db_key_mm->addr = (u64)(unsigned long)qhp->wq.sq.bar2_pa; + sq_db_key_mm->len = PAGE_SIZE; + insert_mmap(ucontext, sq_db_key_mm); + rq_db_key_mm->key = uresp.rq_db_gts_key; + rq_db_key_mm->addr = (u64)(unsigned long)qhp->wq.rq.bar2_pa; + rq_db_key_mm->len = PAGE_SIZE; + insert_mmap(ucontext, rq_db_key_mm); + if (ma_sync_key_mm) { + ma_sync_key_mm->key = uresp.ma_sync_key; + ma_sync_key_mm->addr = + (pci_resource_start(rhp->rdev.lldi.pdev, 0) + + PCIE_MA_SYNC_A) & PAGE_MASK; + ma_sync_key_mm->len = PAGE_SIZE; + insert_mmap(ucontext, ma_sync_key_mm); } } qhp->ibqp.qp_num = qhp->wq.sq.qid; @@ -1795,15 +1798,15 @@ struct ib_qp *c4iw_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *attrs, qhp->wq.rq.memsize, attrs->cap.max_recv_wr); return &qhp->ibqp; err8: - kfree(mm5); + kfree(ma_sync_key_mm); err7: - kfree(mm4); + kfree(rq_db_key_mm); err6: - kfree(mm3); + kfree(sq_db_key_mm); err5: - kfree(mm2); + kfree(rq_key_mm); err4: - kfree(mm1); + kfree(sq_key_mm); err3: remove_handle(rhp, &rhp->qpidr, qhp->wq.sq.qid); err2: -- cgit v0.10.2 From 6812faefb754135d88ace013ad3b75430a8ba413 Mon Sep 17 00:00:00 2001 From: Hariprasad S Date: Fri, 5 Feb 2016 11:43:29 +0530 Subject: iw_cxgb4: remove false error log entry Don't log errors if a listening endpoint is going away when procesing a PASS_ACCEPT_REQ message. This can happen. Change the error printk to a PDBG() debug log entry Signed-off-by: Steve Wise Signed-off-by: Hariprasad Shenai Signed-off-by: Doug Ledford diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c index cd2ff5f..0c2111b 100644 --- a/drivers/infiniband/hw/cxgb4/cm.c +++ b/drivers/infiniband/hw/cxgb4/cm.c @@ -2399,8 +2399,7 @@ static int pass_accept_req(struct c4iw_dev *dev, struct sk_buff *skb) } if (state_read(&parent_ep->com) != LISTEN) { - printk(KERN_ERR "%s - listening ep not in LISTEN\n", - __func__); + PDBG("%s - listening ep not in LISTEN\n", __func__); goto reject; } -- cgit v0.10.2 From ac8e4c69a02103d17247a3fae8daa10ae3497c2d Mon Sep 17 00:00:00 2001 From: Hariprasad S Date: Fri, 5 Feb 2016 11:43:30 +0530 Subject: cxgb4/iw_cxgb4: TOS support This series provides support for iWARP applications to specify a TOS value and have that map to a VLAN Priority for iw_cxgb4 iWARP connections. In iw_cxgb4, when allocating an L2T entry, pass the skb_priority based on the tos value in the cm_id. Also pass the correct tos value during connection setup so the passive side gets the client's desired tos. When sending the FLOWC work request to FW, if the egress device is in a vlan, then use the vlan priority bits as the scheduling class. This allows associating RDMA connections with scheduling classes to provide traffic shaping per flow. Signed-off-by: Steve Wise Signed-off-by: Hariprasad Shenai Signed-off-by: Doug Ledford diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c index 0c2111b..8cd7b5e 100644 --- a/drivers/infiniband/hw/cxgb4/cm.c +++ b/drivers/infiniband/hw/cxgb4/cm.c @@ -485,12 +485,19 @@ static void send_flowc(struct c4iw_ep *ep, struct sk_buff *skb) unsigned int flowclen = 80; struct fw_flowc_wr *flowc; int i; + u16 vlan = ep->l2t->vlan; + int nparams; + + if (vlan == CPL_L2T_VLAN_NONE) + nparams = 8; + else + nparams = 9; skb = get_skb(skb, flowclen, GFP_KERNEL); flowc = (struct fw_flowc_wr *)__skb_put(skb, flowclen); flowc->op_to_nparams = cpu_to_be32(FW_WR_OP_V(FW_FLOWC_WR) | - FW_FLOWC_WR_NPARAMS_V(8)); + FW_FLOWC_WR_NPARAMS_V(nparams)); flowc->flowid_len16 = cpu_to_be32(FW_WR_LEN16_V(DIV_ROUND_UP(flowclen, 16)) | FW_WR_FLOWID_V(ep->hwtid)); @@ -511,9 +518,17 @@ static void send_flowc(struct c4iw_ep *ep, struct sk_buff *skb) flowc->mnemval[6].val = cpu_to_be32(ep->snd_win); flowc->mnemval[7].mnemonic = FW_FLOWC_MNEM_MSS; flowc->mnemval[7].val = cpu_to_be32(ep->emss); - /* Pad WR to 16 byte boundary */ - flowc->mnemval[8].mnemonic = 0; - flowc->mnemval[8].val = 0; + if (nparams == 9) { + u16 pri; + + pri = (vlan & VLAN_PRIO_MASK) >> VLAN_PRIO_SHIFT; + flowc->mnemval[8].mnemonic = FW_FLOWC_MNEM_SCHEDCLASS; + flowc->mnemval[8].val = cpu_to_be32(pri); + } else { + /* Pad WR to 16 byte boundary */ + flowc->mnemval[8].mnemonic = 0; + flowc->mnemval[8].val = 0; + } for (i = 0; i < 9; i++) { flowc->mnemval[i].r4[0] = 0; flowc->mnemval[i].r4[1] = 0; @@ -710,7 +725,7 @@ static int send_connect(struct c4iw_ep *ep) L2T_IDX_V(ep->l2t->idx) | TX_CHAN_V(ep->tx_chan) | SMAC_SEL_V(ep->smac_idx) | - DSCP_V(ep->tos) | + DSCP_V(ep->tos >> 2) | ULP_MODE_V(ULP_MODE_TCPDDP) | RCV_BUFSIZ_V(win); opt2 = RX_CHANNEL_V(0) | @@ -1864,7 +1879,7 @@ static void send_fw_act_open_req(struct c4iw_ep *ep, unsigned int atid) L2T_IDX_V(ep->l2t->idx) | TX_CHAN_V(ep->tx_chan) | SMAC_SEL_V(ep->smac_idx) | - DSCP_V(ep->tos) | + DSCP_V(ep->tos >> 2) | ULP_MODE_V(ULP_MODE_TCPDDP) | RCV_BUFSIZ_V(win)); req->tcb.opt2 = (__force __be32) (PACE_V(1) | @@ -1928,7 +1943,7 @@ static void set_tcp_window(struct c4iw_ep *ep, struct port_info *pi) static int import_ep(struct c4iw_ep *ep, int iptype, __u8 *peer_ip, struct dst_entry *dst, struct c4iw_dev *cdev, - bool clear_mpa_v1, enum chip_type adapter_type) + bool clear_mpa_v1, enum chip_type adapter_type, u8 tos) { struct neighbour *n; int err, step; @@ -1958,7 +1973,7 @@ static int import_ep(struct c4iw_ep *ep, int iptype, __u8 *peer_ip, goto out; } ep->l2t = cxgb4_l2t_get(cdev->rdev.lldi.l2t, - n, pdev, 0); + n, pdev, rt_tos2priority(tos)); if (!ep->l2t) goto out; ep->mtu = pdev->mtu; @@ -2041,7 +2056,7 @@ static int c4iw_reconnect(struct c4iw_ep *ep) if (ep->com.cm_id->local_addr.ss_family == AF_INET) { ep->dst = find_route(ep->com.dev, laddr->sin_addr.s_addr, raddr->sin_addr.s_addr, laddr->sin_port, - raddr->sin_port, 0); + raddr->sin_port, ep->com.cm_id->tos); iptype = 4; ra = (__u8 *)&raddr->sin_addr; } else { @@ -2058,7 +2073,8 @@ static int c4iw_reconnect(struct c4iw_ep *ep) goto fail3; } err = import_ep(ep, iptype, ra, ep->dst, ep->com.dev, false, - ep->com.dev->rdev.lldi.adapter_type); + ep->com.dev->rdev.lldi.adapter_type, + ep->com.cm_id->tos); if (err) { pr_err("%s - cannot alloc l2e.\n", __func__); goto fail4; @@ -2069,7 +2085,7 @@ static int c4iw_reconnect(struct c4iw_ep *ep) ep->l2t->idx); state_set(&ep->com, CONNECTING); - ep->tos = 0; + ep->tos = ep->com.cm_id->tos; /* send connect request to rnic */ err = send_connect(ep); @@ -2391,6 +2407,7 @@ static int pass_accept_req(struct c4iw_dev *dev, struct sk_buff *skb) u16 peer_mss = ntohs(req->tcpopt.mss); int iptype; unsigned short hdrs; + u8 tos = PASS_OPEN_TOS_G(ntohl(req->tos_stid)); parent_ep = lookup_stid(t, stid); if (!parent_ep) { @@ -2414,7 +2431,7 @@ static int pass_accept_req(struct c4iw_dev *dev, struct sk_buff *skb) ntohs(peer_port), peer_mss); dst = find_route(dev, *(__be32 *)local_ip, *(__be32 *)peer_ip, local_port, peer_port, - PASS_OPEN_TOS_G(ntohl(req->tos_stid))); + tos); } else { PDBG("%s parent ep %p hwtid %u laddr %pI6 raddr %pI6 lport %d rport %d peer_mss %d\n" , __func__, parent_ep, hwtid, @@ -2440,7 +2457,7 @@ static int pass_accept_req(struct c4iw_dev *dev, struct sk_buff *skb) } err = import_ep(child_ep, iptype, peer_ip, dst, dev, false, - parent_ep->com.dev->rdev.lldi.adapter_type); + parent_ep->com.dev->rdev.lldi.adapter_type, tos); if (err) { printk(KERN_ERR MOD "%s - failed to allocate l2t entry!\n", __func__); @@ -2508,7 +2525,7 @@ static int pass_accept_req(struct c4iw_dev *dev, struct sk_buff *skb) c4iw_get_ep(&parent_ep->com); child_ep->parent_ep = parent_ep; - child_ep->tos = PASS_OPEN_TOS_G(ntohl(req->tos_stid)); + child_ep->tos = tos; child_ep->dst = dst; child_ep->hwtid = hwtid; @@ -3202,7 +3219,7 @@ int c4iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) ra, ntohs(raddr->sin_port)); ep->dst = find_route(dev, laddr->sin_addr.s_addr, raddr->sin_addr.s_addr, laddr->sin_port, - raddr->sin_port, 0); + raddr->sin_port, cm_id->tos); } else { iptype = 6; ra = (__u8 *)&raddr6->sin6_addr; @@ -3233,7 +3250,7 @@ int c4iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) } err = import_ep(ep, iptype, ra, ep->dst, ep->com.dev, true, - ep->com.dev->rdev.lldi.adapter_type); + ep->com.dev->rdev.lldi.adapter_type, cm_id->tos); if (err) { printk(KERN_ERR MOD "%s - cannot alloc l2e.\n", __func__); goto fail3; @@ -3244,7 +3261,7 @@ int c4iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) ep->l2t->idx); state_set(&ep->com, CONNECTING); - ep->tos = 0; + ep->tos = cm_id->tos; /* send connect request to rnic */ err = send_connect(ep); diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_msg.h b/drivers/net/ethernet/chelsio/cxgb4/t4_msg.h index a072d34..1d2d1da 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/t4_msg.h +++ b/drivers/net/ethernet/chelsio/cxgb4/t4_msg.h @@ -1021,6 +1021,8 @@ struct cpl_l2t_write_req { #define L2T_W_NOREPLY_V(x) ((x) << L2T_W_NOREPLY_S) #define L2T_W_NOREPLY_F L2T_W_NOREPLY_V(1U) +#define CPL_L2T_VLAN_NONE 0xfff + struct cpl_l2t_write_rpl { union opcode_tid ot; u8 status; diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h b/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h index a32de30..c8661c7 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h +++ b/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h @@ -561,6 +561,7 @@ enum fw_flowc_mnem { FW_FLOWC_MNEM_SNDBUF, FW_FLOWC_MNEM_MSS, FW_FLOWC_MNEM_TXDATAPLEN_MAX, + FW_FLOWC_MNEM_SCHEDCLASS = 11, }; struct fw_flowc_mnemval { -- cgit v0.10.2 From ee30f7d507c0f3b3499bbe84d14849a6b5ac9484 Mon Sep 17 00:00:00 2001 From: Hariprasad S Date: Fri, 12 Feb 2016 16:10:35 +0530 Subject: iw_cxgb4: Max fastreg depth depends on DSGL support The max depth of a fastreg mr depends on whether the device supports DSGL or not. So compute it dynamically based on the device support and the module use_dsgl option. Signed-off-by: Steve Wise Signed-off-by: Hariprasad Shenai Signed-off-by: Doug Ledford diff --git a/drivers/infiniband/hw/cxgb4/mem.c b/drivers/infiniband/hw/cxgb4/mem.c index 7849890..9274c90 100644 --- a/drivers/infiniband/hw/cxgb4/mem.c +++ b/drivers/infiniband/hw/cxgb4/mem.c @@ -617,12 +617,14 @@ struct ib_mr *c4iw_alloc_mr(struct ib_pd *pd, int ret = 0; int length = roundup(max_num_sg * sizeof(u64), 32); + php = to_c4iw_pd(pd); + rhp = php->rhp; + if (mr_type != IB_MR_TYPE_MEM_REG || - max_num_sg > t4_max_fr_depth(use_dsgl)) + max_num_sg > t4_max_fr_depth(&rhp->rdev.lldi.ulptx_memwrite_dsgl && + use_dsgl)) return ERR_PTR(-EINVAL); - php = to_c4iw_pd(pd); - rhp = php->rhp; mhp = kzalloc(sizeof(*mhp), GFP_KERNEL); if (!mhp) { ret = -ENOMEM; diff --git a/drivers/infiniband/hw/cxgb4/provider.c b/drivers/infiniband/hw/cxgb4/provider.c index ec04272..8669f48 100644 --- a/drivers/infiniband/hw/cxgb4/provider.c +++ b/drivers/infiniband/hw/cxgb4/provider.c @@ -339,7 +339,8 @@ static int c4iw_query_device(struct ib_device *ibdev, struct ib_device_attr *pro props->max_mr = c4iw_num_stags(&dev->rdev); props->max_pd = T4_MAX_NUM_PD; props->local_ca_ack_delay = 0; - props->max_fast_reg_page_list_len = t4_max_fr_depth(use_dsgl); + props->max_fast_reg_page_list_len = + t4_max_fr_depth(dev->rdev.lldi.ulptx_memwrite_dsgl && use_dsgl); return 0; } diff --git a/drivers/infiniband/hw/cxgb4/qp.c b/drivers/infiniband/hw/cxgb4/qp.c index dadf5f1..d729313 100644 --- a/drivers/infiniband/hw/cxgb4/qp.c +++ b/drivers/infiniband/hw/cxgb4/qp.c @@ -606,7 +606,7 @@ static int build_rdma_recv(struct c4iw_qp *qhp, union t4_recv_wr *wqe, } static int build_memreg(struct t4_sq *sq, union t4_wr *wqe, - struct ib_reg_wr *wr, u8 *len16, u8 t5dev) + struct ib_reg_wr *wr, u8 *len16, bool dsgl_supported) { struct c4iw_mr *mhp = to_c4iw_mr(wr->mr); struct fw_ri_immd *imdp; @@ -615,7 +615,7 @@ static int build_memreg(struct t4_sq *sq, union t4_wr *wqe, int pbllen = roundup(mhp->mpl_len * sizeof(u64), 32); int rem; - if (mhp->mpl_len > t4_max_fr_depth(use_dsgl)) + if (mhp->mpl_len > t4_max_fr_depth(dsgl_supported && use_dsgl)) return -EINVAL; wqe->fr.qpbinde_to_dcacpu = 0; @@ -629,7 +629,7 @@ static int build_memreg(struct t4_sq *sq, union t4_wr *wqe, wqe->fr.va_lo_fbo = cpu_to_be32(mhp->ibmr.iova & 0xffffffff); - if (t5dev && use_dsgl && (pbllen > max_fr_immd)) { + if (dsgl_supported && use_dsgl && (pbllen > max_fr_immd)) { struct fw_ri_dsgl *sglp; for (i = 0; i < mhp->mpl_len; i++) @@ -808,9 +808,7 @@ int c4iw_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, fw_opcode = FW_RI_FR_NSMR_WR; swsqe->opcode = FW_RI_FAST_REGISTER; err = build_memreg(&qhp->wq.sq, wqe, reg_wr(wr), &len16, - is_t5( - qhp->rhp->rdev.lldi.adapter_type) ? - 1 : 0); + qhp->rhp->rdev.lldi.ulptx_memwrite_dsgl); break; case IB_WR_LOCAL_INV: if (wr->send_flags & IB_SEND_FENCE) -- cgit v0.10.2 From 3021376d6d12dd1be8a0a13c16dae8badb7766fd Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 1 Feb 2016 17:42:02 +0100 Subject: infiniband: cxgb4: use %pR format string for printing resources The cxgb4 prints an MMIO resource using the "0x%x" and "%p" format strings on the length and start, respective, but that triggers a compiler warning when using a 64-bit resource_size_t on a 32-bit architecture: drivers/infiniband/hw/cxgb4/device.c: In function 'c4iw_rdev_open': drivers/infiniband/hw/cxgb4/device.c:807:7: warning: cast to pointer from integer of different size [-Wint-to-pointer-cast] (void *)pci_resource_start(rdev->lldi.pdev, 2), This changes the format string to use %pR instead, which pretty-prints the resource, avoids the warning and is shorter. Signed-off-by: Arnd Bergmann Acked-by: Steve Wise Signed-off-by: Doug Ledford diff --git a/drivers/infiniband/hw/cxgb4/device.c b/drivers/infiniband/hw/cxgb4/device.c index 8024ea4..ebd60a2 100644 --- a/drivers/infiniband/hw/cxgb4/device.c +++ b/drivers/infiniband/hw/cxgb4/device.c @@ -801,10 +801,9 @@ static int c4iw_rdev_open(struct c4iw_rdev *rdev) rdev->lldi.vr->qp.size, rdev->lldi.vr->cq.start, rdev->lldi.vr->cq.size); - PDBG("udb len 0x%x udb base %p db_reg %p gts_reg %p " + PDBG("udb %pR db_reg %p gts_reg %p " "qpmask 0x%x cqmask 0x%x\n", - (unsigned)pci_resource_len(rdev->lldi.pdev, 2), - (void *)pci_resource_start(rdev->lldi.pdev, 2), + &rdev->lldi.pdev->resource[2], rdev->lldi.db_reg, rdev->lldi.gts_reg, rdev->qpmask, rdev->cqmask); -- cgit v0.10.2 From 765d67748bcf802c4642a49cd0139787d0d80783 Mon Sep 17 00:00:00 2001 From: Steve Wise Date: Wed, 17 Feb 2016 08:15:41 -0800 Subject: IB: new common API for draining queues Add provider-specific drain_sq/drain_rq functions for providers needing special drain logic. Add static functions __ib_drain_sq() and __ib_drain_rq() which post noop WRs to the SQ or RQ and block until their completions are processed. This ensures the applications completions for work requests posted prior to the drain work request have all been processed. Add API functions ib_drain_sq(), ib_drain_rq(), and ib_drain_qp(). For the drain logic to work, the caller must: ensure there is room in the CQ(s) and QP for the drain work request and completion. allocate the CQ using ib_alloc_cq() and the CQ poll context cannot be IB_POLL_DIRECT. ensure that there are no other contexts that are posting WRs concurrently. Otherwise the drain is not guaranteed. Reviewed-by: Chuck Lever Signed-off-by: Steve Wise Signed-off-by: Doug Ledford diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c index 5af6d02..48dc43c 100644 --- a/drivers/infiniband/core/verbs.c +++ b/drivers/infiniband/core/verbs.c @@ -1657,3 +1657,167 @@ next_page: return i; } EXPORT_SYMBOL(ib_sg_to_pages); + +struct ib_drain_cqe { + struct ib_cqe cqe; + struct completion done; +}; + +static void ib_drain_qp_done(struct ib_cq *cq, struct ib_wc *wc) +{ + struct ib_drain_cqe *cqe = container_of(wc->wr_cqe, struct ib_drain_cqe, + cqe); + + complete(&cqe->done); +} + +/* + * Post a WR and block until its completion is reaped for the SQ. + */ +static void __ib_drain_sq(struct ib_qp *qp) +{ + struct ib_qp_attr attr = { .qp_state = IB_QPS_ERR }; + struct ib_drain_cqe sdrain; + struct ib_send_wr swr = {}, *bad_swr; + int ret; + + if (qp->send_cq->poll_ctx == IB_POLL_DIRECT) { + WARN_ONCE(qp->send_cq->poll_ctx == IB_POLL_DIRECT, + "IB_POLL_DIRECT poll_ctx not supported for drain\n"); + return; + } + + swr.wr_cqe = &sdrain.cqe; + sdrain.cqe.done = ib_drain_qp_done; + init_completion(&sdrain.done); + + ret = ib_modify_qp(qp, &attr, IB_QP_STATE); + if (ret) { + WARN_ONCE(ret, "failed to drain send queue: %d\n", ret); + return; + } + + ret = ib_post_send(qp, &swr, &bad_swr); + if (ret) { + WARN_ONCE(ret, "failed to drain send queue: %d\n", ret); + return; + } + + wait_for_completion(&sdrain.done); +} + +/* + * Post a WR and block until its completion is reaped for the RQ. + */ +static void __ib_drain_rq(struct ib_qp *qp) +{ + struct ib_qp_attr attr = { .qp_state = IB_QPS_ERR }; + struct ib_drain_cqe rdrain; + struct ib_recv_wr rwr = {}, *bad_rwr; + int ret; + + if (qp->recv_cq->poll_ctx == IB_POLL_DIRECT) { + WARN_ONCE(qp->recv_cq->poll_ctx == IB_POLL_DIRECT, + "IB_POLL_DIRECT poll_ctx not supported for drain\n"); + return; + } + + rwr.wr_cqe = &rdrain.cqe; + rdrain.cqe.done = ib_drain_qp_done; + init_completion(&rdrain.done); + + ret = ib_modify_qp(qp, &attr, IB_QP_STATE); + if (ret) { + WARN_ONCE(ret, "failed to drain recv queue: %d\n", ret); + return; + } + + ret = ib_post_recv(qp, &rwr, &bad_rwr); + if (ret) { + WARN_ONCE(ret, "failed to drain recv queue: %d\n", ret); + return; + } + + wait_for_completion(&rdrain.done); +} + +/** + * ib_drain_sq() - Block until all SQ CQEs have been consumed by the + * application. + * @qp: queue pair to drain + * + * If the device has a provider-specific drain function, then + * call that. Otherwise call the generic drain function + * __ib_drain_sq(). + * + * The caller must: + * + * ensure there is room in the CQ and SQ for the drain work request and + * completion. + * + * allocate the CQ using ib_alloc_cq() and the CQ poll context cannot be + * IB_POLL_DIRECT. + * + * ensure that there are no other contexts that are posting WRs concurrently. + * Otherwise the drain is not guaranteed. + */ +void ib_drain_sq(struct ib_qp *qp) +{ + if (qp->device->drain_sq) + qp->device->drain_sq(qp); + else + __ib_drain_sq(qp); +} +EXPORT_SYMBOL(ib_drain_sq); + +/** + * ib_drain_rq() - Block until all RQ CQEs have been consumed by the + * application. + * @qp: queue pair to drain + * + * If the device has a provider-specific drain function, then + * call that. Otherwise call the generic drain function + * __ib_drain_rq(). + * + * The caller must: + * + * ensure there is room in the CQ and RQ for the drain work request and + * completion. + * + * allocate the CQ using ib_alloc_cq() and the CQ poll context cannot be + * IB_POLL_DIRECT. + * + * ensure that there are no other contexts that are posting WRs concurrently. + * Otherwise the drain is not guaranteed. + */ +void ib_drain_rq(struct ib_qp *qp) +{ + if (qp->device->drain_rq) + qp->device->drain_rq(qp); + else + __ib_drain_rq(qp); +} +EXPORT_SYMBOL(ib_drain_rq); + +/** + * ib_drain_qp() - Block until all CQEs have been consumed by the + * application on both the RQ and SQ. + * @qp: queue pair to drain + * + * The caller must: + * + * ensure there is room in the CQ(s), SQ, and RQ for drain work requests + * and completions. + * + * allocate the CQs using ib_alloc_cq() and the CQ poll context cannot be + * IB_POLL_DIRECT. + * + * ensure that there are no other contexts that are posting WRs concurrently. + * Otherwise the drain is not guaranteed. + */ +void ib_drain_qp(struct ib_qp *qp) +{ + ib_drain_sq(qp); + ib_drain_rq(qp); +} +EXPORT_SYMBOL(ib_drain_qp); diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 284b00c..68b7e97 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -1846,6 +1846,8 @@ struct ib_device { int (*check_mr_status)(struct ib_mr *mr, u32 check_mask, struct ib_mr_status *mr_status); void (*disassociate_ucontext)(struct ib_ucontext *ibcontext); + void (*drain_rq)(struct ib_qp *qp); + void (*drain_sq)(struct ib_qp *qp); struct ib_dma_mapping_ops *dma_ops; @@ -3094,4 +3096,7 @@ int ib_sg_to_pages(struct ib_mr *mr, int sg_nents, int (*set_page)(struct ib_mr *, u64)); +void ib_drain_rq(struct ib_qp *qp); +void ib_drain_sq(struct ib_qp *qp); +void ib_drain_qp(struct ib_qp *qp); #endif /* IB_VERBS_H */ -- cgit v0.10.2 From 086dc6e359d11fd29d0f2041cdc0bb76a5d807d8 Mon Sep 17 00:00:00 2001 From: Steve Wise Date: Wed, 17 Feb 2016 08:15:42 -0800 Subject: iw_cxgb4: add queue drain functions Add completion objects, named sq_drained and rq_drained, to the c4iw_qp struct. The queue-specific completion object is signaled when the last CQE is drained from the CQ for that queue. Add c4iw_drain_sq() to block until qp->rq_drained is completed. Add c4iw_drain_rq() to block until qp->sq_drained is completed. Signed-off-by: Steve Wise Signed-off-by: Doug Ledford diff --git a/drivers/infiniband/hw/cxgb4/cq.c b/drivers/infiniband/hw/cxgb4/cq.c index cf21df4..b4eeb78 100644 --- a/drivers/infiniband/hw/cxgb4/cq.c +++ b/drivers/infiniband/hw/cxgb4/cq.c @@ -815,8 +815,15 @@ static int c4iw_poll_cq_one(struct c4iw_cq *chp, struct ib_wc *wc) } } out: - if (wq) + if (wq) { + if (unlikely(qhp->attr.state != C4IW_QP_STATE_RTS)) { + if (t4_sq_empty(wq)) + complete(&qhp->sq_drained); + if (t4_rq_empty(wq)) + complete(&qhp->rq_drained); + } spin_unlock(&qhp->lock); + } return ret; } diff --git a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h index fb2de75..7c6a6e1 100644 --- a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h +++ b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h @@ -476,6 +476,8 @@ struct c4iw_qp { wait_queue_head_t wait; struct timer_list timer; int sq_sig_all; + struct completion rq_drained; + struct completion sq_drained; }; static inline struct c4iw_qp *to_c4iw_qp(struct ib_qp *ibqp) @@ -1016,6 +1018,8 @@ extern int c4iw_wr_log; extern int db_fc_threshold; extern int db_coalescing_threshold; extern int use_dsgl; +void c4iw_drain_rq(struct ib_qp *qp); +void c4iw_drain_sq(struct ib_qp *qp); #endif diff --git a/drivers/infiniband/hw/cxgb4/provider.c b/drivers/infiniband/hw/cxgb4/provider.c index ec04272..104662d 100644 --- a/drivers/infiniband/hw/cxgb4/provider.c +++ b/drivers/infiniband/hw/cxgb4/provider.c @@ -564,6 +564,8 @@ int c4iw_register_device(struct c4iw_dev *dev) dev->ibdev.get_protocol_stats = c4iw_get_mib; dev->ibdev.uverbs_abi_ver = C4IW_UVERBS_ABI_VERSION; dev->ibdev.get_port_immutable = c4iw_port_immutable; + dev->ibdev.drain_sq = c4iw_drain_sq; + dev->ibdev.drain_rq = c4iw_drain_rq; dev->ibdev.iwcm = kmalloc(sizeof(struct iw_cm_verbs), GFP_KERNEL); if (!dev->ibdev.iwcm) diff --git a/drivers/infiniband/hw/cxgb4/qp.c b/drivers/infiniband/hw/cxgb4/qp.c index e99345e..7b1b1e8 100644 --- a/drivers/infiniband/hw/cxgb4/qp.c +++ b/drivers/infiniband/hw/cxgb4/qp.c @@ -1697,6 +1697,8 @@ struct ib_qp *c4iw_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *attrs, qhp->attr.max_ird = 0; qhp->sq_sig_all = attrs->sq_sig_type == IB_SIGNAL_ALL_WR; spin_lock_init(&qhp->lock); + init_completion(&qhp->sq_drained); + init_completion(&qhp->rq_drained); mutex_init(&qhp->mutex); init_waitqueue_head(&qhp->wait); atomic_set(&qhp->refcnt, 1); @@ -1888,3 +1890,17 @@ int c4iw_ib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, init_attr->sq_sig_type = qhp->sq_sig_all ? IB_SIGNAL_ALL_WR : 0; return 0; } + +void c4iw_drain_sq(struct ib_qp *ibqp) +{ + struct c4iw_qp *qp = to_c4iw_qp(ibqp); + + wait_for_completion(&qp->sq_drained); +} + +void c4iw_drain_rq(struct ib_qp *ibqp) +{ + struct c4iw_qp *qp = to_c4iw_qp(ibqp); + + wait_for_completion(&qp->rq_drained); +} -- cgit v0.10.2 From 561392d42d42c0fefad179a07b6dd1e6e261a572 Mon Sep 17 00:00:00 2001 From: Steve Wise Date: Wed, 17 Feb 2016 08:15:42 -0800 Subject: IB/srp: Use ib_drain_rq() Signed-off-by: Steve Wise Reviewed-by: Bart Van Assche Signed-off-by: Doug Ledford diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c index 03022f6..b6bf204 100644 --- a/drivers/infiniband/ulp/srp/ib_srp.c +++ b/drivers/infiniband/ulp/srp/ib_srp.c @@ -446,49 +446,17 @@ static struct srp_fr_pool *srp_alloc_fr_pool(struct srp_target_port *target) dev->max_pages_per_mr); } -static void srp_drain_done(struct ib_cq *cq, struct ib_wc *wc) -{ - struct srp_rdma_ch *ch = cq->cq_context; - - complete(&ch->done); -} - -static struct ib_cqe srp_drain_cqe = { - .done = srp_drain_done, -}; - /** * srp_destroy_qp() - destroy an RDMA queue pair * @ch: SRP RDMA channel. * - * Change a queue pair into the error state and wait until all receive - * completions have been processed before destroying it. This avoids that - * the receive completion handler can access the queue pair while it is + * Drain the qp before destroying it. This avoids that the receive + * completion handler can access the queue pair while it is * being destroyed. */ static void srp_destroy_qp(struct srp_rdma_ch *ch) { - static struct ib_qp_attr attr = { .qp_state = IB_QPS_ERR }; - static struct ib_recv_wr wr = { 0 }; - struct ib_recv_wr *bad_wr; - int ret; - - wr.wr_cqe = &srp_drain_cqe; - /* Destroying a QP and reusing ch->done is only safe if not connected */ - WARN_ON_ONCE(ch->connected); - - ret = ib_modify_qp(ch->qp, &attr, IB_QP_STATE); - WARN_ONCE(ret, "ib_cm_init_qp_attr() returned %d\n", ret); - if (ret) - goto out; - - init_completion(&ch->done); - ret = ib_post_recv(ch->qp, &wr, &bad_wr); - WARN_ONCE(ret, "ib_post_recv() returned %d\n", ret); - if (ret == 0) - wait_for_completion(&ch->done); - -out: + ib_drain_rq(ch->qp); ib_destroy_qp(ch->qp); } @@ -508,7 +476,7 @@ static int srp_create_ch_ib(struct srp_rdma_ch *ch) if (!init_attr) return -ENOMEM; - /* queue_size + 1 for ib_drain_qp */ + /* queue_size + 1 for ib_drain_rq() */ recv_cq = ib_alloc_cq(dev->dev, ch, target->queue_size + 1, ch->comp_vector, IB_POLL_SOFTIRQ); if (IS_ERR(recv_cq)) { -- cgit v0.10.2 From 4c8ba94d1763e4c46564e59729cc0553ba1fddbb Mon Sep 17 00:00:00 2001 From: Steve Wise Date: Wed, 17 Feb 2016 08:17:12 -0800 Subject: IB/iser: Use ib_drain_sq() Signed-off-by: Steve Wise Signed-off-by: Doug Ledford diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.h b/drivers/infiniband/ulp/iser/iscsi_iser.h index 95f0a64..0351059 100644 --- a/drivers/infiniband/ulp/iser/iscsi_iser.h +++ b/drivers/infiniband/ulp/iser/iscsi_iser.h @@ -458,9 +458,6 @@ struct iser_fr_pool { * @comp: iser completion context * @fr_pool: connection fast registration poool * @pi_support: Indicate device T10-PI support - * @last: last send wr to signal all flush errors were drained - * @last_cqe: cqe handler for last wr - * @last_comp: completes when all connection completions consumed */ struct ib_conn { struct rdma_cm_id *cma_id; @@ -472,10 +469,7 @@ struct ib_conn { struct iser_comp *comp; struct iser_fr_pool fr_pool; bool pi_support; - struct ib_send_wr last; - struct ib_cqe last_cqe; struct ib_cqe reg_cqe; - struct completion last_comp; }; /** @@ -617,7 +611,6 @@ void iser_cmd_comp(struct ib_cq *cq, struct ib_wc *wc); void iser_ctrl_comp(struct ib_cq *cq, struct ib_wc *wc); void iser_dataout_comp(struct ib_cq *cq, struct ib_wc *wc); void iser_reg_comp(struct ib_cq *cq, struct ib_wc *wc); -void iser_last_comp(struct ib_cq *cq, struct ib_wc *wc); void iser_task_rdma_init(struct iscsi_iser_task *task); diff --git a/drivers/infiniband/ulp/iser/iser_initiator.c b/drivers/infiniband/ulp/iser/iser_initiator.c index ed54b38..81ae2e3 100644 --- a/drivers/infiniband/ulp/iser/iser_initiator.c +++ b/drivers/infiniband/ulp/iser/iser_initiator.c @@ -729,13 +729,6 @@ void iser_dataout_comp(struct ib_cq *cq, struct ib_wc *wc) kmem_cache_free(ig.desc_cache, desc); } -void iser_last_comp(struct ib_cq *cq, struct ib_wc *wc) -{ - struct ib_conn *ib_conn = wc->qp->qp_context; - - complete(&ib_conn->last_comp); -} - void iser_task_rdma_init(struct iscsi_iser_task *iser_task) { diff --git a/drivers/infiniband/ulp/iser/iser_verbs.c b/drivers/infiniband/ulp/iser/iser_verbs.c index 40c0f49..47e1159 100644 --- a/drivers/infiniband/ulp/iser/iser_verbs.c +++ b/drivers/infiniband/ulp/iser/iser_verbs.c @@ -663,7 +663,6 @@ void iser_conn_release(struct iser_conn *iser_conn) int iser_conn_terminate(struct iser_conn *iser_conn) { struct ib_conn *ib_conn = &iser_conn->ib_conn; - struct ib_send_wr *bad_wr; int err = 0; /* terminate the iser conn only if the conn state is UP */ @@ -688,14 +687,8 @@ int iser_conn_terminate(struct iser_conn *iser_conn) iser_err("Failed to disconnect, conn: 0x%p err %d\n", iser_conn, err); - /* post an indication that all flush errors were consumed */ - err = ib_post_send(ib_conn->qp, &ib_conn->last, &bad_wr); - if (err) { - iser_err("conn %p failed to post last wr", ib_conn); - return 1; - } - - wait_for_completion(&ib_conn->last_comp); + /* block until all flush errors are consumed */ + ib_drain_sq(ib_conn->qp); } return 1; @@ -954,10 +947,6 @@ void iser_conn_init(struct iser_conn *iser_conn) ib_conn->post_recv_buf_count = 0; ib_conn->reg_cqe.done = iser_reg_comp; - ib_conn->last_cqe.done = iser_last_comp; - ib_conn->last.wr_cqe = &ib_conn->last_cqe; - ib_conn->last.opcode = IB_WR_SEND; - init_completion(&ib_conn->last_comp); } /** -- cgit v0.10.2 From f727a0c324ce2c7e7cbe478d22895bf7bc8ed0a6 Mon Sep 17 00:00:00 2001 From: Mitko Haralanov Date: Fri, 5 Feb 2016 11:57:46 -0500 Subject: staging/hfi1: Add function stubs for TID caching Add mmu notify helper functions and TID caching function stubs in preparation for the TID caching implementation. TID caching makes use of the MMU notifier to allow the driver to respond to the user freeing memory which is allocated to the HFI. This patch implements the basic MMU notifier functions to insert, find and remove buffer pages from memory based on the mmu_notifier being invoked. In addition it places stubs in place for the main entry points by follow on code. Follow up patches will complete the implementation of the interaction with user space and makes use of these functions. Signed-off-by: Mitko Haralanov Signed-off-by: Ira Weiny Signed-off-by: Doug Ledford diff --git a/drivers/staging/rdma/hfi1/Kconfig b/drivers/staging/rdma/hfi1/Kconfig index fd25078..bd0249b 100644 --- a/drivers/staging/rdma/hfi1/Kconfig +++ b/drivers/staging/rdma/hfi1/Kconfig @@ -1,6 +1,7 @@ config INFINIBAND_HFI1 tristate "Intel OPA Gen1 support" depends on X86_64 + select MMU_NOTIFIER default m ---help--- This is a low-level driver for Intel OPA Gen1 adapter. diff --git a/drivers/staging/rdma/hfi1/Makefile b/drivers/staging/rdma/hfi1/Makefile index 68c5a31..e63251b 100644 --- a/drivers/staging/rdma/hfi1/Makefile +++ b/drivers/staging/rdma/hfi1/Makefile @@ -10,7 +10,7 @@ obj-$(CONFIG_INFINIBAND_HFI1) += hfi1.o hfi1-y := chip.o cq.o device.o diag.o dma.o driver.o efivar.o eprom.o file_ops.o firmware.o \ init.o intr.o keys.o mad.o mmap.o mr.o pcie.o pio.o pio_copy.o \ qp.o qsfp.o rc.o ruc.o sdma.o srq.o sysfs.o trace.o twsi.o \ - uc.o ud.o user_pages.o user_sdma.o verbs_mcast.o verbs.o + uc.o ud.o user_exp_rcv.o user_pages.o user_sdma.o verbs_mcast.o verbs.o hfi1-$(CONFIG_DEBUG_FS) += debugfs.o CFLAGS_trace.o = -I$(src) diff --git a/drivers/staging/rdma/hfi1/hfi.h b/drivers/staging/rdma/hfi1/hfi.h index 2611bb2..ddb21f0 100644 --- a/drivers/staging/rdma/hfi1/hfi.h +++ b/drivers/staging/rdma/hfi1/hfi.h @@ -65,6 +65,8 @@ #include #include #include +#include +#include #include "chip_registers.h" #include "common.h" @@ -1125,6 +1127,8 @@ struct hfi1_devdata { #define PT_EAGER 1 #define PT_INVALID 2 +struct mmu_rb_node; + /* Private data for file operations */ struct hfi1_filedata { struct hfi1_ctxtdata *uctxt; diff --git a/drivers/staging/rdma/hfi1/user_exp_rcv.c b/drivers/staging/rdma/hfi1/user_exp_rcv.c new file mode 100644 index 0000000..bafeddf --- /dev/null +++ b/drivers/staging/rdma/hfi1/user_exp_rcv.c @@ -0,0 +1,264 @@ +/* + * + * This file is provided under a dual BSD/GPLv2 license. When using or + * redistributing this file, you may do so under either license. + * + * GPL LICENSE SUMMARY + * + * Copyright(c) 2015 Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * BSD LICENSE + * + * Copyright(c) 2015 Intel Corporation. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * - Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + */ +#include + +#include "user_exp_rcv.h" +#include "trace.h" + +struct mmu_rb_node { + struct rb_node rbnode; + unsigned long virt; + unsigned long phys; + unsigned long len; + struct tid_group *grp; + u32 rcventry; + dma_addr_t dma_addr; + bool freed; + unsigned npages; + struct page *pages[0]; +}; + +enum mmu_call_types { + MMU_INVALIDATE_PAGE = 0, + MMU_INVALIDATE_RANGE = 1 +}; + +static const char * const mmu_types[] = { + "PAGE", + "RANGE" +}; + +static inline int mmu_addr_cmp(struct mmu_rb_node *, unsigned long, + unsigned long); +static struct mmu_rb_node *mmu_rb_search_by_addr(struct rb_root *, + unsigned long) __maybe_unused; +static inline struct mmu_rb_node *mmu_rb_search_by_entry(struct rb_root *, + u32); +static int mmu_rb_insert_by_addr(struct rb_root *, + struct mmu_rb_node *) __maybe_unused; +static int mmu_rb_insert_by_entry(struct rb_root *, + struct mmu_rb_node *) __maybe_unused; +static void mmu_notifier_mem_invalidate(struct mmu_notifier *, + unsigned long, unsigned long, + enum mmu_call_types); +static inline void mmu_notifier_page(struct mmu_notifier *, struct mm_struct *, + unsigned long); +static inline void mmu_notifier_range_start(struct mmu_notifier *, + struct mm_struct *, + unsigned long, unsigned long); + +static struct mmu_notifier_ops __maybe_unused mn_opts = { + .invalidate_page = mmu_notifier_page, + .invalidate_range_start = mmu_notifier_range_start, +}; + +/* + * Initialize context and file private data needed for Expected + * receive caching. This needs to be done after the context has + * been configured with the eager/expected RcvEntry counts. + */ +int hfi1_user_exp_rcv_init(struct file *fp) +{ + return -EINVAL; +} + +int hfi1_user_exp_rcv_free(struct hfi1_filedata *fd) +{ + return -EINVAL; +} + +int hfi1_user_exp_rcv_setup(struct file *fp, struct hfi1_tid_info *tinfo) +{ + return -EINVAL; +} + +int hfi1_user_exp_rcv_clear(struct file *fp, struct hfi1_tid_info *tinfo) +{ + return -EINVAL; +} + +int hfi1_user_exp_rcv_invalid(struct file *fp, struct hfi1_tid_info *tinfo) +{ + return -EINVAL; +} + +static inline void mmu_notifier_page(struct mmu_notifier *mn, + struct mm_struct *mm, unsigned long addr) +{ + mmu_notifier_mem_invalidate(mn, addr, addr + PAGE_SIZE, + MMU_INVALIDATE_PAGE); +} + +static inline void mmu_notifier_range_start(struct mmu_notifier *mn, + struct mm_struct *mm, + unsigned long start, + unsigned long end) +{ + mmu_notifier_mem_invalidate(mn, start, end, MMU_INVALIDATE_RANGE); +} + +static void mmu_notifier_mem_invalidate(struct mmu_notifier *mn, + unsigned long start, unsigned long end, + enum mmu_call_types type) +{ + /* Stub for now */ +} + +static inline int mmu_addr_cmp(struct mmu_rb_node *node, unsigned long addr, + unsigned long len) +{ + if ((addr + len) <= node->virt) + return -1; + else if (addr >= node->virt && addr < (node->virt + node->len)) + return 0; + else + return 1; +} + +static inline int mmu_entry_cmp(struct mmu_rb_node *node, u32 entry) +{ + if (entry < node->rcventry) + return -1; + else if (entry > node->rcventry) + return 1; + else + return 0; +} + +static struct mmu_rb_node *mmu_rb_search_by_addr(struct rb_root *root, + unsigned long addr) +{ + struct rb_node *node = root->rb_node; + + while (node) { + struct mmu_rb_node *mnode = + container_of(node, struct mmu_rb_node, rbnode); + /* + * When searching, use at least one page length for size. The + * MMU notifier will not give us anything less than that. We + * also don't need anything more than a page because we are + * guaranteed to have non-overlapping buffers in the tree. + */ + int result = mmu_addr_cmp(mnode, addr, PAGE_SIZE); + + if (result < 0) + node = node->rb_left; + else if (result > 0) + node = node->rb_right; + else + return mnode; + } + return NULL; +} + +static inline struct mmu_rb_node *mmu_rb_search_by_entry(struct rb_root *root, + u32 index) +{ + struct mmu_rb_node *rbnode; + struct rb_node *node; + + if (root && !RB_EMPTY_ROOT(root)) + for (node = rb_first(root); node; node = rb_next(node)) { + rbnode = rb_entry(node, struct mmu_rb_node, rbnode); + if (rbnode->rcventry == index) + return rbnode; + } + return NULL; +} + +static int mmu_rb_insert_by_entry(struct rb_root *root, + struct mmu_rb_node *node) +{ + struct rb_node **new = &root->rb_node, *parent = NULL; + + while (*new) { + struct mmu_rb_node *this = + container_of(*new, struct mmu_rb_node, rbnode); + int result = mmu_entry_cmp(this, node->rcventry); + + parent = *new; + if (result < 0) + new = &((*new)->rb_left); + else if (result > 0) + new = &((*new)->rb_right); + else + return 1; + } + + rb_link_node(&node->rbnode, parent, new); + rb_insert_color(&node->rbnode, root); + return 0; +} + +static int mmu_rb_insert_by_addr(struct rb_root *root, struct mmu_rb_node *node) +{ + struct rb_node **new = &root->rb_node, *parent = NULL; + + /* Figure out where to put new node */ + while (*new) { + struct mmu_rb_node *this = + container_of(*new, struct mmu_rb_node, rbnode); + int result = mmu_addr_cmp(this, node->virt, node->len); + + parent = *new; + if (result < 0) + new = &((*new)->rb_left); + else if (result > 0) + new = &((*new)->rb_right); + else + return 1; + } + + /* Add new node and rebalance tree. */ + rb_link_node(&node->rbnode, parent, new); + rb_insert_color(&node->rbnode, root); + + return 0; +} diff --git a/drivers/staging/rdma/hfi1/user_exp_rcv.h b/drivers/staging/rdma/hfi1/user_exp_rcv.h index 4f4876e..28ef98a 100644 --- a/drivers/staging/rdma/hfi1/user_exp_rcv.h +++ b/drivers/staging/rdma/hfi1/user_exp_rcv.h @@ -50,6 +50,8 @@ * */ +#include "hfi.h" + #define EXP_TID_TIDLEN_MASK 0x7FFULL #define EXP_TID_TIDLEN_SHIFT 0 #define EXP_TID_TIDCTRL_MASK 0x3ULL @@ -71,4 +73,10 @@ (tid) |= EXP_TID_SET(field, (value)); \ } while (0) +int hfi1_user_exp_rcv_init(struct file *); +int hfi1_user_exp_rcv_free(struct hfi1_filedata *); +int hfi1_user_exp_rcv_setup(struct file *, struct hfi1_tid_info *); +int hfi1_user_exp_rcv_clear(struct file *, struct hfi1_tid_info *); +int hfi1_user_exp_rcv_invalid(struct file *, struct hfi1_tid_info *); + #endif /* _HFI1_USER_EXP_RCV_H */ -- cgit v0.10.2 From 462075a6ea85aa1cf6ee1620a232c483dfd4b520 Mon Sep 17 00:00:00 2001 From: Mitko Haralanov Date: Fri, 5 Feb 2016 11:57:47 -0500 Subject: uapi/hfi1_user: Correct comment for capability bit The HFI1_CAP_TID_UNMAP comment was incorrectly implying the opposite of what capability actually did. Correct this error. Reviewed-by: Ira Weiny Signed-off-by: Mitko Haralanov Signed-off-by: Doug Ledford diff --git a/include/uapi/rdma/hfi/hfi1_user.h b/include/uapi/rdma/hfi/hfi1_user.h index 288694e..cf17271 100644 --- a/include/uapi/rdma/hfi/hfi1_user.h +++ b/include/uapi/rdma/hfi/hfi1_user.h @@ -93,7 +93,7 @@ #define HFI1_CAP_MULTI_PKT_EGR (1UL << 7) /* Enable multi-packet Egr buffs*/ #define HFI1_CAP_NODROP_RHQ_FULL (1UL << 8) /* Don't drop on Hdr Q full */ #define HFI1_CAP_NODROP_EGR_FULL (1UL << 9) /* Don't drop on EGR buffs full */ -#define HFI1_CAP_TID_UNMAP (1UL << 10) /* Enable Expected TID caching */ +#define HFI1_CAP_TID_UNMAP (1UL << 10) /* Disable Expected TID caching */ #define HFI1_CAP_PRINT_UNIMPL (1UL << 11) /* Show for unimplemented feats */ #define HFI1_CAP_ALLOW_PERM_JKEY (1UL << 12) /* Allow use of permissive JKEY */ #define HFI1_CAP_NO_INTEGRITY (1UL << 13) /* Enable ctxt integrity checks */ -- cgit v0.10.2 From 955ad36dcde4639664253c2bd39f626cd88d2acf Mon Sep 17 00:00:00 2001 From: Mitko Haralanov Date: Fri, 5 Feb 2016 11:57:48 -0500 Subject: uapi/hfi1_user: Add command and event for TID caching TID caching will use a new event to signal userland that cache invalidation has occurred and needs a matching command code that will be used to read the invalidated TIDs. Add the event bit and the new command to the exported header file. The command is also added to the switch() statement in file_ops.c for completeness and in preparation for its usage later. Signed-off-by: Mitko Haralanov Reviewed-by: Ira Weiny Signed-off-by: Doug Ledford diff --git a/drivers/staging/rdma/hfi1/file_ops.c b/drivers/staging/rdma/hfi1/file_ops.c index d57d549..c666935 100644 --- a/drivers/staging/rdma/hfi1/file_ops.c +++ b/drivers/staging/rdma/hfi1/file_ops.c @@ -241,6 +241,7 @@ static ssize_t hfi1_file_write(struct file *fp, const char __user *data, must_be_root = 1; /* validate user */ copy = 0; break; + case HFI1_CMD_TID_INVAL_READ: default: ret = -EINVAL; goto bail; diff --git a/include/uapi/rdma/hfi/hfi1_user.h b/include/uapi/rdma/hfi/hfi1_user.h index cf17271..92be2e37 100644 --- a/include/uapi/rdma/hfi/hfi1_user.h +++ b/include/uapi/rdma/hfi/hfi1_user.h @@ -134,6 +134,7 @@ #define HFI1_CMD_ACK_EVENT 10 /* ack & clear user status bits */ #define HFI1_CMD_SET_PKEY 11 /* set context's pkey */ #define HFI1_CMD_CTXT_RESET 12 /* reset context's HW send context */ +#define HFI1_CMD_TID_INVAL_READ 13 /* read TID cache invalidations */ /* separate EPROM commands from normal PSM commands */ #define HFI1_CMD_EP_INFO 64 /* read EPROM device ID */ #define HFI1_CMD_EP_ERASE_CHIP 65 /* erase whole EPROM */ @@ -147,13 +148,15 @@ #define _HFI1_EVENT_LID_CHANGE_BIT 2 #define _HFI1_EVENT_LMC_CHANGE_BIT 3 #define _HFI1_EVENT_SL2VL_CHANGE_BIT 4 -#define _HFI1_MAX_EVENT_BIT _HFI1_EVENT_SL2VL_CHANGE_BIT +#define _HFI1_EVENT_TID_MMU_NOTIFY_BIT 5 +#define _HFI1_MAX_EVENT_BIT _HFI1_EVENT_TID_MMU_NOTIFY_BIT #define HFI1_EVENT_FROZEN (1UL << _HFI1_EVENT_FROZEN_BIT) #define HFI1_EVENT_LINKDOWN (1UL << _HFI1_EVENT_LINKDOWN_BIT) #define HFI1_EVENT_LID_CHANGE (1UL << _HFI1_EVENT_LID_CHANGE_BIT) #define HFI1_EVENT_LMC_CHANGE (1UL << _HFI1_EVENT_LMC_CHANGE_BIT) #define HFI1_EVENT_SL2VL_CHANGE (1UL << _HFI1_EVENT_SL2VL_CHANGE_BIT) +#define HFI1_EVENT_TID_MMU_NOTIFY (1UL << _HFI1_EVENT_TID_MMU_NOTIFY_BIT) /* * These are the status bits readable (in ASCII form, 64bit value) -- cgit v0.10.2 From a86cd357e5be1b7eae3b399c02b972a92808c38a Mon Sep 17 00:00:00 2001 From: Mitko Haralanov Date: Fri, 5 Feb 2016 11:57:49 -0500 Subject: staging/hfi1: Add definitions needed for TID cache In preparation for adding the TID caching support, there is a set of headers, structures, and variables which will be needed. This commit adds them to the hfi.h header file. Signed-off-by: Mitko Haralanov Reviewed-by: Ira Weiny Signed-off-by: Doug Ledford diff --git a/drivers/staging/rdma/hfi1/hfi.h b/drivers/staging/rdma/hfi1/hfi.h index ddb21f0..51ecf45 100644 --- a/drivers/staging/rdma/hfi1/hfi.h +++ b/drivers/staging/rdma/hfi1/hfi.h @@ -179,6 +179,11 @@ struct ctxt_eager_bufs { } *rcvtids; }; +struct exp_tid_set { + struct list_head list; + u32 count; +}; + struct hfi1_ctxtdata { /* shadow the ctxt's RcvCtrl register */ u64 rcvctrl; @@ -247,6 +252,11 @@ struct hfi1_ctxtdata { struct page **tid_pg_list; /* dma handles for exp tid pages */ dma_addr_t *physshadow; + + struct exp_tid_set tid_group_list; + struct exp_tid_set tid_used_list; + struct exp_tid_set tid_full_list; + /* lock protecting all Expected TID data */ spinlock_t exp_lock; /* number of pio bufs for this ctxt (all procs, if shared) */ @@ -1137,6 +1147,16 @@ struct hfi1_filedata { struct hfi1_user_sdma_pkt_q *pq; /* for cpu affinity; -1 if none */ int rec_cpu_num; + struct mmu_notifier mn; + struct rb_root tid_rb_root; + spinlock_t tid_lock; /* protect tid_[limit,used] counters */ + u32 tid_limit; + u32 tid_used; + spinlock_t rb_lock; /* protect tid_rb_root RB tree */ + u32 *invalid_tids; + u32 invalid_tid_idx; + spinlock_t invalid_lock; /* protect the invalid_tids array */ + int (*mmu_rb_insert)(struct rb_root *, struct mmu_rb_node *); }; extern struct list_head hfi1_dev_list; -- cgit v0.10.2 From acac10fdd75a85b10a638381127f7bbed632580d Mon Sep 17 00:00:00 2001 From: Mitko Haralanov Date: Fri, 5 Feb 2016 11:57:50 -0500 Subject: staging/hfi1: Remove un-needed variable There is no need to use a separate variable for a return value and a label when returning right away would do just as well. Signed-off-by: Mitko Haralanov Reviewed-by: Ira Weiny Signed-off-by: Doug Ledford diff --git a/drivers/staging/rdma/hfi1/file_ops.c b/drivers/staging/rdma/hfi1/file_ops.c index c666935..76fe603 100644 --- a/drivers/staging/rdma/hfi1/file_ops.c +++ b/drivers/staging/rdma/hfi1/file_ops.c @@ -1037,22 +1037,19 @@ static int allocate_ctxt(struct file *fp, struct hfi1_devdata *dd, static int init_subctxts(struct hfi1_ctxtdata *uctxt, const struct hfi1_user_info *uinfo) { - int ret = 0; unsigned num_subctxts; num_subctxts = uinfo->subctxt_cnt; - if (num_subctxts > HFI1_MAX_SHARED_CTXTS) { - ret = -EINVAL; - goto bail; - } + if (num_subctxts > HFI1_MAX_SHARED_CTXTS) + return -EINVAL; uctxt->subctxt_cnt = uinfo->subctxt_cnt; uctxt->subctxt_id = uinfo->subctxt_id; uctxt->active_slaves = 1; uctxt->redirect_seq_cnt = 1; set_bit(HFI1_CTXT_MASTER_UNINIT, &uctxt->event_flags); -bail: - return ret; + + return 0; } static int setup_subctxt(struct hfi1_ctxtdata *uctxt) -- cgit v0.10.2 From b8abe346737215c6ee6b50c01771b4ca1746801d Mon Sep 17 00:00:00 2001 From: Mitko Haralanov Date: Fri, 5 Feb 2016 11:57:51 -0500 Subject: staging/hfi1: TID group definitions and support funcs Definitions and functions use to manage sets of TID/RcvArray groups. These will be used by the TID cacheline functionality coming with later patches. TID groups (or RcvArray groups) are groups of TID/RcvArray entries organized in sets of 8 and aligned on cacheline boundaries. The TID/RcvArray entries are managed in this way to make taking advantage of write-combining easier - each group is a entire cacheline. rcv_array_wc_fill() is provided to allow of generating writes to TIDs which are not currently being used in order to cause the flush of the write-combining buffer. Signed-off-by: Mitko Haralanov Reviewed-by: Ira Weiny Signed-off-by: Doug Ledford diff --git a/drivers/staging/rdma/hfi1/user_exp_rcv.c b/drivers/staging/rdma/hfi1/user_exp_rcv.c index bafeddf..7f15024 100644 --- a/drivers/staging/rdma/hfi1/user_exp_rcv.c +++ b/drivers/staging/rdma/hfi1/user_exp_rcv.c @@ -52,6 +52,14 @@ #include "user_exp_rcv.h" #include "trace.h" +struct tid_group { + struct list_head list; + unsigned base; + u8 size; + u8 used; + u8 map; +}; + struct mmu_rb_node { struct rb_node rbnode; unsigned long virt; @@ -75,6 +83,8 @@ static const char * const mmu_types[] = { "RANGE" }; +#define EXP_TID_SET_EMPTY(set) (set.count == 0 && list_empty(&set.list)) + static inline int mmu_addr_cmp(struct mmu_rb_node *, unsigned long, unsigned long); static struct mmu_rb_node *mmu_rb_search_by_addr(struct rb_root *, @@ -94,6 +104,43 @@ static inline void mmu_notifier_range_start(struct mmu_notifier *, struct mm_struct *, unsigned long, unsigned long); +static inline void exp_tid_group_init(struct exp_tid_set *set) +{ + INIT_LIST_HEAD(&set->list); + set->count = 0; +} + +static inline void tid_group_remove(struct tid_group *grp, + struct exp_tid_set *set) +{ + list_del_init(&grp->list); + set->count--; +} + +static inline void tid_group_add_tail(struct tid_group *grp, + struct exp_tid_set *set) +{ + list_add_tail(&grp->list, &set->list); + set->count++; +} + +static inline struct tid_group *tid_group_pop(struct exp_tid_set *set) +{ + struct tid_group *grp = + list_first_entry(&set->list, struct tid_group, list); + list_del_init(&grp->list); + set->count--; + return grp; +} + +static inline void tid_group_move(struct tid_group *group, + struct exp_tid_set *s1, + struct exp_tid_set *s2) +{ + tid_group_remove(group, s1); + tid_group_add_tail(group, s2); +} + static struct mmu_notifier_ops __maybe_unused mn_opts = { .invalidate_page = mmu_notifier_page, .invalidate_range_start = mmu_notifier_range_start, @@ -114,6 +161,23 @@ int hfi1_user_exp_rcv_free(struct hfi1_filedata *fd) return -EINVAL; } +/* + * Write an "empty" RcvArray entry. + * This function exists so the TID registaration code can use it + * to write to unused/unneeded entries and still take advantage + * of the WC performance improvements. The HFI will ignore this + * write to the RcvArray entry. + */ +static inline void rcv_array_wc_fill(struct hfi1_devdata *dd, u32 index) +{ + /* + * Doing the WC fill writes only makes sense if the device is + * present and the RcvArray has been mapped as WC memory. + */ + if ((dd->flags & HFI1_PRESENT) && dd->rcvarray_wc) + writeq(0, dd->rcvarray_wc + (index * 8)); +} + int hfi1_user_exp_rcv_setup(struct file *fp, struct hfi1_tid_info *tinfo) { return -EINVAL; -- cgit v0.10.2 From f88e0c8a139dc737b997876203885a3168c32e95 Mon Sep 17 00:00:00 2001 From: Mitko Haralanov Date: Fri, 5 Feb 2016 11:57:52 -0500 Subject: staging/hfi1: Add building blocks for TID caching Functions added by this patch are building blocks for the upcoming TID caching functionality. The functions added are currently unsed (and marked as such.) The functions' purposes are to find physically contigous pages in the user's virtual buffer, program the RcvArray group entries with these physical chunks, and unprogram the RcvArray groups. Reviewed-by: Ira Weiny Signed-off-by: Mitko Haralanov Signed-off-by: Doug Ledford diff --git a/drivers/staging/rdma/hfi1/user_exp_rcv.c b/drivers/staging/rdma/hfi1/user_exp_rcv.c index 7f15024..5a7e455 100644 --- a/drivers/staging/rdma/hfi1/user_exp_rcv.c +++ b/drivers/staging/rdma/hfi1/user_exp_rcv.c @@ -83,8 +83,20 @@ static const char * const mmu_types[] = { "RANGE" }; +struct tid_pageset { + u16 idx; + u16 count; +}; + #define EXP_TID_SET_EMPTY(set) (set.count == 0 && list_empty(&set.list)) +static void unlock_exp_tids(struct hfi1_ctxtdata *, struct exp_tid_set *, + struct rb_root *) __maybe_unused; +static u32 find_phys_blocks(struct page **, unsigned, + struct tid_pageset *) __maybe_unused; +static int set_rcvarray_entry(struct file *, unsigned long, u32, + struct tid_group *, struct page **, + unsigned) __maybe_unused; static inline int mmu_addr_cmp(struct mmu_rb_node *, unsigned long, unsigned long); static struct mmu_rb_node *mmu_rb_search_by_addr(struct rb_root *, @@ -103,6 +115,21 @@ static inline void mmu_notifier_page(struct mmu_notifier *, struct mm_struct *, static inline void mmu_notifier_range_start(struct mmu_notifier *, struct mm_struct *, unsigned long, unsigned long); +static int program_rcvarray(struct file *, unsigned long, struct tid_group *, + struct tid_pageset *, unsigned, u16, struct page **, + u32 *, unsigned *, unsigned *) __maybe_unused; +static int unprogram_rcvarray(struct file *, u32, + struct tid_group **) __maybe_unused; +static void clear_tid_node(struct hfi1_filedata *, u16, + struct mmu_rb_node *) __maybe_unused; + +static inline u32 rcventry2tidinfo(u32 rcventry) +{ + u32 pair = rcventry & ~0x1; + + return EXP_TID_SET(IDX, pair >> 1) | + EXP_TID_SET(CTRL, 1 << (rcventry - pair)); +} static inline void exp_tid_group_init(struct exp_tid_set *set) { @@ -193,6 +220,316 @@ int hfi1_user_exp_rcv_invalid(struct file *fp, struct hfi1_tid_info *tinfo) return -EINVAL; } +static u32 find_phys_blocks(struct page **pages, unsigned npages, + struct tid_pageset *list) +{ + unsigned pagecount, pageidx, setcount = 0, i; + unsigned long pfn, this_pfn; + + if (!npages) + return 0; + + /* + * Look for sets of physically contiguous pages in the user buffer. + * This will allow us to optimize Expected RcvArray entry usage by + * using the bigger supported sizes. + */ + pfn = page_to_pfn(pages[0]); + for (pageidx = 0, pagecount = 1, i = 1; i <= npages; i++) { + this_pfn = i < npages ? page_to_pfn(pages[i]) : 0; + + /* + * If the pfn's are not sequential, pages are not physically + * contiguous. + */ + if (this_pfn != ++pfn) { + /* + * At this point we have to loop over the set of + * physically contiguous pages and break them down it + * sizes supported by the HW. + * There are two main constraints: + * 1. The max buffer size is MAX_EXPECTED_BUFFER. + * If the total set size is bigger than that + * program only a MAX_EXPECTED_BUFFER chunk. + * 2. The buffer size has to be a power of two. If + * it is not, round down to the closes power of + * 2 and program that size. + */ + while (pagecount) { + int maxpages = pagecount; + u32 bufsize = pagecount * PAGE_SIZE; + + if (bufsize > MAX_EXPECTED_BUFFER) + maxpages = + MAX_EXPECTED_BUFFER >> + PAGE_SHIFT; + else if (!is_power_of_2(bufsize)) + maxpages = + rounddown_pow_of_two(bufsize) >> + PAGE_SHIFT; + + list[setcount].idx = pageidx; + list[setcount].count = maxpages; + pagecount -= maxpages; + pageidx += maxpages; + setcount++; + } + pageidx = i; + pagecount = 1; + pfn = this_pfn; + } else { + pagecount++; + } + } + return setcount; +} + +/** + * program_rcvarray() - program an RcvArray group with receive buffers + * @fp: file pointer + * @vaddr: starting user virtual address + * @grp: RcvArray group + * @sets: array of struct tid_pageset holding information on physically + * contiguous chunks from the user buffer + * @start: starting index into sets array + * @count: number of struct tid_pageset's to program + * @pages: an array of struct page * for the user buffer + * @tidlist: the array of u32 elements when the information about the + * programmed RcvArray entries is to be encoded. + * @tididx: starting offset into tidlist + * @pmapped: (output parameter) number of pages programmed into the RcvArray + * entries. + * + * This function will program up to 'count' number of RcvArray entries from the + * group 'grp'. To make best use of write-combining writes, the function will + * perform writes to the unused RcvArray entries which will be ignored by the + * HW. Each RcvArray entry will be programmed with a physically contiguous + * buffer chunk from the user's virtual buffer. + * + * Return: + * -EINVAL if the requested count is larger than the size of the group, + * -ENOMEM or -EFAULT on error from set_rcvarray_entry(), or + * number of RcvArray entries programmed. + */ +static int program_rcvarray(struct file *fp, unsigned long vaddr, + struct tid_group *grp, + struct tid_pageset *sets, + unsigned start, u16 count, struct page **pages, + u32 *tidlist, unsigned *tididx, unsigned *pmapped) +{ + struct hfi1_filedata *fd = fp->private_data; + struct hfi1_ctxtdata *uctxt = fd->uctxt; + struct hfi1_devdata *dd = uctxt->dd; + u16 idx; + u32 tidinfo = 0, rcventry, useidx = 0; + int mapped = 0; + + /* Count should never be larger than the group size */ + if (count > grp->size) + return -EINVAL; + + /* Find the first unused entry in the group */ + for (idx = 0; idx < grp->size; idx++) { + if (!(grp->map & (1 << idx))) { + useidx = idx; + break; + } + rcv_array_wc_fill(dd, grp->base + idx); + } + + idx = 0; + while (idx < count) { + u16 npages, pageidx, setidx = start + idx; + int ret = 0; + + /* + * If this entry in the group is used, move to the next one. + * If we go past the end of the group, exit the loop. + */ + if (useidx >= grp->size) { + break; + } else if (grp->map & (1 << useidx)) { + rcv_array_wc_fill(dd, grp->base + useidx); + useidx++; + continue; + } + + rcventry = grp->base + useidx; + npages = sets[setidx].count; + pageidx = sets[setidx].idx; + + ret = set_rcvarray_entry(fp, vaddr + (pageidx * PAGE_SIZE), + rcventry, grp, pages + pageidx, + npages); + if (ret) + return ret; + mapped += npages; + + tidinfo = rcventry2tidinfo(rcventry - uctxt->expected_base) | + EXP_TID_SET(LEN, npages); + tidlist[(*tididx)++] = tidinfo; + grp->used++; + grp->map |= 1 << useidx++; + idx++; + } + + /* Fill the rest of the group with "blank" writes */ + for (; useidx < grp->size; useidx++) + rcv_array_wc_fill(dd, grp->base + useidx); + *pmapped = mapped; + return idx; +} + +static int set_rcvarray_entry(struct file *fp, unsigned long vaddr, + u32 rcventry, struct tid_group *grp, + struct page **pages, unsigned npages) +{ + int ret; + struct hfi1_filedata *fd = fp->private_data; + struct hfi1_ctxtdata *uctxt = fd->uctxt; + struct mmu_rb_node *node; + struct hfi1_devdata *dd = uctxt->dd; + struct rb_root *root = &fd->tid_rb_root; + dma_addr_t phys; + + /* + * Allocate the node first so we can handle a potential + * failure before we've programmed anything. + */ + node = kzalloc(sizeof(*node) + (sizeof(struct page *) * npages), + GFP_KERNEL); + if (!node) + return -ENOMEM; + + phys = pci_map_single(dd->pcidev, + __va(page_to_phys(pages[0])), + npages * PAGE_SIZE, PCI_DMA_FROMDEVICE); + if (dma_mapping_error(&dd->pcidev->dev, phys)) { + dd_dev_err(dd, "Failed to DMA map Exp Rcv pages 0x%llx\n", + phys); + kfree(node); + return -EFAULT; + } + + node->virt = vaddr; + node->phys = page_to_phys(pages[0]); + node->len = npages * PAGE_SIZE; + node->npages = npages; + node->rcventry = rcventry; + node->dma_addr = phys; + node->grp = grp; + node->freed = false; + memcpy(node->pages, pages, sizeof(struct page *) * npages); + + spin_lock(&fd->rb_lock); + ret = fd->mmu_rb_insert(root, node); + spin_unlock(&fd->rb_lock); + + if (ret) { + hfi1_cdbg(TID, "Failed to insert RB node %u 0x%lx, 0x%lx %d", + node->rcventry, node->virt, node->phys, ret); + pci_unmap_single(dd->pcidev, phys, npages * PAGE_SIZE, + PCI_DMA_FROMDEVICE); + kfree(node); + return -EFAULT; + } + hfi1_put_tid(dd, rcventry, PT_EXPECTED, phys, ilog2(npages) + 1); + return 0; +} + +static int unprogram_rcvarray(struct file *fp, u32 tidinfo, + struct tid_group **grp) +{ + struct hfi1_filedata *fd = fp->private_data; + struct hfi1_ctxtdata *uctxt = fd->uctxt; + struct hfi1_devdata *dd = uctxt->dd; + struct mmu_rb_node *node; + u8 tidctrl = EXP_TID_GET(tidinfo, CTRL); + u32 tidbase = uctxt->expected_base, + tididx = EXP_TID_GET(tidinfo, IDX) << 1, rcventry; + + if (tididx >= uctxt->expected_count) { + dd_dev_err(dd, "Invalid RcvArray entry (%u) index for ctxt %u\n", + tididx, uctxt->ctxt); + return -EINVAL; + } + + if (tidctrl == 0x3) + return -EINVAL; + + rcventry = tidbase + tididx + (tidctrl - 1); + + spin_lock(&fd->rb_lock); + node = mmu_rb_search_by_entry(&fd->tid_rb_root, rcventry); + if (!node) { + spin_unlock(&fd->rb_lock); + return -EBADF; + } + rb_erase(&node->rbnode, &fd->tid_rb_root); + spin_unlock(&fd->rb_lock); + if (grp) + *grp = node->grp; + clear_tid_node(fd, fd->subctxt, node); + return 0; +} + +static void clear_tid_node(struct hfi1_filedata *fd, u16 subctxt, + struct mmu_rb_node *node) +{ + struct hfi1_ctxtdata *uctxt = fd->uctxt; + struct hfi1_devdata *dd = uctxt->dd; + + hfi1_put_tid(dd, node->rcventry, PT_INVALID, 0, 0); + /* + * Make sure device has seen the write before we unpin the + * pages. + */ + flush_wc(); + + pci_unmap_single(dd->pcidev, node->dma_addr, node->len, + PCI_DMA_FROMDEVICE); + hfi1_release_user_pages(node->pages, node->npages, true); + + node->grp->used--; + node->grp->map &= ~(1 << (node->rcventry - node->grp->base)); + + if (node->grp->used == node->grp->size - 1) + tid_group_move(node->grp, &uctxt->tid_full_list, + &uctxt->tid_used_list); + else if (!node->grp->used) + tid_group_move(node->grp, &uctxt->tid_used_list, + &uctxt->tid_group_list); + kfree(node); +} + +static void unlock_exp_tids(struct hfi1_ctxtdata *uctxt, + struct exp_tid_set *set, struct rb_root *root) +{ + struct tid_group *grp, *ptr; + struct hfi1_filedata *fd = container_of(root, struct hfi1_filedata, + tid_rb_root); + int i; + + list_for_each_entry_safe(grp, ptr, &set->list, list) { + list_del_init(&grp->list); + + spin_lock(&fd->rb_lock); + for (i = 0; i < grp->size; i++) { + if (grp->map & (1 << i)) { + u16 rcventry = grp->base + i; + struct mmu_rb_node *node; + + node = mmu_rb_search_by_entry(root, rcventry); + if (!node) + continue; + rb_erase(&node->rbnode, root); + clear_tid_node(fd, -1, node); + } + } + spin_unlock(&fd->rb_lock); + } +} + static inline void mmu_notifier_page(struct mmu_notifier *mn, struct mm_struct *mm, unsigned long addr) { -- cgit v0.10.2 From 463e6ebc86578ef3ff5bb500f6fc9449afaeea7e Mon Sep 17 00:00:00 2001 From: Mitko Haralanov Date: Fri, 5 Feb 2016 11:57:53 -0500 Subject: staging/hfi1: Convert lock to mutex The exp_lock lock does not need to be a spinlock as all its uses are in process context and allowing the process to sleep when the mutex is contended might be beneficial. Signed-off-by: Mitko Haralanov Reviewed-by: Ira Weiny Signed-off-by: Doug Ledford diff --git a/drivers/staging/rdma/hfi1/file_ops.c b/drivers/staging/rdma/hfi1/file_ops.c index 76fe603..b034826 100644 --- a/drivers/staging/rdma/hfi1/file_ops.c +++ b/drivers/staging/rdma/hfi1/file_ops.c @@ -1611,14 +1611,14 @@ static int exp_tid_setup(struct file *fp, struct hfi1_tid_info *tinfo) * reserved, we don't need the lock anymore since we * are guaranteed the groups. */ - spin_lock(&uctxt->exp_lock); + mutex_lock(&uctxt->exp_lock); if (uctxt->tidusemap[useidx] == -1ULL || bitidx >= BITS_PER_LONG) { /* no free groups in the set, use the next */ useidx = (useidx + 1) % uctxt->tidmapcnt; idx++; bitidx = 0; - spin_unlock(&uctxt->exp_lock); + mutex_unlock(&uctxt->exp_lock); continue; } ngroups = ((npages - mapped) / dd->rcv_entries.group_size) + @@ -1635,13 +1635,13 @@ static int exp_tid_setup(struct file *fp, struct hfi1_tid_info *tinfo) * as 0 because we don't check the entire bitmap but * we start from bitidx. */ - spin_unlock(&uctxt->exp_lock); + mutex_unlock(&uctxt->exp_lock); continue; } bits_used = min(free, ngroups); tidmap[useidx] |= ((1ULL << bits_used) - 1) << bitidx; uctxt->tidusemap[useidx] |= tidmap[useidx]; - spin_unlock(&uctxt->exp_lock); + mutex_unlock(&uctxt->exp_lock); /* * At this point, we know where in the map we have free bits. @@ -1677,10 +1677,10 @@ static int exp_tid_setup(struct file *fp, struct hfi1_tid_info *tinfo) * Let go of the bits that we reserved since we are not * going to use them. */ - spin_lock(&uctxt->exp_lock); + mutex_lock(&uctxt->exp_lock); uctxt->tidusemap[useidx] &= ~(((1ULL << bits_used) - 1) << bitidx); - spin_unlock(&uctxt->exp_lock); + mutex_unlock(&uctxt->exp_lock); goto done; } /* diff --git a/drivers/staging/rdma/hfi1/hfi.h b/drivers/staging/rdma/hfi1/hfi.h index 51ecf45..53f464c 100644 --- a/drivers/staging/rdma/hfi1/hfi.h +++ b/drivers/staging/rdma/hfi1/hfi.h @@ -258,7 +258,7 @@ struct hfi1_ctxtdata { struct exp_tid_set tid_full_list; /* lock protecting all Expected TID data */ - spinlock_t exp_lock; + struct mutex exp_lock; /* number of pio bufs for this ctxt (all procs, if shared) */ u32 piocnt; /* first pio buffer for this ctxt */ diff --git a/drivers/staging/rdma/hfi1/init.c b/drivers/staging/rdma/hfi1/init.c index 4dd8051..72c5143 100644 --- a/drivers/staging/rdma/hfi1/init.c +++ b/drivers/staging/rdma/hfi1/init.c @@ -227,7 +227,7 @@ struct hfi1_ctxtdata *hfi1_create_ctxtdata(struct hfi1_pportdata *ppd, u32 ctxt) rcd->numa_id = numa_node_id(); rcd->rcv_array_groups = dd->rcv_entries.ngroups; - spin_lock_init(&rcd->exp_lock); + mutex_init(&rcd->exp_lock); /* * Calculate the context's RcvArray entry starting point. -- cgit v0.10.2 From 3abb33ac652135da9c3c36d9def73ede67e4ba03 Mon Sep 17 00:00:00 2001 From: Mitko Haralanov Date: Fri, 5 Feb 2016 11:57:54 -0500 Subject: staging/hfi1: Add TID cache receive init and free funcs The upcoming TID caching feature requires different data structures and, by extension, different initialization for each of the MPI processes. The two new functions (currently unused) perform the required initialization and freeing of required resources and structures. Signed-off-by: Mitko Haralanov Reviewed-by: Ira Weiny Signed-off-by: Doug Ledford diff --git a/drivers/staging/rdma/hfi1/user_exp_rcv.c b/drivers/staging/rdma/hfi1/user_exp_rcv.c index 5a7e455..843023e 100644 --- a/drivers/staging/rdma/hfi1/user_exp_rcv.c +++ b/drivers/staging/rdma/hfi1/user_exp_rcv.c @@ -90,23 +90,25 @@ struct tid_pageset { #define EXP_TID_SET_EMPTY(set) (set.count == 0 && list_empty(&set.list)) +#define num_user_pages(vaddr, len) \ + (1 + (((((unsigned long)(vaddr) + \ + (unsigned long)(len) - 1) & PAGE_MASK) - \ + ((unsigned long)vaddr & PAGE_MASK)) >> PAGE_SHIFT)) + static void unlock_exp_tids(struct hfi1_ctxtdata *, struct exp_tid_set *, - struct rb_root *) __maybe_unused; + struct rb_root *); static u32 find_phys_blocks(struct page **, unsigned, struct tid_pageset *) __maybe_unused; static int set_rcvarray_entry(struct file *, unsigned long, u32, - struct tid_group *, struct page **, - unsigned) __maybe_unused; + struct tid_group *, struct page **, unsigned); static inline int mmu_addr_cmp(struct mmu_rb_node *, unsigned long, unsigned long); static struct mmu_rb_node *mmu_rb_search_by_addr(struct rb_root *, unsigned long) __maybe_unused; static inline struct mmu_rb_node *mmu_rb_search_by_entry(struct rb_root *, u32); -static int mmu_rb_insert_by_addr(struct rb_root *, - struct mmu_rb_node *) __maybe_unused; -static int mmu_rb_insert_by_entry(struct rb_root *, - struct mmu_rb_node *) __maybe_unused; +static int mmu_rb_insert_by_addr(struct rb_root *, struct mmu_rb_node *); +static int mmu_rb_insert_by_entry(struct rb_root *, struct mmu_rb_node *); static void mmu_notifier_mem_invalidate(struct mmu_notifier *, unsigned long, unsigned long, enum mmu_call_types); @@ -168,7 +170,7 @@ static inline void tid_group_move(struct tid_group *group, tid_group_add_tail(group, s2); } -static struct mmu_notifier_ops __maybe_unused mn_opts = { +static struct mmu_notifier_ops mn_opts = { .invalidate_page = mmu_notifier_page, .invalidate_range_start = mmu_notifier_range_start, }; @@ -180,12 +182,144 @@ static struct mmu_notifier_ops __maybe_unused mn_opts = { */ int hfi1_user_exp_rcv_init(struct file *fp) { - return -EINVAL; + struct hfi1_filedata *fd = fp->private_data; + struct hfi1_ctxtdata *uctxt = fd->uctxt; + struct hfi1_devdata *dd = uctxt->dd; + unsigned tidbase; + int i, ret = 0; + + INIT_HLIST_NODE(&fd->mn.hlist); + spin_lock_init(&fd->rb_lock); + spin_lock_init(&fd->tid_lock); + spin_lock_init(&fd->invalid_lock); + fd->mn.ops = &mn_opts; + fd->tid_rb_root = RB_ROOT; + + if (!uctxt->subctxt_cnt || !fd->subctxt) { + exp_tid_group_init(&uctxt->tid_group_list); + exp_tid_group_init(&uctxt->tid_used_list); + exp_tid_group_init(&uctxt->tid_full_list); + + tidbase = uctxt->expected_base; + for (i = 0; i < uctxt->expected_count / + dd->rcv_entries.group_size; i++) { + struct tid_group *grp; + + grp = kzalloc(sizeof(*grp), GFP_KERNEL); + if (!grp) { + /* + * If we fail here, the groups already + * allocated will be freed by the close + * call. + */ + ret = -ENOMEM; + goto done; + } + grp->size = dd->rcv_entries.group_size; + grp->base = tidbase; + tid_group_add_tail(grp, &uctxt->tid_group_list); + tidbase += dd->rcv_entries.group_size; + } + } + + if (!HFI1_CAP_IS_USET(TID_UNMAP)) { + fd->invalid_tid_idx = 0; + fd->invalid_tids = kzalloc(uctxt->expected_count * + sizeof(u32), GFP_KERNEL); + if (!fd->invalid_tids) { + ret = -ENOMEM; + goto done; + } else { + /* + * Register MMU notifier callbacks. If the registration + * fails, continue but turn off the TID caching for + * all user contexts. + */ + ret = mmu_notifier_register(&fd->mn, current->mm); + if (ret) { + dd_dev_info(dd, + "Failed MMU notifier registration %d\n", + ret); + HFI1_CAP_USET(TID_UNMAP); + ret = 0; + } + } + } + + if (HFI1_CAP_IS_USET(TID_UNMAP)) + fd->mmu_rb_insert = mmu_rb_insert_by_entry; + else + fd->mmu_rb_insert = mmu_rb_insert_by_addr; + + /* + * PSM does not have a good way to separate, count, and + * effectively enforce a limit on RcvArray entries used by + * subctxts (when context sharing is used) when TID caching + * is enabled. To help with that, we calculate a per-process + * RcvArray entry share and enforce that. + * If TID caching is not in use, PSM deals with usage on its + * own. In that case, we allow any subctxt to take all of the + * entries. + * + * Make sure that we set the tid counts only after successful + * init. + */ + if (uctxt->subctxt_cnt && !HFI1_CAP_IS_USET(TID_UNMAP)) { + u16 remainder; + + fd->tid_limit = uctxt->expected_count / uctxt->subctxt_cnt; + remainder = uctxt->expected_count % uctxt->subctxt_cnt; + if (remainder && fd->subctxt < remainder) + fd->tid_limit++; + } else { + fd->tid_limit = uctxt->expected_count; + } +done: + return ret; } int hfi1_user_exp_rcv_free(struct hfi1_filedata *fd) { - return -EINVAL; + struct hfi1_ctxtdata *uctxt = fd->uctxt; + struct tid_group *grp, *gptr; + + /* + * The notifier would have been removed when the process'es mm + * was freed. + */ + if (current->mm && !HFI1_CAP_IS_USET(TID_UNMAP)) + mmu_notifier_unregister(&fd->mn, current->mm); + + kfree(fd->invalid_tids); + + if (!uctxt->cnt) { + if (!EXP_TID_SET_EMPTY(uctxt->tid_full_list)) + unlock_exp_tids(uctxt, &uctxt->tid_full_list, + &fd->tid_rb_root); + if (!EXP_TID_SET_EMPTY(uctxt->tid_used_list)) + unlock_exp_tids(uctxt, &uctxt->tid_used_list, + &fd->tid_rb_root); + list_for_each_entry_safe(grp, gptr, &uctxt->tid_group_list.list, + list) { + list_del_init(&grp->list); + kfree(grp); + } + spin_lock(&fd->rb_lock); + if (!RB_EMPTY_ROOT(&fd->tid_rb_root)) { + struct rb_node *node; + struct mmu_rb_node *rbnode; + + while ((node = rb_first(&fd->tid_rb_root))) { + rbnode = rb_entry(node, struct mmu_rb_node, + rbnode); + rb_erase(&rbnode->rbnode, &fd->tid_rb_root); + kfree(rbnode); + } + } + spin_unlock(&fd->rb_lock); + hfi1_clear_tids(uctxt); + } + return 0; } /* -- cgit v0.10.2 From b5eb3b2ffd1bf5be17df08565f4ab56c3fdae43e Mon Sep 17 00:00:00 2001 From: Mitko Haralanov Date: Fri, 5 Feb 2016 11:57:55 -0500 Subject: staging/hfi1: Add MMU notifier callback function TID caching will rely on the MMU notifier to be told when memory is being invalidated. When the callback is called, the driver will find all RcvArray entries that span the invalidated buffer and "schedule" them to be freed by the PSM library. This function is currently unused and is being added in preparation for the TID caching feature. Signed-off-by: Mitko Haralanov Reviewed-by: Ira Weiny Signed-off-by: Doug Ledford diff --git a/drivers/staging/rdma/hfi1/user_exp_rcv.c b/drivers/staging/rdma/hfi1/user_exp_rcv.c index 843023e..1787c55 100644 --- a/drivers/staging/rdma/hfi1/user_exp_rcv.c +++ b/drivers/staging/rdma/hfi1/user_exp_rcv.c @@ -104,7 +104,7 @@ static int set_rcvarray_entry(struct file *, unsigned long, u32, static inline int mmu_addr_cmp(struct mmu_rb_node *, unsigned long, unsigned long); static struct mmu_rb_node *mmu_rb_search_by_addr(struct rb_root *, - unsigned long) __maybe_unused; + unsigned long); static inline struct mmu_rb_node *mmu_rb_search_by_entry(struct rb_root *, u32); static int mmu_rb_insert_by_addr(struct rb_root *, struct mmu_rb_node *); @@ -683,7 +683,70 @@ static void mmu_notifier_mem_invalidate(struct mmu_notifier *mn, unsigned long start, unsigned long end, enum mmu_call_types type) { - /* Stub for now */ + struct hfi1_filedata *fd = container_of(mn, struct hfi1_filedata, mn); + struct hfi1_ctxtdata *uctxt = fd->uctxt; + struct rb_root *root = &fd->tid_rb_root; + struct mmu_rb_node *node; + unsigned long addr = start; + + spin_lock(&fd->rb_lock); + while (addr < end) { + node = mmu_rb_search_by_addr(root, addr); + + if (!node) { + /* + * Didn't find a node at this address. However, the + * range could be bigger than what we have registered + * so we have to keep looking. + */ + addr += PAGE_SIZE; + continue; + } + + /* + * The next address to be looked up is computed based + * on the node's starting address. This is due to the + * fact that the range where we start might be in the + * middle of the node's buffer so simply incrementing + * the address by the node's size would result is a + * bad address. + */ + addr = node->virt + (node->npages * PAGE_SIZE); + if (node->freed) + continue; + + node->freed = true; + + spin_lock(&fd->invalid_lock); + if (fd->invalid_tid_idx < uctxt->expected_count) { + fd->invalid_tids[fd->invalid_tid_idx] = + rcventry2tidinfo(node->rcventry - + uctxt->expected_base); + fd->invalid_tids[fd->invalid_tid_idx] |= + EXP_TID_SET(LEN, node->npages); + if (!fd->invalid_tid_idx) { + unsigned long *ev; + + /* + * hfi1_set_uevent_bits() sets a user event flag + * for all processes. Because calling into the + * driver to process TID cache invalidations is + * expensive and TID cache invalidations are + * handled on a per-process basis, we can + * optimize this to set the flag only for the + * process in question. + */ + ev = uctxt->dd->events + + (((uctxt->ctxt - + uctxt->dd->first_user_ctxt) * + HFI1_MAX_SHARED_CTXTS) + fd->subctxt); + set_bit(_HFI1_EVENT_TID_MMU_NOTIFY_BIT, ev); + } + fd->invalid_tid_idx++; + } + spin_unlock(&fd->invalid_lock); + } + spin_unlock(&fd->rb_lock); } static inline int mmu_addr_cmp(struct mmu_rb_node *node, unsigned long addr, -- cgit v0.10.2 From 455d7f1ab86b7b1703898c75c4bc01df869da4a6 Mon Sep 17 00:00:00 2001 From: Mitko Haralanov Date: Fri, 5 Feb 2016 11:57:56 -0500 Subject: staging/hfi1: Add TID free/clear function bodies Up to now, the functions which cleared the programmed TID entries and gave PSM the list of invalidated TID entries were just stubs. With this commit, the bodies of these functions are added. This commit is a bit asymmetric as it only contains the free code path. This is done on purpose to help with patch reviews as the programming code path is much longer. Signed-off-by: Mitko Haralanov Reviewed-by: Ira Weiny Signed-off-by: Doug Ledford diff --git a/drivers/staging/rdma/hfi1/user_exp_rcv.c b/drivers/staging/rdma/hfi1/user_exp_rcv.c index 1787c55..776ce00 100644 --- a/drivers/staging/rdma/hfi1/user_exp_rcv.c +++ b/drivers/staging/rdma/hfi1/user_exp_rcv.c @@ -120,10 +120,8 @@ static inline void mmu_notifier_range_start(struct mmu_notifier *, static int program_rcvarray(struct file *, unsigned long, struct tid_group *, struct tid_pageset *, unsigned, u16, struct page **, u32 *, unsigned *, unsigned *) __maybe_unused; -static int unprogram_rcvarray(struct file *, u32, - struct tid_group **) __maybe_unused; -static void clear_tid_node(struct hfi1_filedata *, u16, - struct mmu_rb_node *) __maybe_unused; +static int unprogram_rcvarray(struct file *, u32, struct tid_group **); +static void clear_tid_node(struct hfi1_filedata *, u16, struct mmu_rb_node *); static inline u32 rcventry2tidinfo(u32 rcventry) { @@ -264,6 +262,7 @@ int hfi1_user_exp_rcv_init(struct file *fp) * Make sure that we set the tid counts only after successful * init. */ + spin_lock(&fd->tid_lock); if (uctxt->subctxt_cnt && !HFI1_CAP_IS_USET(TID_UNMAP)) { u16 remainder; @@ -274,6 +273,7 @@ int hfi1_user_exp_rcv_init(struct file *fp) } else { fd->tid_limit = uctxt->expected_count; } + spin_unlock(&fd->tid_lock); done: return ret; } @@ -346,12 +346,91 @@ int hfi1_user_exp_rcv_setup(struct file *fp, struct hfi1_tid_info *tinfo) int hfi1_user_exp_rcv_clear(struct file *fp, struct hfi1_tid_info *tinfo) { - return -EINVAL; + int ret = 0; + struct hfi1_filedata *fd = fp->private_data; + struct hfi1_ctxtdata *uctxt = fd->uctxt; + u32 *tidinfo; + unsigned tididx; + + tidinfo = kcalloc(tinfo->tidcnt, sizeof(*tidinfo), GFP_KERNEL); + if (!tidinfo) + return -ENOMEM; + + if (copy_from_user(tidinfo, (void __user *)(unsigned long) + tinfo->tidlist, sizeof(tidinfo[0]) * + tinfo->tidcnt)) { + ret = -EFAULT; + goto done; + } + + mutex_lock(&uctxt->exp_lock); + for (tididx = 0; tididx < tinfo->tidcnt; tididx++) { + ret = unprogram_rcvarray(fp, tidinfo[tididx], NULL); + if (ret) { + hfi1_cdbg(TID, "Failed to unprogram rcv array %d", + ret); + break; + } + } + spin_lock(&fd->tid_lock); + fd->tid_used -= tididx; + spin_unlock(&fd->tid_lock); + tinfo->tidcnt = tididx; + mutex_unlock(&uctxt->exp_lock); +done: + kfree(tidinfo); + return ret; } int hfi1_user_exp_rcv_invalid(struct file *fp, struct hfi1_tid_info *tinfo) { - return -EINVAL; + struct hfi1_filedata *fd = fp->private_data; + struct hfi1_ctxtdata *uctxt = fd->uctxt; + unsigned long *ev = uctxt->dd->events + + (((uctxt->ctxt - uctxt->dd->first_user_ctxt) * + HFI1_MAX_SHARED_CTXTS) + fd->subctxt); + u32 *array; + int ret = 0; + + if (!fd->invalid_tids) + return -EINVAL; + + /* + * copy_to_user() can sleep, which will leave the invalid_lock + * locked and cause the MMU notifier to be blocked on the lock + * for a long time. + * Copy the data to a local buffer so we can release the lock. + */ + array = kcalloc(uctxt->expected_count, sizeof(*array), GFP_KERNEL); + if (!array) + return -EFAULT; + + spin_lock(&fd->invalid_lock); + if (fd->invalid_tid_idx) { + memcpy(array, fd->invalid_tids, sizeof(*array) * + fd->invalid_tid_idx); + memset(fd->invalid_tids, 0, sizeof(*fd->invalid_tids) * + fd->invalid_tid_idx); + tinfo->tidcnt = fd->invalid_tid_idx; + fd->invalid_tid_idx = 0; + /* + * Reset the user flag while still holding the lock. + * Otherwise, PSM can miss events. + */ + clear_bit(_HFI1_EVENT_TID_MMU_NOTIFY_BIT, ev); + } else { + tinfo->tidcnt = 0; + } + spin_unlock(&fd->invalid_lock); + + if (tinfo->tidcnt) { + if (copy_to_user((void __user *)tinfo->tidlist, + array, sizeof(*array) * tinfo->tidcnt)) + ret = -EFAULT; + } + kfree(array); + + return ret; } static u32 find_phys_blocks(struct page **pages, unsigned npages, -- cgit v0.10.2 From 7e7a436ecb6e703a232df0613b5f24accbe3d7d2 Mon Sep 17 00:00:00 2001 From: Mitko Haralanov Date: Fri, 5 Feb 2016 11:57:57 -0500 Subject: staging/hfi1: Add TID entry program function body The previous patch in the series added the free/invalidate function bodies. Now, it's time for the programming side. This large function takes the user's buffer, breaks it up into manageable chunks, allocates enough RcvArray groups and programs the chunks into the RcvArray entries in the hardware. With this function, the TID caching functionality is implemented. However, it is still unused. The switch will come in a later patch in the series, which will remove the old functionality and switch the driver over to TID caching. Signed-off-by: Mitko Haralanov Reviewed-by: Ira Weiny Signed-off-by: Doug Ledford diff --git a/drivers/staging/rdma/hfi1/user_exp_rcv.c b/drivers/staging/rdma/hfi1/user_exp_rcv.c index 776ce00..d33f579 100644 --- a/drivers/staging/rdma/hfi1/user_exp_rcv.c +++ b/drivers/staging/rdma/hfi1/user_exp_rcv.c @@ -97,8 +97,7 @@ struct tid_pageset { static void unlock_exp_tids(struct hfi1_ctxtdata *, struct exp_tid_set *, struct rb_root *); -static u32 find_phys_blocks(struct page **, unsigned, - struct tid_pageset *) __maybe_unused; +static u32 find_phys_blocks(struct page **, unsigned, struct tid_pageset *); static int set_rcvarray_entry(struct file *, unsigned long, u32, struct tid_group *, struct page **, unsigned); static inline int mmu_addr_cmp(struct mmu_rb_node *, unsigned long, @@ -119,7 +118,7 @@ static inline void mmu_notifier_range_start(struct mmu_notifier *, unsigned long, unsigned long); static int program_rcvarray(struct file *, unsigned long, struct tid_group *, struct tid_pageset *, unsigned, u16, struct page **, - u32 *, unsigned *, unsigned *) __maybe_unused; + u32 *, unsigned *, unsigned *); static int unprogram_rcvarray(struct file *, u32, struct tid_group **); static void clear_tid_node(struct hfi1_filedata *, u16, struct mmu_rb_node *); @@ -339,9 +338,265 @@ static inline void rcv_array_wc_fill(struct hfi1_devdata *dd, u32 index) writeq(0, dd->rcvarray_wc + (index * 8)); } +/* + * RcvArray entry allocation for Expected Receives is done by the + * following algorithm: + * + * The context keeps 3 lists of groups of RcvArray entries: + * 1. List of empty groups - tid_group_list + * This list is created during user context creation and + * contains elements which describe sets (of 8) of empty + * RcvArray entries. + * 2. List of partially used groups - tid_used_list + * This list contains sets of RcvArray entries which are + * not completely used up. Another mapping request could + * use some of all of the remaining entries. + * 3. List of full groups - tid_full_list + * This is the list where sets that are completely used + * up go. + * + * An attempt to optimize the usage of RcvArray entries is + * made by finding all sets of physically contiguous pages in a + * user's buffer. + * These physically contiguous sets are further split into + * sizes supported by the receive engine of the HFI. The + * resulting sets of pages are stored in struct tid_pageset, + * which describes the sets as: + * * .count - number of pages in this set + * * .idx - starting index into struct page ** array + * of this set + * + * From this point on, the algorithm deals with the page sets + * described above. The number of pagesets is divided by the + * RcvArray group size to produce the number of full groups + * needed. + * + * Groups from the 3 lists are manipulated using the following + * rules: + * 1. For each set of 8 pagesets, a complete group from + * tid_group_list is taken, programmed, and moved to + * the tid_full_list list. + * 2. For all remaining pagesets: + * 2.1 If the tid_used_list is empty and the tid_group_list + * is empty, stop processing pageset and return only + * what has been programmed up to this point. + * 2.2 If the tid_used_list is empty and the tid_group_list + * is not empty, move a group from tid_group_list to + * tid_used_list. + * 2.3 For each group is tid_used_group, program as much as + * can fit into the group. If the group becomes fully + * used, move it to tid_full_list. + */ int hfi1_user_exp_rcv_setup(struct file *fp, struct hfi1_tid_info *tinfo) { - return -EINVAL; + int ret = 0, need_group = 0, pinned; + struct hfi1_filedata *fd = fp->private_data; + struct hfi1_ctxtdata *uctxt = fd->uctxt; + struct hfi1_devdata *dd = uctxt->dd; + unsigned npages, ngroups, pageidx = 0, pageset_count, npagesets, + tididx = 0, mapped, mapped_pages = 0; + unsigned long vaddr = tinfo->vaddr; + struct page **pages = NULL; + u32 *tidlist = NULL; + struct tid_pageset *pagesets = NULL; + + /* Get the number of pages the user buffer spans */ + npages = num_user_pages(vaddr, tinfo->length); + if (!npages) + return -EINVAL; + + if (npages > uctxt->expected_count) { + dd_dev_err(dd, "Expected buffer too big\n"); + return -EINVAL; + } + + /* Verify that access is OK for the user buffer */ + if (!access_ok(VERIFY_WRITE, (void __user *)vaddr, + npages * PAGE_SIZE)) { + dd_dev_err(dd, "Fail vaddr %p, %u pages, !access_ok\n", + (void *)vaddr, npages); + return -EFAULT; + } + + pagesets = kcalloc(uctxt->expected_count, sizeof(*pagesets), + GFP_KERNEL); + if (!pagesets) + return -ENOMEM; + + /* Allocate the array of struct page pointers needed for pinning */ + pages = kcalloc(npages, sizeof(*pages), GFP_KERNEL); + if (!pages) { + ret = -ENOMEM; + goto bail; + } + + /* + * Pin all the pages of the user buffer. If we can't pin all the + * pages, accept the amount pinned so far and program only that. + * User space knows how to deal with partially programmed buffers. + */ + pinned = hfi1_acquire_user_pages(vaddr, npages, true, pages); + if (pinned <= 0) { + ret = pinned; + goto bail; + } + + /* Find sets of physically contiguous pages */ + npagesets = find_phys_blocks(pages, pinned, pagesets); + + /* + * We don't need to access this under a lock since tid_used is per + * process and the same process cannot be in hfi1_user_exp_rcv_clear() + * and hfi1_user_exp_rcv_setup() at the same time. + */ + spin_lock(&fd->tid_lock); + if (fd->tid_used + npagesets > fd->tid_limit) + pageset_count = fd->tid_limit - fd->tid_used; + else + pageset_count = npagesets; + spin_unlock(&fd->tid_lock); + + if (!pageset_count) + goto bail; + + ngroups = pageset_count / dd->rcv_entries.group_size; + tidlist = kcalloc(pageset_count, sizeof(*tidlist), GFP_KERNEL); + if (!tidlist) { + ret = -ENOMEM; + goto nomem; + } + + tididx = 0; + + /* + * From this point on, we are going to be using shared (between master + * and subcontexts) context resources. We need to take the lock. + */ + mutex_lock(&uctxt->exp_lock); + /* + * The first step is to program the RcvArray entries which are complete + * groups. + */ + while (ngroups && uctxt->tid_group_list.count) { + struct tid_group *grp = + tid_group_pop(&uctxt->tid_group_list); + + ret = program_rcvarray(fp, vaddr, grp, pagesets, + pageidx, dd->rcv_entries.group_size, + pages, tidlist, &tididx, &mapped); + /* + * If there was a failure to program the RcvArray + * entries for the entire group, reset the grp fields + * and add the grp back to the free group list. + */ + if (ret <= 0) { + tid_group_add_tail(grp, &uctxt->tid_group_list); + hfi1_cdbg(TID, + "Failed to program RcvArray group %d", ret); + goto unlock; + } + + tid_group_add_tail(grp, &uctxt->tid_full_list); + ngroups--; + pageidx += ret; + mapped_pages += mapped; + } + + while (pageidx < pageset_count) { + struct tid_group *grp, *ptr; + /* + * If we don't have any partially used tid groups, check + * if we have empty groups. If so, take one from there and + * put in the partially used list. + */ + if (!uctxt->tid_used_list.count || need_group) { + if (!uctxt->tid_group_list.count) + goto unlock; + + grp = tid_group_pop(&uctxt->tid_group_list); + tid_group_add_tail(grp, &uctxt->tid_used_list); + need_group = 0; + } + /* + * There is an optimization opportunity here - instead of + * fitting as many page sets as we can, check for a group + * later on in the list that could fit all of them. + */ + list_for_each_entry_safe(grp, ptr, &uctxt->tid_used_list.list, + list) { + unsigned use = min_t(unsigned, pageset_count - pageidx, + grp->size - grp->used); + + ret = program_rcvarray(fp, vaddr, grp, pagesets, + pageidx, use, pages, tidlist, + &tididx, &mapped); + if (ret < 0) { + hfi1_cdbg(TID, + "Failed to program RcvArray entries %d", + ret); + ret = -EFAULT; + goto unlock; + } else if (ret > 0) { + if (grp->used == grp->size) + tid_group_move(grp, + &uctxt->tid_used_list, + &uctxt->tid_full_list); + pageidx += ret; + mapped_pages += mapped; + need_group = 0; + /* Check if we are done so we break out early */ + if (pageidx >= pageset_count) + break; + } else if (WARN_ON(ret == 0)) { + /* + * If ret is 0, we did not program any entries + * into this group, which can only happen if + * we've screwed up the accounting somewhere. + * Warn and try to continue. + */ + need_group = 1; + } + } + } +unlock: + mutex_unlock(&uctxt->exp_lock); +nomem: + hfi1_cdbg(TID, "total mapped: tidpairs:%u pages:%u (%d)", tididx, + mapped_pages, ret); + if (tididx) { + spin_lock(&fd->tid_lock); + fd->tid_used += tididx; + spin_unlock(&fd->tid_lock); + tinfo->tidcnt = tididx; + tinfo->length = mapped_pages * PAGE_SIZE; + + if (copy_to_user((void __user *)(unsigned long)tinfo->tidlist, + tidlist, sizeof(tidlist[0]) * tididx)) { + /* + * On failure to copy to the user level, we need to undo + * everything done so far so we don't leak resources. + */ + tinfo->tidlist = (unsigned long)&tidlist; + hfi1_user_exp_rcv_clear(fp, tinfo); + tinfo->tidlist = 0; + ret = -EFAULT; + goto bail; + } + } + + /* + * If not everything was mapped (due to insufficient RcvArray entries, + * for example), unpin all unmapped pages so we can pin them nex time. + */ + if (mapped_pages != pinned) + hfi1_release_user_pages(&pages[mapped_pages], + pinned - mapped_pages, + false); +bail: + kfree(pagesets); + kfree(pages); + kfree(tidlist); + return ret > 0 ? 0 : ret; } int hfi1_user_exp_rcv_clear(struct file *fp, struct hfi1_tid_info *tinfo) -- cgit v0.10.2 From 0b091fb32c5ae4737bf606a313e6625dad34bbc6 Mon Sep 17 00:00:00 2001 From: Mitko Haralanov Date: Fri, 5 Feb 2016 11:57:58 -0500 Subject: staging/hfi1: Enable TID caching feature This commit "flips the switch" on the TID caching feature implemented in this patch series. As well as enabling the new feature by tying the new function with the PSM API, it also cleans up the old unneeded code, data structure members, and variables. Due to difference in operation and information, the tracing functions related to expected receives had to be changed. This patch include these changes. The tracing function changes could not be split into a separate commit without including both tracing variants at the same time. This would have caused other complications and ugliness. Signed-off-by: Mitko Haralanov Reviewed-by: Ira Weiny Signed-off-by: Doug Ledford diff --git a/drivers/staging/rdma/hfi1/file_ops.c b/drivers/staging/rdma/hfi1/file_ops.c index b034826..d365889 100644 --- a/drivers/staging/rdma/hfi1/file_ops.c +++ b/drivers/staging/rdma/hfi1/file_ops.c @@ -96,9 +96,6 @@ static int user_event_ack(struct hfi1_ctxtdata *, int, unsigned long); static int set_ctxt_pkey(struct hfi1_ctxtdata *, unsigned, u16); static int manage_rcvq(struct hfi1_ctxtdata *, unsigned, int); static int vma_fault(struct vm_area_struct *, struct vm_fault *); -static int exp_tid_setup(struct file *, struct hfi1_tid_info *); -static int exp_tid_free(struct file *, struct hfi1_tid_info *); -static void unlock_exp_tids(struct hfi1_ctxtdata *); static const struct file_operations hfi1_file_ops = { .owner = THIS_MODULE, @@ -188,6 +185,7 @@ static ssize_t hfi1_file_write(struct file *fp, const char __user *data, struct hfi1_cmd cmd; struct hfi1_user_info uinfo; struct hfi1_tid_info tinfo; + unsigned long addr; ssize_t consumed = 0, copy = 0, ret = 0; void *dest = NULL; __u64 user_val = 0; @@ -219,6 +217,7 @@ static ssize_t hfi1_file_write(struct file *fp, const char __user *data, break; case HFI1_CMD_TID_UPDATE: case HFI1_CMD_TID_FREE: + case HFI1_CMD_TID_INVAL_READ: copy = sizeof(tinfo); dest = &tinfo; break; @@ -241,7 +240,6 @@ static ssize_t hfi1_file_write(struct file *fp, const char __user *data, must_be_root = 1; /* validate user */ copy = 0; break; - case HFI1_CMD_TID_INVAL_READ: default: ret = -EINVAL; goto bail; @@ -295,9 +293,8 @@ static ssize_t hfi1_file_write(struct file *fp, const char __user *data, sc_return_credits(uctxt->sc); break; case HFI1_CMD_TID_UPDATE: - ret = exp_tid_setup(fp, &tinfo); + ret = hfi1_user_exp_rcv_setup(fp, &tinfo); if (!ret) { - unsigned long addr; /* * Copy the number of tidlist entries we used * and the length of the buffer we registered. @@ -312,8 +309,25 @@ static ssize_t hfi1_file_write(struct file *fp, const char __user *data, ret = -EFAULT; } break; + case HFI1_CMD_TID_INVAL_READ: + ret = hfi1_user_exp_rcv_invalid(fp, &tinfo); + if (ret) + break; + addr = (unsigned long)cmd.addr + + offsetof(struct hfi1_tid_info, tidcnt); + if (copy_to_user((void __user *)addr, &tinfo.tidcnt, + sizeof(tinfo.tidcnt))) + ret = -EFAULT; + break; case HFI1_CMD_TID_FREE: - ret = exp_tid_free(fp, &tinfo); + ret = hfi1_user_exp_rcv_clear(fp, &tinfo); + if (ret) + break; + addr = (unsigned long)cmd.addr + + offsetof(struct hfi1_tid_info, tidcnt); + if (copy_to_user((void __user *)addr, &tinfo.tidcnt, + sizeof(tinfo.tidcnt))) + ret = -EFAULT; break; case HFI1_CMD_RECV_CTRL: ret = manage_rcvq(uctxt, fd->subctxt, (int)user_val); @@ -779,12 +793,9 @@ static int hfi1_file_close(struct inode *inode, struct file *fp) uctxt->pionowait = 0; uctxt->event_flags = 0; - hfi1_clear_tids(uctxt); + hfi1_user_exp_rcv_free(fdata); hfi1_clear_ctxt_pkey(dd, uctxt->ctxt); - if (uctxt->tid_pg_list) - unlock_exp_tids(uctxt); - hfi1_stats.sps_ctxts--; dd->freectxts++; mutex_unlock(&hfi1_mutex); @@ -1107,7 +1118,7 @@ static int user_init(struct file *fp) ret = wait_event_interruptible(uctxt->wait, !test_bit(HFI1_CTXT_MASTER_UNINIT, &uctxt->event_flags)); - goto done; + goto expected; } /* initialize poll variables... */ @@ -1154,8 +1165,18 @@ static int user_init(struct file *fp) clear_bit(HFI1_CTXT_MASTER_UNINIT, &uctxt->event_flags); wake_up(&uctxt->wait); } - ret = 0; +expected: + /* + * Expected receive has to be setup for all processes (including + * shared contexts). However, it has to be done after the master + * context has been fully configured as it depends on the + * eager/expected split of the RcvArray entries. + * Setting it up here ensures that the subcontexts will be waiting + * (due to the above wait_event_interruptible() until the master + * is setup. + */ + ret = hfi1_user_exp_rcv_init(fp); done: return ret; } @@ -1225,46 +1246,6 @@ static int setup_ctxt(struct file *fp) if (ret) goto done; } - /* Setup Expected Rcv memories */ - uctxt->tid_pg_list = vzalloc(uctxt->expected_count * - sizeof(struct page **)); - if (!uctxt->tid_pg_list) { - ret = -ENOMEM; - goto done; - } - uctxt->physshadow = vzalloc(uctxt->expected_count * - sizeof(*uctxt->physshadow)); - if (!uctxt->physshadow) { - ret = -ENOMEM; - goto done; - } - /* allocate expected TID map and initialize the cursor */ - atomic_set(&uctxt->tidcursor, 0); - uctxt->numtidgroups = uctxt->expected_count / - dd->rcv_entries.group_size; - uctxt->tidmapcnt = uctxt->numtidgroups / BITS_PER_LONG + - !!(uctxt->numtidgroups % BITS_PER_LONG); - uctxt->tidusemap = kzalloc_node(uctxt->tidmapcnt * - sizeof(*uctxt->tidusemap), - GFP_KERNEL, uctxt->numa_id); - if (!uctxt->tidusemap) { - ret = -ENOMEM; - goto done; - } - /* - * In case that the number of groups is not a multiple of - * 64 (the number of groups in a tidusemap element), mark - * the extra ones as used. This will effectively make them - * permanently used and should never be assigned. Otherwise, - * the code which checks how many free groups we have will - * get completely confused about the state of the bits. - */ - if (uctxt->numtidgroups % BITS_PER_LONG) - uctxt->tidusemap[uctxt->tidmapcnt - 1] = - ~((1ULL << (uctxt->numtidgroups % - BITS_PER_LONG)) - 1); - trace_hfi1_exp_tid_map(uctxt->ctxt, fd->subctxt, 0, - uctxt->tidusemap, uctxt->tidmapcnt); } ret = hfi1_user_sdma_alloc_queues(uctxt, fp); if (ret) @@ -1503,367 +1484,6 @@ static int user_event_ack(struct hfi1_ctxtdata *uctxt, int subctxt, return 0; } -#define num_user_pages(vaddr, len) \ - (1 + (((((unsigned long)(vaddr) + \ - (unsigned long)(len) - 1) & PAGE_MASK) - \ - ((unsigned long)vaddr & PAGE_MASK)) >> PAGE_SHIFT)) - -/** - * tzcnt - count the number of trailing zeros in a 64bit value - * @value: the value to be examined - * - * Returns the number of trailing least significant zeros in the - * the input value. If the value is zero, return the number of - * bits of the value. - */ -static inline u8 tzcnt(u64 value) -{ - return value ? __builtin_ctzl(value) : sizeof(value) * 8; -} - -static inline unsigned num_free_groups(unsigned long map, u16 *start) -{ - unsigned free; - u16 bitidx = *start; - - if (bitidx >= BITS_PER_LONG) - return 0; - /* "Turn off" any bits set before our bit index */ - map &= ~((1ULL << bitidx) - 1); - free = tzcnt(map) - bitidx; - while (!free && bitidx < BITS_PER_LONG) { - /* Zero out the last set bit so we look at the rest */ - map &= ~(1ULL << bitidx); - /* - * Account for the previously checked bits and advance - * the bit index. We don't have to check for bitidx - * getting bigger than BITS_PER_LONG here as it would - * mean extra instructions that we don't need. If it - * did happen, it would push free to a negative value - * which will break the loop. - */ - free = tzcnt(map) - ++bitidx; - } - *start = bitidx; - return free; -} - -static int exp_tid_setup(struct file *fp, struct hfi1_tid_info *tinfo) -{ - int ret = 0; - struct hfi1_filedata *fd = fp->private_data; - struct hfi1_ctxtdata *uctxt = fd->uctxt; - struct hfi1_devdata *dd = uctxt->dd; - unsigned tid, mapped = 0, npages, ngroups, exp_groups, - tidpairs = uctxt->expected_count / 2; - struct page **pages; - unsigned long vaddr, tidmap[uctxt->tidmapcnt]; - dma_addr_t *phys; - u32 tidlist[tidpairs], pairidx = 0, tidcursor; - u16 useidx, idx, bitidx, tidcnt = 0; - - vaddr = tinfo->vaddr; - - if (offset_in_page(vaddr)) { - ret = -EINVAL; - goto bail; - } - - npages = num_user_pages(vaddr, tinfo->length); - if (!npages) { - ret = -EINVAL; - goto bail; - } - if (!access_ok(VERIFY_WRITE, (void __user *)vaddr, - npages * PAGE_SIZE)) { - dd_dev_err(dd, "Fail vaddr %p, %u pages, !access_ok\n", - (void *)vaddr, npages); - ret = -EFAULT; - goto bail; - } - - memset(tidmap, 0, sizeof(tidmap[0]) * uctxt->tidmapcnt); - memset(tidlist, 0, sizeof(tidlist[0]) * tidpairs); - - exp_groups = uctxt->expected_count / dd->rcv_entries.group_size; - /* which group set do we look at first? */ - tidcursor = atomic_read(&uctxt->tidcursor); - useidx = (tidcursor >> 16) & 0xffff; - bitidx = tidcursor & 0xffff; - - /* - * Keep going until we've mapped all pages or we've exhausted all - * RcvArray entries. - * This iterates over the number of tidmaps + 1 - * (idx <= uctxt->tidmapcnt) so we check the bitmap which we - * started from one more time for any free bits before the - * starting point bit. - */ - for (mapped = 0, idx = 0; - mapped < npages && idx <= uctxt->tidmapcnt;) { - u64 i, offset = 0; - unsigned free, pinned, pmapped = 0, bits_used; - u16 grp; - - /* - * "Reserve" the needed group bits under lock so other - * processes can't step in the middle of it. Once - * reserved, we don't need the lock anymore since we - * are guaranteed the groups. - */ - mutex_lock(&uctxt->exp_lock); - if (uctxt->tidusemap[useidx] == -1ULL || - bitidx >= BITS_PER_LONG) { - /* no free groups in the set, use the next */ - useidx = (useidx + 1) % uctxt->tidmapcnt; - idx++; - bitidx = 0; - mutex_unlock(&uctxt->exp_lock); - continue; - } - ngroups = ((npages - mapped) / dd->rcv_entries.group_size) + - !!((npages - mapped) % dd->rcv_entries.group_size); - - /* - * If we've gotten here, the current set of groups does have - * one or more free groups. - */ - free = num_free_groups(uctxt->tidusemap[useidx], &bitidx); - if (!free) { - /* - * Despite the check above, free could still come back - * as 0 because we don't check the entire bitmap but - * we start from bitidx. - */ - mutex_unlock(&uctxt->exp_lock); - continue; - } - bits_used = min(free, ngroups); - tidmap[useidx] |= ((1ULL << bits_used) - 1) << bitidx; - uctxt->tidusemap[useidx] |= tidmap[useidx]; - mutex_unlock(&uctxt->exp_lock); - - /* - * At this point, we know where in the map we have free bits. - * properly offset into the various "shadow" arrays and compute - * the RcvArray entry index. - */ - offset = ((useidx * BITS_PER_LONG) + bitidx) * - dd->rcv_entries.group_size; - pages = uctxt->tid_pg_list + offset; - phys = uctxt->physshadow + offset; - tid = uctxt->expected_base + offset; - - /* Calculate how many pages we can pin based on free bits */ - pinned = min((bits_used * dd->rcv_entries.group_size), - (npages - mapped)); - /* - * Now that we know how many free RcvArray entries we have, - * we can pin that many user pages. - */ - ret = hfi1_acquire_user_pages(vaddr + (mapped * PAGE_SIZE), - pinned, true, pages); - if (ret) { - /* - * We can't continue because the pages array won't be - * initialized. This should never happen, - * unless perhaps the user has mpin'ed the pages - * themselves. - */ - dd_dev_info(dd, - "Failed to lock addr %p, %u pages: errno %d\n", - (void *) vaddr, pinned, -ret); - /* - * Let go of the bits that we reserved since we are not - * going to use them. - */ - mutex_lock(&uctxt->exp_lock); - uctxt->tidusemap[useidx] &= - ~(((1ULL << bits_used) - 1) << bitidx); - mutex_unlock(&uctxt->exp_lock); - goto done; - } - /* - * How many groups do we need based on how many pages we have - * pinned? - */ - ngroups = (pinned / dd->rcv_entries.group_size) + - !!(pinned % dd->rcv_entries.group_size); - /* - * Keep programming RcvArray entries for all the free - * groups. - */ - for (i = 0, grp = 0; grp < ngroups; i++, grp++) { - unsigned j; - u32 pair_size = 0, tidsize; - /* - * This inner loop will program an entire group or the - * array of pinned pages (which ever limit is hit - * first). - */ - for (j = 0; j < dd->rcv_entries.group_size && - pmapped < pinned; j++, pmapped++, tid++) { - tidsize = PAGE_SIZE; - phys[pmapped] = hfi1_map_page(dd->pcidev, - pages[pmapped], 0, - tidsize, PCI_DMA_FROMDEVICE); - trace_hfi1_exp_rcv_set(uctxt->ctxt, - fd->subctxt, - tid, vaddr, - phys[pmapped], - pages[pmapped]); - /* - * Each RcvArray entry is programmed with one - * page * worth of memory. This will handle - * the 8K MTU as well as anything smaller - * due to the fact that both entries in the - * RcvTidPair are programmed with a page. - * PSM currently does not handle anything - * bigger than 8K MTU, so should we even worry - * about 10K here? - */ - hfi1_put_tid(dd, tid, PT_EXPECTED, - phys[pmapped], - ilog2(tidsize >> PAGE_SHIFT) + 1); - pair_size += tidsize >> PAGE_SHIFT; - EXP_TID_RESET(tidlist[pairidx], LEN, pair_size); - if (!(tid % 2)) { - tidlist[pairidx] |= - EXP_TID_SET(IDX, - (tid - uctxt->expected_base) - / 2); - tidlist[pairidx] |= - EXP_TID_SET(CTRL, 1); - tidcnt++; - } else { - tidlist[pairidx] |= - EXP_TID_SET(CTRL, 2); - pair_size = 0; - pairidx++; - } - } - /* - * We've programmed the entire group (or as much of the - * group as we'll use. Now, it's time to push it out... - */ - flush_wc(); - } - mapped += pinned; - atomic_set(&uctxt->tidcursor, - (((useidx & 0xffffff) << 16) | - ((bitidx + bits_used) & 0xffffff))); - } - trace_hfi1_exp_tid_map(uctxt->ctxt, fd->subctxt, 0, uctxt->tidusemap, - uctxt->tidmapcnt); - -done: - /* If we've mapped anything, copy relevant info to user */ - if (mapped) { - if (copy_to_user((void __user *)(unsigned long)tinfo->tidlist, - tidlist, sizeof(tidlist[0]) * tidcnt)) { - ret = -EFAULT; - goto done; - } - /* copy TID info to user */ - if (copy_to_user((void __user *)(unsigned long)tinfo->tidmap, - tidmap, sizeof(tidmap[0]) * uctxt->tidmapcnt)) - ret = -EFAULT; - } -bail: - /* - * Calculate mapped length. New Exp TID protocol does not "unwind" and - * report an error if it can't map the entire buffer. It just reports - * the length that was mapped. - */ - tinfo->length = mapped * PAGE_SIZE; - tinfo->tidcnt = tidcnt; - return ret; -} - -static int exp_tid_free(struct file *fp, struct hfi1_tid_info *tinfo) -{ - struct hfi1_filedata *fd = fp->private_data; - struct hfi1_ctxtdata *uctxt = fd->uctxt; - struct hfi1_devdata *dd = uctxt->dd; - unsigned long tidmap[uctxt->tidmapcnt]; - struct page **pages; - dma_addr_t *phys; - u16 idx, bitidx, tid; - int ret = 0; - - if (copy_from_user(&tidmap, (void __user *)(unsigned long) - tinfo->tidmap, - sizeof(tidmap[0]) * uctxt->tidmapcnt)) { - ret = -EFAULT; - goto done; - } - for (idx = 0; idx < uctxt->tidmapcnt; idx++) { - unsigned long map; - - bitidx = 0; - if (!tidmap[idx]) - continue; - map = tidmap[idx]; - while ((bitidx = tzcnt(map)) < BITS_PER_LONG) { - int i, pcount = 0; - struct page *pshadow[dd->rcv_entries.group_size]; - unsigned offset = ((idx * BITS_PER_LONG) + bitidx) * - dd->rcv_entries.group_size; - - pages = uctxt->tid_pg_list + offset; - phys = uctxt->physshadow + offset; - tid = uctxt->expected_base + offset; - for (i = 0; i < dd->rcv_entries.group_size; - i++, tid++) { - if (pages[i]) { - hfi1_put_tid(dd, tid, PT_INVALID, - 0, 0); - trace_hfi1_exp_rcv_free(uctxt->ctxt, - fd->subctxt, - tid, phys[i], - pages[i]); - pci_unmap_page(dd->pcidev, phys[i], - PAGE_SIZE, PCI_DMA_FROMDEVICE); - pshadow[pcount] = pages[i]; - pages[i] = NULL; - pcount++; - phys[i] = 0; - } - } - flush_wc(); - hfi1_release_user_pages(pshadow, pcount, true); - clear_bit(bitidx, &uctxt->tidusemap[idx]); - map &= ~(1ULL<ctxt, fd->subctxt, 1, uctxt->tidusemap, - uctxt->tidmapcnt); -done: - return ret; -} - -static void unlock_exp_tids(struct hfi1_ctxtdata *uctxt) -{ - struct hfi1_devdata *dd = uctxt->dd; - unsigned tid; - - dd_dev_info(dd, "ctxt %u unlocking any locked expTID pages\n", - uctxt->ctxt); - for (tid = 0; tid < uctxt->expected_count; tid++) { - struct page *p = uctxt->tid_pg_list[tid]; - dma_addr_t phys; - - if (!p) - continue; - - phys = uctxt->physshadow[tid]; - uctxt->physshadow[tid] = 0; - uctxt->tid_pg_list[tid] = NULL; - pci_unmap_page(dd->pcidev, phys, PAGE_SIZE, PCI_DMA_FROMDEVICE); - hfi1_release_user_pages(&p, 1, true); - } -} - static int set_ctxt_pkey(struct hfi1_ctxtdata *uctxt, unsigned subctxt, u16 pkey) { diff --git a/drivers/staging/rdma/hfi1/hfi.h b/drivers/staging/rdma/hfi1/hfi.h index 53f464c..62157cc 100644 --- a/drivers/staging/rdma/hfi1/hfi.h +++ b/drivers/staging/rdma/hfi1/hfi.h @@ -240,18 +240,6 @@ struct hfi1_ctxtdata { u32 expected_count; /* index of first expected TID entry. */ u32 expected_base; - /* cursor into the exp group sets */ - atomic_t tidcursor; - /* number of exp TID groups assigned to the ctxt */ - u16 numtidgroups; - /* size of exp TID group fields in tidusemap */ - u16 tidmapcnt; - /* exp TID group usage bitfield array */ - unsigned long *tidusemap; - /* pinned pages for exp sends, allocated at open */ - struct page **tid_pg_list; - /* dma handles for exp tid pages */ - dma_addr_t *physshadow; struct exp_tid_set tid_group_list; struct exp_tid_set tid_used_list; @@ -1660,8 +1648,6 @@ int get_platform_config_field(struct hfi1_devdata *dd, enum platform_config_table_type_encoding table_type, int table_index, int field_index, u32 *data, u32 len); -dma_addr_t hfi1_map_page(struct pci_dev *, struct page *, unsigned long, - size_t, int); const char *get_unit_name(int unit); /* diff --git a/drivers/staging/rdma/hfi1/init.c b/drivers/staging/rdma/hfi1/init.c index 72c5143..00f52e8 100644 --- a/drivers/staging/rdma/hfi1/init.c +++ b/drivers/staging/rdma/hfi1/init.c @@ -962,13 +962,10 @@ void hfi1_free_ctxtdata(struct hfi1_devdata *dd, struct hfi1_ctxtdata *rcd) kfree(rcd->egrbufs.buffers); sc_free(rcd->sc); - vfree(rcd->physshadow); - vfree(rcd->tid_pg_list); vfree(rcd->user_event_mask); vfree(rcd->subctxt_uregbase); vfree(rcd->subctxt_rcvegrbuf); vfree(rcd->subctxt_rcvhdr_base); - kfree(rcd->tidusemap); kfree(rcd->opstats); kfree(rcd); } diff --git a/drivers/staging/rdma/hfi1/trace.h b/drivers/staging/rdma/hfi1/trace.h index 86c12eb..1e43567 100644 --- a/drivers/staging/rdma/hfi1/trace.h +++ b/drivers/staging/rdma/hfi1/trace.h @@ -153,92 +153,130 @@ TRACE_EVENT(hfi1_receive_interrupt, ) ); -const char *print_u64_array(struct trace_seq *, u64 *, int); +TRACE_EVENT(hfi1_exp_tid_reg, + TP_PROTO(unsigned ctxt, u16 subctxt, u32 rarr, + u32 npages, unsigned long va, unsigned long pa, + dma_addr_t dma), + TP_ARGS(ctxt, subctxt, rarr, npages, va, pa, dma), + TP_STRUCT__entry( + __field(unsigned, ctxt) + __field(u16, subctxt) + __field(u32, rarr) + __field(u32, npages) + __field(unsigned long, va) + __field(unsigned long, pa) + __field(dma_addr_t, dma) + ), + TP_fast_assign( + __entry->ctxt = ctxt; + __entry->subctxt = subctxt; + __entry->rarr = rarr; + __entry->npages = npages; + __entry->va = va; + __entry->pa = pa; + __entry->dma = dma; + ), + TP_printk("[%u:%u] entry:%u, %u pages @ 0x%lx, va:0x%lx dma:0x%llx", + __entry->ctxt, + __entry->subctxt, + __entry->rarr, + __entry->npages, + __entry->pa, + __entry->va, + __entry->dma + ) + ); -TRACE_EVENT(hfi1_exp_tid_map, - TP_PROTO(unsigned ctxt, u16 subctxt, int dir, - unsigned long *maps, u16 count), - TP_ARGS(ctxt, subctxt, dir, maps, count), +TRACE_EVENT(hfi1_exp_tid_unreg, + TP_PROTO(unsigned ctxt, u16 subctxt, u32 rarr, u32 npages, + unsigned long va, unsigned long pa, dma_addr_t dma), + TP_ARGS(ctxt, subctxt, rarr, npages, va, pa, dma), TP_STRUCT__entry( __field(unsigned, ctxt) __field(u16, subctxt) - __field(int, dir) - __field(u16, count) - __dynamic_array(unsigned long, maps, sizeof(*maps) * count) + __field(u32, rarr) + __field(u32, npages) + __field(unsigned long, va) + __field(unsigned long, pa) + __field(dma_addr_t, dma) ), TP_fast_assign( __entry->ctxt = ctxt; __entry->subctxt = subctxt; - __entry->dir = dir; - __entry->count = count; - memcpy(__get_dynamic_array(maps), maps, - sizeof(*maps) * count); + __entry->rarr = rarr; + __entry->npages = npages; + __entry->va = va; + __entry->pa = pa; + __entry->dma = dma; ), - TP_printk("[%3u:%02u] %s tidmaps %s", + TP_printk("[%u:%u] entry:%u, %u pages @ 0x%lx, va:0x%lx dma:0x%llx", __entry->ctxt, __entry->subctxt, - (__entry->dir ? ">" : "<"), - print_u64_array(p, __get_dynamic_array(maps), - __entry->count) + __entry->rarr, + __entry->npages, + __entry->pa, + __entry->va, + __entry->dma ) ); -TRACE_EVENT(hfi1_exp_rcv_set, - TP_PROTO(unsigned ctxt, u16 subctxt, u32 tid, - unsigned long vaddr, u64 phys_addr, void *page), - TP_ARGS(ctxt, subctxt, tid, vaddr, phys_addr, page), +TRACE_EVENT(hfi1_exp_tid_inval, + TP_PROTO(unsigned ctxt, u16 subctxt, unsigned long va, u32 rarr, + u32 npages, dma_addr_t dma), + TP_ARGS(ctxt, subctxt, va, rarr, npages, dma), TP_STRUCT__entry( __field(unsigned, ctxt) __field(u16, subctxt) - __field(u32, tid) - __field(unsigned long, vaddr) - __field(u64, phys_addr) - __field(void *, page) + __field(unsigned long, va) + __field(u32, rarr) + __field(u32, npages) + __field(dma_addr_t, dma) ), TP_fast_assign( __entry->ctxt = ctxt; __entry->subctxt = subctxt; - __entry->tid = tid; - __entry->vaddr = vaddr; - __entry->phys_addr = phys_addr; - __entry->page = page; + __entry->va = va; + __entry->rarr = rarr; + __entry->npages = npages; + __entry->dma = dma; ), - TP_printk("[%u:%u] TID %u, vaddrs 0x%lx, physaddr 0x%llx, pgp %p", + TP_printk("[%u:%u] entry:%u, %u pages @ 0x%lx dma: 0x%llx", __entry->ctxt, __entry->subctxt, - __entry->tid, - __entry->vaddr, - __entry->phys_addr, - __entry->page + __entry->rarr, + __entry->npages, + __entry->va, + __entry->dma ) ); -TRACE_EVENT(hfi1_exp_rcv_free, - TP_PROTO(unsigned ctxt, u16 subctxt, u32 tid, - unsigned long phys, void *page), - TP_ARGS(ctxt, subctxt, tid, phys, page), +TRACE_EVENT(hfi1_mmu_invalidate, + TP_PROTO(unsigned ctxt, u16 subctxt, const char *type, + unsigned long start, unsigned long end), + TP_ARGS(ctxt, subctxt, type, start, end), TP_STRUCT__entry( __field(unsigned, ctxt) __field(u16, subctxt) - __field(u32, tid) - __field(unsigned long, phys) - __field(void *, page) + __string(type, type) + __field(unsigned long, start) + __field(unsigned long, end) ), TP_fast_assign( __entry->ctxt = ctxt; __entry->subctxt = subctxt; - __entry->tid = tid; - __entry->phys = phys; - __entry->page = page; + __assign_str(type, type); + __entry->start = start; + __entry->end = end; ), - TP_printk("[%u:%u] freeing TID %u, 0x%lx, pgp %p", + TP_printk("[%3u:%02u] MMU Invalidate (%s) 0x%lx - 0x%lx", __entry->ctxt, __entry->subctxt, - __entry->tid, - __entry->phys, - __entry->page + __get_str(type), + __entry->start, + __entry->end ) ); + #undef TRACE_SYSTEM #define TRACE_SYSTEM hfi1_tx diff --git a/drivers/staging/rdma/hfi1/user_exp_rcv.c b/drivers/staging/rdma/hfi1/user_exp_rcv.c index d33f579..79612a2 100644 --- a/drivers/staging/rdma/hfi1/user_exp_rcv.c +++ b/drivers/staging/rdma/hfi1/user_exp_rcv.c @@ -902,6 +902,8 @@ static int set_rcvarray_entry(struct file *fp, unsigned long vaddr, return -EFAULT; } hfi1_put_tid(dd, rcventry, PT_EXPECTED, phys, ilog2(npages) + 1); + trace_hfi1_exp_tid_reg(uctxt->ctxt, fd->subctxt, rcventry, + npages, node->virt, node->phys, phys); return 0; } @@ -947,6 +949,10 @@ static void clear_tid_node(struct hfi1_filedata *fd, u16 subctxt, struct hfi1_ctxtdata *uctxt = fd->uctxt; struct hfi1_devdata *dd = uctxt->dd; + trace_hfi1_exp_tid_unreg(uctxt->ctxt, fd->subctxt, node->rcventry, + node->npages, node->virt, node->phys, + node->dma_addr); + hfi1_put_tid(dd, node->rcventry, PT_INVALID, 0, 0); /* * Make sure device has seen the write before we unpin the @@ -1023,6 +1029,9 @@ static void mmu_notifier_mem_invalidate(struct mmu_notifier *mn, struct mmu_rb_node *node; unsigned long addr = start; + trace_hfi1_mmu_invalidate(uctxt->ctxt, fd->subctxt, mmu_types[type], + start, end); + spin_lock(&fd->rb_lock); while (addr < end) { node = mmu_rb_search_by_addr(root, addr); @@ -1049,6 +1058,9 @@ static void mmu_notifier_mem_invalidate(struct mmu_notifier *mn, if (node->freed) continue; + trace_hfi1_exp_tid_inval(uctxt->ctxt, fd->subctxt, node->virt, + node->rcventry, node->npages, + node->dma_addr); node->freed = true; spin_lock(&fd->invalid_lock); diff --git a/drivers/staging/rdma/hfi1/user_pages.c b/drivers/staging/rdma/hfi1/user_pages.c index 692de65..1854c0c 100644 --- a/drivers/staging/rdma/hfi1/user_pages.c +++ b/drivers/staging/rdma/hfi1/user_pages.c @@ -54,20 +54,6 @@ #include "hfi.h" -/** - * hfi1_map_page - a safety wrapper around pci_map_page() - * - */ -dma_addr_t hfi1_map_page(struct pci_dev *hwdev, struct page *page, - unsigned long offset, size_t size, int direction) -{ - dma_addr_t phys; - - phys = pci_map_page(hwdev, page, offset, size, direction); - - return phys; -} - int hfi1_acquire_user_pages(unsigned long vaddr, size_t npages, bool writable, struct page **pages) { diff --git a/include/uapi/rdma/hfi/hfi1_user.h b/include/uapi/rdma/hfi/hfi1_user.h index 92be2e37..a533cec 100644 --- a/include/uapi/rdma/hfi/hfi1_user.h +++ b/include/uapi/rdma/hfi/hfi1_user.h @@ -66,7 +66,7 @@ * The major version changes when data structures change in an incompatible * way. The driver must be the same for initialization to succeed. */ -#define HFI1_USER_SWMAJOR 4 +#define HFI1_USER_SWMAJOR 5 /* * Minor version differences are always compatible @@ -241,11 +241,6 @@ struct hfi1_tid_info { __u32 tidcnt; /* length of transfer buffer programmed by this request */ __u32 length; - /* - * pointer to bitmap of TIDs used for this call; - * checked for being large enough at open - */ - __u64 tidmap; }; struct hfi1_cmd { -- cgit v0.10.2 From 571e09eeff544e5562bd2a704f1fe91083f7592f Mon Sep 17 00:00:00 2001 From: Abhilash Jindal Date: Sun, 31 Jan 2016 13:53:31 -0500 Subject: IB/mlx4: Use boottime Wall time obtained from ktime_get_real_ns is susceptible to sudden jumps due to user setting the time or due to NTP. Boot time is constantly increasing time better suited for comparing two timestamps. Signed-off-by: Abhilash Jindal Reviewed-by: Leon Romanovsky Signed-off-by: Doug Ledford diff --git a/drivers/infiniband/hw/mlx4/alias_GUID.c b/drivers/infiniband/hw/mlx4/alias_GUID.c index 21cb41a..c74ef26 100644 --- a/drivers/infiniband/hw/mlx4/alias_GUID.c +++ b/drivers/infiniband/hw/mlx4/alias_GUID.c @@ -310,7 +310,7 @@ static void aliasguid_query_handler(int status, if (status) { pr_debug("(port: %d) failed: status = %d\n", cb_ctx->port, status); - rec->time_to_run = ktime_get_real_ns() + 1 * NSEC_PER_SEC; + rec->time_to_run = ktime_get_boot_ns() + 1 * NSEC_PER_SEC; goto out; } @@ -416,7 +416,7 @@ next_entry: be64_to_cpu((__force __be64)rec->guid_indexes), be64_to_cpu((__force __be64)applied_guid_indexes), be64_to_cpu((__force __be64)declined_guid_indexes)); - rec->time_to_run = ktime_get_real_ns() + + rec->time_to_run = ktime_get_boot_ns() + resched_delay_sec * NSEC_PER_SEC; } else { rec->status = MLX4_GUID_INFO_STATUS_SET; @@ -708,7 +708,7 @@ static int get_low_record_time_index(struct mlx4_ib_dev *dev, u8 port, } } if (resched_delay_sec) { - u64 curr_time = ktime_get_real_ns(); + u64 curr_time = ktime_get_boot_ns(); *resched_delay_sec = (low_record_time < curr_time) ? 0 : div_u64((low_record_time - curr_time), NSEC_PER_SEC); -- cgit v0.10.2 From a3100a78794175d7f2488a3155d247da3d7390e4 Mon Sep 17 00:00:00 2001 From: Marina Varshaver Date: Thu, 18 Feb 2016 18:31:05 +0200 Subject: IB/core: Add don't trap flag to flow creation Don't trap flag (i.e. IB_FLOW_ATTR_FLAGS_DONT_TRAP) indicates that QP will receive traffic, but will not steal it. When a packet matches a flow steering rule that was created with the don't trap flag, the QPs assigned to this rule will get this packet, but matching will continue to other equal/lower priority rules. This will let other QPs assigned to those rules to get the packet too. If both don't trap rule and other rules have the same priority and match the same packet, the behavior is undefined. The don't trap flag can't be set with default rule types (i.e. IB_FLOW_ATTR_ALL_DEFAULT, IB_FLOW_ATTR_MC_DEFAULT) as default rules don't have rules after them and don't trap has no meaning here. Signed-off-by: Marina Varshaver Reviewed-by: Matan Barak Reviewed-by: Yishai Hadas Signed-off-by: Doug Ledford diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index 6ffc9c4..0f05de6 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -3085,6 +3085,14 @@ int ib_uverbs_ex_create_flow(struct ib_uverbs_file *file, !capable(CAP_NET_ADMIN)) || !capable(CAP_NET_RAW)) return -EPERM; + if (cmd.flow_attr.flags >= IB_FLOW_ATTR_FLAGS_RESERVED) + return -EINVAL; + + if ((cmd.flow_attr.flags & IB_FLOW_ATTR_FLAGS_DONT_TRAP) && + ((cmd.flow_attr.type == IB_FLOW_ATTR_ALL_DEFAULT) || + (cmd.flow_attr.type == IB_FLOW_ATTR_MC_DEFAULT))) + return -EINVAL; + if (cmd.flow_attr.num_of_specs > IB_FLOW_SPEC_SUPPORT_LAYERS) return -EINVAL; diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c index 1c7ab6c..41f2c25 100644 --- a/drivers/infiniband/hw/mlx4/main.c +++ b/drivers/infiniband/hw/mlx4/main.c @@ -1653,6 +1653,9 @@ static struct ib_flow *mlx4_ib_create_flow(struct ib_qp *qp, struct mlx4_dev *dev = (to_mdev(qp->device))->dev; int is_bonded = mlx4_is_bonded(dev); + if (flow_attr->flags & IB_FLOW_ATTR_FLAGS_DONT_TRAP) + return ERR_PTR(-EOPNOTSUPP); + memset(type, 0, sizeof(type)); mflow = kzalloc(sizeof(*mflow), GFP_KERNEL); diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 284b00c..514223f 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -1487,6 +1487,11 @@ enum ib_flow_domain { IB_FLOW_DOMAIN_NUM /* Must be last */ }; +enum ib_flow_flags { + IB_FLOW_ATTR_FLAGS_DONT_TRAP = 1UL << 1, /* Continue match, no steal */ + IB_FLOW_ATTR_FLAGS_RESERVED = 1UL << 2 /* Must be last */ +}; + struct ib_flow_eth_filter { u8 dst_mac[6]; u8 src_mac[6]; -- cgit v0.10.2 From 0e451e883bd13ce616f439e2414b8c17fa28318a Mon Sep 17 00:00:00 2001 From: Marina Varshaver Date: Thu, 18 Feb 2016 18:31:06 +0200 Subject: IB/mlx4: Add support for the don't trap rule Add support for receiving multicast/unicast traffic with the don't trap rule. Sniffing these packets requires a flow steering rule of type NORMAL at priority 0 with flag IB_FLOW_ATTR_FLAGS_DONT_TRAP set. Choosing between multicast or unicast is done via ethernet L2 dest_mac mask and value: - If mask is all zeros - unicast and multicast are set. - If mask non zero - only mask with multicast bit 1 and rest 0 is supported, the mac value will choose if it is multicast or unicast rule. If the mask multicast bit is on and some other bits are on too, it means a request for specific multicast or unicast, this is not supported, either receive all multicast or all unicast. Only when limitations are met registered QP will receive requested type but other QPs can receive same traffic if registered for it. Otherwise, if limitations are not met, an error will be returned. Limitations: - Rule must be with priority 0. - A0 mode is not supported. - Sniffer QP cannot appear in any other flow steering rule. Signed-off-by: Marina Varshaver Reviewed-by: Matan Barak Reviewed-by: Yishai Hadas Signed-off-by: Doug Ledford diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c index 41f2c25..914bc98 100644 --- a/drivers/infiniband/hw/mlx4/main.c +++ b/drivers/infiniband/hw/mlx4/main.c @@ -1643,6 +1643,56 @@ static int mlx4_ib_tunnel_steer_add(struct ib_qp *qp, struct ib_flow_attr *flow_ return err; } +static int mlx4_ib_add_dont_trap_rule(struct mlx4_dev *dev, + struct ib_flow_attr *flow_attr, + enum mlx4_net_trans_promisc_mode *type) +{ + int err = 0; + + if (!(dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_DMFS_UC_MC_SNIFFER) || + (dev->caps.dmfs_high_steer_mode == MLX4_STEERING_DMFS_A0_STATIC) || + (flow_attr->num_of_specs > 1) || (flow_attr->priority != 0)) { + return -EOPNOTSUPP; + } + + if (flow_attr->num_of_specs == 0) { + type[0] = MLX4_FS_MC_SNIFFER; + type[1] = MLX4_FS_UC_SNIFFER; + } else { + union ib_flow_spec *ib_spec; + + ib_spec = (union ib_flow_spec *)(flow_attr + 1); + if (ib_spec->type != IB_FLOW_SPEC_ETH) + return -EINVAL; + + /* if all is zero than MC and UC */ + if (is_zero_ether_addr(ib_spec->eth.mask.dst_mac)) { + type[0] = MLX4_FS_MC_SNIFFER; + type[1] = MLX4_FS_UC_SNIFFER; + } else { + u8 mac[ETH_ALEN] = {ib_spec->eth.mask.dst_mac[0] ^ 0x01, + ib_spec->eth.mask.dst_mac[1], + ib_spec->eth.mask.dst_mac[2], + ib_spec->eth.mask.dst_mac[3], + ib_spec->eth.mask.dst_mac[4], + ib_spec->eth.mask.dst_mac[5]}; + + /* Above xor was only on MC bit, non empty mask is valid + * only if this bit is set and rest are zero. + */ + if (!is_zero_ether_addr(&mac[0])) + return -EINVAL; + + if (is_multicast_ether_addr(ib_spec->eth.val.dst_mac)) + type[0] = MLX4_FS_MC_SNIFFER; + else + type[0] = MLX4_FS_UC_SNIFFER; + } + } + + return err; +} + static struct ib_flow *mlx4_ib_create_flow(struct ib_qp *qp, struct ib_flow_attr *flow_attr, int domain) @@ -1653,7 +1703,8 @@ static struct ib_flow *mlx4_ib_create_flow(struct ib_qp *qp, struct mlx4_dev *dev = (to_mdev(qp->device))->dev; int is_bonded = mlx4_is_bonded(dev); - if (flow_attr->flags & IB_FLOW_ATTR_FLAGS_DONT_TRAP) + if ((flow_attr->flags & IB_FLOW_ATTR_FLAGS_DONT_TRAP) && + (flow_attr->type != IB_FLOW_ATTR_NORMAL)) return ERR_PTR(-EOPNOTSUPP); memset(type, 0, sizeof(type)); @@ -1666,7 +1717,19 @@ static struct ib_flow *mlx4_ib_create_flow(struct ib_qp *qp, switch (flow_attr->type) { case IB_FLOW_ATTR_NORMAL: - type[0] = MLX4_FS_REGULAR; + /* If dont trap flag (continue match) is set, under specific + * condition traffic be replicated to given qp, + * without stealing it + */ + if (unlikely(flow_attr->flags & IB_FLOW_ATTR_FLAGS_DONT_TRAP)) { + err = mlx4_ib_add_dont_trap_rule(dev, + flow_attr, + type); + if (err) + goto err_free; + } else { + type[0] = MLX4_FS_REGULAR; + } break; case IB_FLOW_ATTR_ALL_DEFAULT: @@ -1678,8 +1741,8 @@ static struct ib_flow *mlx4_ib_create_flow(struct ib_qp *qp, break; case IB_FLOW_ATTR_SNIFFER: - type[0] = MLX4_FS_UC_SNIFFER; - type[1] = MLX4_FS_MC_SNIFFER; + type[0] = MLX4_FS_MIRROR_RX_PORT; + type[1] = MLX4_FS_MIRROR_SX_PORT; break; default: diff --git a/drivers/net/ethernet/mellanox/mlx4/fw.c b/drivers/net/ethernet/mellanox/mlx4/fw.c index d66c690..e970945 100644 --- a/drivers/net/ethernet/mellanox/mlx4/fw.c +++ b/drivers/net/ethernet/mellanox/mlx4/fw.c @@ -157,7 +157,8 @@ static void dump_dev_cap_flags2(struct mlx4_dev *dev, u64 flags) [29] = "802.1ad offload support", [31] = "Modifying loopback source checks using UPDATE_QP support", [32] = "Loopback source checks support", - [33] = "RoCEv2 support" + [33] = "RoCEv2 support", + [34] = "DMFS Sniffer support (UC & MC)" }; int i; @@ -810,6 +811,8 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap) if (field & 0x80) dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_FS_EN; dev_cap->fs_log_max_ucast_qp_range_size = field & 0x1f; + if (field & 0x20) + dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_DMFS_UC_MC_SNIFFER; MLX4_GET(field, outbox, QUERY_DEV_CAP_PORT_BEACON_OFFSET); if (field & 0x80) dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_PORT_BEACON; diff --git a/drivers/net/ethernet/mellanox/mlx4/mcg.c b/drivers/net/ethernet/mellanox/mlx4/mcg.c index 1d4e2e0..42d8de8 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mcg.c +++ b/drivers/net/ethernet/mellanox/mlx4/mcg.c @@ -752,8 +752,10 @@ static const u8 __promisc_mode[] = { [MLX4_FS_REGULAR] = 0x0, [MLX4_FS_ALL_DEFAULT] = 0x1, [MLX4_FS_MC_DEFAULT] = 0x3, - [MLX4_FS_UC_SNIFFER] = 0x4, - [MLX4_FS_MC_SNIFFER] = 0x5, + [MLX4_FS_MIRROR_RX_PORT] = 0x4, + [MLX4_FS_MIRROR_SX_PORT] = 0x5, + [MLX4_FS_UC_SNIFFER] = 0x6, + [MLX4_FS_MC_SNIFFER] = 0x7, }; int mlx4_map_sw_to_hw_steering_mode(struct mlx4_dev *dev, diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index a0e8cc8..8541a91 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -219,6 +219,7 @@ enum { MLX4_DEV_CAP_FLAG2_UPDATE_QP_SRC_CHECK_LB = 1ULL << 31, MLX4_DEV_CAP_FLAG2_LB_SRC_CHK = 1ULL << 32, MLX4_DEV_CAP_FLAG2_ROCE_V1_V2 = 1ULL << 33, + MLX4_DEV_CAP_FLAG2_DMFS_UC_MC_SNIFFER = 1ULL << 34, }; enum { @@ -1160,6 +1161,8 @@ enum mlx4_net_trans_promisc_mode { MLX4_FS_REGULAR = 1, MLX4_FS_ALL_DEFAULT, MLX4_FS_MC_DEFAULT, + MLX4_FS_MIRROR_RX_PORT, + MLX4_FS_MIRROR_SX_PORT, MLX4_FS_UC_SNIFFER, MLX4_FS_MC_SNIFFER, MLX4_FS_MODE_NUM, /* should be last */ -- cgit v0.10.2 From bfec53c6c88aa2b18a5015ca3e0137d4e0dec95a Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Mon, 15 Feb 2016 21:25:44 +0000 Subject: RDMA/nes: Replace LRO with GRO GRO is simpler to use than the old inet_lro library, and is compatible with forwarding and bridging configurations. Compile-tested only. Signed-off-by: Ben Hutchings Signed-off-by: Doug Ledford diff --git a/drivers/infiniband/hw/nes/Kconfig b/drivers/infiniband/hw/nes/Kconfig index 846dc97..7964eba8 100644 --- a/drivers/infiniband/hw/nes/Kconfig +++ b/drivers/infiniband/hw/nes/Kconfig @@ -2,7 +2,6 @@ config INFINIBAND_NES tristate "NetEffect RNIC Driver" depends on PCI && INET && INFINIBAND select LIBCRC32C - select INET_LRO ---help--- This is the RDMA Network Interface Card (RNIC) driver for NetEffect Ethernet Cluster Server Adapters. diff --git a/drivers/infiniband/hw/nes/nes_hw.c b/drivers/infiniband/hw/nes/nes_hw.c index 4713dd7..a1c6481 100644 --- a/drivers/infiniband/hw/nes/nes_hw.c +++ b/drivers/infiniband/hw/nes/nes_hw.c @@ -35,18 +35,11 @@ #include #include #include -#include -#include #include -#include #include #include "nes.h" -static unsigned int nes_lro_max_aggr = NES_LRO_MAX_AGGR; -module_param(nes_lro_max_aggr, uint, 0444); -MODULE_PARM_DESC(nes_lro_max_aggr, "NIC LRO max packet aggregation"); - static int wide_ppm_offset; module_param(wide_ppm_offset, int, 0644); MODULE_PARM_DESC(wide_ppm_offset, "Increase CX4 interface clock ppm offset, 0=100ppm (default), 1=300ppm"); @@ -1642,25 +1635,6 @@ static void nes_rq_wqes_timeout(unsigned long parm) } -static int nes_lro_get_skb_hdr(struct sk_buff *skb, void **iphdr, - void **tcph, u64 *hdr_flags, void *priv) -{ - unsigned int ip_len; - struct iphdr *iph; - skb_reset_network_header(skb); - iph = ip_hdr(skb); - if (iph->protocol != IPPROTO_TCP) - return -1; - ip_len = ip_hdrlen(skb); - skb_set_transport_header(skb, ip_len); - *tcph = tcp_hdr(skb); - - *hdr_flags = LRO_IPV4 | LRO_TCP; - *iphdr = iph; - return 0; -} - - /** * nes_init_nic_qp */ @@ -1895,14 +1869,6 @@ int nes_init_nic_qp(struct nes_device *nesdev, struct net_device *netdev) return -ENOMEM; } - nesvnic->lro_mgr.max_aggr = nes_lro_max_aggr; - nesvnic->lro_mgr.max_desc = NES_MAX_LRO_DESCRIPTORS; - nesvnic->lro_mgr.lro_arr = nesvnic->lro_desc; - nesvnic->lro_mgr.get_skb_header = nes_lro_get_skb_hdr; - nesvnic->lro_mgr.features = LRO_F_NAPI | LRO_F_EXTRACT_VLAN_ID; - nesvnic->lro_mgr.dev = netdev; - nesvnic->lro_mgr.ip_summed = CHECKSUM_UNNECESSARY; - nesvnic->lro_mgr.ip_summed_aggr = CHECKSUM_UNNECESSARY; return 0; } @@ -2809,13 +2775,10 @@ void nes_nic_ce_handler(struct nes_device *nesdev, struct nes_hw_nic_cq *cq) u16 pkt_type; u16 rqes_processed = 0; u8 sq_cqes = 0; - u8 nes_use_lro = 0; head = cq->cq_head; cq_size = cq->cq_size; cq->cqes_pending = 1; - if (nesvnic->netdev->features & NETIF_F_LRO) - nes_use_lro = 1; do { if (le32_to_cpu(cq->cq_vbase[head].cqe_words[NES_NIC_CQE_MISC_IDX]) & NES_NIC_CQE_VALID) { @@ -2950,10 +2913,7 @@ void nes_nic_ce_handler(struct nes_device *nesdev, struct nes_hw_nic_cq *cq) __vlan_hwaccel_put_tag(rx_skb, htons(ETH_P_8021Q), vlan_tag); } - if (nes_use_lro) - lro_receive_skb(&nesvnic->lro_mgr, rx_skb, NULL); - else - netif_receive_skb(rx_skb); + napi_gro_receive(&nesvnic->napi, rx_skb); skip_rx_indicate0: ; @@ -2984,8 +2944,6 @@ skip_rx_indicate0: } while (1); - if (nes_use_lro) - lro_flush_all(&nesvnic->lro_mgr); if (sq_cqes) { barrier(); /* restart the queue if it had been stopped */ diff --git a/drivers/infiniband/hw/nes/nes_hw.h b/drivers/infiniband/hw/nes/nes_hw.h index c908020..1b66ef1 100644 --- a/drivers/infiniband/hw/nes/nes_hw.h +++ b/drivers/infiniband/hw/nes/nes_hw.h @@ -33,8 +33,6 @@ #ifndef __NES_HW_H #define __NES_HW_H -#include - #define NES_PHY_TYPE_CX4 1 #define NES_PHY_TYPE_1G 2 #define NES_PHY_TYPE_ARGUS 4 @@ -1049,8 +1047,6 @@ struct nes_hw_tune_timer { #define NES_TIMER_ENABLE_LIMIT 4 #define NES_MAX_LINK_INTERRUPTS 128 #define NES_MAX_LINK_CHECK 200 -#define NES_MAX_LRO_DESCRIPTORS 32 -#define NES_LRO_MAX_AGGR 64 struct nes_adapter { u64 fw_ver; @@ -1263,9 +1259,6 @@ struct nes_vnic { u8 next_qp_nic_index; u8 of_device_registered; u8 rdma_enabled; - u32 lro_max_aggr; - struct net_lro_mgr lro_mgr; - struct net_lro_desc lro_desc[NES_MAX_LRO_DESCRIPTORS]; struct timer_list event_timer; enum ib_event_type delayed_event; enum ib_event_type last_dispatched_event; diff --git a/drivers/infiniband/hw/nes/nes_nic.c b/drivers/infiniband/hw/nes/nes_nic.c index 6a0bdfa..3ea9e05 100644 --- a/drivers/infiniband/hw/nes/nes_nic.c +++ b/drivers/infiniband/hw/nes/nes_nic.c @@ -1085,9 +1085,6 @@ static const char nes_ethtool_stringset[][ETH_GSTRING_LEN] = { "Free 4Kpbls", "Free 256pbls", "Timer Inits", - "LRO aggregated", - "LRO flushed", - "LRO no_desc", "PAU CreateQPs", "PAU DestroyQPs", }; @@ -1302,9 +1299,6 @@ static void nes_netdev_get_ethtool_stats(struct net_device *netdev, target_stat_values[++index] = nesadapter->free_4kpbl; target_stat_values[++index] = nesadapter->free_256pbl; target_stat_values[++index] = int_mod_timer_init; - target_stat_values[++index] = nesvnic->lro_mgr.stats.aggregated; - target_stat_values[++index] = nesvnic->lro_mgr.stats.flushed; - target_stat_values[++index] = nesvnic->lro_mgr.stats.no_desc; target_stat_values[++index] = atomic_read(&pau_qps_created); target_stat_values[++index] = atomic_read(&pau_qps_destroyed); } @@ -1709,7 +1703,6 @@ struct net_device *nes_netdev_init(struct nes_device *nesdev, netdev->hw_features |= NETIF_F_TSO; netdev->features = netdev->hw_features | NETIF_F_HIGHDMA | NETIF_F_HW_VLAN_CTAG_TX; - netdev->hw_features |= NETIF_F_LRO; nes_debug(NES_DBG_INIT, "nesvnic = %p, reported features = 0x%lX, QPid = %d," " nic_index = %d, logical_port = %d, mac_index = %d.\n", -- cgit v0.10.2 From a82268b30a8b4b920d0bad24472cbb000c8e734a Mon Sep 17 00:00:00 2001 From: Insu Yun Date: Wed, 17 Feb 2016 13:06:33 -0500 Subject: nes: handling failed allocation when creating workqueue Since create_singlethread_workqueue uses kzalloc internally, it can fail when the system is under memory pressure, so need to handle it. Signed-off-by: Insu Yun Reviewed-by: Leon Romanovsky Signed-off-by: Doug Ledford diff --git a/drivers/infiniband/hw/nes/nes_cm.c b/drivers/infiniband/hw/nes/nes_cm.c index cb9f0f2..e3fc2a1 100644 --- a/drivers/infiniband/hw/nes/nes_cm.c +++ b/drivers/infiniband/hw/nes/nes_cm.c @@ -2856,12 +2856,22 @@ static struct nes_cm_core *nes_cm_alloc_core(void) nes_debug(NES_DBG_CM, "Enable QUEUE EVENTS\n"); cm_core->event_wq = create_singlethread_workqueue("nesewq"); + if (!cm_core->event_wq) + goto out_free_cmcore; cm_core->post_event = nes_cm_post_event; nes_debug(NES_DBG_CM, "Enable QUEUE DISCONNECTS\n"); cm_core->disconn_wq = create_singlethread_workqueue("nesdwq"); + if (!cm_core->disconn_wq) + goto out_free_wq; print_core(cm_core); return cm_core; + +out_free_wq: + destroy_workqueue(cm_core->event_wq); +out_free_cmcore: + kfree(cm_core); + return NULL; } -- cgit v0.10.2 From e1614869d370d4d1599d771346d7da570f1d2bfa Mon Sep 17 00:00:00 2001 From: Somnath Kotur Date: Thu, 28 Jan 2016 08:59:56 -0500 Subject: RDMA/ocrdma: Export udp encapsulation capability Add support to read device configuration and initialize port-immutables to report UDP-Encap flag during port query. Signed-off-by: Devesh Sharma Signed-off-by: Somnath Kotur Signed-off-by: Doug Ledford diff --git a/drivers/infiniband/hw/ocrdma/ocrdma.h b/drivers/infiniband/hw/ocrdma/ocrdma.h index 12503f1..b58833d 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma.h +++ b/drivers/infiniband/hw/ocrdma/ocrdma.h @@ -114,6 +114,7 @@ struct ocrdma_dev_attr { u8 local_ca_ack_delay; u8 ird; u8 num_ird_pages; + u8 udp_encap; }; struct ocrdma_dma_mem { @@ -598,4 +599,10 @@ static inline u8 ocrdma_get_ae_link_state(u32 ae_state) return ((ae_state & OCRDMA_AE_LSC_LS_MASK) >> OCRDMA_AE_LSC_LS_SHIFT); } +static inline bool ocrdma_is_udp_encap_supported(struct ocrdma_dev *dev) +{ + return (dev->attr.udp_encap & OCRDMA_L3_TYPE_IPV4) || + (dev->attr.udp_encap & OCRDMA_L3_TYPE_IPV6); +} + #endif diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_hw.c b/drivers/infiniband/hw/ocrdma/ocrdma_hw.c index 283ca84..aea7f17 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_hw.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_hw.c @@ -1144,6 +1144,9 @@ static void ocrdma_get_attr(struct ocrdma_dev *dev, attr->max_pd = (rsp->max_pd_ca_ack_delay & OCRDMA_MBX_QUERY_CFG_MAX_PD_MASK) >> OCRDMA_MBX_QUERY_CFG_MAX_PD_SHIFT; + attr->udp_encap = (rsp->max_pd_ca_ack_delay & + OCRDMA_MBX_QUERY_CFG_L3_TYPE_MASK) >> + OCRDMA_MBX_QUERY_CFG_L3_TYPE_SHIFT; attr->max_dpp_pds = (rsp->max_dpp_pds_credits & OCRDMA_MBX_QUERY_CFG_MAX_DPP_PDS_MASK) >> OCRDMA_MBX_QUERY_CFG_MAX_DPP_PDS_OFFSET; diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_main.c b/drivers/infiniband/hw/ocrdma/ocrdma_main.c index f387430..3d75f65 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_main.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_main.c @@ -89,8 +89,10 @@ static int ocrdma_port_immutable(struct ib_device *ibdev, u8 port_num, struct ib_port_immutable *immutable) { struct ib_port_attr attr; + struct ocrdma_dev *dev; int err; + dev = get_ocrdma_dev(ibdev); err = ocrdma_query_port(ibdev, port_num, &attr); if (err) return err; @@ -98,6 +100,8 @@ static int ocrdma_port_immutable(struct ib_device *ibdev, u8 port_num, immutable->pkey_tbl_len = attr.pkey_tbl_len; immutable->gid_tbl_len = attr.gid_tbl_len; immutable->core_cap_flags = RDMA_CORE_PORT_IBA_ROCE; + if (ocrdma_is_udp_encap_supported(dev)) + immutable->core_cap_flags |= RDMA_CORE_CAP_PROT_ROCE_UDP_ENCAP; immutable->max_mad_size = IB_MGMT_MAD_SIZE; return 0; diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_sli.h b/drivers/infiniband/hw/ocrdma/ocrdma_sli.h index 99dd6fd..8d75bd4 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_sli.h +++ b/drivers/infiniband/hw/ocrdma/ocrdma_sli.h @@ -140,7 +140,11 @@ enum { OCRDMA_DB_RQ_SHIFT = 24 }; -#define OCRDMA_ROUDP_FLAGS_SHIFT 0x03 +enum { + OCRDMA_L3_TYPE_IB_GRH = 0x00, + OCRDMA_L3_TYPE_IPV4 = 0x01, + OCRDMA_L3_TYPE_IPV6 = 0x02 +}; #define OCRDMA_DB_CQ_RING_ID_MASK 0x3FF /* bits 0 - 9 */ #define OCRDMA_DB_CQ_RING_ID_EXT_MASK 0x0C00 /* bits 10-11 of qid at 12-11 */ @@ -546,7 +550,8 @@ enum { OCRDMA_MBX_QUERY_CFG_CA_ACK_DELAY_SHIFT = 8, OCRDMA_MBX_QUERY_CFG_CA_ACK_DELAY_MASK = 0xFF << OCRDMA_MBX_QUERY_CFG_CA_ACK_DELAY_SHIFT, - + OCRDMA_MBX_QUERY_CFG_L3_TYPE_SHIFT = 3, + OCRDMA_MBX_QUERY_CFG_L3_TYPE_MASK = 0x18, OCRDMA_MBX_QUERY_CFG_MAX_SEND_SGE_SHIFT = 0, OCRDMA_MBX_QUERY_CFG_MAX_SEND_SGE_MASK = 0xFFFF, OCRDMA_MBX_QUERY_CFG_MAX_WRITE_SGE_SHIFT = 16, -- cgit v0.10.2 From 6b0626679d81626eac47cba7940f1435ec480b2e Mon Sep 17 00:00:00 2001 From: Devesh Sharma Date: Thu, 28 Jan 2016 08:59:57 -0500 Subject: RDMA/ocrdma: Support RoCE-v2 in the UD path This patch adds following changes to support RoCE-v2 in the UD path. * During AH creation GID-type is resolved for a given gid-index. * Based on GID-type protocol header is built. * Work completion reports network header type and set IB_WC_WITH_NETWORK_HDR_TYPE flag in wc->wc_flags to indicate that the network header type is valid. Signed-off-by: Somnath Kotur Signed-off-by: Devesh Sharma Signed-off-by: Doug Ledford diff --git a/drivers/infiniband/hw/ocrdma/ocrdma.h b/drivers/infiniband/hw/ocrdma/ocrdma.h index b58833d..45bdfa0 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma.h +++ b/drivers/infiniband/hw/ocrdma/ocrdma.h @@ -357,6 +357,7 @@ struct ocrdma_ah { struct ocrdma_av *av; u16 sgid_index; u32 id; + u8 hdr_type; }; struct ocrdma_qp_hwq_info { diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_ah.c b/drivers/infiniband/hw/ocrdma/ocrdma_ah.c index 3790771..4aed1db 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_ah.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_ah.c @@ -55,6 +55,21 @@ #define OCRDMA_VID_PCP_SHIFT 0xD +static u16 ocrdma_hdr_type_to_proto_num(int devid, u8 hdr_type) +{ + switch (hdr_type) { + case OCRDMA_L3_TYPE_IB_GRH: + return (u16)0x8915; + case OCRDMA_L3_TYPE_IPV4: + return (u16)0x0800; + case OCRDMA_L3_TYPE_IPV6: + return (u16)0x86dd; + default: + pr_err("ocrdma%d: Invalid network header\n", devid); + return 0; + } +} + static inline int set_av_attr(struct ocrdma_dev *dev, struct ocrdma_ah *ah, struct ib_ah_attr *attr, union ib_gid *sgid, int pdid, bool *isvlan, u16 vlan_tag) @@ -63,10 +78,23 @@ static inline int set_av_attr(struct ocrdma_dev *dev, struct ocrdma_ah *ah, struct ocrdma_eth_vlan eth; struct ocrdma_grh grh; int eth_sz; + u16 proto_num = 0; + u8 nxthdr = 0x11; + struct iphdr ipv4; + union { + struct sockaddr _sockaddr; + struct sockaddr_in _sockaddr_in; + struct sockaddr_in6 _sockaddr_in6; + } sgid_addr, dgid_addr; memset(ð, 0, sizeof(eth)); memset(&grh, 0, sizeof(grh)); + /* Protocol Number */ + proto_num = ocrdma_hdr_type_to_proto_num(dev->id, ah->hdr_type); + if (!proto_num) + return -EINVAL; + nxthdr = (proto_num == 0x8915) ? 0x1b : 0x11; /* VLAN */ if (!vlan_tag || (vlan_tag > 0xFFF)) vlan_tag = dev->pvid; @@ -78,13 +106,13 @@ static inline int set_av_attr(struct ocrdma_dev *dev, struct ocrdma_ah *ah, dev->id); } eth.eth_type = cpu_to_be16(0x8100); - eth.roce_eth_type = cpu_to_be16(OCRDMA_ROCE_ETH_TYPE); + eth.roce_eth_type = cpu_to_be16(proto_num); vlan_tag |= (dev->sl & 0x07) << OCRDMA_VID_PCP_SHIFT; eth.vlan_tag = cpu_to_be16(vlan_tag); eth_sz = sizeof(struct ocrdma_eth_vlan); *isvlan = true; } else { - eth.eth_type = cpu_to_be16(OCRDMA_ROCE_ETH_TYPE); + eth.eth_type = cpu_to_be16(proto_num); eth_sz = sizeof(struct ocrdma_eth_basic); } /* MAC */ @@ -93,18 +121,33 @@ static inline int set_av_attr(struct ocrdma_dev *dev, struct ocrdma_ah *ah, if (status) return status; ah->sgid_index = attr->grh.sgid_index; - memcpy(&grh.sgid[0], sgid->raw, sizeof(union ib_gid)); - memcpy(&grh.dgid[0], attr->grh.dgid.raw, sizeof(attr->grh.dgid.raw)); - - grh.tclass_flow = cpu_to_be32((6 << 28) | - (attr->grh.traffic_class << 24) | - attr->grh.flow_label); - /* 0x1b is next header value in GRH */ - grh.pdid_hoplimit = cpu_to_be32((pdid << 16) | - (0x1b << 8) | attr->grh.hop_limit); /* Eth HDR */ memcpy(&ah->av->eth_hdr, ð, eth_sz); - memcpy((u8 *)ah->av + eth_sz, &grh, sizeof(struct ocrdma_grh)); + if (ah->hdr_type == RDMA_NETWORK_IPV4) { + *((__be16 *)&ipv4) = htons((4 << 12) | (5 << 8) | + attr->grh.traffic_class); + ipv4.id = cpu_to_be16(pdid); + ipv4.frag_off = htons(IP_DF); + ipv4.tot_len = htons(0); + ipv4.ttl = attr->grh.hop_limit; + ipv4.protocol = nxthdr; + rdma_gid2ip(&sgid_addr._sockaddr, sgid); + ipv4.saddr = sgid_addr._sockaddr_in.sin_addr.s_addr; + rdma_gid2ip(&dgid_addr._sockaddr, &attr->grh.dgid); + ipv4.daddr = dgid_addr._sockaddr_in.sin_addr.s_addr; + memcpy((u8 *)ah->av + eth_sz, &ipv4, sizeof(struct iphdr)); + } else { + memcpy(&grh.sgid[0], sgid->raw, sizeof(union ib_gid)); + grh.tclass_flow = cpu_to_be32((6 << 28) | + (attr->grh.traffic_class << 24) | + attr->grh.flow_label); + memcpy(&grh.dgid[0], attr->grh.dgid.raw, + sizeof(attr->grh.dgid.raw)); + grh.pdid_hoplimit = cpu_to_be32((pdid << 16) | + (nxthdr << 8) | + attr->grh.hop_limit); + memcpy((u8 *)ah->av + eth_sz, &grh, sizeof(struct ocrdma_grh)); + } if (*isvlan) ah->av->valid |= OCRDMA_AV_VLAN_VALID; ah->av->valid = cpu_to_le32(ah->av->valid); @@ -128,6 +171,7 @@ struct ib_ah *ocrdma_create_ah(struct ib_pd *ibpd, struct ib_ah_attr *attr) if (atomic_cmpxchg(&dev->update_sl, 1, 0)) ocrdma_init_service_level(dev); + ah = kzalloc(sizeof(*ah), GFP_ATOMIC); if (!ah) return ERR_PTR(-ENOMEM); @@ -148,6 +192,8 @@ struct ib_ah *ocrdma_create_ah(struct ib_pd *ibpd, struct ib_ah_attr *attr) vlan_tag = vlan_dev_vlan_id(sgid_attr.ndev); dev_put(sgid_attr.ndev); } + /* Get network header type for this GID */ + ah->hdr_type = ib_gid_to_network_type(sgid_attr.gid_type, &sgid); if ((pd->uctx) && (!rdma_is_multicast_addr((struct in6_addr *)attr->grh.dgid.raw)) && diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_sli.h b/drivers/infiniband/hw/ocrdma/ocrdma_sli.h index 8d75bd4..3d15948 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_sli.h +++ b/drivers/infiniband/hw/ocrdma/ocrdma_sli.h @@ -1740,8 +1740,11 @@ enum { /* w1 */ OCRDMA_CQE_UD_XFER_LEN_SHIFT = 16, + OCRDMA_CQE_UD_XFER_LEN_MASK = 0x1FFF, OCRDMA_CQE_PKEY_SHIFT = 0, OCRDMA_CQE_PKEY_MASK = 0xFFFF, + OCRDMA_CQE_UD_L3TYPE_SHIFT = 29, + OCRDMA_CQE_UD_L3TYPE_MASK = 0x07, /* w2 */ OCRDMA_CQE_QPN_SHIFT = 0, @@ -1866,7 +1869,7 @@ struct ocrdma_ewqe_ud_hdr { u32 rsvd_dest_qpn; u32 qkey; u32 rsvd_ahid; - u32 rsvd; + u32 hdr_type; }; /* extended wqe followed by hdr_wqe for Fast Memory register */ diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c index 12420e4..4df3f13 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c @@ -2005,6 +2005,7 @@ static void ocrdma_build_ud_hdr(struct ocrdma_qp *qp, else ud_hdr->qkey = ud_wr(wr)->remote_qkey; ud_hdr->rsvd_ahid = ah->id; + ud_hdr->hdr_type = ah->hdr_type; if (ah->av->valid & OCRDMA_AV_VLAN_VALID) hdr->cw |= (OCRDMA_FLAG_AH_VLAN_PR << OCRDMA_WQE_FLAGS_SHIFT); } @@ -2717,9 +2718,11 @@ static bool ocrdma_poll_scqe(struct ocrdma_qp *qp, struct ocrdma_cqe *cqe, return expand; } -static int ocrdma_update_ud_rcqe(struct ib_wc *ibwc, struct ocrdma_cqe *cqe) +static int ocrdma_update_ud_rcqe(struct ocrdma_dev *dev, struct ib_wc *ibwc, + struct ocrdma_cqe *cqe) { int status; + u16 hdr_type = 0; status = (le32_to_cpu(cqe->flags_status_srcqpn) & OCRDMA_CQE_UD_STATUS_MASK) >> OCRDMA_CQE_UD_STATUS_SHIFT; @@ -2728,7 +2731,17 @@ static int ocrdma_update_ud_rcqe(struct ib_wc *ibwc, struct ocrdma_cqe *cqe) ibwc->pkey_index = 0; ibwc->wc_flags = IB_WC_GRH; ibwc->byte_len = (le32_to_cpu(cqe->ud.rxlen_pkey) >> - OCRDMA_CQE_UD_XFER_LEN_SHIFT); + OCRDMA_CQE_UD_XFER_LEN_SHIFT) & + OCRDMA_CQE_UD_XFER_LEN_MASK; + + if (ocrdma_is_udp_encap_supported(dev)) { + hdr_type = (le32_to_cpu(cqe->ud.rxlen_pkey) >> + OCRDMA_CQE_UD_L3TYPE_SHIFT) & + OCRDMA_CQE_UD_L3TYPE_MASK; + ibwc->wc_flags |= IB_WC_WITH_NETWORK_HDR_TYPE; + ibwc->network_hdr_type = hdr_type; + } + return status; } @@ -2791,12 +2804,15 @@ static bool ocrdma_poll_err_rcqe(struct ocrdma_qp *qp, struct ocrdma_cqe *cqe, static void ocrdma_poll_success_rcqe(struct ocrdma_qp *qp, struct ocrdma_cqe *cqe, struct ib_wc *ibwc) { + struct ocrdma_dev *dev; + + dev = get_ocrdma_dev(qp->ibqp.device); ibwc->opcode = IB_WC_RECV; ibwc->qp = &qp->ibqp; ibwc->status = IB_WC_SUCCESS; if (qp->qp_type == IB_QPT_UD || qp->qp_type == IB_QPT_GSI) - ocrdma_update_ud_rcqe(ibwc, cqe); + ocrdma_update_ud_rcqe(dev, ibwc, cqe); else ibwc->byte_len = le32_to_cpu(cqe->rq.rxlen); -- cgit v0.10.2 From bcf117e2cf6f451b46780e0660e9ae7ab33a33ea Mon Sep 17 00:00:00 2001 From: Devesh Sharma Date: Thu, 28 Jan 2016 08:59:58 -0500 Subject: RDMA/ocrdma: Support RoCE-v2 in the RC path This patch implements following changes to support RoCE-v2 in the RC path: * Get the GID-type for a given sgid. * Based on the GID-type get IPv4/IPv6 L3-address and give those to underlying device. * Resolve and provide network header type to device. Signed-off-by: Somnath Kotur Signed-off-by: Devesh Sharma Signed-off-by: Doug Ledford diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_hw.c b/drivers/infiniband/hw/ocrdma/ocrdma_hw.c index aea7f17..2cfbf15 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_hw.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_hw.c @@ -2504,7 +2504,12 @@ static int ocrdma_set_av_params(struct ocrdma_qp *qp, union ib_gid sgid, zgid; struct ib_gid_attr sgid_attr; u32 vlan_id = 0xFFFF; - u8 mac_addr[6]; + u8 mac_addr[6], hdr_type; + union { + struct sockaddr _sockaddr; + struct sockaddr_in _sockaddr_in; + struct sockaddr_in6 _sockaddr_in6; + } sgid_addr, dgid_addr; struct ocrdma_dev *dev = get_ocrdma_dev(qp->ibqp.device); if ((ah_attr->ah_flags & IB_AH_GRH) == 0) @@ -2519,6 +2524,8 @@ static int ocrdma_set_av_params(struct ocrdma_qp *qp, cmd->params.hop_lmt_rq_psn |= (ah_attr->grh.hop_limit << OCRDMA_QP_PARAMS_HOP_LMT_SHIFT); cmd->flags |= OCRDMA_QP_PARA_FLOW_LBL_VALID; + + /* GIDs */ memcpy(&cmd->params.dgid[0], &ah_attr->grh.dgid.raw[0], sizeof(cmd->params.dgid)); @@ -2541,6 +2548,16 @@ static int ocrdma_set_av_params(struct ocrdma_qp *qp, return status; cmd->params.dmac_b0_to_b3 = mac_addr[0] | (mac_addr[1] << 8) | (mac_addr[2] << 16) | (mac_addr[3] << 24); + + hdr_type = ib_gid_to_network_type(sgid_attr.gid_type, &sgid); + if (hdr_type == RDMA_NETWORK_IPV4) { + rdma_gid2ip(&sgid_addr._sockaddr, &sgid); + rdma_gid2ip(&dgid_addr._sockaddr, &ah_attr->grh.dgid); + memcpy(&cmd->params.dgid[0], + &dgid_addr._sockaddr_in.sin_addr.s_addr, 4); + memcpy(&cmd->params.sgid[0], + &sgid_addr._sockaddr_in.sin_addr.s_addr, 4); + } /* convert them to LE format. */ ocrdma_cpu_to_le32(&cmd->params.dgid[0], sizeof(cmd->params.dgid)); ocrdma_cpu_to_le32(&cmd->params.sgid[0], sizeof(cmd->params.sgid)); @@ -2561,7 +2578,9 @@ static int ocrdma_set_av_params(struct ocrdma_qp *qp, cmd->params.rnt_rc_sl_fl |= (dev->sl & 0x07) << OCRDMA_QP_PARAMS_SL_SHIFT; } - + cmd->params.max_sge_recv_flags |= ((hdr_type << + OCRDMA_QP_PARAMS_FLAGS_L3_TYPE_SHIFT) & + OCRDMA_QP_PARAMS_FLAGS_L3_TYPE_MASK); return 0; } diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_sli.h b/drivers/infiniband/hw/ocrdma/ocrdma_sli.h index 3d15948..0efc966 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_sli.h +++ b/drivers/infiniband/hw/ocrdma/ocrdma_sli.h @@ -1112,6 +1112,8 @@ enum { OCRDMA_QP_PARAMS_STATE_MASK = BIT(5) | BIT(6) | BIT(7), OCRDMA_QP_PARAMS_FLAGS_SQD_ASYNC = BIT(8), OCRDMA_QP_PARAMS_FLAGS_INB_ATEN = BIT(9), + OCRDMA_QP_PARAMS_FLAGS_L3_TYPE_SHIFT = 11, + OCRDMA_QP_PARAMS_FLAGS_L3_TYPE_MASK = BIT(11) | BIT(12) | BIT(13), OCRDMA_QP_PARAMS_MAX_SGE_RECV_SHIFT = 16, OCRDMA_QP_PARAMS_MAX_SGE_RECV_MASK = 0xFFFF << OCRDMA_QP_PARAMS_MAX_SGE_RECV_SHIFT, -- cgit v0.10.2 From 834d16d66ebc2b5faa06af0bda3bb6f9c71b3996 Mon Sep 17 00:00:00 2001 From: Devesh Sharma Date: Thu, 28 Jan 2016 08:59:59 -0500 Subject: RDMA/ocrdma: Support user AH creation for RoCE-v2 This patch adds support to create RoCE-v2 compatible AH. It uses ahid field to tell network-header-type to user space library. The library has to decode network-header-type from ahid field. Signed-off-by: Somnath Kotur Signed-off-by: Devesh Sharma Signed-off-by: Doug Ledford diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_ah.c b/drivers/infiniband/hw/ocrdma/ocrdma_ah.c index 4aed1db..e3c4f17 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_ah.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_ah.c @@ -218,6 +218,11 @@ struct ib_ah *ocrdma_create_ah(struct ib_pd *ibpd, struct ib_ah_attr *attr) ahid_addr = pd->uctx->ah_tbl.va + attr->dlid; *ahid_addr = 0; *ahid_addr |= ah->id & OCRDMA_AH_ID_MASK; + if (ocrdma_is_udp_encap_supported(dev)) { + *ahid_addr |= ((u32)ah->hdr_type & + OCRDMA_AH_L3_TYPE_MASK) << + OCRDMA_AH_L3_TYPE_SHIFT; + } if (isvlan) *ahid_addr |= (OCRDMA_AH_VLAN_VALID_MASK << OCRDMA_AH_VLAN_VALID_SHIFT); diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_ah.h b/drivers/infiniband/hw/ocrdma/ocrdma_ah.h index 04a30ae..3856dd4 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_ah.h +++ b/drivers/infiniband/hw/ocrdma/ocrdma_ah.h @@ -46,9 +46,10 @@ enum { OCRDMA_AH_ID_MASK = 0x3FF, OCRDMA_AH_VLAN_VALID_MASK = 0x01, - OCRDMA_AH_VLAN_VALID_SHIFT = 0x1F + OCRDMA_AH_VLAN_VALID_SHIFT = 0x1F, + OCRDMA_AH_L3_TYPE_MASK = 0x03, + OCRDMA_AH_L3_TYPE_SHIFT = 0x1D /* 29 bits */ }; - struct ib_ah *ocrdma_create_ah(struct ib_pd *, struct ib_ah_attr *); int ocrdma_destroy_ah(struct ib_ah *); int ocrdma_query_ah(struct ib_ah *, struct ib_ah_attr *); -- cgit v0.10.2 From 51093254bf879bc9ce96590400a87897c7498463 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Thu, 11 Feb 2016 11:03:09 -0800 Subject: IB/srpt: Simplify srpt_handle_tsk_mgmt() Let the target core check task existence instead of the SRP target driver. Additionally, let the target core check the validity of the task management request instead of the ib_srpt driver. This patch fixes the following kernel crash: BUG: unable to handle kernel NULL pointer dereference at 0000000000000001 IP: [] srpt_handle_new_iu+0x6d7/0x790 [ib_srpt] Oops: 0002 [#1] SMP Call Trace: [] srpt_process_completion+0xde/0x570 [ib_srpt] [] srpt_compl_thread+0x13f/0x160 [ib_srpt] [] kthread+0xcf/0xe0 [] ret_from_fork+0x7c/0xb0 Signed-off-by: Bart Van Assche Fixes: 3e4f574857ee ("ib_srpt: Convert TMR path to target_submit_tmr") Tested-by: Alex Estrin Reviewed-by: Christoph Hellwig Cc: Nicholas Bellinger Cc: Sagi Grimberg Cc: stable Signed-off-by: Doug Ledford diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.c b/drivers/infiniband/ulp/srpt/ib_srpt.c index 0c37fee..4328679 100644 --- a/drivers/infiniband/ulp/srpt/ib_srpt.c +++ b/drivers/infiniband/ulp/srpt/ib_srpt.c @@ -1670,47 +1670,6 @@ send_sense: return -1; } -/** - * srpt_rx_mgmt_fn_tag() - Process a task management function by tag. - * @ch: RDMA channel of the task management request. - * @fn: Task management function to perform. - * @req_tag: Tag of the SRP task management request. - * @mgmt_ioctx: I/O context of the task management request. - * - * Returns zero if the target core will process the task management - * request asynchronously. - * - * Note: It is assumed that the initiator serializes tag-based task management - * requests. - */ -static int srpt_rx_mgmt_fn_tag(struct srpt_send_ioctx *ioctx, u64 tag) -{ - struct srpt_device *sdev; - struct srpt_rdma_ch *ch; - struct srpt_send_ioctx *target; - int ret, i; - - ret = -EINVAL; - ch = ioctx->ch; - BUG_ON(!ch); - BUG_ON(!ch->sport); - sdev = ch->sport->sdev; - BUG_ON(!sdev); - spin_lock_irq(&sdev->spinlock); - for (i = 0; i < ch->rq_size; ++i) { - target = ch->ioctx_ring[i]; - if (target->cmd.se_lun == ioctx->cmd.se_lun && - target->cmd.tag == tag && - srpt_get_cmd_state(target) != SRPT_STATE_DONE) { - ret = 0; - /* now let the target core abort &target->cmd; */ - break; - } - } - spin_unlock_irq(&sdev->spinlock); - return ret; -} - static int srp_tmr_to_tcm(int fn) { switch (fn) { @@ -1745,7 +1704,6 @@ static void srpt_handle_tsk_mgmt(struct srpt_rdma_ch *ch, struct se_cmd *cmd; struct se_session *sess = ch->sess; uint64_t unpacked_lun; - uint32_t tag = 0; int tcm_tmr; int rc; @@ -1761,25 +1719,10 @@ static void srpt_handle_tsk_mgmt(struct srpt_rdma_ch *ch, srpt_set_cmd_state(send_ioctx, SRPT_STATE_MGMT); send_ioctx->cmd.tag = srp_tsk->tag; tcm_tmr = srp_tmr_to_tcm(srp_tsk->tsk_mgmt_func); - if (tcm_tmr < 0) { - send_ioctx->cmd.se_tmr_req->response = - TMR_TASK_MGMT_FUNCTION_NOT_SUPPORTED; - goto fail; - } unpacked_lun = srpt_unpack_lun((uint8_t *)&srp_tsk->lun, sizeof(srp_tsk->lun)); - - if (srp_tsk->tsk_mgmt_func == SRP_TSK_ABORT_TASK) { - rc = srpt_rx_mgmt_fn_tag(send_ioctx, srp_tsk->task_tag); - if (rc < 0) { - send_ioctx->cmd.se_tmr_req->response = - TMR_TASK_DOES_NOT_EXIST; - goto fail; - } - tag = srp_tsk->task_tag; - } rc = target_submit_tmr(&send_ioctx->cmd, sess, NULL, unpacked_lun, - srp_tsk, tcm_tmr, GFP_KERNEL, tag, + srp_tsk, tcm_tmr, GFP_KERNEL, srp_tsk->task_tag, TARGET_SCF_ACK_KREF); if (rc != 0) { send_ioctx->cmd.se_tmr_req->response = TMR_FUNCTION_REJECTED; -- cgit v0.10.2 From 9d2aa2b4fd14d3d9a0c3f0c43a70c7805d97a56d Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Thu, 11 Feb 2016 11:03:31 -0800 Subject: IB/srpt: Add parentheses around sizeof argument Although sizeof is an operator and hence in many cases parentheses can be left out, the recommended kernel coding style is to surround the sizeof argument with parentheses. This patch does not change any functionality. It has been generated by running the following shell command: sed -i 's/sizeof \([^ );,]*\)/sizeof(\1)/g' drivers/infiniband/ulp/srpt/*.[ch] Signed-off-by: Bart Van Assche Reviewed-by: Christoph Hellwig Reviewed-by: Sagi Grimberg Cc: Alex Estrin Signed-off-by: Doug Ledford diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.c b/drivers/infiniband/ulp/srpt/ib_srpt.c index 4328679..4ee9678 100644 --- a/drivers/infiniband/ulp/srpt/ib_srpt.c +++ b/drivers/infiniband/ulp/srpt/ib_srpt.c @@ -281,7 +281,7 @@ static void srpt_get_class_port_info(struct ib_dm_mad *mad) struct ib_class_port_info *cif; cif = (struct ib_class_port_info *)mad->data; - memset(cif, 0, sizeof *cif); + memset(cif, 0, sizeof(*cif)); cif->base_version = 1; cif->class_version = 1; cif->resp_time_value = 20; @@ -340,7 +340,7 @@ static void srpt_get_ioc(struct srpt_port *sport, u32 slot, return; } - memset(iocp, 0, sizeof *iocp); + memset(iocp, 0, sizeof(*iocp)); strcpy(iocp->id_string, SRPT_ID_STRING); iocp->guid = cpu_to_be64(srpt_service_guid); iocp->vendor_id = cpu_to_be32(sdev->device->attrs.vendor_id); @@ -390,7 +390,7 @@ static void srpt_get_svc_entries(u64 ioc_guid, } svc_entries = (struct ib_dm_svc_entries *)mad->data; - memset(svc_entries, 0, sizeof *svc_entries); + memset(svc_entries, 0, sizeof(*svc_entries)); svc_entries->service_entries[0].id = cpu_to_be64(ioc_guid); snprintf(svc_entries->service_entries[0].name, sizeof(svc_entries->service_entries[0].name), @@ -484,7 +484,7 @@ static void srpt_mad_recv_handler(struct ib_mad_agent *mad_agent, rsp->ah = ah; dm_mad = rsp->mad; - memcpy(dm_mad, mad_wc->recv_buf.mad, sizeof *dm_mad); + memcpy(dm_mad, mad_wc->recv_buf.mad, sizeof(*dm_mad)); dm_mad->mad_hdr.method = IB_MGMT_METHOD_GET_RESP; dm_mad->mad_hdr.status = 0; @@ -532,7 +532,7 @@ static int srpt_refresh_port(struct srpt_port *sport) struct ib_port_attr port_attr; int ret; - memset(&port_modify, 0, sizeof port_modify); + memset(&port_modify, 0, sizeof(port_modify)); port_modify.set_port_cap_mask = IB_PORT_DEVICE_MGMT_SUP; port_modify.clr_port_cap_mask = 0; @@ -553,7 +553,7 @@ static int srpt_refresh_port(struct srpt_port *sport) goto err_query_port; if (!sport->mad_agent) { - memset(®_req, 0, sizeof reg_req); + memset(®_req, 0, sizeof(reg_req)); reg_req.mgmt_class = IB_MGMT_CLASS_DEVICE_MGMT; reg_req.mgmt_class_version = IB_MGMT_BASE_VERSION; set_bit(IB_MGMT_METHOD_GET, reg_req.method_mask); @@ -903,14 +903,14 @@ static int srpt_get_desc_tbl(struct srpt_send_ioctx *ioctx, db = (struct srp_direct_buf *)(srp_cmd->add_data + add_cdb_offset); - memcpy(ioctx->rbufs, db, sizeof *db); + memcpy(ioctx->rbufs, db, sizeof(*db)); *data_len = be32_to_cpu(db->len); } else if (((srp_cmd->buf_fmt & 0xf) == SRP_DATA_DESC_INDIRECT) || ((srp_cmd->buf_fmt >> 4) == SRP_DATA_DESC_INDIRECT)) { idb = (struct srp_indirect_buf *)(srp_cmd->add_data + add_cdb_offset); - ioctx->n_rbuf = be32_to_cpu(idb->table_desc.len) / sizeof *db; + ioctx->n_rbuf = be32_to_cpu(idb->table_desc.len) / sizeof(*db); if (ioctx->n_rbuf > (srp_cmd->data_out_desc_cnt + srp_cmd->data_in_desc_cnt)) { @@ -929,7 +929,7 @@ static int srpt_get_desc_tbl(struct srpt_send_ioctx *ioctx, ioctx->rbufs = &ioctx->single_rbuf; else { ioctx->rbufs = - kmalloc(ioctx->n_rbuf * sizeof *db, GFP_ATOMIC); + kmalloc(ioctx->n_rbuf * sizeof(*db), GFP_ATOMIC); if (!ioctx->rbufs) { ioctx->n_rbuf = 0; ret = -ENOMEM; @@ -938,7 +938,7 @@ static int srpt_get_desc_tbl(struct srpt_send_ioctx *ioctx, } db = idb->desc_list; - memcpy(ioctx->rbufs, db, ioctx->n_rbuf * sizeof *db); + memcpy(ioctx->rbufs, db, ioctx->n_rbuf * sizeof(*db)); *data_len = be32_to_cpu(idb->len); } out: @@ -956,7 +956,7 @@ static int srpt_init_ch_qp(struct srpt_rdma_ch *ch, struct ib_qp *qp) struct ib_qp_attr *attr; int ret; - attr = kzalloc(sizeof *attr, GFP_KERNEL); + attr = kzalloc(sizeof(*attr), GFP_KERNEL); if (!attr) return -ENOMEM; @@ -1464,7 +1464,7 @@ static int srpt_build_cmd_rsp(struct srpt_rdma_ch *ch, sense_data_len = ioctx->cmd.scsi_sense_length; WARN_ON(sense_data_len > sizeof(ioctx->sense_data)); - memset(srp_rsp, 0, sizeof *srp_rsp); + memset(srp_rsp, 0, sizeof(*srp_rsp)); srp_rsp->opcode = SRP_RSP; srp_rsp->req_lim_delta = cpu_to_be32(1 + atomic_xchg(&ch->req_lim_delta, 0)); @@ -1514,7 +1514,7 @@ static int srpt_build_tskmgmt_rsp(struct srpt_rdma_ch *ch, srp_rsp = ioctx->ioctx.buf; BUG_ON(!srp_rsp); - memset(srp_rsp, 0, sizeof *srp_rsp); + memset(srp_rsp, 0, sizeof(*srp_rsp)); srp_rsp->opcode = SRP_RSP; srp_rsp->req_lim_delta = @@ -1893,7 +1893,7 @@ static int srpt_create_ch_ib(struct srpt_rdma_ch *ch) WARN_ON(ch->rq_size < 1); ret = -ENOMEM; - qp_init = kzalloc(sizeof *qp_init, GFP_KERNEL); + qp_init = kzalloc(sizeof(*qp_init), GFP_KERNEL); if (!qp_init) goto out; @@ -2209,9 +2209,9 @@ static int srpt_cm_req_recv(struct ib_cm_id *cm_id, be64_to_cpu(*(__be64 *)&sdev->port[param->port - 1].gid.raw[0]), be64_to_cpu(*(__be64 *)&sdev->port[param->port - 1].gid.raw[8])); - rsp = kzalloc(sizeof *rsp, GFP_KERNEL); - rej = kzalloc(sizeof *rej, GFP_KERNEL); - rep_param = kzalloc(sizeof *rep_param, GFP_KERNEL); + rsp = kzalloc(sizeof(*rsp), GFP_KERNEL); + rej = kzalloc(sizeof(*rej), GFP_KERNEL); + rep_param = kzalloc(sizeof(*rep_param), GFP_KERNEL); if (!rsp || !rej || !rep_param) { ret = -ENOMEM; @@ -2283,7 +2283,7 @@ static int srpt_cm_req_recv(struct ib_cm_id *cm_id, goto reject; } - ch = kzalloc(sizeof *ch, GFP_KERNEL); + ch = kzalloc(sizeof(*ch), GFP_KERNEL); if (!ch) { rej->reason = cpu_to_be32( SRP_LOGIN_REJ_INSUFFICIENT_RESOURCES); @@ -2396,7 +2396,7 @@ try_again: /* create cm reply */ rep_param->qp_num = ch->qp->qp_num; rep_param->private_data = (void *)rsp; - rep_param->private_data_len = sizeof *rsp; + rep_param->private_data_len = sizeof(*rsp); rep_param->rnr_retry_count = 7; rep_param->flow_control = 1; rep_param->failover_accepted = 0; @@ -2440,7 +2440,7 @@ reject: | SRP_BUF_FORMAT_INDIRECT); ib_send_cm_rej(cm_id, IB_CM_REJ_CONSUMER_DEFINED, NULL, 0, - (void *)rej, sizeof *rej); + (void *)rej, sizeof(*rej)); out: kfree(rep_param); @@ -2946,7 +2946,7 @@ static void srpt_add_one(struct ib_device *device) pr_debug("device = %p, device->dma_ops = %p\n", device, device->dma_ops); - sdev = kzalloc(sizeof *sdev, GFP_KERNEL); + sdev = kzalloc(sizeof(*sdev), GFP_KERNEL); if (!sdev) goto err; -- cgit v0.10.2 From 697a35d709b828c406c1061b38251866a7871aa0 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Thu, 11 Feb 2016 11:04:02 -0800 Subject: IB/srpt: Remove struct srpt_node_acl Since struct srpt_node_acl is identical to struct se_node_acl, remove the definition of the former structure. This patch does not change any functionality. Signed-off-by: Bart Van Assche Reviewed-by: Christoph Hellwig Reviewed-by: Alex Estrin Cc: Sagi Grimberg Signed-off-by: Doug Ledford diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.c b/drivers/infiniband/ulp/srpt/ib_srpt.c index 4ee9678..9f5cd3f 100644 --- a/drivers/infiniband/ulp/srpt/ib_srpt.c +++ b/drivers/infiniband/ulp/srpt/ib_srpt.c @@ -3508,7 +3508,6 @@ static struct configfs_attribute *srpt_wwn_attrs[] = { static const struct target_core_fabric_ops srpt_template = { .module = THIS_MODULE, .name = "srpt", - .node_acl_size = sizeof(struct srpt_node_acl), .get_fabric_name = srpt_get_fabric_name, .tpg_get_wwn = srpt_get_fabric_wwn, .tpg_get_tag = srpt_get_tag, diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.h b/drivers/infiniband/ulp/srpt/ib_srpt.h index 09037f2b..b0ede97 100644 --- a/drivers/infiniband/ulp/srpt/ib_srpt.h +++ b/drivers/infiniband/ulp/srpt/ib_srpt.h @@ -363,12 +363,4 @@ struct srpt_device { struct list_head list; }; -/** - * struct srpt_node_acl - Per-initiator ACL data (managed via configfs). - * @nacl: Target core node ACL information. - */ -struct srpt_node_acl { - struct se_node_acl nacl; -}; - #endif /* IB_SRPT_H */ -- cgit v0.10.2 From f68cba4e9f7cdd3808631af286cc3244c5e57dab Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Thu, 11 Feb 2016 11:04:20 -0800 Subject: IB/srpt: Inline srpt_sdev_name() srpt_sdev_name() is too trivial to keep it as a separate function. Hence inline this function. Signed-off-by: Bart Van Assche Reviewed-by: Christoph Hellwig Reviewed-by: Sagi Grimberg Cc: Alex Estrin Signed-off-by: Doug Ledford diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.c b/drivers/infiniband/ulp/srpt/ib_srpt.c index 9f5cd3f..babc15b 100644 --- a/drivers/infiniband/ulp/srpt/ib_srpt.c +++ b/drivers/infiniband/ulp/srpt/ib_srpt.c @@ -109,16 +109,6 @@ enum dma_data_direction opposite_dma_dir(enum dma_data_direction dir) } } -/** - * srpt_sdev_name() - Return the name associated with the HCA. - * - * Examples are ib0, ib1, ... - */ -static inline const char *srpt_sdev_name(struct srpt_device *sdev) -{ - return sdev->device->name; -} - static enum rdma_ch_state srpt_get_ch_state(struct srpt_rdma_ch *ch) { unsigned long flags; @@ -182,7 +172,7 @@ static void srpt_event_handler(struct ib_event_handler *handler, return; pr_debug("ASYNC event= %d on device= %s\n", event->event, - srpt_sdev_name(sdev)); + sdev->device->name); switch (event->event) { case IB_EVENT_PORT_ERR: @@ -3025,7 +3015,7 @@ static void srpt_add_one(struct ib_device *device) if (srpt_refresh_port(sport)) { pr_err("MAD registration failed for %s-%d.\n", - srpt_sdev_name(sdev), i); + sdev->device->name, i); goto err_ring; } snprintf(sport->port_guid, sizeof(sport->port_guid), -- cgit v0.10.2 From 33912d73487899bfbfc8edb5f3d586ba2b15ceb9 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Thu, 11 Feb 2016 11:04:43 -0800 Subject: IB/srpt: Inline srpt_get_ch_state() The callers of srpt_get_ch_state() can access ch->state safely without using locking. Hence inline this function. Signed-off-by: Bart Van Assche Reviewed-by: Christoph Hellwig Reviewed-by: Sagi Grimberg Cc: Alex Estrin Signed-off-by: Doug Ledford diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.c b/drivers/infiniband/ulp/srpt/ib_srpt.c index babc15b..6d1a137 100644 --- a/drivers/infiniband/ulp/srpt/ib_srpt.c +++ b/drivers/infiniband/ulp/srpt/ib_srpt.c @@ -109,17 +109,6 @@ enum dma_data_direction opposite_dma_dir(enum dma_data_direction dir) } } -static enum rdma_ch_state srpt_get_ch_state(struct srpt_rdma_ch *ch) -{ - unsigned long flags; - enum rdma_ch_state state; - - spin_lock_irqsave(&ch->spinlock, flags); - state = ch->state; - spin_unlock_irqrestore(&ch->spinlock, flags); - return state; -} - static enum rdma_ch_state srpt_set_ch_state(struct srpt_rdma_ch *ch, enum rdma_ch_state new_state) { @@ -216,7 +205,7 @@ static void srpt_srq_event(struct ib_event *event, void *ctx) static void srpt_qp_event(struct ib_event *event, struct srpt_rdma_ch *ch) { pr_debug("QP event %d on cm_id=%p sess_name=%s state=%d\n", - event->event, ch->cm_id, ch->sess_name, srpt_get_ch_state(ch)); + event->event, ch->cm_id, ch->sess_name, ch->state); switch (event->event) { case IB_EVENT_COMM_EST: @@ -228,7 +217,7 @@ static void srpt_qp_event(struct ib_event *event, struct srpt_rdma_ch *ch) srpt_release_channel(ch); else pr_debug("%s: state %d - ignored LAST_WQE.\n", - ch->sess_name, srpt_get_ch_state(ch)); + ch->sess_name, ch->state); break; default: pr_err("received unrecognized IB QP event %d\n", event->event); @@ -1733,7 +1722,6 @@ static void srpt_handle_new_iu(struct srpt_rdma_ch *ch, struct srpt_send_ioctx *send_ioctx) { struct srp_cmd *srp_cmd; - enum rdma_ch_state ch_state; BUG_ON(!ch); BUG_ON(!recv_ioctx); @@ -1742,13 +1730,12 @@ static void srpt_handle_new_iu(struct srpt_rdma_ch *ch, recv_ioctx->ioctx.dma, srp_max_req_size, DMA_FROM_DEVICE); - ch_state = srpt_get_ch_state(ch); - if (unlikely(ch_state == CH_CONNECTING)) { + if (unlikely(ch->state == CH_CONNECTING)) { list_add_tail(&recv_ioctx->wait_list, &ch->cmd_wait_list); goto out; } - if (unlikely(ch_state != CH_LIVE)) + if (unlikely(ch->state != CH_LIVE)) goto out; srp_cmd = recv_ioctx->ioctx.buf; @@ -1857,7 +1844,7 @@ static void srpt_send_done(struct ib_cq *cq, struct ib_wc *wc) out: while (!list_empty(&ch->cmd_wait_list) && - srpt_get_ch_state(ch) == CH_LIVE && + ch->state == CH_LIVE && (ioctx = srpt_get_send_ioctx(ch)) != NULL) { struct srpt_recv_ioctx *recv_ioctx; @@ -2238,17 +2225,14 @@ static int srpt_cm_req_recv(struct ib_cm_id *cm_id, && param->port == ch->sport->port && param->listen_id == ch->sport->sdev->cm_id && ch->cm_id) { - enum rdma_ch_state ch_state; - - ch_state = srpt_get_ch_state(ch); - if (ch_state != CH_CONNECTING - && ch_state != CH_LIVE) + if (ch->state != CH_CONNECTING + && ch->state != CH_LIVE) continue; /* found an existing channel */ pr_debug("Found existing channel %s" " cm_id= %p state= %d\n", - ch->sess_name, ch->cm_id, ch_state); + ch->sess_name, ch->cm_id, ch->state); __srpt_close_ch(ch); @@ -2499,7 +2483,7 @@ static void srpt_cm_dreq_recv(struct ib_cm_id *cm_id) ch = srpt_find_channel(cm_id->context, cm_id); BUG_ON(!ch); - pr_debug("cm_id= %p ch->state= %d\n", cm_id, srpt_get_ch_state(ch)); + pr_debug("cm_id= %p ch->state= %d\n", cm_id, ch->state); spin_lock_irqsave(&ch->spinlock, flags); switch (ch->state) { @@ -2691,7 +2675,6 @@ static int srpt_write_pending(struct se_cmd *se_cmd) struct srpt_rdma_ch *ch; struct srpt_send_ioctx *ioctx; enum srpt_command_state new_state; - enum rdma_ch_state ch_state; int ret; ioctx = container_of(se_cmd, struct srpt_send_ioctx, cmd); @@ -2702,10 +2685,9 @@ static int srpt_write_pending(struct se_cmd *se_cmd) ch = ioctx->ch; BUG_ON(!ch); - ch_state = srpt_get_ch_state(ch); - switch (ch_state) { + switch (ch->state) { case CH_CONNECTING: - WARN(true, "unexpected channel state %d\n", ch_state); + WARN(true, "unexpected channel state %d\n", ch->state); ret = -EINVAL; goto out; case CH_LIVE: @@ -3171,7 +3153,7 @@ static void srpt_close_session(struct se_session *se_sess) ch = se_sess->fabric_sess_ptr; WARN_ON(ch->sess != se_sess); - pr_debug("ch %p state %d\n", ch, srpt_get_ch_state(ch)); + pr_debug("ch %p state %d\n", ch, ch->state); sdev = ch->sport->sdev; spin_lock_irq(&sdev->spinlock); -- cgit v0.10.2 From 671ec1b2d3dc2c800fd3814fd41f4f96499c92ee Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Thu, 11 Feb 2016 11:05:01 -0800 Subject: IB/srpt: Introduce target_reverse_dma_direction() Use the function target_reverse_dma_direction() instead of reimplementing it. Signed-off-by: Bart Van Assche Reviewed-by: Christoph Hellwig Reviewed-by: Sagi Grimberg Cc: Alex Estrin Signed-off-by: Doug Ledford diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.c b/drivers/infiniband/ulp/srpt/ib_srpt.c index 6d1a137..5da59f4 100644 --- a/drivers/infiniband/ulp/srpt/ib_srpt.c +++ b/drivers/infiniband/ulp/srpt/ib_srpt.c @@ -96,19 +96,6 @@ static int srpt_queue_status(struct se_cmd *cmd); static void srpt_recv_done(struct ib_cq *cq, struct ib_wc *wc); static void srpt_send_done(struct ib_cq *cq, struct ib_wc *wc); -/** - * opposite_dma_dir() - Swap DMA_TO_DEVICE and DMA_FROM_DEVICE. - */ -static inline -enum dma_data_direction opposite_dma_dir(enum dma_data_direction dir) -{ - switch (dir) { - case DMA_TO_DEVICE: return DMA_FROM_DEVICE; - case DMA_FROM_DEVICE: return DMA_TO_DEVICE; - default: return dir; - } -} - static enum rdma_ch_state srpt_set_ch_state(struct srpt_rdma_ch *ch, enum rdma_ch_state new_state) { @@ -1049,7 +1036,7 @@ static void srpt_unmap_sg_to_ib_sge(struct srpt_rdma_ch *ch, dir = ioctx->cmd.data_direction; BUG_ON(dir == DMA_NONE); ib_dma_unmap_sg(ch->sport->sdev->device, sg, ioctx->sg_cnt, - opposite_dma_dir(dir)); + target_reverse_dma_direction(&ioctx->cmd)); ioctx->mapped_sg_count = 0; } } @@ -1086,7 +1073,7 @@ static int srpt_map_sg_to_ib_sge(struct srpt_rdma_ch *ch, ioctx->sg_cnt = sg_cnt = cmd->t_data_nents; count = ib_dma_map_sg(ch->sport->sdev->device, sg, sg_cnt, - opposite_dma_dir(dir)); + target_reverse_dma_direction(cmd)); if (unlikely(!count)) return -EAGAIN; -- cgit v0.10.2 From e1dd413ccff7a35c4d8b14781668ed27bae64823 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Thu, 11 Feb 2016 11:05:19 -0800 Subject: IB/srpt: Use scsilun_to_int() Just like other target drivers, use scsilun_to_int() to unpack SCSI LUN numbers. This patch only changes the behavior of ib_srpt for LUN numbers >= 16384. Signed-off-by: Bart Van Assche Reviewed-by: Christoph Hellwig Reviewed-by: Sagi Grimberg Cc: Alex Estrin Signed-off-by: Doug Ledford diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.c b/drivers/infiniband/ulp/srpt/ib_srpt.c index 5da59f4..716f429 100644 --- a/drivers/infiniband/ulp/srpt/ib_srpt.c +++ b/drivers/infiniband/ulp/srpt/ib_srpt.c @@ -1494,80 +1494,6 @@ static int srpt_build_tskmgmt_rsp(struct srpt_rdma_ch *ch, return resp_len; } -#define NO_SUCH_LUN ((uint64_t)-1LL) - -/* - * SCSI LUN addressing method. See also SAM-2 and the section about - * eight byte LUNs. - */ -enum scsi_lun_addr_method { - SCSI_LUN_ADDR_METHOD_PERIPHERAL = 0, - SCSI_LUN_ADDR_METHOD_FLAT = 1, - SCSI_LUN_ADDR_METHOD_LUN = 2, - SCSI_LUN_ADDR_METHOD_EXTENDED_LUN = 3, -}; - -/* - * srpt_unpack_lun() - Convert from network LUN to linear LUN. - * - * Convert an 2-byte, 4-byte, 6-byte or 8-byte LUN structure in network byte - * order (big endian) to a linear LUN. Supports three LUN addressing methods: - * peripheral, flat and logical unit. See also SAM-2, section 4.9.4 (page 40). - */ -static uint64_t srpt_unpack_lun(const uint8_t *lun, int len) -{ - uint64_t res = NO_SUCH_LUN; - int addressing_method; - - if (unlikely(len < 2)) { - pr_err("Illegal LUN length %d, expected 2 bytes or more\n", - len); - goto out; - } - - switch (len) { - case 8: - if ((*((__be64 *)lun) & - cpu_to_be64(0x0000FFFFFFFFFFFFLL)) != 0) - goto out_err; - break; - case 4: - if (*((__be16 *)&lun[2]) != 0) - goto out_err; - break; - case 6: - if (*((__be32 *)&lun[2]) != 0) - goto out_err; - break; - case 2: - break; - default: - goto out_err; - } - - addressing_method = (*lun) >> 6; /* highest two bits of byte 0 */ - switch (addressing_method) { - case SCSI_LUN_ADDR_METHOD_PERIPHERAL: - case SCSI_LUN_ADDR_METHOD_FLAT: - case SCSI_LUN_ADDR_METHOD_LUN: - res = *(lun + 1) | (((*lun) & 0x3f) << 8); - break; - - case SCSI_LUN_ADDR_METHOD_EXTENDED_LUN: - default: - pr_err("Unimplemented LUN addressing method %u\n", - addressing_method); - break; - } - -out: - return res; - -out_err: - pr_err("Support for multi-level LUNs has not yet been implemented\n"); - goto out; -} - static int srpt_check_stop_free(struct se_cmd *cmd) { struct srpt_send_ioctx *ioctx = container_of(cmd, @@ -1585,7 +1511,6 @@ static int srpt_handle_cmd(struct srpt_rdma_ch *ch, { struct se_cmd *cmd; struct srp_cmd *srp_cmd; - uint64_t unpacked_lun; u64 data_len; enum dma_data_direction dir; sense_reason_t ret; @@ -1620,11 +1545,10 @@ static int srpt_handle_cmd(struct srpt_rdma_ch *ch, goto send_sense; } - unpacked_lun = srpt_unpack_lun((uint8_t *)&srp_cmd->lun, - sizeof(srp_cmd->lun)); rc = target_submit_cmd(cmd, ch->sess, srp_cmd->cdb, - &send_ioctx->sense_data[0], unpacked_lun, data_len, - TCM_SIMPLE_TAG, dir, TARGET_SCF_ACK_KREF); + &send_ioctx->sense_data[0], + scsilun_to_int(&srp_cmd->lun), data_len, + TCM_SIMPLE_TAG, dir, TARGET_SCF_ACK_KREF); if (rc != 0) { ret = TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE; goto send_sense; @@ -1669,7 +1593,6 @@ static void srpt_handle_tsk_mgmt(struct srpt_rdma_ch *ch, struct srp_tsk_mgmt *srp_tsk; struct se_cmd *cmd; struct se_session *sess = ch->sess; - uint64_t unpacked_lun; int tcm_tmr; int rc; @@ -1685,11 +1608,10 @@ static void srpt_handle_tsk_mgmt(struct srpt_rdma_ch *ch, srpt_set_cmd_state(send_ioctx, SRPT_STATE_MGMT); send_ioctx->cmd.tag = srp_tsk->tag; tcm_tmr = srp_tmr_to_tcm(srp_tsk->tsk_mgmt_func); - unpacked_lun = srpt_unpack_lun((uint8_t *)&srp_tsk->lun, - sizeof(srp_tsk->lun)); - rc = target_submit_tmr(&send_ioctx->cmd, sess, NULL, unpacked_lun, - srp_tsk, tcm_tmr, GFP_KERNEL, srp_tsk->task_tag, - TARGET_SCF_ACK_KREF); + rc = target_submit_tmr(&send_ioctx->cmd, sess, NULL, + scsilun_to_int(&srp_tsk->lun), srp_tsk, tcm_tmr, + GFP_KERNEL, srp_tsk->task_tag, + TARGET_SCF_ACK_KREF); if (rc != 0) { send_ioctx->cmd.se_tmr_req->response = TMR_FUNCTION_REJECTED; goto fail; -- cgit v0.10.2 From f130c2205dba8f02372c794c6c4e6cc4906376f4 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Thu, 11 Feb 2016 11:05:38 -0800 Subject: IB/srpt: Simplify channel state management The only allowed channel state changes are those that change the channel state into a state with a higher numerical value. This allows to merge the functions srpt_set_ch_state() and srpt_test_and_set_ch_state() into a single function. Signed-off-by: Bart Van Assche Reviewed-by: Christoph Hellwig Reviewed-by: Sagi Grimberg Cc: Alex Estrin Signed-off-by: Doug Ledford diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.c b/drivers/infiniband/ulp/srpt/ib_srpt.c index 716f429..863fdd1 100644 --- a/drivers/infiniband/ulp/srpt/ib_srpt.c +++ b/drivers/infiniband/ulp/srpt/ib_srpt.c @@ -96,37 +96,25 @@ static int srpt_queue_status(struct se_cmd *cmd); static void srpt_recv_done(struct ib_cq *cq, struct ib_wc *wc); static void srpt_send_done(struct ib_cq *cq, struct ib_wc *wc); -static enum rdma_ch_state -srpt_set_ch_state(struct srpt_rdma_ch *ch, enum rdma_ch_state new_state) -{ - unsigned long flags; - enum rdma_ch_state prev; - - spin_lock_irqsave(&ch->spinlock, flags); - prev = ch->state; - ch->state = new_state; - spin_unlock_irqrestore(&ch->spinlock, flags); - return prev; -} - -/** - * srpt_test_and_set_ch_state() - Test and set the channel state. - * - * Returns true if and only if the channel state has been set to the new state. +/* + * The only allowed channel state changes are those that change the channel + * state into a state with a higher numerical value. Hence the new > prev test. */ -static bool -srpt_test_and_set_ch_state(struct srpt_rdma_ch *ch, enum rdma_ch_state old, - enum rdma_ch_state new) +static bool srpt_set_ch_state(struct srpt_rdma_ch *ch, enum rdma_ch_state new) { unsigned long flags; enum rdma_ch_state prev; + bool changed = false; spin_lock_irqsave(&ch->spinlock, flags); prev = ch->state; - if (prev == old) + if (new > prev) { ch->state = new; + changed = true; + } spin_unlock_irqrestore(&ch->spinlock, flags); - return prev == old; + + return changed; } /** @@ -199,8 +187,7 @@ static void srpt_qp_event(struct ib_event *event, struct srpt_rdma_ch *ch) ib_cm_notify(ch->cm_id, event->event); break; case IB_EVENT_QP_LAST_WQE_REACHED: - if (srpt_test_and_set_ch_state(ch, CH_DRAINING, - CH_RELEASING)) + if (srpt_set_ch_state(ch, CH_RELEASING)) srpt_release_channel(ch); else pr_debug("%s: state %d - ignored LAST_WQE.\n", @@ -1947,12 +1934,7 @@ static void srpt_drain_channel(struct ib_cm_id *cm_id) spin_lock_irq(&sdev->spinlock); list_for_each_entry(ch, &sdev->rch_list, list) { if (ch->cm_id == cm_id) { - do_reset = srpt_test_and_set_ch_state(ch, - CH_CONNECTING, CH_DRAINING) || - srpt_test_and_set_ch_state(ch, - CH_LIVE, CH_DRAINING) || - srpt_test_and_set_ch_state(ch, - CH_DISCONNECTING, CH_DRAINING); + do_reset = srpt_set_ch_state(ch, CH_DRAINING); break; } } @@ -2353,7 +2335,7 @@ static void srpt_cm_rtu_recv(struct ib_cm_id *cm_id) ch = srpt_find_channel(cm_id->context, cm_id); BUG_ON(!ch); - if (srpt_test_and_set_ch_state(ch, CH_CONNECTING, CH_LIVE)) { + if (srpt_set_ch_state(ch, CH_LIVE)) { struct srpt_recv_ioctx *ioctx, *ioctx_tmp; ret = srpt_ch_qp_rts(ch, ch->qp); -- cgit v0.10.2 From 88936259c6a42c98e23181c9e1bba4b0bddb966a Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Thu, 11 Feb 2016 11:05:58 -0800 Subject: IB/srpt: Simplify srpt_shutdown_session() The target core guarantees that shutdown_session() is only invoked once per session. This means that the ib_srpt target driver doesn't have to track whether or not shutdown_session() has been called. Additionally, ensure that target_sess_cmd_list_set_waiting() is called before target_wait_for_sess_cmds() by moving it into srpt_release_channel_work(). Signed-off-by: Bart Van Assche Reviewed-by: Christoph Hellwig Reviewed-by: Sagi Grimberg Cc: Alex Estrin Signed-off-by: Doug Ledford diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.c b/drivers/infiniband/ulp/srpt/ib_srpt.c index 863fdd1..bffca47 100644 --- a/drivers/infiniband/ulp/srpt/ib_srpt.c +++ b/drivers/infiniband/ulp/srpt/ib_srpt.c @@ -1892,20 +1892,7 @@ static void srpt_close_ch(struct srpt_rdma_ch *ch) */ static int srpt_shutdown_session(struct se_session *se_sess) { - struct srpt_rdma_ch *ch = se_sess->fabric_sess_ptr; - unsigned long flags; - - spin_lock_irqsave(&ch->spinlock, flags); - if (ch->in_shutdown) { - spin_unlock_irqrestore(&ch->spinlock, flags); - return true; - } - - ch->in_shutdown = true; - target_sess_cmd_list_set_waiting(se_sess); - spin_unlock_irqrestore(&ch->spinlock, flags); - - return true; + return 1; } /** @@ -2008,6 +1995,7 @@ static void srpt_release_channel_work(struct work_struct *w) se_sess = ch->sess; BUG_ON(!se_sess); + target_sess_cmd_list_set_waiting(se_sess); target_wait_for_sess_cmds(se_sess); transport_deregister_session_configfs(se_sess); diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.h b/drivers/infiniband/ulp/srpt/ib_srpt.h index b0ede97..9c326c7 100644 --- a/drivers/infiniband/ulp/srpt/ib_srpt.h +++ b/drivers/infiniband/ulp/srpt/ib_srpt.h @@ -286,7 +286,6 @@ struct srpt_rdma_ch { u8 sess_name[36]; struct work_struct release_work; struct completion *release_done; - bool in_shutdown; }; /** -- cgit v0.10.2 From f108f0f66a1f01ab077a88b1da2e2f092acd4b14 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Thu, 11 Feb 2016 11:06:14 -0800 Subject: IB/srpt: Fix srpt_close_session() Avoid that srpt_close_session() waits if it doesn't have to wait. Additionally, increase the time during which srpt_close_session() waits until closing a session has finished. This makes it easier to detect session shutdown bugs. Signed-off-by: Bart Van Assche Reviewed-by: Christoph Hellwig Reviewed-by: Sagi Grimberg Cc: Alex Estrin Signed-off-by: Doug Ledford diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.c b/drivers/infiniband/ulp/srpt/ib_srpt.c index bffca47..d644606 100644 --- a/drivers/infiniband/ulp/srpt/ib_srpt.c +++ b/drivers/infiniband/ulp/srpt/ib_srpt.c @@ -1986,8 +1986,8 @@ static void srpt_release_channel_work(struct work_struct *w) struct se_session *se_sess; ch = container_of(w, struct srpt_rdma_ch, release_work); - pr_debug("ch = %p; ch->sess = %p; release_done = %p\n", ch, ch->sess, - ch->release_done); + pr_debug("%s: %s-%d; release_done = %p\n", __func__, ch->sess_name, + ch->qp->qp_num, ch->release_done); sdev = ch->sport->sdev; BUG_ON(!sdev); @@ -2011,11 +2011,10 @@ static void srpt_release_channel_work(struct work_struct *w) ch->rsp_size, DMA_TO_DEVICE); spin_lock_irq(&sdev->spinlock); - list_del(&ch->list); - spin_unlock_irq(&sdev->spinlock); - + list_del_init(&ch->list); if (ch->release_done) complete(ch->release_done); + spin_unlock_irq(&sdev->spinlock); wake_up(&sdev->ch_releaseQ); @@ -3025,24 +3024,26 @@ static void srpt_release_cmd(struct se_cmd *se_cmd) static void srpt_close_session(struct se_session *se_sess) { DECLARE_COMPLETION_ONSTACK(release_done); - struct srpt_rdma_ch *ch; - struct srpt_device *sdev; - unsigned long res; - - ch = se_sess->fabric_sess_ptr; - WARN_ON(ch->sess != se_sess); + struct srpt_rdma_ch *ch = se_sess->fabric_sess_ptr; + struct srpt_device *sdev = ch->sport->sdev; + bool wait; - pr_debug("ch %p state %d\n", ch, ch->state); + pr_debug("ch %s-%d state %d\n", ch->sess_name, ch->qp->qp_num, + ch->state); - sdev = ch->sport->sdev; spin_lock_irq(&sdev->spinlock); BUG_ON(ch->release_done); ch->release_done = &release_done; + wait = !list_empty(&ch->list); __srpt_close_ch(ch); spin_unlock_irq(&sdev->spinlock); - res = wait_for_completion_timeout(&release_done, 60 * HZ); - WARN_ON(res == 0); + if (!wait) + return; + + while (wait_for_completion_timeout(&release_done, 180 * HZ) == 0) + pr_info("%s(%s-%d state %d): still waiting ...\n", __func__, + ch->sess_name, ch->qp->qp_num, ch->state); } /** -- cgit v0.10.2 From 2c7f37ff1c33087b8a4567730ebb128e8572da5e Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Thu, 11 Feb 2016 11:06:55 -0800 Subject: IB/srpt: Fix srpt_handle_cmd() error paths The target core function that should be called if target_submit_cmd() fails is target_put_sess_cmd(). Additionally, change the return type of srpt_handle_cmd() from int into void. Signed-off-by: Bart Van Assche Reviewed-by: Christoph Hellwig Reviewed-by: Alex Estrin Cc: Sagi Grimberg Signed-off-by: Doug Ledford diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.c b/drivers/infiniband/ulp/srpt/ib_srpt.c index d644606..5b0fbc1 100644 --- a/drivers/infiniband/ulp/srpt/ib_srpt.c +++ b/drivers/infiniband/ulp/srpt/ib_srpt.c @@ -91,6 +91,7 @@ MODULE_PARM_DESC(srpt_service_guid, " instead of using the node_guid of the first HCA."); static struct ib_client srpt_client; +static void srpt_release_cmd(struct se_cmd *se_cmd); static void srpt_release_channel(struct srpt_rdma_ch *ch); static int srpt_queue_status(struct se_cmd *cmd); static void srpt_recv_done(struct ib_cq *cq, struct ib_wc *wc); @@ -1492,15 +1493,14 @@ static int srpt_check_stop_free(struct se_cmd *cmd) /** * srpt_handle_cmd() - Process SRP_CMD. */ -static int srpt_handle_cmd(struct srpt_rdma_ch *ch, - struct srpt_recv_ioctx *recv_ioctx, - struct srpt_send_ioctx *send_ioctx) +static void srpt_handle_cmd(struct srpt_rdma_ch *ch, + struct srpt_recv_ioctx *recv_ioctx, + struct srpt_send_ioctx *send_ioctx) { struct se_cmd *cmd; struct srp_cmd *srp_cmd; u64 data_len; enum dma_data_direction dir; - sense_reason_t ret; int rc; BUG_ON(!send_ioctx); @@ -1528,8 +1528,7 @@ static int srpt_handle_cmd(struct srpt_rdma_ch *ch, if (srpt_get_desc_tbl(send_ioctx, srp_cmd, &dir, &data_len)) { pr_err("0x%llx: parsing SRP descriptor table failed.\n", srp_cmd->tag); - ret = TCM_INVALID_CDB_FIELD; - goto send_sense; + goto release_ioctx; } rc = target_submit_cmd(cmd, ch->sess, srp_cmd->cdb, @@ -1537,14 +1536,15 @@ static int srpt_handle_cmd(struct srpt_rdma_ch *ch, scsilun_to_int(&srp_cmd->lun), data_len, TCM_SIMPLE_TAG, dir, TARGET_SCF_ACK_KREF); if (rc != 0) { - ret = TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE; - goto send_sense; + pr_debug("target_submit_cmd() returned %d for tag %#llx\n", rc, + srp_cmd->tag); + goto release_ioctx; } - return 0; + return; -send_sense: - transport_send_check_condition_and_sense(cmd, ret, 0); - return -1; +release_ioctx: + send_ioctx->state = SRPT_STATE_DONE; + srpt_release_cmd(cmd); } static int srp_tmr_to_tcm(int fn) -- cgit v0.10.2 From 49f40163b66e02acdcdfdd2bf46239683d65806d Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Thu, 11 Feb 2016 11:07:11 -0800 Subject: IB/srpt: Fix how aborted commands are processed srpt_abort_cmd() must not be called in state SRPT_STATE_DATA_IN. Issue a warning if this occurs. srpt_abort_cmd() must not invoke target_put_sess_cmd() for commands in state SRPT_STATE_DONE because the srpt_abort_cmd() callers already do this when necessary. Hence remove this call. If an RDMA read fails the corresponding SCSI command must fail. Hence add a transport_generic_request_failure() call. Remove an incorrect srpt_abort_cmd() call from srpt_rdma_write_done(). Avoid that srpt_send_done() calls srpt_abort_cmd() for finished SCSI commands. Signed-off-by: Bart Van Assche Reviewed-by: Christoph Hellwig Reviewed-by: Alex Estrin Cc: Sagi Grimberg Signed-off-by: Doug Ledford diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.c b/drivers/infiniband/ulp/srpt/ib_srpt.c index 5b0fbc1..64e9107 100644 --- a/drivers/infiniband/ulp/srpt/ib_srpt.c +++ b/drivers/infiniband/ulp/srpt/ib_srpt.c @@ -1267,10 +1267,7 @@ static int srpt_abort_cmd(struct srpt_send_ioctx *ioctx) /* * If the command is in a state where the target core is waiting for - * the ib_srpt driver, change the state to the next state. Changing - * the state of the command from SRPT_STATE_NEED_DATA to - * SRPT_STATE_DATA_IN ensures that srpt_xmit_response() will call this - * function a second time. + * the ib_srpt driver, change the state to the next state. */ spin_lock_irqsave(&ioctx->spinlock, flags); @@ -1279,25 +1276,17 @@ static int srpt_abort_cmd(struct srpt_send_ioctx *ioctx) case SRPT_STATE_NEED_DATA: ioctx->state = SRPT_STATE_DATA_IN; break; - case SRPT_STATE_DATA_IN: case SRPT_STATE_CMD_RSP_SENT: case SRPT_STATE_MGMT_RSP_SENT: ioctx->state = SRPT_STATE_DONE; break; default: + WARN_ONCE(true, "%s: unexpected I/O context state %d\n", + __func__, state); break; } spin_unlock_irqrestore(&ioctx->spinlock, flags); - if (state == SRPT_STATE_DONE) { - struct srpt_rdma_ch *ch = ioctx->ch; - - BUG_ON(ch->sess == NULL); - - target_put_sess_cmd(&ioctx->cmd); - goto out; - } - pr_debug("Aborting cmd with state %d and tag %lld\n", state, ioctx->cmd.tag); @@ -1305,19 +1294,16 @@ static int srpt_abort_cmd(struct srpt_send_ioctx *ioctx) case SRPT_STATE_NEW: case SRPT_STATE_DATA_IN: case SRPT_STATE_MGMT: + case SRPT_STATE_DONE: /* * Do nothing - defer abort processing until * srpt_queue_response() is invoked. */ - WARN_ON(!transport_check_aborted_status(&ioctx->cmd, false)); break; case SRPT_STATE_NEED_DATA: - /* DMA_TO_DEVICE (write) - RDMA read error. */ - - /* XXX(hch): this is a horrible layering violation.. */ - spin_lock_irqsave(&ioctx->cmd.t_state_lock, flags); - ioctx->cmd.transport_state &= ~CMD_T_ACTIVE; - spin_unlock_irqrestore(&ioctx->cmd.t_state_lock, flags); + pr_debug("tag %#llx: RDMA read error\n", ioctx->cmd.tag); + transport_generic_request_failure(&ioctx->cmd, + TCM_CHECK_CONDITION_ABORT_CMD); break; case SRPT_STATE_CMD_RSP_SENT: /* @@ -1325,18 +1311,16 @@ static int srpt_abort_cmd(struct srpt_send_ioctx *ioctx) * not been received in time. */ srpt_unmap_sg_to_ib_sge(ioctx->ch, ioctx); - target_put_sess_cmd(&ioctx->cmd); + transport_generic_free_cmd(&ioctx->cmd, 0); break; case SRPT_STATE_MGMT_RSP_SENT: - srpt_set_cmd_state(ioctx, SRPT_STATE_DONE); - target_put_sess_cmd(&ioctx->cmd); + transport_generic_free_cmd(&ioctx->cmd, 0); break; default: WARN(1, "Unexpected command state (%d)", state); break; } -out: return state; } @@ -1376,9 +1360,14 @@ static void srpt_rdma_write_done(struct ib_cq *cq, struct ib_wc *wc) container_of(wc->wr_cqe, struct srpt_send_ioctx, rdma_cqe); if (unlikely(wc->status != IB_WC_SUCCESS)) { + /* + * Note: if an RDMA write error completion is received that + * means that a SEND also has been posted. Defer further + * processing of the associated command until the send error + * completion has been received. + */ pr_info("RDMA_WRITE for ioctx 0x%p failed with status %d\n", ioctx, wc->status); - srpt_abort_cmd(ioctx); } } @@ -1721,15 +1710,10 @@ static void srpt_send_done(struct ib_cq *cq, struct ib_wc *wc) atomic_inc(&ch->sq_wr_avail); - if (wc->status != IB_WC_SUCCESS) { + if (wc->status != IB_WC_SUCCESS) pr_info("sending response for ioctx 0x%p failed" " with status %d\n", ioctx, wc->status); - atomic_dec(&ch->req_lim); - srpt_abort_cmd(ioctx); - goto out; - } - if (state != SRPT_STATE_DONE) { srpt_unmap_sg_to_ib_sge(ch, ioctx); transport_generic_free_cmd(&ioctx->cmd, 0); @@ -1738,7 +1722,6 @@ static void srpt_send_done(struct ib_cq *cq, struct ib_wc *wc) " wr_id = %u.\n", ioctx->ioctx.index); } -out: while (!list_empty(&ch->cmd_wait_list) && ch->state == CH_LIVE && (ioctx = srpt_get_send_ioctx(ch)) != NULL) { -- cgit v0.10.2 From 1e20a2a5107021144fd795bcb66a272fd80cb477 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Thu, 11 Feb 2016 11:07:29 -0800 Subject: IB/srpt: Inline trivial CM callback functions Inline those CM callback functions that are only two lines long. Signed-off-by: Bart Van Assche Reviewed-by: Christoph Hellwig Reviewed-by: Alex Estrin Cc: Sagi Grimberg Signed-off-by: Doug Ledford diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.c b/drivers/infiniband/ulp/srpt/ib_srpt.c index 64e9107..ea73594 100644 --- a/drivers/infiniband/ulp/srpt/ib_srpt.c +++ b/drivers/infiniband/ulp/srpt/ib_srpt.c @@ -2320,18 +2320,6 @@ static void srpt_cm_rtu_recv(struct ib_cm_id *cm_id) } } -static void srpt_cm_timewait_exit(struct ib_cm_id *cm_id) -{ - pr_info("Received IB TimeWait exit for cm_id %p.\n", cm_id); - srpt_drain_channel(cm_id); -} - -static void srpt_cm_rep_error(struct ib_cm_id *cm_id) -{ - pr_info("Received IB REP error for cm_id %p.\n", cm_id); - srpt_drain_channel(cm_id); -} - /** * srpt_cm_dreq_recv() - Process reception of a DREQ message. */ @@ -2370,15 +2358,6 @@ static void srpt_cm_dreq_recv(struct ib_cm_id *cm_id) } /** - * srpt_cm_drep_recv() - Process reception of a DREP message. - */ -static void srpt_cm_drep_recv(struct ib_cm_id *cm_id) -{ - pr_info("Received InfiniBand DREP message for cm_id %p.\n", cm_id); - srpt_drain_channel(cm_id); -} - -/** * srpt_cm_handler() - IB connection manager callback function. * * A non-zero return value will cause the caller destroy the CM ID. @@ -2409,22 +2388,26 @@ static int srpt_cm_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event) srpt_cm_dreq_recv(cm_id); break; case IB_CM_DREP_RECEIVED: - srpt_cm_drep_recv(cm_id); + pr_info("Received CM DREP message for cm_id %p.\n", + cm_id); + srpt_drain_channel(cm_id); break; case IB_CM_TIMEWAIT_EXIT: - srpt_cm_timewait_exit(cm_id); + pr_info("Received CM TimeWait exit for cm_id %p.\n", cm_id); + srpt_drain_channel(cm_id); break; case IB_CM_REP_ERROR: - srpt_cm_rep_error(cm_id); + pr_info("Received CM REP error for cm_id %p.\n", cm_id); + srpt_drain_channel(cm_id); break; case IB_CM_DREQ_ERROR: - pr_info("Received IB DREQ ERROR event.\n"); + pr_info("Received CM DREQ ERROR event.\n"); break; case IB_CM_MRA_RECEIVED: - pr_info("Received IB MRA event\n"); + pr_info("Received CM MRA event\n"); break; default: - pr_err("received unrecognized IB CM event %d\n", event->event); + pr_err("received unrecognized CM event %d\n", event->event); break; } -- cgit v0.10.2 From 2739b592d360fd2031262c034f0f73f6f4b7c394 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Thu, 11 Feb 2016 11:07:49 -0800 Subject: IB/srpt: Eliminate srpt_find_channel() In the CM REQ message handler, store the channel pointer in cm_id->context such that the function srpt_find_channel() is no longer needed. Additionally, make the CM event messages more informative. Signed-off-by: Bart Van Assche Reviewed-by: Christoph Hellwig Cc: Sagi Grimberg Cc: Alex Estrin Signed-off-by: Doug Ledford diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.c b/drivers/infiniband/ulp/srpt/ib_srpt.c index ea73594..10bc133 100644 --- a/drivers/infiniband/ulp/srpt/ib_srpt.c +++ b/drivers/infiniband/ulp/srpt/ib_srpt.c @@ -1890,25 +1890,14 @@ static int srpt_shutdown_session(struct se_session *se_sess) * ib_destroy_cm_id(), which locks the cm_id spinlock and hence waits until * this function has finished). */ -static void srpt_drain_channel(struct ib_cm_id *cm_id) +static void srpt_drain_channel(struct srpt_rdma_ch *ch) { - struct srpt_device *sdev; - struct srpt_rdma_ch *ch; int ret; bool do_reset = false; WARN_ON_ONCE(irqs_disabled()); - sdev = cm_id->context; - BUG_ON(!sdev); - spin_lock_irq(&sdev->spinlock); - list_for_each_entry(ch, &sdev->rch_list, list) { - if (ch->cm_id == cm_id) { - do_reset = srpt_set_ch_state(ch, CH_DRAINING); - break; - } - } - spin_unlock_irq(&sdev->spinlock); + do_reset = srpt_set_ch_state(ch, CH_DRAINING); if (do_reset) { if (ch->sess) @@ -1922,34 +1911,6 @@ static void srpt_drain_channel(struct ib_cm_id *cm_id) } /** - * srpt_find_channel() - Look up an RDMA channel. - * @cm_id: Pointer to the CM ID of the channel to be looked up. - * - * Return NULL if no matching RDMA channel has been found. - */ -static struct srpt_rdma_ch *srpt_find_channel(struct srpt_device *sdev, - struct ib_cm_id *cm_id) -{ - struct srpt_rdma_ch *ch; - bool found; - - WARN_ON_ONCE(irqs_disabled()); - BUG_ON(!sdev); - - found = false; - spin_lock_irq(&sdev->spinlock); - list_for_each_entry(ch, &sdev->rch_list, list) { - if (ch->cm_id == cm_id) { - found = true; - break; - } - } - spin_unlock_irq(&sdev->spinlock); - - return found ? ch : NULL; -} - -/** * srpt_release_channel() - Release channel resources. * * Schedules the actual release because: @@ -2132,6 +2093,7 @@ static int srpt_cm_req_recv(struct ib_cm_id *cm_id, memcpy(ch->t_port_id, req->target_port_id, 16); ch->sport = &sdev->port[param->port - 1]; ch->cm_id = cm_id; + cm_id->context = ch; /* * Avoid QUEUE_FULL conditions by limiting the number of buffers used * for the SRP protocol to the command queue size. @@ -2285,10 +2247,14 @@ out: return ret; } -static void srpt_cm_rej_recv(struct ib_cm_id *cm_id) +static void srpt_cm_rej_recv(struct srpt_rdma_ch *ch, + enum ib_cm_rej_reason reason, + const u8 *private_data, + u8 private_data_len) { - pr_info("Received IB REJ for cm_id %p.\n", cm_id); - srpt_drain_channel(cm_id); + pr_info("Received CM REJ for ch %s-%d; reason %d.\n", + ch->sess_name, ch->qp->qp_num, reason); + srpt_drain_channel(ch); } /** @@ -2297,14 +2263,10 @@ static void srpt_cm_rej_recv(struct ib_cm_id *cm_id) * An IB_CM_RTU_RECEIVED message indicates that the connection is established * and that the recipient may begin transmitting (RTU = ready to use). */ -static void srpt_cm_rtu_recv(struct ib_cm_id *cm_id) +static void srpt_cm_rtu_recv(struct srpt_rdma_ch *ch) { - struct srpt_rdma_ch *ch; int ret; - ch = srpt_find_channel(cm_id->context, cm_id); - BUG_ON(!ch); - if (srpt_set_ch_state(ch, CH_LIVE)) { struct srpt_recv_ioctx *ioctx, *ioctx_tmp; @@ -2323,16 +2285,13 @@ static void srpt_cm_rtu_recv(struct ib_cm_id *cm_id) /** * srpt_cm_dreq_recv() - Process reception of a DREQ message. */ -static void srpt_cm_dreq_recv(struct ib_cm_id *cm_id) +static void srpt_cm_dreq_recv(struct srpt_rdma_ch *ch) { - struct srpt_rdma_ch *ch; unsigned long flags; bool send_drep = false; - ch = srpt_find_channel(cm_id->context, cm_id); - BUG_ON(!ch); - - pr_debug("cm_id= %p ch->state= %d\n", cm_id, ch->state); + pr_debug("ch %s-%d state %d\n", ch->sess_name, ch->qp->qp_num, + ch->state); spin_lock_irqsave(&ch->spinlock, flags); switch (ch->state) { @@ -2369,6 +2328,7 @@ static void srpt_cm_dreq_recv(struct ib_cm_id *cm_id) */ static int srpt_cm_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event) { + struct srpt_rdma_ch *ch = cm_id->context; int ret; ret = 0; @@ -2378,27 +2338,31 @@ static int srpt_cm_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event) event->private_data); break; case IB_CM_REJ_RECEIVED: - srpt_cm_rej_recv(cm_id); + srpt_cm_rej_recv(ch, event->param.rej_rcvd.reason, + event->private_data, + IB_CM_REJ_PRIVATE_DATA_SIZE); break; case IB_CM_RTU_RECEIVED: case IB_CM_USER_ESTABLISHED: - srpt_cm_rtu_recv(cm_id); + srpt_cm_rtu_recv(ch); break; case IB_CM_DREQ_RECEIVED: - srpt_cm_dreq_recv(cm_id); + srpt_cm_dreq_recv(ch); break; case IB_CM_DREP_RECEIVED: - pr_info("Received CM DREP message for cm_id %p.\n", - cm_id); - srpt_drain_channel(cm_id); + pr_info("Received CM DREP message for ch %s-%d.\n", + ch->sess_name, ch->qp->qp_num); + srpt_drain_channel(ch); break; case IB_CM_TIMEWAIT_EXIT: - pr_info("Received CM TimeWait exit for cm_id %p.\n", cm_id); - srpt_drain_channel(cm_id); + pr_info("Received CM TimeWait exit for ch %s-%d.\n", + ch->sess_name, ch->qp->qp_num); + srpt_drain_channel(ch); break; case IB_CM_REP_ERROR: - pr_info("Received CM REP error for cm_id %p.\n", cm_id); - srpt_drain_channel(cm_id); + pr_info("Received CM REP error for ch %s-%d.\n", ch->sess_name, + ch->qp->qp_num); + srpt_drain_channel(ch); break; case IB_CM_DREQ_ERROR: pr_info("Received CM DREQ ERROR event.\n"); -- cgit v0.10.2 From c13c90ea6762d834a07c0aa18134d98794f6fc3d Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Thu, 11 Feb 2016 11:08:12 -0800 Subject: IB/srpt: Log private data associated with REJ To make it possible to determine why an initiator sent a REJ, log the private data associated with the received REJ packet. Signed-off-by: Bart Van Assche Reviewed-by: Christoph Hellwig Cc: Sagi Grimberg Cc: Alex Estrin Signed-off-by: Doug Ledford diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.c b/drivers/infiniband/ulp/srpt/ib_srpt.c index 10bc133..0bf204e 100644 --- a/drivers/infiniband/ulp/srpt/ib_srpt.c +++ b/drivers/infiniband/ulp/srpt/ib_srpt.c @@ -2252,8 +2252,18 @@ static void srpt_cm_rej_recv(struct srpt_rdma_ch *ch, const u8 *private_data, u8 private_data_len) { - pr_info("Received CM REJ for ch %s-%d; reason %d.\n", - ch->sess_name, ch->qp->qp_num, reason); + char *priv = NULL; + int i; + + if (private_data_len && (priv = kmalloc(private_data_len * 3 + 1, + GFP_KERNEL))) { + for (i = 0; i < private_data_len; i++) + sprintf(priv + 3 * i, " %02x", private_data[i]); + } + pr_info("Received CM REJ for ch %s-%d; reason %d%s%s.\n", + ch->sess_name, ch->qp->qp_num, reason, private_data_len ? + "; private data" : "", priv ? priv : " (?)"); + kfree(priv); srpt_drain_channel(ch); } -- cgit v0.10.2 From 8628991fbe6a9086189f55f0b33dee7f25108ecc Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Thu, 11 Feb 2016 11:08:34 -0800 Subject: IB/srpt: Use a mutex to protect the channel list In a later patch a function that can block will be called while iterating over the rch_list. Hence protect that list with a mutex instead of a spinlock. And since it is not allowed to sleep while the task state != TASK_RUNNING, convert the list test in srpt_ch_list_empty() into a lockless test. Signed-off-by: Bart Van Assche Reviewed-by: Christoph Hellwig Cc: Sagi Grimberg Cc: Alex Estrin Signed-off-by: Doug Ledford diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.c b/drivers/infiniband/ulp/srpt/ib_srpt.c index 0bf204e..33bd408 100644 --- a/drivers/infiniband/ulp/srpt/ib_srpt.c +++ b/drivers/infiniband/ulp/srpt/ib_srpt.c @@ -1862,12 +1862,11 @@ static void __srpt_close_ch(struct srpt_rdma_ch *ch) */ static void srpt_close_ch(struct srpt_rdma_ch *ch) { - struct srpt_device *sdev; + struct srpt_device *sdev = ch->sport->sdev; - sdev = ch->sport->sdev; - spin_lock_irq(&sdev->spinlock); + mutex_lock(&sdev->mutex); __srpt_close_ch(ch); - spin_unlock_irq(&sdev->spinlock); + mutex_unlock(&sdev->mutex); } /** @@ -1954,11 +1953,11 @@ static void srpt_release_channel_work(struct work_struct *w) ch->sport->sdev, ch->rq_size, ch->rsp_size, DMA_TO_DEVICE); - spin_lock_irq(&sdev->spinlock); + mutex_lock(&sdev->mutex); list_del_init(&ch->list); if (ch->release_done) complete(ch->release_done); - spin_unlock_irq(&sdev->spinlock); + mutex_unlock(&sdev->mutex); wake_up(&sdev->ch_releaseQ); @@ -2039,7 +2038,7 @@ static int srpt_cm_req_recv(struct ib_cm_id *cm_id, if ((req->req_flags & SRP_MTCH_ACTION) == SRP_MULTICHAN_SINGLE) { rsp->rsp_flags = SRP_LOGIN_RSP_MULTICHAN_NO_CHAN; - spin_lock_irq(&sdev->spinlock); + mutex_lock(&sdev->mutex); list_for_each_entry_safe(ch, tmp_ch, &sdev->rch_list, list) { if (!memcmp(ch->i_port_id, req->initiator_port_id, 16) @@ -2063,7 +2062,7 @@ static int srpt_cm_req_recv(struct ib_cm_id *cm_id, } } - spin_unlock_irq(&sdev->spinlock); + mutex_unlock(&sdev->mutex); } else rsp->rsp_flags = SRP_LOGIN_RSP_MULTICHAN_MAINTAINED; @@ -2208,9 +2207,9 @@ try_again: goto release_channel; } - spin_lock_irq(&sdev->spinlock); + mutex_lock(&sdev->mutex); list_add_tail(&ch->list, &sdev->rch_list); - spin_unlock_irq(&sdev->spinlock); + mutex_unlock(&sdev->mutex); goto out; @@ -2653,17 +2652,6 @@ static void srpt_refresh_port_work(struct work_struct *work) srpt_refresh_port(sport); } -static int srpt_ch_list_empty(struct srpt_device *sdev) -{ - int res; - - spin_lock_irq(&sdev->spinlock); - res = list_empty(&sdev->rch_list); - spin_unlock_irq(&sdev->spinlock); - - return res; -} - /** * srpt_release_sdev() - Free the channel resources associated with a target. */ @@ -2676,13 +2664,13 @@ static int srpt_release_sdev(struct srpt_device *sdev) BUG_ON(!sdev); - spin_lock_irq(&sdev->spinlock); + mutex_lock(&sdev->mutex); list_for_each_entry_safe(ch, tmp_ch, &sdev->rch_list, list) __srpt_close_ch(ch); - spin_unlock_irq(&sdev->spinlock); + mutex_unlock(&sdev->mutex); res = wait_event_interruptible(sdev->ch_releaseQ, - srpt_ch_list_empty(sdev)); + list_empty_careful(&sdev->rch_list)); if (res) pr_err("%s: interrupted.\n", __func__); @@ -2743,7 +2731,7 @@ static void srpt_add_one(struct ib_device *device) sdev->device = device; INIT_LIST_HEAD(&sdev->rch_list); init_waitqueue_head(&sdev->ch_releaseQ); - spin_lock_init(&sdev->spinlock); + mutex_init(&sdev->mutex); sdev->pd = ib_alloc_pd(device); if (IS_ERR(sdev->pd)) @@ -2971,12 +2959,12 @@ static void srpt_close_session(struct se_session *se_sess) pr_debug("ch %s-%d state %d\n", ch->sess_name, ch->qp->qp_num, ch->state); - spin_lock_irq(&sdev->spinlock); + mutex_lock(&sdev->mutex); BUG_ON(ch->release_done); ch->release_done = &release_done; wait = !list_empty(&ch->list); __srpt_close_ch(ch); - spin_unlock_irq(&sdev->spinlock); + mutex_unlock(&sdev->mutex); if (!wait) return; diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.h b/drivers/infiniband/ulp/srpt/ib_srpt.h index 9c326c7..5883295 100644 --- a/drivers/infiniband/ulp/srpt/ib_srpt.h +++ b/drivers/infiniband/ulp/srpt/ib_srpt.h @@ -342,7 +342,7 @@ struct srpt_port { * @ioctx_ring: Per-HCA SRQ. * @rch_list: Per-device channel list -- see also srpt_rdma_ch.list. * @ch_releaseQ: Enables waiting for removal from rch_list. - * @spinlock: Protects rch_list and tpg. + * @mutex: Protects rch_list. * @port: Information about the ports owned by this HCA. * @event_handler: Per-HCA asynchronous IB event handler. * @list: Node in srpt_dev_list. @@ -356,7 +356,7 @@ struct srpt_device { struct srpt_recv_ioctx **ioctx_ring; struct list_head rch_list; wait_queue_head_t ch_releaseQ; - spinlock_t spinlock; + struct mutex mutex; struct srpt_port port[2]; struct ib_event_handler event_handler; struct list_head list; -- cgit v0.10.2 From aaf45bd83eba804adfa5c4ff5b17ca0c88884c6f Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Thu, 11 Feb 2016 11:08:53 -0800 Subject: IB/srpt: Detect session shutdown reliably The Last WQE Reached event is only generated after one or more work requests have been queued on the QP associated with a session. Since session shutdown can start before any work requests have been queued, use a zero-length RDMA write to wait until a QP has been drained. Additionally, rework the code for closing and disconnecting a session. Signed-off-by: Bart Van Assche Reviewed-by: Christoph Hellwig Cc: Sagi Grimberg Cc: Alex Estrin Signed-off-by: Doug Ledford diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.c b/drivers/infiniband/ulp/srpt/ib_srpt.c index 33bd408..0881ae9 100644 --- a/drivers/infiniband/ulp/srpt/ib_srpt.c +++ b/drivers/infiniband/ulp/srpt/ib_srpt.c @@ -92,10 +92,11 @@ MODULE_PARM_DESC(srpt_service_guid, static struct ib_client srpt_client; static void srpt_release_cmd(struct se_cmd *se_cmd); -static void srpt_release_channel(struct srpt_rdma_ch *ch); +static void srpt_free_ch(struct kref *kref); static int srpt_queue_status(struct se_cmd *cmd); static void srpt_recv_done(struct ib_cq *cq, struct ib_wc *wc); static void srpt_send_done(struct ib_cq *cq, struct ib_wc *wc); +static void srpt_zerolength_write_done(struct ib_cq *cq, struct ib_wc *wc); /* * The only allowed channel state changes are those that change the channel @@ -175,6 +176,23 @@ static void srpt_srq_event(struct ib_event *event, void *ctx) pr_info("SRQ event %d\n", event->event); } +static const char *get_ch_state_name(enum rdma_ch_state s) +{ + switch (s) { + case CH_CONNECTING: + return "connecting"; + case CH_LIVE: + return "live"; + case CH_DISCONNECTING: + return "disconnecting"; + case CH_DRAINING: + return "draining"; + case CH_DISCONNECTED: + return "disconnected"; + } + return "???"; +} + /** * srpt_qp_event() - QP event callback function. */ @@ -188,11 +206,9 @@ static void srpt_qp_event(struct ib_event *event, struct srpt_rdma_ch *ch) ib_cm_notify(ch->cm_id, event->event); break; case IB_EVENT_QP_LAST_WQE_REACHED: - if (srpt_set_ch_state(ch, CH_RELEASING)) - srpt_release_channel(ch); - else - pr_debug("%s: state %d - ignored LAST_WQE.\n", - ch->sess_name, ch->state); + pr_debug("%s-%d, state %s: received Last WQE event.\n", + ch->sess_name, ch->qp->qp_num, + get_ch_state_name(ch->state)); break; default: pr_err("received unrecognized IB QP event %d\n", event->event); @@ -795,6 +811,37 @@ out: } /** + * srpt_zerolength_write() - Perform a zero-length RDMA write. + * + * A quote from the InfiniBand specification: C9-88: For an HCA responder + * using Reliable Connection service, for each zero-length RDMA READ or WRITE + * request, the R_Key shall not be validated, even if the request includes + * Immediate data. + */ +static int srpt_zerolength_write(struct srpt_rdma_ch *ch) +{ + struct ib_send_wr wr, *bad_wr; + + memset(&wr, 0, sizeof(wr)); + wr.opcode = IB_WR_RDMA_WRITE; + wr.wr_cqe = &ch->zw_cqe; + wr.send_flags = IB_SEND_SIGNALED; + return ib_post_send(ch->qp, &wr, &bad_wr); +} + +static void srpt_zerolength_write_done(struct ib_cq *cq, struct ib_wc *wc) +{ + struct srpt_rdma_ch *ch = cq->cq_context; + + WARN(wc->status == IB_WC_SUCCESS, "%s-%d: QP not in error state\n", + ch->sess_name, ch->qp->qp_num); + if (srpt_set_ch_state(ch, CH_DISCONNECTED)) + schedule_work(&ch->release_work); + else + WARN_ONCE("%s-%d\n", ch->sess_name, ch->qp->qp_num); +} + +/** * srpt_get_desc_tbl() - Parse the data descriptors of an SRP_CMD request. * @ioctx: Pointer to the I/O context associated with the request. * @srp_cmd: Pointer to the SRP_CMD request data. @@ -1816,110 +1863,102 @@ static void srpt_destroy_ch_ib(struct srpt_rdma_ch *ch) } /** - * __srpt_close_ch() - Close an RDMA channel by setting the QP error state. + * srpt_close_ch() - Close an RDMA channel. * - * Reset the QP and make sure all resources associated with the channel will - * be deallocated at an appropriate time. + * Make sure all resources associated with the channel will be deallocated at + * an appropriate time. * - * Note: The caller must hold ch->sport->sdev->spinlock. + * Returns true if and only if the channel state has been modified into + * CH_DRAINING. */ -static void __srpt_close_ch(struct srpt_rdma_ch *ch) +static bool srpt_close_ch(struct srpt_rdma_ch *ch) { - enum rdma_ch_state prev_state; - unsigned long flags; + int ret; - spin_lock_irqsave(&ch->spinlock, flags); - prev_state = ch->state; - switch (prev_state) { - case CH_CONNECTING: - case CH_LIVE: - ch->state = CH_DISCONNECTING; - break; - default: - break; + if (!srpt_set_ch_state(ch, CH_DRAINING)) { + pr_debug("%s-%d: already closed\n", ch->sess_name, + ch->qp->qp_num); + return false; } - spin_unlock_irqrestore(&ch->spinlock, flags); - switch (prev_state) { - case CH_CONNECTING: - ib_send_cm_rej(ch->cm_id, IB_CM_REJ_NO_RESOURCES, NULL, 0, - NULL, 0); - /* fall through */ - case CH_LIVE: - if (ib_send_cm_dreq(ch->cm_id, NULL, 0) < 0) - pr_err("sending CM DREQ failed.\n"); - break; - case CH_DISCONNECTING: - break; - case CH_DRAINING: - case CH_RELEASING: - break; - } -} + kref_get(&ch->kref); -/** - * srpt_close_ch() - Close an RDMA channel. - */ -static void srpt_close_ch(struct srpt_rdma_ch *ch) -{ - struct srpt_device *sdev = ch->sport->sdev; + ret = srpt_ch_qp_err(ch); + if (ret < 0) + pr_err("%s-%d: changing queue pair into error state failed: %d\n", + ch->sess_name, ch->qp->qp_num, ret); - mutex_lock(&sdev->mutex); - __srpt_close_ch(ch); - mutex_unlock(&sdev->mutex); -} + pr_debug("%s-%d: queued zerolength write\n", ch->sess_name, + ch->qp->qp_num); + ret = srpt_zerolength_write(ch); + if (ret < 0) { + pr_err("%s-%d: queuing zero-length write failed: %d\n", + ch->sess_name, ch->qp->qp_num, ret); + if (srpt_set_ch_state(ch, CH_DISCONNECTED)) + schedule_work(&ch->release_work); + else + WARN_ON_ONCE(true); + } -/** - * srpt_shutdown_session() - Whether or not a session may be shut down. - */ -static int srpt_shutdown_session(struct se_session *se_sess) -{ - return 1; + kref_put(&ch->kref, srpt_free_ch); + + return true; } -/** - * srpt_drain_channel() - Drain a channel by resetting the IB queue pair. - * @cm_id: Pointer to the CM ID of the channel to be drained. - * - * Note: Must be called from inside srpt_cm_handler to avoid a race between - * accessing sdev->spinlock and the call to kfree(sdev) in srpt_remove_one() - * (the caller of srpt_cm_handler holds the cm_id spinlock; srpt_remove_one() - * waits until all target sessions for the associated IB device have been - * unregistered and target session registration involves a call to - * ib_destroy_cm_id(), which locks the cm_id spinlock and hence waits until - * this function has finished). +/* + * Change the channel state into CH_DISCONNECTING. If a channel has not yet + * reached the connected state, close it. If a channel is in the connected + * state, send a DREQ. If a DREQ has been received, send a DREP. Note: it is + * the responsibility of the caller to ensure that this function is not + * invoked concurrently with the code that accepts a connection. This means + * that this function must either be invoked from inside a CM callback + * function or that it must be invoked with the srpt_port.mutex held. */ -static void srpt_drain_channel(struct srpt_rdma_ch *ch) +static int srpt_disconnect_ch(struct srpt_rdma_ch *ch) { int ret; - bool do_reset = false; - WARN_ON_ONCE(irqs_disabled()); + if (!srpt_set_ch_state(ch, CH_DISCONNECTING)) + return -ENOTCONN; + + ret = ib_send_cm_dreq(ch->cm_id, NULL, 0); + if (ret < 0) + ret = ib_send_cm_drep(ch->cm_id, NULL, 0); + + if (ret < 0 && srpt_close_ch(ch)) + ret = 0; + + return ret; +} - do_reset = srpt_set_ch_state(ch, CH_DRAINING); +static void __srpt_close_all_ch(struct srpt_device *sdev) +{ + struct srpt_rdma_ch *ch; - if (do_reset) { - if (ch->sess) - srpt_shutdown_session(ch->sess); + lockdep_assert_held(&sdev->mutex); - ret = srpt_ch_qp_err(ch); - if (ret < 0) - pr_err("Setting queue pair in error state" - " failed: %d\n", ret); + list_for_each_entry(ch, &sdev->rch_list, list) { + if (srpt_disconnect_ch(ch) >= 0) + pr_info("Closing channel %s-%d because target %s has been disabled\n", + ch->sess_name, ch->qp->qp_num, + sdev->device->name); + srpt_close_ch(ch); } } /** - * srpt_release_channel() - Release channel resources. - * - * Schedules the actual release because: - * - Calling the ib_destroy_cm_id() call from inside an IB CM callback would - * trigger a deadlock. - * - It is not safe to call TCM transport_* functions from interrupt context. + * srpt_shutdown_session() - Whether or not a session may be shut down. */ -static void srpt_release_channel(struct srpt_rdma_ch *ch) +static int srpt_shutdown_session(struct se_session *se_sess) +{ + return 1; +} + +static void srpt_free_ch(struct kref *kref) { - schedule_work(&ch->release_work); + struct srpt_rdma_ch *ch = container_of(kref, struct srpt_rdma_ch, kref); + + kfree(ch); } static void srpt_release_channel_work(struct work_struct *w) @@ -1961,7 +2000,7 @@ static void srpt_release_channel_work(struct work_struct *w) wake_up(&sdev->ch_releaseQ); - kfree(ch); + kref_put(&ch->kref, srpt_free_ch); } /** @@ -2046,17 +2085,10 @@ static int srpt_cm_req_recv(struct ib_cm_id *cm_id, && param->port == ch->sport->port && param->listen_id == ch->sport->sdev->cm_id && ch->cm_id) { - if (ch->state != CH_CONNECTING - && ch->state != CH_LIVE) + if (srpt_disconnect_ch(ch) < 0) continue; - - /* found an existing channel */ - pr_debug("Found existing channel %s" - " cm_id= %p state= %d\n", - ch->sess_name, ch->cm_id, ch->state); - - __srpt_close_ch(ch); - + pr_info("Relogin - closed existing channel %s\n", + ch->sess_name); rsp->rsp_flags = SRP_LOGIN_RSP_MULTICHAN_TERMINATED; } @@ -2087,6 +2119,8 @@ static int srpt_cm_req_recv(struct ib_cm_id *cm_id, goto reject; } + kref_init(&ch->kref); + ch->zw_cqe.done = srpt_zerolength_write_done; INIT_WORK(&ch->release_work, srpt_release_channel_work); memcpy(ch->i_port_id, req->initiator_port_id, 16); memcpy(ch->t_port_id, req->target_port_id, 16); @@ -2214,7 +2248,7 @@ try_again: goto out; release_channel: - srpt_set_ch_state(ch, CH_RELEASING); + srpt_disconnect_ch(ch); transport_deregister_session_configfs(ch->sess); transport_deregister_session(ch->sess); ch->sess = NULL; @@ -2263,7 +2297,6 @@ static void srpt_cm_rej_recv(struct srpt_rdma_ch *ch, ch->sess_name, ch->qp->qp_num, reason, private_data_len ? "; private data" : "", priv ? priv : " (?)"); kfree(priv); - srpt_drain_channel(ch); } /** @@ -2292,40 +2325,6 @@ static void srpt_cm_rtu_recv(struct srpt_rdma_ch *ch) } /** - * srpt_cm_dreq_recv() - Process reception of a DREQ message. - */ -static void srpt_cm_dreq_recv(struct srpt_rdma_ch *ch) -{ - unsigned long flags; - bool send_drep = false; - - pr_debug("ch %s-%d state %d\n", ch->sess_name, ch->qp->qp_num, - ch->state); - - spin_lock_irqsave(&ch->spinlock, flags); - switch (ch->state) { - case CH_CONNECTING: - case CH_LIVE: - send_drep = true; - ch->state = CH_DISCONNECTING; - break; - case CH_DISCONNECTING: - case CH_DRAINING: - case CH_RELEASING: - WARN(true, "unexpected channel state %d\n", ch->state); - break; - } - spin_unlock_irqrestore(&ch->spinlock, flags); - - if (send_drep) { - if (ib_send_cm_drep(ch->cm_id, NULL, 0) < 0) - pr_err("Sending IB DREP failed.\n"); - pr_info("Received DREQ and sent DREP for session %s.\n", - ch->sess_name); - } -} - -/** * srpt_cm_handler() - IB connection manager callback function. * * A non-zero return value will cause the caller destroy the CM ID. @@ -2356,22 +2355,21 @@ static int srpt_cm_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event) srpt_cm_rtu_recv(ch); break; case IB_CM_DREQ_RECEIVED: - srpt_cm_dreq_recv(ch); + srpt_disconnect_ch(ch); break; case IB_CM_DREP_RECEIVED: pr_info("Received CM DREP message for ch %s-%d.\n", ch->sess_name, ch->qp->qp_num); - srpt_drain_channel(ch); + srpt_close_ch(ch); break; case IB_CM_TIMEWAIT_EXIT: pr_info("Received CM TimeWait exit for ch %s-%d.\n", ch->sess_name, ch->qp->qp_num); - srpt_drain_channel(ch); + srpt_close_ch(ch); break; case IB_CM_REP_ERROR: pr_info("Received CM REP error for ch %s-%d.\n", ch->sess_name, ch->qp->qp_num); - srpt_drain_channel(ch); break; case IB_CM_DREQ_ERROR: pr_info("Received CM DREQ ERROR event.\n"); @@ -2511,7 +2509,7 @@ static int srpt_write_pending(struct se_cmd *se_cmd) break; case CH_DISCONNECTING: case CH_DRAINING: - case CH_RELEASING: + case CH_DISCONNECTED: pr_debug("cmd with tag %lld: channel disconnecting\n", ioctx->cmd.tag); srpt_set_cmd_state(ioctx, SRPT_STATE_DATA_IN); @@ -2657,16 +2655,16 @@ static void srpt_refresh_port_work(struct work_struct *work) */ static int srpt_release_sdev(struct srpt_device *sdev) { - struct srpt_rdma_ch *ch, *tmp_ch; - int res; + int i, res; WARN_ON_ONCE(irqs_disabled()); BUG_ON(!sdev); mutex_lock(&sdev->mutex); - list_for_each_entry_safe(ch, tmp_ch, &sdev->rch_list, list) - __srpt_close_ch(ch); + for (i = 0; i < ARRAY_SIZE(sdev->port); i++) + sdev->port[i].enabled = false; + __srpt_close_all_ch(sdev); mutex_unlock(&sdev->mutex); res = wait_event_interruptible(sdev->ch_releaseQ, @@ -2963,7 +2961,7 @@ static void srpt_close_session(struct se_session *se_sess) BUG_ON(ch->release_done); ch->release_done = &release_done; wait = !list_empty(&ch->list); - __srpt_close_ch(ch); + srpt_disconnect_ch(ch); mutex_unlock(&sdev->mutex); if (!wait) diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.h b/drivers/infiniband/ulp/srpt/ib_srpt.h index 5883295..af9b8b5 100644 --- a/drivers/infiniband/ulp/srpt/ib_srpt.h +++ b/drivers/infiniband/ulp/srpt/ib_srpt.h @@ -218,20 +218,20 @@ struct srpt_send_ioctx { /** * enum rdma_ch_state - SRP channel state. - * @CH_CONNECTING: QP is in RTR state; waiting for RTU. - * @CH_LIVE: QP is in RTS state. - * @CH_DISCONNECTING: DREQ has been received; waiting for DREP - * or DREQ has been send and waiting for DREP - * or . - * @CH_DRAINING: QP is in ERR state; waiting for last WQE event. - * @CH_RELEASING: Last WQE event has been received; releasing resources. + * @CH_CONNECTING: QP is in RTR state; waiting for RTU. + * @CH_LIVE: QP is in RTS state. + * @CH_DISCONNECTING: DREQ has been sent and waiting for DREP or DREQ has + * been received. + * @CH_DRAINING: DREP has been received or waiting for DREP timed out + * and last work request has been queued. + * @CH_DISCONNECTED: Last completion has been received. */ enum rdma_ch_state { CH_CONNECTING, CH_LIVE, CH_DISCONNECTING, CH_DRAINING, - CH_RELEASING + CH_DISCONNECTED, }; /** @@ -267,6 +267,8 @@ struct srpt_rdma_ch { struct ib_cm_id *cm_id; struct ib_qp *qp; struct ib_cq *cq; + struct ib_cqe zw_cqe; + struct kref kref; int rq_size; u32 rsp_size; atomic_t sq_wr_avail; -- cgit v0.10.2 From fc3af58d3f8b820e3acabb1e1c4f105eb3f283c1 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Thu, 11 Feb 2016 11:09:10 -0800 Subject: IB/srpt: Fix srpt_write_pending() The only allowed return values for the write_pending() callback function are 0, -EAGAIN and -ENOMEM. Since attempting to perform RDMA over a disconnecting channel will result in an IB error completion anyway, remove the code that checks the channel state from srpt_write_pending(). Signed-off-by: Bart Van Assche Reviewed-by: Christoph Hellwig Reviewed-by: Sagi Grimberg Cc: Alex Estrin Signed-off-by: Doug Ledford diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.c b/drivers/infiniband/ulp/srpt/ib_srpt.c index 0881ae9..12cc695 100644 --- a/drivers/infiniband/ulp/srpt/ib_srpt.c +++ b/drivers/infiniband/ulp/srpt/ib_srpt.c @@ -2487,39 +2487,14 @@ static int srpt_write_pending_status(struct se_cmd *se_cmd) */ static int srpt_write_pending(struct se_cmd *se_cmd) { - struct srpt_rdma_ch *ch; - struct srpt_send_ioctx *ioctx; + struct srpt_send_ioctx *ioctx = + container_of(se_cmd, struct srpt_send_ioctx, cmd); + struct srpt_rdma_ch *ch = ioctx->ch; enum srpt_command_state new_state; - int ret; - - ioctx = container_of(se_cmd, struct srpt_send_ioctx, cmd); new_state = srpt_set_cmd_state(ioctx, SRPT_STATE_NEED_DATA); WARN_ON(new_state == SRPT_STATE_DONE); - - ch = ioctx->ch; - BUG_ON(!ch); - - switch (ch->state) { - case CH_CONNECTING: - WARN(true, "unexpected channel state %d\n", ch->state); - ret = -EINVAL; - goto out; - case CH_LIVE: - break; - case CH_DISCONNECTING: - case CH_DRAINING: - case CH_DISCONNECTED: - pr_debug("cmd with tag %lld: channel disconnecting\n", - ioctx->cmd.tag); - srpt_set_cmd_state(ioctx, SRPT_STATE_DATA_IN); - ret = -EINVAL; - goto out; - } - ret = srpt_xfer_data(ch, ioctx); - -out: - return ret; + return srpt_xfer_data(ch, ioctx); } static u8 tcm_to_srp_tsk_mgmt_status(const int tcm_mgmt_status) -- cgit v0.10.2 From 043a6806a480ad11d6746b8bdd667b6c7bdc2d64 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Thu, 11 Feb 2016 11:09:28 -0800 Subject: IB/srpt: Log out all initiators if a port is disabled If an initiator observes LUN deletion during shutdown of the target stack then that will trigger an I/O error even when using multipathd. Users need a way to avoid that shutting down the target stack causes I/O errors, e.g. by providing a way to force initiator logout. Hence close all sessions if a target port is disabled. Signed-off-by: Bart Van Assche Reviewed-by: Christoph Hellwig Reviewed-by: Alex Estrin Cc: Sagi Grimberg Signed-off-by: Doug Ledford diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.c b/drivers/infiniband/ulp/srpt/ib_srpt.c index 12cc695..cba2c1f 100644 --- a/drivers/infiniband/ulp/srpt/ib_srpt.c +++ b/drivers/infiniband/ulp/srpt/ib_srpt.c @@ -3152,6 +3152,8 @@ static ssize_t srpt_tpg_enable_store(struct config_item *item, { struct se_portal_group *se_tpg = to_tpg(item); struct srpt_port *sport = container_of(se_tpg, struct srpt_port, port_tpg_1); + struct srpt_device *sdev = sport->sdev; + struct srpt_rdma_ch *ch; unsigned long tmp; int ret; @@ -3165,11 +3167,24 @@ static ssize_t srpt_tpg_enable_store(struct config_item *item, pr_err("Illegal value for srpt_tpg_store_enable: %lu\n", tmp); return -EINVAL; } - if (tmp == 1) - sport->enabled = true; - else - sport->enabled = false; + if (sport->enabled == tmp) + goto out; + sport->enabled = tmp; + if (sport->enabled) + goto out; + mutex_lock(&sdev->mutex); + list_for_each_entry(ch, &sdev->rch_list, list) { + if (ch->sport == sport) { + pr_debug("%s: ch %p %s-%d\n", __func__, ch, + ch->sess_name, ch->qp->qp_num); + srpt_disconnect_ch(ch); + srpt_close_ch(ch); + } + } + mutex_unlock(&sdev->mutex); + +out: return count; } -- cgit v0.10.2 From 539b3248d7f5f0341e3797e734ba653bccfaf498 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Thu, 11 Feb 2016 11:09:50 -0800 Subject: IB/srpt: Introduce srpt_process_wait_list() This patch does not change any functionality. Signed-off-by: Bart Van Assche Reviewed-by: Christoph Hellwig Reviewed-by: Alex Estrin Cc: Sagi Grimberg Signed-off-by: Doug Ledford diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.c b/drivers/infiniband/ulp/srpt/ib_srpt.c index cba2c1f..289e058 100644 --- a/drivers/infiniband/ulp/srpt/ib_srpt.c +++ b/drivers/infiniband/ulp/srpt/ib_srpt.c @@ -1730,6 +1730,28 @@ static void srpt_recv_done(struct ib_cq *cq, struct ib_wc *wc) } } +/* + * This function must be called from the context in which RDMA completions are + * processed because it accesses the wait list without protection against + * access from other threads. + */ +static void srpt_process_wait_list(struct srpt_rdma_ch *ch) +{ + struct srpt_send_ioctx *ioctx; + + while (!list_empty(&ch->cmd_wait_list) && + ch->state >= CH_LIVE && + (ioctx = srpt_get_send_ioctx(ch)) != NULL) { + struct srpt_recv_ioctx *recv_ioctx; + + recv_ioctx = list_first_entry(&ch->cmd_wait_list, + struct srpt_recv_ioctx, + wait_list); + list_del(&recv_ioctx->wait_list); + srpt_handle_new_iu(ch, recv_ioctx, ioctx); + } +} + /** * Note: Although this has not yet been observed during tests, at least in * theory it is possible that the srpt_get_send_ioctx() call invoked by @@ -1769,17 +1791,7 @@ static void srpt_send_done(struct ib_cq *cq, struct ib_wc *wc) " wr_id = %u.\n", ioctx->ioctx.index); } - while (!list_empty(&ch->cmd_wait_list) && - ch->state == CH_LIVE && - (ioctx = srpt_get_send_ioctx(ch)) != NULL) { - struct srpt_recv_ioctx *recv_ioctx; - - recv_ioctx = list_first_entry(&ch->cmd_wait_list, - struct srpt_recv_ioctx, - wait_list); - list_del(&recv_ioctx->wait_list); - srpt_handle_new_iu(ch, recv_ioctx, ioctx); - } + srpt_process_wait_list(ch); } /** @@ -2310,15 +2322,9 @@ static void srpt_cm_rtu_recv(struct srpt_rdma_ch *ch) int ret; if (srpt_set_ch_state(ch, CH_LIVE)) { - struct srpt_recv_ioctx *ioctx, *ioctx_tmp; - ret = srpt_ch_qp_rts(ch, ch->qp); - list_for_each_entry_safe(ioctx, ioctx_tmp, &ch->cmd_wait_list, - wait_list) { - list_del(&ioctx->wait_list); - srpt_handle_new_iu(ch, ioctx, NULL); - } + srpt_process_wait_list(ch); if (ret) srpt_close_ch(ch); } -- cgit v0.10.2 From 387add460dc2f61567fa127bd76dab8ba6833234 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Thu, 11 Feb 2016 11:10:09 -0800 Subject: IB/srpt: Fix wait list processing Since the wait list is not protected against concurrent access it must be processed from the context of the completion handler. Replace the wait list processing code in the IB CM RTU callback handler by code that triggers a completion handler. This patch fixes the following rare crash: WARNING: CPU: 2 PID: 78656 at lib/list_debug.c:53 __list_del_entry+0x67/0xd0() list_del corruption, ffff88041ae404b8->next is LIST_POISON1 (dead000000000100) Call Trace: [] dump_stack+0x4f/0x74 [] warn_slowpath_common+0x8b/0xd0 [] warn_slowpath_fmt+0x41/0x70 [] __list_del_entry+0x67/0xd0 [] list_del+0x11/0x40 [] srpt_cm_handler+0x172/0x1a4 [ib_srpt] [] cm_process_work+0x20/0xf0 [ib_cm] [] cm_establish_handler+0xbe/0x110 [ib_cm] [] cm_work_handler+0x67/0xd0 [ib_cm] [] process_one_work+0x1bd/0x460 [] worker_thread+0x118/0x420 [] kthread+0xe4/0x100 [] ret_from_fork+0x3f/0x70 Signed-off-by: Bart Van Assche Reviewed-by: Christoph Hellwig Reviewed-by: Alex Estrin Cc: Sagi Grimberg Signed-off-by: Doug Ledford diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.c b/drivers/infiniband/ulp/srpt/ib_srpt.c index 289e058..25bdaee 100644 --- a/drivers/infiniband/ulp/srpt/ib_srpt.c +++ b/drivers/infiniband/ulp/srpt/ib_srpt.c @@ -96,7 +96,7 @@ static void srpt_free_ch(struct kref *kref); static int srpt_queue_status(struct se_cmd *cmd); static void srpt_recv_done(struct ib_cq *cq, struct ib_wc *wc); static void srpt_send_done(struct ib_cq *cq, struct ib_wc *wc); -static void srpt_zerolength_write_done(struct ib_cq *cq, struct ib_wc *wc); +static void srpt_process_wait_list(struct srpt_rdma_ch *ch); /* * The only allowed channel state changes are those that change the channel @@ -833,12 +833,14 @@ static void srpt_zerolength_write_done(struct ib_cq *cq, struct ib_wc *wc) { struct srpt_rdma_ch *ch = cq->cq_context; - WARN(wc->status == IB_WC_SUCCESS, "%s-%d: QP not in error state\n", - ch->sess_name, ch->qp->qp_num); - if (srpt_set_ch_state(ch, CH_DISCONNECTED)) - schedule_work(&ch->release_work); - else - WARN_ONCE("%s-%d\n", ch->sess_name, ch->qp->qp_num); + if (wc->status == IB_WC_SUCCESS) { + srpt_process_wait_list(ch); + } else { + if (srpt_set_ch_state(ch, CH_DISCONNECTED)) + schedule_work(&ch->release_work); + else + WARN_ONCE("%s-%d\n", ch->sess_name, ch->qp->qp_num); + } } /** @@ -2324,9 +2326,13 @@ static void srpt_cm_rtu_recv(struct srpt_rdma_ch *ch) if (srpt_set_ch_state(ch, CH_LIVE)) { ret = srpt_ch_qp_rts(ch, ch->qp); - srpt_process_wait_list(ch); - if (ret) + if (ret == 0) { + /* Trigger wait list processing. */ + ret = srpt_zerolength_write(ch); + WARN_ONCE(ret < 0, "%d\n", ret); + } else { srpt_close_ch(ch); + } } } -- cgit v0.10.2 From 5adebafb75bdfbbe4ec69f14c3613e70f6ed7f6f Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Sun, 21 Feb 2016 18:12:26 +0200 Subject: IB/core: Fix missed clean call in registration path In case of failure returned from query function in IB device registration, we need to clean IB cache which was missed. This change fixes it. Fixes: 3e153a93a1c1 ('IB/core: Save the device attributes on the device structure') Signed-off-by: Leon Romanovsky Reviewed-by: Ira Weiny Reviewed-by: Sagi Grimberg Signed-off-by: Doug Ledford diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c index 00da80e..94b80a5 100644 --- a/drivers/infiniband/core/device.c +++ b/drivers/infiniband/core/device.c @@ -358,6 +358,7 @@ int ib_register_device(struct ib_device *device, ret = device->query_device(device, &device->attrs, &uhw); if (ret) { printk(KERN_WARNING "Couldn't query the device attributes\n"); + ib_cache_cleanup_one(device); goto out; } -- cgit v0.10.2 From a1edc18a484285fcaf4bb73241f573ccb8f06fbc Mon Sep 17 00:00:00 2001 From: Ira Weiny Date: Mon, 11 Jan 2016 13:04:32 -0500 Subject: staging/hfi1: add dd_dev_dbg To be used in future patches add dd_dev_dbg. dd_* functions properly decode the hfi1_devdata structure used throughout the driver Signed-off-by: Ira Weiny Signed-off-by: Doug Ledford diff --git a/drivers/staging/rdma/hfi1/hfi.h b/drivers/staging/rdma/hfi1/hfi.h index 62157cc..52dcc87 100644 --- a/drivers/staging/rdma/hfi1/hfi.h +++ b/drivers/staging/rdma/hfi1/hfi.h @@ -1804,6 +1804,10 @@ static inline u64 hfi1_pkt_base_sdma_integrity(struct hfi1_devdata *dd) dev_info(&(dd)->pcidev->dev, "%s: " fmt, \ get_unit_name((dd)->unit), ##__VA_ARGS__) +#define dd_dev_dbg(dd, fmt, ...) \ + dev_dbg(&(dd)->pcidev->dev, "%s: " fmt, \ + get_unit_name((dd)->unit), ##__VA_ARGS__) + #define hfi1_dev_porterr(dd, port, fmt, ...) \ dev_err(&(dd)->pcidev->dev, "%s: IB%u:%u " fmt, \ get_unit_name((dd)->unit), (dd)->unit, (port), \ -- cgit v0.10.2 From a06e825a13c363da31936d04645e94b99aeb34e2 Mon Sep 17 00:00:00 2001 From: Dean Luick Date: Mon, 11 Jan 2016 13:04:33 -0500 Subject: staging/hfi1: set Gen3 half-swing for integrated devices Correctly set half-swing for integrated devices. A0 needs all fields set for CcePcieCtrl. B0 and later only need a few fields set. Reviewed-by: Stuart Summers Signed-off-by: Dean Luick Signed-off-by: Ira Weiny Signed-off-by: Doug Ledford diff --git a/drivers/staging/rdma/hfi1/chip_registers.h b/drivers/staging/rdma/hfi1/chip_registers.h index 701e9e1..014d7a6 100644 --- a/drivers/staging/rdma/hfi1/chip_registers.h +++ b/drivers/staging/rdma/hfi1/chip_registers.h @@ -551,6 +551,17 @@ #define CCE_MSIX_TABLE_UPPER (CCE + 0x000000100008) #define CCE_MSIX_TABLE_UPPER_RESETCSR 0x0000000100000000ull #define CCE_MSIX_VEC_CLR_WITHOUT_INT (CCE + 0x000000110400) +#define CCE_PCIE_CTRL (CCE + 0x0000000000C0) +#define CCE_PCIE_CTRL_PCIE_LANE_BUNDLE_MASK 0x3ull +#define CCE_PCIE_CTRL_PCIE_LANE_BUNDLE_SHIFT 0 +#define CCE_PCIE_CTRL_PCIE_LANE_DELAY_MASK 0xFull +#define CCE_PCIE_CTRL_PCIE_LANE_DELAY_SHIFT 2 +#define CCE_PCIE_CTRL_XMT_MARGIN_OVERWRITE_ENABLE_SHIFT 8 +#define CCE_PCIE_CTRL_XMT_MARGIN_SHIFT 9 +#define CCE_PCIE_CTRL_XMT_MARGIN_GEN1_GEN2_OVERWRITE_ENABLE_MASK 0x1ull +#define CCE_PCIE_CTRL_XMT_MARGIN_GEN1_GEN2_OVERWRITE_ENABLE_SHIFT 12 +#define CCE_PCIE_CTRL_XMT_MARGIN_GEN1_GEN2_MASK 0x7ull +#define CCE_PCIE_CTRL_XMT_MARGIN_GEN1_GEN2_SHIFT 13 #define CCE_REVISION (CCE + 0x000000000000) #define CCE_REVISION2 (CCE + 0x000000000008) #define CCE_REVISION2_HFI_ID_MASK 0x1ull diff --git a/drivers/staging/rdma/hfi1/pcie.c b/drivers/staging/rdma/hfi1/pcie.c index 8317b07..9917faf 100644 --- a/drivers/staging/rdma/hfi1/pcie.c +++ b/drivers/staging/rdma/hfi1/pcie.c @@ -867,6 +867,83 @@ static void arm_gasket_logic(struct hfi1_devdata *dd) } /* + * CCE_PCIE_CTRL long name helpers + * We redefine these shorter macros to use in the code while leaving + * chip_registers.h to be autogenerated from the hardware spec. + */ +#define LANE_BUNDLE_MASK CCE_PCIE_CTRL_PCIE_LANE_BUNDLE_MASK +#define LANE_BUNDLE_SHIFT CCE_PCIE_CTRL_PCIE_LANE_BUNDLE_SHIFT +#define LANE_DELAY_MASK CCE_PCIE_CTRL_PCIE_LANE_DELAY_MASK +#define LANE_DELAY_SHIFT CCE_PCIE_CTRL_PCIE_LANE_DELAY_SHIFT +#define MARGIN_OVERWRITE_ENABLE_SHIFT CCE_PCIE_CTRL_XMT_MARGIN_OVERWRITE_ENABLE_SHIFT +#define MARGIN_SHIFT CCE_PCIE_CTRL_XMT_MARGIN_SHIFT +#define MARGIN_G1_G2_OVERWRITE_MASK CCE_PCIE_CTRL_XMT_MARGIN_GEN1_GEN2_OVERWRITE_ENABLE_MASK +#define MARGIN_G1_G2_OVERWRITE_SHIFT CCE_PCIE_CTRL_XMT_MARGIN_GEN1_GEN2_OVERWRITE_ENABLE_SHIFT +#define MARGIN_GEN1_GEN2_MASK CCE_PCIE_CTRL_XMT_MARGIN_GEN1_GEN2_MASK +#define MARGIN_GEN1_GEN2_SHIFT CCE_PCIE_CTRL_XMT_MARGIN_GEN1_GEN2_SHIFT + + /* + * Write xmt_margin for full-swing (WFR-B) or half-swing (WFR-C). + */ +static void write_xmt_margin(struct hfi1_devdata *dd, const char *fname) +{ + u64 pcie_ctrl; + u64 xmt_margin; + u64 xmt_margin_oe; + u64 lane_delay; + u64 lane_bundle; + + pcie_ctrl = read_csr(dd, CCE_PCIE_CTRL); + + /* + * For Discrete, use full-swing. + * - PCIe TX defaults to full-swing. + * Leave this register as default. + * For Integrated, use half-swing + * - Copy xmt_margin and xmt_margin_oe + * from Gen1/Gen2 to Gen3. + */ + if (dd->pcidev->device == PCI_DEVICE_ID_INTEL1) { /* integrated */ + /* extract initial fields */ + xmt_margin = (pcie_ctrl >> MARGIN_GEN1_GEN2_SHIFT) + & MARGIN_GEN1_GEN2_MASK; + xmt_margin_oe = (pcie_ctrl >> MARGIN_G1_G2_OVERWRITE_SHIFT) + & MARGIN_G1_G2_OVERWRITE_MASK; + lane_delay = (pcie_ctrl >> LANE_DELAY_SHIFT) & LANE_DELAY_MASK; + lane_bundle = (pcie_ctrl >> LANE_BUNDLE_SHIFT) + & LANE_BUNDLE_MASK; + + /* + * For A0, EFUSE values are not set. Override with the + * correct values. + */ + if (is_ax(dd)) { + /* + * xmt_margin and OverwiteEnabel should be the + * same for Gen1/Gen2 and Gen3 + */ + xmt_margin = 0x5; + xmt_margin_oe = 0x1; + lane_delay = 0xF; /* Delay 240ns. */ + lane_bundle = 0x0; /* Set to 1 lane. */ + } + + /* overwrite existing values */ + pcie_ctrl = (xmt_margin << MARGIN_GEN1_GEN2_SHIFT) + | (xmt_margin_oe << MARGIN_G1_G2_OVERWRITE_SHIFT) + | (xmt_margin << MARGIN_SHIFT) + | (xmt_margin_oe << MARGIN_OVERWRITE_ENABLE_SHIFT) + | (lane_delay << LANE_DELAY_SHIFT) + | (lane_bundle << LANE_BUNDLE_SHIFT); + + write_csr(dd, CCE_PCIE_CTRL, pcie_ctrl); + } + + dd_dev_dbg(dd, "%s: program XMT margin, CcePcieCtrl 0x%llx\n", + fname, pcie_ctrl); +} + +/* * Do all the steps needed to transition the PCIe link to Gen3 speed. */ int do_pcie_gen3_transition(struct hfi1_devdata *dd) @@ -1064,11 +1141,8 @@ retry: /* * step 5d: program XMT margin - * Right now, leave the default alone. To change, do a - * read-modify-write of: - * CcePcieCtrl.XmtMargin - * CcePcieCtrl.XmitMarginOverwriteEnable */ + write_xmt_margin(dd, __func__); /* step 5e: disable active state power management (ASPM) */ dd_dev_info(dd, "%s: clearing ASPM\n", __func__); -- cgit v0.10.2 From 349ac71ffad79281f3c0dc908cd4b6f7d7dbb477 Mon Sep 17 00:00:00 2001 From: "jubin.john@intel.com" Date: Mon, 11 Jan 2016 18:30:52 -0500 Subject: staging/hfi1: Use BIT macro This patch fixes the checkpatch issue: CHECK: Prefer using the BIT macro Use of BIT macro for HDRQ_INCREMENT in chip.h causes a change in format specifier for error message in init.c in order to avoid a build warning. Reviewed-by: Dean Luick Reviewed-by: Ira Weiny Reviewed-by: Mike Marciniszyn Signed-off-by: Jubin John Signed-off-by: Doug Ledford diff --git a/drivers/staging/rdma/hfi1/chip.h b/drivers/staging/rdma/hfi1/chip.h index 5b375dd..1368a44 100644 --- a/drivers/staging/rdma/hfi1/chip.h +++ b/drivers/staging/rdma/hfi1/chip.h @@ -242,18 +242,18 @@ #define HCMD_SUCCESS 2 /* DC_DC8051_DBG_ERR_INFO_SET_BY_8051.ERROR - error flags */ -#define SPICO_ROM_FAILED (1 << 0) -#define UNKNOWN_FRAME (1 << 1) -#define TARGET_BER_NOT_MET (1 << 2) -#define FAILED_SERDES_INTERNAL_LOOPBACK (1 << 3) -#define FAILED_SERDES_INIT (1 << 4) -#define FAILED_LNI_POLLING (1 << 5) -#define FAILED_LNI_DEBOUNCE (1 << 6) -#define FAILED_LNI_ESTBCOMM (1 << 7) -#define FAILED_LNI_OPTEQ (1 << 8) -#define FAILED_LNI_VERIFY_CAP1 (1 << 9) -#define FAILED_LNI_VERIFY_CAP2 (1 << 10) -#define FAILED_LNI_CONFIGLT (1 << 11) +#define SPICO_ROM_FAILED BIT(0) +#define UNKNOWN_FRAME BIT(1) +#define TARGET_BER_NOT_MET BIT(2) +#define FAILED_SERDES_INTERNAL_LOOPBACK BIT(3) +#define FAILED_SERDES_INIT BIT(4) +#define FAILED_LNI_POLLING BIT(5) +#define FAILED_LNI_DEBOUNCE BIT(6) +#define FAILED_LNI_ESTBCOMM BIT(7) +#define FAILED_LNI_OPTEQ BIT(8) +#define FAILED_LNI_VERIFY_CAP1 BIT(9) +#define FAILED_LNI_VERIFY_CAP2 BIT(10) +#define FAILED_LNI_CONFIGLT BIT(11) #define FAILED_LNI (FAILED_LNI_POLLING | FAILED_LNI_DEBOUNCE \ | FAILED_LNI_ESTBCOMM | FAILED_LNI_OPTEQ \ @@ -262,16 +262,16 @@ | FAILED_LNI_CONFIGLT) /* DC_DC8051_DBG_ERR_INFO_SET_BY_8051.HOST_MSG - host message flags */ -#define HOST_REQ_DONE (1 << 0) -#define BC_PWR_MGM_MSG (1 << 1) -#define BC_SMA_MSG (1 << 2) -#define BC_BCC_UNKOWN_MSG (1 << 3) -#define BC_IDLE_UNKNOWN_MSG (1 << 4) -#define EXT_DEVICE_CFG_REQ (1 << 5) -#define VERIFY_CAP_FRAME (1 << 6) -#define LINKUP_ACHIEVED (1 << 7) -#define LINK_GOING_DOWN (1 << 8) -#define LINK_WIDTH_DOWNGRADED (1 << 9) +#define HOST_REQ_DONE BIT(0) +#define BC_PWR_MGM_MSG BIT(1) +#define BC_SMA_MSG BIT(2) +#define BC_BCC_UNKNOWN_MSG BIT(3) +#define BC_IDLE_UNKNOWN_MSG BIT(4) +#define EXT_DEVICE_CFG_REQ BIT(5) +#define VERIFY_CAP_FRAME BIT(6) +#define LINKUP_ACHIEVED BIT(7) +#define LINK_GOING_DOWN BIT(8) +#define LINK_WIDTH_DOWNGRADED BIT(9) /* DC_DC8051_CFG_EXT_DEV_1.REQ_TYPE - 8051 host requests */ #define HREQ_LOAD_CONFIG 0x01 @@ -335,14 +335,14 @@ * the CSR fields hold multiples of this value. */ #define RCV_SHIFT 3 -#define RCV_INCREMENT (1 << RCV_SHIFT) +#define RCV_INCREMENT BIT(RCV_SHIFT) /* * Receive header queue entry increment - the CSR holds multiples of * this value. */ #define HDRQ_SIZE_SHIFT 5 -#define HDRQ_INCREMENT (1 << HDRQ_SIZE_SHIFT) +#define HDRQ_INCREMENT BIT(HDRQ_SIZE_SHIFT) /* * Freeze handling flags diff --git a/drivers/staging/rdma/hfi1/common.h b/drivers/staging/rdma/hfi1/common.h index 5dd9272..e4b1dc6 100644 --- a/drivers/staging/rdma/hfi1/common.h +++ b/drivers/staging/rdma/hfi1/common.h @@ -349,10 +349,10 @@ struct hfi1_message_header { #define HFI1_QPN_MASK 0xFFFFFF #define HFI1_FECN_SHIFT 31 #define HFI1_FECN_MASK 1 -#define HFI1_FECN_SMASK (1 << HFI1_FECN_SHIFT) +#define HFI1_FECN_SMASK BIT(HFI1_FECN_SHIFT) #define HFI1_BECN_SHIFT 30 #define HFI1_BECN_MASK 1 -#define HFI1_BECN_SMASK (1 << HFI1_BECN_SHIFT) +#define HFI1_BECN_SMASK BIT(HFI1_BECN_SHIFT) #define HFI1_MULTICAST_LID_BASE 0xC000 static inline __u64 rhf_to_cpu(const __le32 *rbuf) diff --git a/drivers/staging/rdma/hfi1/hfi.h b/drivers/staging/rdma/hfi1/hfi.h index 52dcc87..55202c7 100644 --- a/drivers/staging/rdma/hfi1/hfi.h +++ b/drivers/staging/rdma/hfi1/hfi.h @@ -424,17 +424,17 @@ struct hfi1_sge_state; #define __HLS_GOING_OFFLINE_BP 9 #define __HLS_LINK_COOLDOWN_BP 10 -#define HLS_UP_INIT (1 << __HLS_UP_INIT_BP) -#define HLS_UP_ARMED (1 << __HLS_UP_ARMED_BP) -#define HLS_UP_ACTIVE (1 << __HLS_UP_ACTIVE_BP) -#define HLS_DN_DOWNDEF (1 << __HLS_DN_DOWNDEF_BP) /* link down default */ -#define HLS_DN_POLL (1 << __HLS_DN_POLL_BP) -#define HLS_DN_DISABLE (1 << __HLS_DN_DISABLE_BP) -#define HLS_DN_OFFLINE (1 << __HLS_DN_OFFLINE_BP) -#define HLS_VERIFY_CAP (1 << __HLS_VERIFY_CAP_BP) -#define HLS_GOING_UP (1 << __HLS_GOING_UP_BP) -#define HLS_GOING_OFFLINE (1 << __HLS_GOING_OFFLINE_BP) -#define HLS_LINK_COOLDOWN (1 << __HLS_LINK_COOLDOWN_BP) +#define HLS_UP_INIT BIT(__HLS_UP_INIT_BP) +#define HLS_UP_ARMED BIT(__HLS_UP_ARMED_BP) +#define HLS_UP_ACTIVE BIT(__HLS_UP_ACTIVE_BP) +#define HLS_DN_DOWNDEF BIT(__HLS_DN_DOWNDEF_BP) /* link down default */ +#define HLS_DN_POLL BIT(__HLS_DN_POLL_BP) +#define HLS_DN_DISABLE BIT(__HLS_DN_DISABLE_BP) +#define HLS_DN_OFFLINE BIT(__HLS_DN_OFFLINE_BP) +#define HLS_VERIFY_CAP BIT(__HLS_VERIFY_CAP_BP) +#define HLS_GOING_UP BIT(__HLS_GOING_UP_BP) +#define HLS_GOING_OFFLINE BIT(__HLS_GOING_OFFLINE_BP) +#define HLS_LINK_COOLDOWN BIT(__HLS_LINK_COOLDOWN_BP) #define HLS_UP (HLS_UP_INIT | HLS_UP_ARMED | HLS_UP_ACTIVE) diff --git a/drivers/staging/rdma/hfi1/init.c b/drivers/staging/rdma/hfi1/init.c index 00f52e8..aa46923 100644 --- a/drivers/staging/rdma/hfi1/init.c +++ b/drivers/staging/rdma/hfi1/init.c @@ -260,7 +260,7 @@ struct hfi1_ctxtdata *hfi1_create_ctxtdata(struct hfi1_pportdata *ppd, u32 ctxt) /* Validate and initialize Rcv Hdr Q variables */ if (rcvhdrcnt % HDRQ_INCREMENT) { dd_dev_err(dd, - "ctxt%u: header queue count %d must be divisible by %d\n", + "ctxt%u: header queue count %d must be divisible by %lu\n", rcd->ctxt, rcvhdrcnt, HDRQ_INCREMENT); goto bail; } diff --git a/drivers/staging/rdma/hfi1/mad.c b/drivers/staging/rdma/hfi1/mad.c index 4f5dbd1..eeb8687 100644 --- a/drivers/staging/rdma/hfi1/mad.c +++ b/drivers/staging/rdma/hfi1/mad.c @@ -1782,7 +1782,7 @@ static int __subn_get_opa_cable_info(struct opa_smp *smp, u32 am, u8 *data, u32 len = OPA_AM_CI_LEN(am) + 1; int ret; -#define __CI_PAGE_SIZE (1 << 7) /* 128 bytes */ +#define __CI_PAGE_SIZE BIT(7) /* 128 bytes */ #define __CI_PAGE_MASK ~(__CI_PAGE_SIZE - 1) #define __CI_PAGE_NUM(a) ((a) & __CI_PAGE_MASK) @@ -3402,7 +3402,7 @@ struct opa_led_info { }; #define OPA_LED_SHIFT 31 -#define OPA_LED_MASK (1 << OPA_LED_SHIFT) +#define OPA_LED_MASK BIT(OPA_LED_SHIFT) static int __subn_get_opa_led_info(struct opa_smp *smp, u32 am, u8 *data, struct ib_device *ibdev, u8 port, diff --git a/drivers/staging/rdma/hfi1/qp.h b/drivers/staging/rdma/hfi1/qp.h index 62a94c5..19b16a9 100644 --- a/drivers/staging/rdma/hfi1/qp.h +++ b/drivers/staging/rdma/hfi1/qp.h @@ -54,7 +54,7 @@ #include "verbs.h" #include "sdma.h" -#define QPN_MAX (1 << 24) +#define QPN_MAX BIT(24) #define QPNMAP_ENTRIES (QPN_MAX / PAGE_SIZE / BITS_PER_BYTE) /* diff --git a/drivers/staging/rdma/hfi1/qsfp.h b/drivers/staging/rdma/hfi1/qsfp.h index d30c2a6..16aebdc 100644 --- a/drivers/staging/rdma/hfi1/qsfp.h +++ b/drivers/staging/rdma/hfi1/qsfp.h @@ -59,11 +59,11 @@ * Below are masks for QSFP pins. Pins are the same for HFI0 and HFI1. * _N means asserted low */ -#define QSFP_HFI0_I2CCLK (1 << 0) -#define QSFP_HFI0_I2CDAT (1 << 1) -#define QSFP_HFI0_RESET_N (1 << 2) -#define QSFP_HFI0_INT_N (1 << 3) -#define QSFP_HFI0_MODPRST_N (1 << 4) +#define QSFP_HFI0_I2CCLK BIT(0) +#define QSFP_HFI0_I2CDAT BIT(1) +#define QSFP_HFI0_RESET_N BIT(2) +#define QSFP_HFI0_INT_N BIT(3) +#define QSFP_HFI0_MODPRST_N BIT(4) /* QSFP is paged at 256 bytes */ #define QSFP_PAGESIZE 256 diff --git a/drivers/staging/rdma/hfi1/sdma.c b/drivers/staging/rdma/hfi1/sdma.c index 9a15f1f..1d38be5 100644 --- a/drivers/staging/rdma/hfi1/sdma.c +++ b/drivers/staging/rdma/hfi1/sdma.c @@ -112,10 +112,10 @@ MODULE_PARM_DESC(desct_intr, "Number of SDMA descriptor before interrupt"); | SD(ENG_ERR_STATUS_SDMA_HEADER_REQUEST_FIFO_UNC_ERR_SMASK)) /* sdma_sendctrl operations */ -#define SDMA_SENDCTRL_OP_ENABLE (1U << 0) -#define SDMA_SENDCTRL_OP_INTENABLE (1U << 1) -#define SDMA_SENDCTRL_OP_HALT (1U << 2) -#define SDMA_SENDCTRL_OP_CLEANUP (1U << 3) +#define SDMA_SENDCTRL_OP_ENABLE BIT(0) +#define SDMA_SENDCTRL_OP_INTENABLE BIT(1) +#define SDMA_SENDCTRL_OP_HALT BIT(2) +#define SDMA_SENDCTRL_OP_CLEANUP BIT(3) /* handle long defines */ #define SDMA_EGRESS_PACKET_OCCUPANCY_SMASK \ -- cgit v0.10.2 From 9eb0432baa150c31222be71f435091ea562f93cf Mon Sep 17 00:00:00 2001 From: Harish Chegondi Date: Mon, 11 Jan 2016 18:30:53 -0500 Subject: staging/hfi1: Move s_sde to read mostly section of hfi1_qp This would reduce L2 cache misses on s_sde in the _hfi1_schedule_send function when invoked from post_send thereby improving performance of post_send. Reviewed-by: Mike Marciniszyn Signed-off-by: Harish Chegondi Signed-off-by: Jubin John Signed-off-by: Doug Ledford diff --git a/drivers/staging/rdma/hfi1/verbs.h b/drivers/staging/rdma/hfi1/verbs.h index 286e468..a163fc2 100644 --- a/drivers/staging/rdma/hfi1/verbs.h +++ b/drivers/staging/rdma/hfi1/verbs.h @@ -441,6 +441,7 @@ struct hfi1_qp { struct hfi1_swqe *s_wq; /* send work queue */ struct hfi1_mmap_info *ip; struct ahg_ib_header *s_hdr; /* next packet header to send */ + struct sdma_engine *s_sde; /* current sde */ /* sc for UC/RC QPs - based on ah for UD */ u8 s_sc; unsigned long timeout_jiffies; /* computed from timeout */ @@ -506,7 +507,6 @@ struct hfi1_qp { struct hfi1_swqe *s_wqe; struct hfi1_sge_state s_sge; /* current send request data */ struct hfi1_mregion *s_rdma_mr; - struct sdma_engine *s_sde; /* current sde */ u32 s_cur_size; /* size of send packet in bytes */ u32 s_len; /* total length of s_sge */ u32 s_rdma_read_len; /* total length of s_rdma_read_sge */ -- cgit v0.10.2 From 5b55ea3b6eb8abe30aea4ae1604a6f067bd5e010 Mon Sep 17 00:00:00 2001 From: "Mark F. Brown" Date: Mon, 11 Jan 2016 18:30:54 -0500 Subject: staging/hfi1: change krcvqs mod param from byte to uint The krcvqs parameter is displayed incorrectly in sysfs. The workaround is to set the param type as uint. Reviewed-by: Mike Marciniszyn Reviewed-by: Mitko Haralanov Signed-off-by: Mark F. Brown Signed-off-by: Jubin John Signed-off-by: Doug Ledford diff --git a/drivers/staging/rdma/hfi1/hfi.h b/drivers/staging/rdma/hfi1/hfi.h index 55202c7..b33bcca 100644 --- a/drivers/staging/rdma/hfi1/hfi.h +++ b/drivers/staging/rdma/hfi1/hfi.h @@ -1677,7 +1677,7 @@ extern unsigned int hfi1_cu; extern unsigned int user_credit_return_threshold; extern int num_user_contexts; extern unsigned n_krcvqs; -extern u8 krcvqs[]; +extern uint krcvqs[]; extern int krcvqsset; extern uint kdeth_qp; extern uint loopback; diff --git a/drivers/staging/rdma/hfi1/init.c b/drivers/staging/rdma/hfi1/init.c index aa46923..48269a2 100644 --- a/drivers/staging/rdma/hfi1/init.c +++ b/drivers/staging/rdma/hfi1/init.c @@ -87,9 +87,9 @@ module_param_named(num_user_contexts, num_user_contexts, uint, S_IRUGO); MODULE_PARM_DESC( num_user_contexts, "Set max number of user contexts to use"); -u8 krcvqs[RXE_NUM_DATA_VL]; +uint krcvqs[RXE_NUM_DATA_VL]; int krcvqsset; -module_param_array(krcvqs, byte, &krcvqsset, S_IRUGO); +module_param_array(krcvqs, uint, &krcvqsset, S_IRUGO); MODULE_PARM_DESC(krcvqs, "Array of the number of non-control kernel receive queues by VL"); /* computed based on above array */ -- cgit v0.10.2 From 0edf80eae01b7f211a1142856c6c8fc41ea3ce06 Mon Sep 17 00:00:00 2001 From: "jubin.john@intel.com" Date: Mon, 11 Jan 2016 18:30:55 -0500 Subject: staging/hfi1: Change default krcvqs Change the default number of krcvqs to number of numa nodes + 1 based on the performance data collected. Reviewed-by: Mike Marciniszyn Signed-off-by: Jubin John Signed-off-by: Doug Ledford diff --git a/drivers/staging/rdma/hfi1/chip.c b/drivers/staging/rdma/hfi1/chip.c index bbe5ad8..503bfca 100644 --- a/drivers/staging/rdma/hfi1/chip.c +++ b/drivers/staging/rdma/hfi1/chip.c @@ -12445,7 +12445,7 @@ static int set_up_context_variables(struct hfi1_devdata *dd) */ num_kernel_contexts = n_krcvqs + MIN_KERNEL_KCTXTS - 1; else - num_kernel_contexts = num_online_nodes(); + num_kernel_contexts = num_online_nodes() + 1; num_kernel_contexts = max_t(int, MIN_KERNEL_KCTXTS, num_kernel_contexts); /* -- cgit v0.10.2 From b54ba2772b7af82a07eb48f88c88f7cadfb33401 Mon Sep 17 00:00:00 2001 From: Meny Yossefi Date: Thu, 18 Feb 2016 18:14:59 +0200 Subject: net/mlx5_core: Add helper function to read virtual port counters Added helper function to read 64bit virtual port Infiniband traffic counters. Signed-off-by: Meny Yossefi Signed-off-by: Majd Dibbiny Reviewed-by: Matan Barak Signed-off-by: Doug Ledford diff --git a/drivers/net/ethernet/mellanox/mlx5/core/vport.c b/drivers/net/ethernet/mellanox/mlx5/core/vport.c index c7398b9..90ab09e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/vport.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/vport.c @@ -850,3 +850,43 @@ int mlx5_nic_vport_disable_roce(struct mlx5_core_dev *mdev) return mlx5_nic_vport_update_roce_state(mdev, MLX5_VPORT_ROCE_DISABLED); } EXPORT_SYMBOL_GPL(mlx5_nic_vport_disable_roce); + +int mlx5_core_query_vport_counter(struct mlx5_core_dev *dev, u8 other_vport, + u8 port_num, void *out, size_t out_sz) +{ + int in_sz = MLX5_ST_SZ_BYTES(query_vport_counter_in); + int is_group_manager; + void *in; + int err; + + is_group_manager = MLX5_CAP_GEN(dev, vport_group_manager); + in = mlx5_vzalloc(in_sz); + if (!in) { + err = -ENOMEM; + return err; + } + + MLX5_SET(query_vport_counter_in, in, opcode, + MLX5_CMD_OP_QUERY_VPORT_COUNTER); + if (other_vport) { + if (is_group_manager) { + MLX5_SET(query_vport_counter_in, in, other_vport, 1); + MLX5_SET(query_vport_counter_in, in, vport_number, 0); + } else { + err = -EPERM; + goto free; + } + } + if (MLX5_CAP_GEN(dev, num_ports) == 2) + MLX5_SET(query_vport_counter_in, in, port_num, port_num); + + err = mlx5_cmd_exec(dev, in, in_sz, out, out_sz); + if (err) + goto free; + err = mlx5_cmd_status_to_err_v2(out); + +free: + kvfree(in); + return err; +} +EXPORT_SYMBOL_GPL(mlx5_core_query_vport_counter); diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 51f1e54..0732e6c 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -3126,7 +3126,8 @@ struct mlx5_ifc_query_vport_counter_in_bits { u8 op_mod[0x10]; u8 other_vport[0x1]; - u8 reserved_at_41[0xf]; + u8 reserved_at_41[0xb]; + u8 port_num[0x4]; u8 vport_number[0x10]; u8 reserved_at_60[0x60]; diff --git a/include/linux/mlx5/vport.h b/include/linux/mlx5/vport.h index 1237710..a9f2bcc 100644 --- a/include/linux/mlx5/vport.h +++ b/include/linux/mlx5/vport.h @@ -92,5 +92,7 @@ int mlx5_modify_nic_vport_vlans(struct mlx5_core_dev *dev, int mlx5_nic_vport_enable_roce(struct mlx5_core_dev *mdev); int mlx5_nic_vport_disable_roce(struct mlx5_core_dev *mdev); +int mlx5_core_query_vport_counter(struct mlx5_core_dev *dev, u8 other_vport, + u8 port_num, void *out, size_t out_sz); #endif /* __MLX5_VPORT_H__ */ -- cgit v0.10.2 From 1c64bf6f291cae7cbe779e407db9477378bb4e7d Mon Sep 17 00:00:00 2001 From: Meny Yossefi Date: Thu, 18 Feb 2016 18:15:00 +0200 Subject: net/mlx5_core: Add helper function to read IB error counters Added helper function to read IB standard error counters via the PPCNT register. The PPCNT register read command provides the 32-bit error counters of both IB/RoCE link layer and transport layer. Signed-off-by: Meny Yossefi Signed-off-by: Majd Dibbiny Reviewed-by: Matan Barak Signed-off-by: Doug Ledford diff --git a/drivers/net/ethernet/mellanox/mlx5/core/port.c b/drivers/net/ethernet/mellanox/mlx5/core/port.c index a87e773..5635ce7 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/port.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/port.c @@ -324,6 +324,29 @@ int mlx5_query_port_vl_hw_cap(struct mlx5_core_dev *dev, } EXPORT_SYMBOL_GPL(mlx5_query_port_vl_hw_cap); +int mlx5_core_query_ib_ppcnt(struct mlx5_core_dev *dev, + u8 port_num, void *out, size_t sz) +{ + u32 *in; + int err; + + in = mlx5_vzalloc(sz); + if (!in) { + err = -ENOMEM; + return err; + } + + MLX5_SET(ppcnt_reg, in, local_port, port_num); + + MLX5_SET(ppcnt_reg, in, grp, MLX5_INFINIBAND_PORT_COUNTERS_GROUP); + err = mlx5_core_access_reg(dev, in, sz, out, + sz, MLX5_REG_PPCNT, 0, 0); + + kvfree(in); + return err; +} +EXPORT_SYMBOL_GPL(mlx5_core_query_ib_ppcnt); + int mlx5_set_port_pause(struct mlx5_core_dev *dev, u32 rx_pause, u32 tx_pause) { u32 in[MLX5_ST_SZ_DW(pfcc_reg)]; diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h index 987764a..99e2edc 100644 --- a/include/linux/mlx5/device.h +++ b/include/linux/mlx5/device.h @@ -1284,7 +1284,8 @@ enum { MLX5_RFC_3635_COUNTERS_GROUP = 0x3, MLX5_ETHERNET_EXTENDED_COUNTERS_GROUP = 0x5, MLX5_PER_PRIORITY_COUNTERS_GROUP = 0x10, - MLX5_PER_TRAFFIC_CLASS_COUNTERS_GROUP = 0x11 + MLX5_PER_TRAFFIC_CLASS_COUNTERS_GROUP = 0x11, + MLX5_INFINIBAND_PORT_COUNTERS_GROUP = 0x20, }; static inline u16 mlx5_to_sw_pkey_sz(int pkey_sz) diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 1e3006d..8edcd08 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -847,6 +847,8 @@ int mlx5_core_destroy_psv(struct mlx5_core_dev *dev, int psv_num); void mlx5_core_put_rsc(struct mlx5_core_rsc_common *common); int mlx5_query_odp_caps(struct mlx5_core_dev *dev, struct mlx5_odp_caps *odp_caps); +int mlx5_core_query_ib_ppcnt(struct mlx5_core_dev *dev, + u8 port_num, void *out, size_t sz); static inline int fw_initializing(struct mlx5_core_dev *dev) { diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 0732e6c..9f404de 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -1208,6 +1208,36 @@ struct mlx5_ifc_phys_layer_cntrs_bits { u8 reserved_at_640[0x180]; }; +struct mlx5_ifc_ib_port_cntrs_grp_data_layout_bits { + u8 symbol_error_counter[0x10]; + + u8 link_error_recovery_counter[0x8]; + + u8 link_downed_counter[0x8]; + + u8 port_rcv_errors[0x10]; + + u8 port_rcv_remote_physical_errors[0x10]; + + u8 port_rcv_switch_relay_errors[0x10]; + + u8 port_xmit_discards[0x10]; + + u8 port_xmit_constraint_errors[0x8]; + + u8 port_rcv_constraint_errors[0x8]; + + u8 reserved_at_70[0x8]; + + u8 link_overrun_errors[0x8]; + + u8 reserved_at_80[0x10]; + + u8 vl_15_dropped[0x10]; + + u8 reserved_at_a0[0xa0]; +}; + struct mlx5_ifc_eth_per_traffic_grp_data_layout_bits { u8 transmit_queue_high[0x20]; @@ -2618,6 +2648,7 @@ union mlx5_ifc_eth_cntrs_grp_data_layout_auto_bits { struct mlx5_ifc_eth_extended_cntrs_grp_data_layout_bits eth_extended_cntrs_grp_data_layout; struct mlx5_ifc_eth_per_prio_grp_data_layout_bits eth_per_prio_grp_data_layout; struct mlx5_ifc_eth_per_traffic_grp_data_layout_bits eth_per_traffic_grp_data_layout; + struct mlx5_ifc_ib_port_cntrs_grp_data_layout_bits ib_port_cntrs_grp_data_layout; struct mlx5_ifc_phys_layer_cntrs_bits phys_layer_cntrs; u8 reserved_at_0[0x7c0]; }; @@ -6955,6 +6986,7 @@ union mlx5_ifc_ports_control_registers_document_bits { struct mlx5_ifc_peir_reg_bits peir_reg; struct mlx5_ifc_pelc_reg_bits pelc_reg; struct mlx5_ifc_pfcc_reg_bits pfcc_reg; + struct mlx5_ifc_ib_port_cntrs_grp_data_layout_bits ib_port_cntrs_grp_data_layout; struct mlx5_ifc_phys_layer_cntrs_bits phys_layer_cntrs; struct mlx5_ifc_pifr_reg_bits pifr_reg; struct mlx5_ifc_pipg_reg_bits pipg_reg; -- cgit v0.10.2 From 3efd9a11212d500e36c2837db853178cdaa86d5a Mon Sep 17 00:00:00 2001 From: Meny Yossefi Date: Thu, 18 Feb 2016 18:15:01 +0200 Subject: IB/mlx5: Modify MAD reading counters method to use counter registers Modify mlx5_ib_process_mad to use PPCNT and query_vport commands instead of MAD_IFC, as MAD_IFC is deprecated on new firmware versions (and doesn't support RoCE anyway). Traffic counters exist in both 32-bit and 64-bit forms. Declaring support of extended coutners results in traffic counters to be read in their 64-bit form only via the query_vport command. Error counters exist only in 32-bit form and read via PPCNT command. This commit also adds counters support in RoCE. Signed-off-by: Meny Yossefi Signed-off-by: Majd Dibbiny Reviewed-by: Matan Barak Signed-off-by: Doug Ledford diff --git a/drivers/infiniband/hw/mlx5/mad.c b/drivers/infiniband/hw/mlx5/mad.c index b84d13a..41d8a00 100644 --- a/drivers/infiniband/hw/mlx5/mad.c +++ b/drivers/infiniband/hw/mlx5/mad.c @@ -31,8 +31,10 @@ */ #include +#include #include #include +#include #include "mlx5_ib.h" enum { @@ -57,20 +59,12 @@ int mlx5_MAD_IFC(struct mlx5_ib_dev *dev, int ignore_mkey, int ignore_bkey, return mlx5_core_mad_ifc(dev->mdev, in_mad, response_mad, op_modifier, port); } -int mlx5_ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, - const struct ib_wc *in_wc, const struct ib_grh *in_grh, - const struct ib_mad_hdr *in, size_t in_mad_size, - struct ib_mad_hdr *out, size_t *out_mad_size, - u16 *out_mad_pkey_index) +static int process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, + const struct ib_wc *in_wc, const struct ib_grh *in_grh, + const struct ib_mad *in_mad, struct ib_mad *out_mad) { u16 slid; int err; - const struct ib_mad *in_mad = (const struct ib_mad *)in; - struct ib_mad *out_mad = (struct ib_mad *)out; - - if (WARN_ON_ONCE(in_mad_size != sizeof(*in_mad) || - *out_mad_size != sizeof(*out_mad))) - return IB_MAD_RESULT_FAILURE; slid = in_wc ? in_wc->slid : be16_to_cpu(IB_LID_PERMISSIVE); @@ -117,6 +111,156 @@ int mlx5_ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY; } +static void pma_cnt_ext_assign(struct ib_pma_portcounters_ext *pma_cnt_ext, + void *out) +{ +#define MLX5_SUM_CNT(p, cntr1, cntr2) \ + (MLX5_GET64(query_vport_counter_out, p, cntr1) + \ + MLX5_GET64(query_vport_counter_out, p, cntr2)) + + pma_cnt_ext->port_xmit_data = + cpu_to_be64(MLX5_SUM_CNT(out, transmitted_ib_unicast.octets, + transmitted_ib_multicast.octets) >> 2); + pma_cnt_ext->port_xmit_data = + cpu_to_be64(MLX5_SUM_CNT(out, received_ib_unicast.octets, + received_ib_multicast.octets) >> 2); + pma_cnt_ext->port_xmit_packets = + cpu_to_be64(MLX5_SUM_CNT(out, transmitted_ib_unicast.packets, + transmitted_ib_multicast.packets)); + pma_cnt_ext->port_rcv_packets = + cpu_to_be64(MLX5_SUM_CNT(out, received_ib_unicast.packets, + received_ib_multicast.packets)); + pma_cnt_ext->port_unicast_xmit_packets = + MLX5_GET64_BE(query_vport_counter_out, + out, transmitted_ib_unicast.packets); + pma_cnt_ext->port_unicast_rcv_packets = + MLX5_GET64_BE(query_vport_counter_out, + out, received_ib_unicast.packets); + pma_cnt_ext->port_multicast_xmit_packets = + MLX5_GET64_BE(query_vport_counter_out, + out, transmitted_ib_multicast.packets); + pma_cnt_ext->port_multicast_rcv_packets = + MLX5_GET64_BE(query_vport_counter_out, + out, received_ib_multicast.packets); +} + +static void pma_cnt_assign(struct ib_pma_portcounters *pma_cnt, + void *out) +{ + /* Traffic counters will be reported in + * their 64bit form via ib_pma_portcounters_ext by default. + */ + void *out_pma = MLX5_ADDR_OF(ppcnt_reg, out, + counter_set); + +#define MLX5_ASSIGN_PMA_CNTR(counter_var, counter_name) { \ + counter_var = MLX5_GET_BE(typeof(counter_var), \ + ib_port_cntrs_grp_data_layout, \ + out_pma, counter_name); \ + } + + MLX5_ASSIGN_PMA_CNTR(pma_cnt->symbol_error_counter, + symbol_error_counter); + MLX5_ASSIGN_PMA_CNTR(pma_cnt->link_error_recovery_counter, + link_error_recovery_counter); + MLX5_ASSIGN_PMA_CNTR(pma_cnt->link_downed_counter, + link_downed_counter); + MLX5_ASSIGN_PMA_CNTR(pma_cnt->port_rcv_errors, + port_rcv_errors); + MLX5_ASSIGN_PMA_CNTR(pma_cnt->port_rcv_remphys_errors, + port_rcv_remote_physical_errors); + MLX5_ASSIGN_PMA_CNTR(pma_cnt->port_rcv_switch_relay_errors, + port_rcv_switch_relay_errors); + MLX5_ASSIGN_PMA_CNTR(pma_cnt->port_xmit_discards, + port_xmit_discards); + MLX5_ASSIGN_PMA_CNTR(pma_cnt->port_xmit_constraint_errors, + port_xmit_constraint_errors); + MLX5_ASSIGN_PMA_CNTR(pma_cnt->port_rcv_constraint_errors, + port_rcv_constraint_errors); + MLX5_ASSIGN_PMA_CNTR(pma_cnt->link_overrun_errors, + link_overrun_errors); + MLX5_ASSIGN_PMA_CNTR(pma_cnt->vl15_dropped, + vl_15_dropped); +} + +static int process_pma_cmd(struct ib_device *ibdev, u8 port_num, + const struct ib_mad *in_mad, struct ib_mad *out_mad) +{ + struct mlx5_ib_dev *dev = to_mdev(ibdev); + int err; + void *out_cnt; + + /* Decalring support of extended counters */ + if (in_mad->mad_hdr.attr_id == IB_PMA_CLASS_PORT_INFO) { + struct ib_class_port_info cpi = {}; + + cpi.capability_mask = IB_PMA_CLASS_CAP_EXT_WIDTH; + memcpy((out_mad->data + 40), &cpi, sizeof(cpi)); + return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY; + } + + if (in_mad->mad_hdr.attr_id == IB_PMA_PORT_COUNTERS_EXT) { + struct ib_pma_portcounters_ext *pma_cnt_ext = + (struct ib_pma_portcounters_ext *)(out_mad->data + 40); + int sz = MLX5_ST_SZ_BYTES(query_vport_counter_out); + + out_cnt = mlx5_vzalloc(sz); + if (!out_cnt) + return IB_MAD_RESULT_FAILURE; + + err = mlx5_core_query_vport_counter(dev->mdev, 0, + port_num, out_cnt, sz); + if (!err) + pma_cnt_ext_assign(pma_cnt_ext, out_cnt); + } else { + struct ib_pma_portcounters *pma_cnt = + (struct ib_pma_portcounters *)(out_mad->data + 40); + int sz = MLX5_ST_SZ_BYTES(ppcnt_reg); + + out_cnt = mlx5_vzalloc(sz); + if (!out_cnt) + return IB_MAD_RESULT_FAILURE; + + err = mlx5_core_query_ib_ppcnt(dev->mdev, port_num, + out_cnt, sz); + if (!err) + pma_cnt_assign(pma_cnt, out_cnt); + } + + kvfree(out_cnt); + if (err) + return IB_MAD_RESULT_FAILURE; + + return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY; +} + +int mlx5_ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, + const struct ib_wc *in_wc, const struct ib_grh *in_grh, + const struct ib_mad_hdr *in, size_t in_mad_size, + struct ib_mad_hdr *out, size_t *out_mad_size, + u16 *out_mad_pkey_index) +{ + struct mlx5_ib_dev *dev = to_mdev(ibdev); + struct mlx5_core_dev *mdev = dev->mdev; + const struct ib_mad *in_mad = (const struct ib_mad *)in; + struct ib_mad *out_mad = (struct ib_mad *)out; + + if (WARN_ON_ONCE(in_mad_size != sizeof(*in_mad) || + *out_mad_size != sizeof(*out_mad))) + return IB_MAD_RESULT_FAILURE; + + memset(out_mad->data, 0, sizeof(out_mad->data)); + + if (MLX5_CAP_GEN(mdev, vport_counters) && + in_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_PERF_MGMT && + in_mad->mad_hdr.method == IB_MGMT_METHOD_GET) { + return process_pma_cmd(ibdev, port_num, in_mad, out_mad); + } else { + return process_mad(ibdev, mad_flags, port_num, in_wc, in_grh, + in_mad, out_mad); + } +} + int mlx5_query_ext_port_caps(struct mlx5_ib_dev *dev, u8 port) { struct ib_smp *in_mad = NULL; diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h index 99e2edc..12079fd 100644 --- a/include/linux/mlx5/device.h +++ b/include/linux/mlx5/device.h @@ -105,6 +105,29 @@ __mlx5_mask(typ, fld)) ___t; \ }) +/* Big endian getters */ +#define MLX5_GET64_BE(typ, p, fld) (*((__be64 *)(p) +\ + __mlx5_64_off(typ, fld))) + +#define MLX5_GET_BE(type_t, typ, p, fld) ({ \ + type_t tmp; \ + switch (sizeof(tmp)) { \ + case sizeof(u8): \ + tmp = (__force type_t)MLX5_GET(typ, p, fld); \ + break; \ + case sizeof(u16): \ + tmp = (__force type_t)cpu_to_be16(MLX5_GET(typ, p, fld)); \ + break; \ + case sizeof(u32): \ + tmp = (__force type_t)cpu_to_be32(MLX5_GET(typ, p, fld)); \ + break; \ + case sizeof(u64): \ + tmp = (__force type_t)MLX5_GET64_BE(typ, p, fld); \ + break; \ + } \ + tmp; \ + }) + enum { MLX5_MAX_COMMANDS = 32, MLX5_CMD_DATA_BLOCK_SIZE = 512, -- cgit v0.10.2 From 1015c2e8ca2b94d8964f8ab30d925b6f678fd9d2 Mon Sep 17 00:00:00 2001 From: Erez Shitrit Date: Sun, 21 Feb 2016 16:27:16 +0200 Subject: IB/mlx5: Define interface bits for IPoIB offloads The HW can supply several offloads for UD QP, added offloads for checksumming for both TX and RX and LSO for TX. Two new bits were added in order to expose and enable these offloads: 1. HCA capability bit: declares the support for IPoIB basic offloads. 2. QPC bit which will be used in the QP creation flow, which set these abilities in the QP. Signed-off-by: Erez Shitrit Signed-off-by: Eran Ben Elisha Reviewed-by: Leon Romanovsky Signed-off-by: Doug Ledford diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 9f404de..711c9dc 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -736,7 +736,9 @@ struct mlx5_ifc_cmd_hca_cap_bits { u8 cqe_version[0x4]; u8 compact_address_vector[0x1]; - u8 reserved_at_200[0xe]; + u8 reserved_at_200[0x3]; + u8 ipoib_basic_offloads[0x1]; + u8 reserved_at_204[0xa]; u8 drain_sigerr[0x1]; u8 cmdif_checksum[0x2]; u8 sigerr_cqe[0x1]; @@ -1810,7 +1812,7 @@ struct mlx5_ifc_qpc_bits { u8 log_sq_size[0x4]; u8 reserved_at_55[0x6]; u8 rlky[0x1]; - u8 reserved_at_5c[0x4]; + u8 ulp_stateless_offload_mode[0x4]; u8 counter_set_id[0x8]; u8 uar_page[0x18]; -- cgit v0.10.2 From f031396531fe2b1a6ffb4fa5eceb9c1fa276869a Mon Sep 17 00:00:00 2001 From: Erez Shitrit Date: Sun, 21 Feb 2016 16:27:17 +0200 Subject: IB/mlx5: Implement UD QP offloads for IPoIB in the TX flow In order to support LSO and CSUM in the TX flow the driver does the following: * LSO bit for the enum mlx5_ib_qp_flags was added, indicates QP that supports LSO offloads. * Enables the special offload when the QP is created, and enable the special work request id (IB_WR_LSO) when comes. * Calculates the size of the WQE according to the new WQE format that support these offloads. * Handles the new WQE format when arrived, sets the relevant fields, and copies the needed data. Signed-off-by: Erez Shitrit Signed-off-by: Eran Ben Elisha Reviewed-by: Leon Romanovsky Signed-off-by: Doug Ledford diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 03c418c..76b0939 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -504,6 +504,11 @@ static int mlx5_ib_query_device(struct ib_device *ibdev, (MLX5_CAP_ETH(dev->mdev, csum_cap))) props->device_cap_flags |= IB_DEVICE_RAW_IP_CSUM; + if (MLX5_CAP_GEN(mdev, ipoib_basic_offloads)) { + props->device_cap_flags |= IB_DEVICE_UD_IP_CSUM; + props->device_cap_flags |= IB_DEVICE_UD_TSO; + } + props->vendor_part_id = mdev->pdev->device; props->hw_ver = mdev->pdev->revision; diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index d2b9737..14396b0 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -325,11 +325,12 @@ struct mlx5_ib_cq_buf { }; enum mlx5_ib_qp_flags { - MLX5_IB_QP_BLOCK_MULTICAST_LOOPBACK = 1 << 0, - MLX5_IB_QP_SIGNATURE_HANDLING = 1 << 1, - MLX5_IB_QP_CROSS_CHANNEL = 1 << 2, - MLX5_IB_QP_MANAGED_SEND = 1 << 3, - MLX5_IB_QP_MANAGED_RECV = 1 << 4, + MLX5_IB_QP_LSO = IB_QP_CREATE_IPOIB_UD_LSO, + MLX5_IB_QP_BLOCK_MULTICAST_LOOPBACK = IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK, + MLX5_IB_QP_CROSS_CHANNEL = IB_QP_CREATE_CROSS_CHANNEL, + MLX5_IB_QP_MANAGED_SEND = IB_QP_CREATE_MANAGED_SEND, + MLX5_IB_QP_MANAGED_RECV = IB_QP_CREATE_MANAGED_RECV, + MLX5_IB_QP_SIGNATURE_HANDLING = 1 << 5, }; struct mlx5_umr_wr { diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index 34cb8e8..baa8808 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -58,6 +58,7 @@ enum { static const u32 mlx5_ib_opcode[] = { [IB_WR_SEND] = MLX5_OPCODE_SEND, + [IB_WR_LSO] = MLX5_OPCODE_LSO, [IB_WR_SEND_WITH_IMM] = MLX5_OPCODE_SEND_IMM, [IB_WR_RDMA_WRITE] = MLX5_OPCODE_RDMA_WRITE, [IB_WR_RDMA_WRITE_WITH_IMM] = MLX5_OPCODE_RDMA_WRITE_IMM, @@ -72,6 +73,9 @@ static const u32 mlx5_ib_opcode[] = { [MLX5_IB_WR_UMR] = MLX5_OPCODE_UMR, }; +struct mlx5_wqe_eth_pad { + u8 rsvd0[16]; +}; static int is_qp0(enum ib_qp_type qp_type) { @@ -260,11 +264,11 @@ static int set_rq_size(struct mlx5_ib_dev *dev, struct ib_qp_cap *cap, return 0; } -static int sq_overhead(enum ib_qp_type qp_type) +static int sq_overhead(struct ib_qp_init_attr *attr) { int size = 0; - switch (qp_type) { + switch (attr->qp_type) { case IB_QPT_XRC_INI: size += sizeof(struct mlx5_wqe_xrc_seg); /* fall through */ @@ -287,6 +291,10 @@ static int sq_overhead(enum ib_qp_type qp_type) break; case IB_QPT_UD: + if (attr->create_flags & IB_QP_CREATE_IPOIB_UD_LSO) + size += sizeof(struct mlx5_wqe_eth_pad) + + sizeof(struct mlx5_wqe_eth_seg); + /* fall through */ case IB_QPT_SMI: case IB_QPT_GSI: size += sizeof(struct mlx5_wqe_ctrl_seg) + @@ -311,7 +319,7 @@ static int calc_send_wqe(struct ib_qp_init_attr *attr) int inl_size = 0; int size; - size = sq_overhead(attr->qp_type); + size = sq_overhead(attr); if (size < 0) return size; @@ -348,8 +356,8 @@ static int calc_sq_size(struct mlx5_ib_dev *dev, struct ib_qp_init_attr *attr, return -EINVAL; } - qp->max_inline_data = wqe_size - sq_overhead(attr->qp_type) - - sizeof(struct mlx5_wqe_inline_seg); + qp->max_inline_data = wqe_size - sq_overhead(attr) - + sizeof(struct mlx5_wqe_inline_seg); attr->cap.max_inline_data = qp->max_inline_data; if (attr->create_flags & IB_QP_CREATE_SIGNATURE_EN) @@ -783,7 +791,9 @@ static int create_kernel_qp(struct mlx5_ib_dev *dev, int err; uuari = &dev->mdev->priv.uuari; - if (init_attr->create_flags & ~(IB_QP_CREATE_SIGNATURE_EN | IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK)) + if (init_attr->create_flags & ~(IB_QP_CREATE_SIGNATURE_EN | + IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK | + IB_QP_CREATE_IPOIB_UD_LSO)) return -EINVAL; if (init_attr->qp_type == MLX5_IB_QPT_REG_UMR) @@ -1228,6 +1238,14 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd, if (init_attr->create_flags & IB_QP_CREATE_MANAGED_RECV) qp->flags |= MLX5_IB_QP_MANAGED_RECV; } + + if (init_attr->qp_type == IB_QPT_UD && + (init_attr->create_flags & IB_QP_CREATE_IPOIB_UD_LSO)) + if (!MLX5_CAP_GEN(mdev, ipoib_basic_offloads)) { + mlx5_ib_dbg(dev, "ipoib UD lso qp isn't supported\n"); + return -EOPNOTSUPP; + } + if (init_attr->sq_sig_type == IB_SIGNAL_ALL_WR) qp->sq_signal_bits = MLX5_WQE_CTRL_CQ_UPDATE; @@ -1385,6 +1403,13 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd, /* 0xffffff means we ask to work with cqe version 0 */ MLX5_SET(qpc, qpc, user_index, uidx); } + /* we use IB_QP_CREATE_IPOIB_UD_LSO to indicates ipoib qp */ + if (init_attr->qp_type == IB_QPT_UD && + (init_attr->create_flags & IB_QP_CREATE_IPOIB_UD_LSO)) { + qpc = MLX5_ADDR_OF(create_qp_in, in, qpc); + MLX5_SET(qpc, qpc, ulp_stateless_offload_mode, 1); + qp->flags |= MLX5_IB_QP_LSO; + } if (init_attr->qp_type == IB_QPT_RAW_PACKET) { qp->raw_packet_qp.sq.ubuffer.buf_addr = ucmd.sq_buf_addr; @@ -2442,6 +2467,59 @@ static __always_inline void set_raddr_seg(struct mlx5_wqe_raddr_seg *rseg, rseg->reserved = 0; } +static void *set_eth_seg(struct mlx5_wqe_eth_seg *eseg, + struct ib_send_wr *wr, void *qend, + struct mlx5_ib_qp *qp, int *size) +{ + void *seg = eseg; + + memset(eseg, 0, sizeof(struct mlx5_wqe_eth_seg)); + + if (wr->send_flags & IB_SEND_IP_CSUM) + eseg->cs_flags = MLX5_ETH_WQE_L3_CSUM | + MLX5_ETH_WQE_L4_CSUM; + + seg += sizeof(struct mlx5_wqe_eth_seg); + *size += sizeof(struct mlx5_wqe_eth_seg) / 16; + + if (wr->opcode == IB_WR_LSO) { + struct ib_ud_wr *ud_wr = container_of(wr, struct ib_ud_wr, wr); + int size_of_inl_hdr_start = sizeof(eseg->inline_hdr_start); + u64 left, leftlen, copysz; + void *pdata = ud_wr->header; + + left = ud_wr->hlen; + eseg->mss = cpu_to_be16(ud_wr->mss); + eseg->inline_hdr_sz = cpu_to_be16(left); + + /* + * check if there is space till the end of queue, if yes, + * copy all in one shot, otherwise copy till the end of queue, + * rollback and than the copy the left + */ + leftlen = qend - (void *)eseg->inline_hdr_start; + copysz = min_t(u64, leftlen, left); + + memcpy(seg - size_of_inl_hdr_start, pdata, copysz); + + if (likely(copysz > size_of_inl_hdr_start)) { + seg += ALIGN(copysz - size_of_inl_hdr_start, 16); + *size += ALIGN(copysz - size_of_inl_hdr_start, 16) / 16; + } + + if (unlikely(copysz < left)) { /* the last wqe in the queue */ + seg = mlx5_get_send_wqe(qp, 0); + left -= copysz; + pdata += copysz; + memcpy(seg, pdata, left); + seg += ALIGN(left, 16); + *size += ALIGN(left, 16) / 16; + } + } + + return seg; +} + static void set_datagram_seg(struct mlx5_wqe_datagram_seg *dseg, struct ib_send_wr *wr) { @@ -3373,7 +3451,6 @@ int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, } break; - case IB_QPT_UD: case IB_QPT_SMI: case IB_QPT_GSI: set_datagram_seg(seg, wr); @@ -3382,7 +3459,29 @@ int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, if (unlikely((seg == qend))) seg = mlx5_get_send_wqe(qp, 0); break; + case IB_QPT_UD: + set_datagram_seg(seg, wr); + seg += sizeof(struct mlx5_wqe_datagram_seg); + size += sizeof(struct mlx5_wqe_datagram_seg) / 16; + if (unlikely((seg == qend))) + seg = mlx5_get_send_wqe(qp, 0); + + /* handle qp that supports ud offload */ + if (qp->flags & IB_QP_CREATE_IPOIB_UD_LSO) { + struct mlx5_wqe_eth_pad *pad; + + pad = seg; + memset(pad, 0, sizeof(struct mlx5_wqe_eth_pad)); + seg += sizeof(struct mlx5_wqe_eth_pad); + size += sizeof(struct mlx5_wqe_eth_pad) / 16; + + seg = set_eth_seg(seg, wr, qend, qp, &size); + + if (unlikely((seg == qend))) + seg = mlx5_get_send_wqe(qp, 0); + } + break; case MLX5_IB_QPT_REG_UMR: if (wr->opcode != MLX5_IB_WR_UMR) { err = -EINVAL; -- cgit v0.10.2 From c7ce833b364bc19ef51b3c973c94a863e4af0e06 Mon Sep 17 00:00:00 2001 From: Erez Shitrit Date: Sun, 21 Feb 2016 16:27:18 +0200 Subject: IB/mlx5: Add support for CSUM in RX flow The driver checks the csum from the HW when completion arrived and marks it in the wc->wc_flags field for the ulp drivers. These is for packets from type IB_WC_RECV only. Signed-off-by: Erez Shitrit Signed-off-by: Eran Ben Elisha Reviewed-by: Leon Romanovsky Signed-off-by: Doug Ledford diff --git a/drivers/infiniband/hw/mlx5/cq.c b/drivers/infiniband/hw/mlx5/cq.c index fd1de31..5ece9a8 100644 --- a/drivers/infiniband/hw/mlx5/cq.c +++ b/drivers/infiniband/hw/mlx5/cq.c @@ -207,7 +207,10 @@ static void handle_responder(struct ib_wc *wc, struct mlx5_cqe64 *cqe, break; case MLX5_CQE_RESP_SEND: wc->opcode = IB_WC_RECV; - wc->wc_flags = 0; + wc->wc_flags = IB_WC_IP_CSUM_OK; + if (unlikely(!((cqe->hds_ip_ext & CQE_L3_OK) && + (cqe->hds_ip_ext & CQE_L4_OK)))) + wc->wc_flags = 0; break; case MLX5_CQE_RESP_SEND_IMM: wc->opcode = IB_WC_RECV; -- cgit v0.10.2 From b11a4f9cde1c06e0073662882b60c1fb95a1d597 Mon Sep 17 00:00:00 2001 From: Haggai Eran Date: Mon, 29 Feb 2016 15:45:03 +0200 Subject: IB/mlx5: Add support for setting source QP number In order to create multiple GSI QPs, we need to set the source QP number to one on all these QPs. Add the necessary definitions and infrastructure to do that. Reviewed-by: Leon Romanovsky Signed-off-by: Haggai Eran Signed-off-by: Doug Ledford diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index 14396b0..32699f9 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -165,6 +165,18 @@ struct mlx5_ib_flow_db { #define MLX5_IB_QPT_REG_UMR IB_QPT_RESERVED1 #define MLX5_IB_WR_UMR IB_WR_RESERVED1 +/* Private QP creation flags to be passed in ib_qp_init_attr.create_flags. + * + * These flags are intended for internal use by the mlx5_ib driver, and they + * rely on the range reserved for that use in the ib_qp_create_flags enum. + */ + +/* Create a UD QP whose source QP number is 1 */ +static inline enum ib_qp_create_flags mlx5_ib_create_qp_sqpn_qp1(void) +{ + return IB_QP_CREATE_RESERVED_START; +} + struct wr_list { u16 opcode; u16 next; @@ -331,6 +343,8 @@ enum mlx5_ib_qp_flags { MLX5_IB_QP_MANAGED_SEND = IB_QP_CREATE_MANAGED_SEND, MLX5_IB_QP_MANAGED_RECV = IB_QP_CREATE_MANAGED_RECV, MLX5_IB_QP_SIGNATURE_HANDLING = 1 << 5, + /* QP uses 1 as its source QP number */ + MLX5_IB_QP_SQPN_QP1 = 1 << 6, }; struct mlx5_umr_wr { diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index baa8808..794e760 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -793,7 +793,8 @@ static int create_kernel_qp(struct mlx5_ib_dev *dev, uuari = &dev->mdev->priv.uuari; if (init_attr->create_flags & ~(IB_QP_CREATE_SIGNATURE_EN | IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK | - IB_QP_CREATE_IPOIB_UD_LSO)) + IB_QP_CREATE_IPOIB_UD_LSO | + mlx5_ib_create_qp_sqpn_qp1())) return -EINVAL; if (init_attr->qp_type == MLX5_IB_QPT_REG_UMR) @@ -838,6 +839,11 @@ static int create_kernel_qp(struct mlx5_ib_dev *dev, (*in)->ctx.params1 |= cpu_to_be32(1 << 11); (*in)->ctx.sq_crq_size |= cpu_to_be16(1 << 4); + if (init_attr->create_flags & mlx5_ib_create_qp_sqpn_qp1()) { + (*in)->ctx.deth_sqpn = cpu_to_be32(1); + qp->flags |= MLX5_IB_QP_SQPN_QP1; + } + mlx5_fill_page_array(&qp->buf, (*in)->pas); err = mlx5_db_alloc(dev->mdev, &qp->db); @@ -1289,6 +1295,11 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd, ucmd.sq_wqe_count, max_wqes); return -EINVAL; } + if (init_attr->create_flags & + mlx5_ib_create_qp_sqpn_qp1()) { + mlx5_ib_dbg(dev, "user-space is not allowed to create UD QPs spoofing as QP1\n"); + return -EINVAL; + } err = create_user_qp(dev, pd, qp, udata, init_attr, &in, &resp, &inlen, base); if (err) @@ -2309,6 +2320,8 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp, if (!ibqp->uobject && cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT) context->sq_crq_size |= cpu_to_be16(1 << 4); + if (qp->flags & MLX5_IB_QP_SQPN_QP1) + context->deth_sqpn = cpu_to_be32(1); mlx5_cur = to_mlx5_state(cur_state); mlx5_new = to_mlx5_state(new_state); @@ -3973,6 +3986,8 @@ int mlx5_ib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, qp_init_attr->create_flags |= IB_QP_CREATE_MANAGED_SEND; if (qp->flags & MLX5_IB_QP_MANAGED_RECV) qp_init_attr->create_flags |= IB_QP_CREATE_MANAGED_RECV; + if (qp->flags & MLX5_IB_QP_SQPN_QP1) + qp_init_attr->create_flags |= mlx5_ib_create_qp_sqpn_qp1(); qp_init_attr->sq_sig_type = qp->sq_signal_bits & MLX5_WQE_CTRL_CQ_UPDATE ? IB_SIGNAL_ALL_WR : IB_SIGNAL_REQ_WR; diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 711c9dc..72bba52 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -772,7 +772,8 @@ struct mlx5_ifc_cmd_hca_cap_bits { u8 reserved_at_22e[0x7]; u8 qkv[0x1]; u8 pkv[0x1]; - u8 reserved_at_237[0x4]; + u8 set_deth_sqpn[0x1]; + u8 reserved_at_239[0x3]; u8 xrc[0x1]; u8 ud[0x1]; u8 uc[0x1]; diff --git a/include/linux/mlx5/qp.h b/include/linux/mlx5/qp.h index 5b8c89f..e5bbcf0 100644 --- a/include/linux/mlx5/qp.h +++ b/include/linux/mlx5/qp.h @@ -499,7 +499,8 @@ struct mlx5_qp_context { u8 reserved2[4]; __be32 next_send_psn; __be32 cqn_send; - u8 reserved3[8]; + __be32 deth_sqpn; + u8 reserved3[4]; __be32 last_acked_psn; __be32 ssn; __be32 params2; -- cgit v0.10.2 From 158abf862a2947bfac250a10e79ac20f5e6fea6c Mon Sep 17 00:00:00 2001 From: Haggai Eran Date: Mon, 29 Feb 2016 15:45:04 +0200 Subject: IB/mlx5: Modify QP debugging prints Add debugging prints to the modify QP verb to help understand the cause a returned error. Reviewed-by: Leon Romanovsky Signed-off-by: Haggai Eran Signed-off-by: Doug Ledford diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index 794e760..c8b12f9 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -2197,8 +2197,10 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp, context = &in->ctx; err = to_mlx5_st(ibqp->qp_type); - if (err < 0) + if (err < 0) { + mlx5_ib_dbg(dev, "unsupported qp type %d\n", ibqp->qp_type); goto out; + } context->flags = cpu_to_be32(err << 16); @@ -2418,30 +2420,45 @@ int mlx5_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, if (ibqp->qp_type != MLX5_IB_QPT_REG_UMR && !ib_modify_qp_is_ok(cur_state, new_state, ibqp->qp_type, attr_mask, - ll)) + ll)) { + mlx5_ib_dbg(dev, "invalid QP state transition from %d to %d, qp_type %d, attr_mask 0x%x\n", + cur_state, new_state, ibqp->qp_type, attr_mask); goto out; + } if ((attr_mask & IB_QP_PORT) && (attr->port_num == 0 || - attr->port_num > MLX5_CAP_GEN(dev->mdev, num_ports))) + attr->port_num > MLX5_CAP_GEN(dev->mdev, num_ports))) { + mlx5_ib_dbg(dev, "invalid port number %d. number of ports is %d\n", + attr->port_num, dev->num_ports); goto out; + } if (attr_mask & IB_QP_PKEY_INDEX) { port = attr_mask & IB_QP_PORT ? attr->port_num : qp->port; if (attr->pkey_index >= - dev->mdev->port_caps[port - 1].pkey_table_len) + dev->mdev->port_caps[port - 1].pkey_table_len) { + mlx5_ib_dbg(dev, "invalid pkey index %d\n", + attr->pkey_index); goto out; + } } if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC && attr->max_rd_atomic > - (1 << MLX5_CAP_GEN(dev->mdev, log_max_ra_res_qp))) + (1 << MLX5_CAP_GEN(dev->mdev, log_max_ra_res_qp))) { + mlx5_ib_dbg(dev, "invalid max_rd_atomic value %d\n", + attr->max_rd_atomic); goto out; + } if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC && attr->max_dest_rd_atomic > - (1 << MLX5_CAP_GEN(dev->mdev, log_max_ra_req_qp))) + (1 << MLX5_CAP_GEN(dev->mdev, log_max_ra_req_qp))) { + mlx5_ib_dbg(dev, "invalid max_dest_rd_atomic value %d\n", + attr->max_dest_rd_atomic); goto out; + } if (cur_state == new_state && cur_state == IB_QPS_RESET) { err = 0; -- cgit v0.10.2 From d16e91daf446c605a92112889552f9df757186bc Mon Sep 17 00:00:00 2001 From: Haggai Eran Date: Mon, 29 Feb 2016 15:45:05 +0200 Subject: IB/mlx5: Add GSI QP wrapper mlx5 creates special GSI QPs that has limited ability to control the P_Key of transmitted packets. The sent P_Key is taken from the QP object, similarly to what happens with regular UD QPs. Create a software wrapper around GSI QPs that with the following patches will be able to emulate the functionality of a GSI QP including control of the P_Key per work request. Reviewed-by: Leon Romanovsky Signed-off-by: Haggai Eran Signed-off-by: Doug Ledford diff --git a/drivers/infiniband/hw/mlx5/Makefile b/drivers/infiniband/hw/mlx5/Makefile index 27a7015..4e85188 100644 --- a/drivers/infiniband/hw/mlx5/Makefile +++ b/drivers/infiniband/hw/mlx5/Makefile @@ -1,4 +1,4 @@ obj-$(CONFIG_MLX5_INFINIBAND) += mlx5_ib.o -mlx5_ib-y := main.o cq.o doorbell.o qp.o mem.o srq.o mr.o ah.o mad.o +mlx5_ib-y := main.o cq.o doorbell.o qp.o mem.o srq.o mr.o ah.o mad.o gsi.o mlx5_ib-$(CONFIG_INFINIBAND_ON_DEMAND_PAGING) += odp.o diff --git a/drivers/infiniband/hw/mlx5/gsi.c b/drivers/infiniband/hw/mlx5/gsi.c new file mode 100644 index 0000000..7116554 --- /dev/null +++ b/drivers/infiniband/hw/mlx5/gsi.c @@ -0,0 +1,179 @@ +/* + * Copyright (c) 2016, Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "mlx5_ib.h" + +struct mlx5_ib_gsi_qp { + struct ib_qp ibqp; + struct ib_qp *rx_qp; + u8 port_num; + struct ib_qp_cap cap; + enum ib_sig_type sq_sig_type; + /* Serialize qp state modifications */ + struct mutex mutex; +}; + +static struct mlx5_ib_gsi_qp *gsi_qp(struct ib_qp *qp) +{ + return container_of(qp, struct mlx5_ib_gsi_qp, ibqp); +} + +struct ib_qp *mlx5_ib_gsi_create_qp(struct ib_pd *pd, + struct ib_qp_init_attr *init_attr) +{ + struct mlx5_ib_dev *dev = to_mdev(pd->device); + struct mlx5_ib_gsi_qp *gsi; + struct ib_qp_init_attr hw_init_attr = *init_attr; + const u8 port_num = init_attr->port_num; + int ret; + + mlx5_ib_dbg(dev, "creating GSI QP\n"); + + if (port_num > ARRAY_SIZE(dev->devr.ports) || port_num < 1) { + mlx5_ib_warn(dev, + "invalid port number %d during GSI QP creation\n", + port_num); + return ERR_PTR(-EINVAL); + } + + gsi = kzalloc(sizeof(*gsi), GFP_KERNEL); + if (!gsi) + return ERR_PTR(-ENOMEM); + + mutex_init(&gsi->mutex); + + mutex_lock(&dev->devr.mutex); + + if (dev->devr.ports[port_num - 1].gsi) { + mlx5_ib_warn(dev, "GSI QP already exists on port %d\n", + port_num); + ret = -EBUSY; + goto err_free; + } + + gsi->cap = init_attr->cap; + gsi->sq_sig_type = init_attr->sq_sig_type; + gsi->ibqp.qp_num = 1; + gsi->port_num = port_num; + + hw_init_attr.qp_type = MLX5_IB_QPT_HW_GSI; + gsi->rx_qp = ib_create_qp(pd, &hw_init_attr); + if (IS_ERR(gsi->rx_qp)) { + mlx5_ib_warn(dev, "unable to create hardware GSI QP. error %ld\n", + PTR_ERR(gsi->rx_qp)); + ret = PTR_ERR(gsi->rx_qp); + goto err_free; + } + + dev->devr.ports[init_attr->port_num - 1].gsi = gsi; + + mutex_unlock(&dev->devr.mutex); + + return &gsi->ibqp; + +err_free: + mutex_unlock(&dev->devr.mutex); + kfree(gsi); + return ERR_PTR(ret); +} + +int mlx5_ib_gsi_destroy_qp(struct ib_qp *qp) +{ + struct mlx5_ib_dev *dev = to_mdev(qp->device); + struct mlx5_ib_gsi_qp *gsi = gsi_qp(qp); + const int port_num = gsi->port_num; + int ret; + + mlx5_ib_dbg(dev, "destroying GSI QP\n"); + + mutex_lock(&dev->devr.mutex); + ret = ib_destroy_qp(gsi->rx_qp); + if (ret) { + mlx5_ib_warn(dev, "unable to destroy hardware GSI QP. error %d\n", + ret); + mutex_unlock(&dev->devr.mutex); + return ret; + } + dev->devr.ports[port_num - 1].gsi = NULL; + mutex_unlock(&dev->devr.mutex); + + kfree(gsi); + + return 0; +} + +int mlx5_ib_gsi_modify_qp(struct ib_qp *qp, struct ib_qp_attr *attr, + int attr_mask) +{ + struct mlx5_ib_dev *dev = to_mdev(qp->device); + struct mlx5_ib_gsi_qp *gsi = gsi_qp(qp); + int ret; + + mlx5_ib_dbg(dev, "modifying GSI QP to state %d\n", attr->qp_state); + + mutex_lock(&gsi->mutex); + ret = ib_modify_qp(gsi->rx_qp, attr, attr_mask); + mutex_unlock(&gsi->mutex); + + return ret; +} + +int mlx5_ib_gsi_query_qp(struct ib_qp *qp, struct ib_qp_attr *qp_attr, + int qp_attr_mask, + struct ib_qp_init_attr *qp_init_attr) +{ + struct mlx5_ib_gsi_qp *gsi = gsi_qp(qp); + int ret; + + mutex_lock(&gsi->mutex); + ret = ib_query_qp(gsi->rx_qp, qp_attr, qp_attr_mask, qp_init_attr); + qp_init_attr->cap = gsi->cap; + mutex_unlock(&gsi->mutex); + + return ret; +} + +int mlx5_ib_gsi_post_send(struct ib_qp *qp, struct ib_send_wr *wr, + struct ib_send_wr **bad_wr) +{ + struct mlx5_ib_gsi_qp *gsi = gsi_qp(qp); + + return ib_post_send(gsi->rx_qp, wr, bad_wr); +} + +int mlx5_ib_gsi_post_recv(struct ib_qp *qp, struct ib_recv_wr *wr, + struct ib_recv_wr **bad_wr) +{ + struct mlx5_ib_gsi_qp *gsi = gsi_qp(qp); + + return ib_post_recv(gsi->rx_qp, wr, bad_wr); +} diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 76b0939..0b30dc5 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -1970,6 +1970,8 @@ static int create_dev_resources(struct mlx5_ib_resources *devr) dev = container_of(devr, struct mlx5_ib_dev, devr); + mutex_init(&devr->mutex); + devr->p0 = mlx5_ib_alloc_pd(&dev->ib_dev, NULL, NULL); if (IS_ERR(devr->p0)) { ret = PTR_ERR(devr->p0); diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index 32699f9..c68a913 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -163,6 +163,11 @@ struct mlx5_ib_flow_db { #define MLX5_IB_SEND_UMR_FAIL_IF_FREE (IB_SEND_RESERVED_START << 1) #define MLX5_IB_SEND_UMR_UPDATE_MTT (IB_SEND_RESERVED_START << 2) #define MLX5_IB_QPT_REG_UMR IB_QPT_RESERVED1 +/* + * IB_QPT_GSI creates the software wrapper around GSI, and MLX5_IB_QPT_HW_GSI + * creates the actual hardware QP. + */ +#define MLX5_IB_QPT_HW_GSI IB_QPT_RESERVED2 #define MLX5_IB_WR_UMR IB_WR_RESERVED1 /* Private QP creation flags to be passed in ib_qp_init_attr.create_flags. @@ -502,6 +507,12 @@ struct mlx5_mr_cache { unsigned long last_add; }; +struct mlx5_ib_gsi_qp; + +struct mlx5_ib_port_resources { + struct mlx5_ib_gsi_qp *gsi; +}; + struct mlx5_ib_resources { struct ib_cq *c0; struct ib_xrcd *x0; @@ -509,6 +520,9 @@ struct mlx5_ib_resources { struct ib_pd *p0; struct ib_srq *s0; struct ib_srq *s1; + struct mlx5_ib_port_resources ports[2]; + /* Protects changes to the port resources */ + struct mutex mutex; }; struct mlx5_roce { @@ -754,6 +768,20 @@ static inline void mlx5_ib_qp_enable_pagefaults(struct mlx5_ib_qp *qp) {} __be16 mlx5_get_roce_udp_sport(struct mlx5_ib_dev *dev, u8 port_num, int index); +/* GSI QP helper functions */ +struct ib_qp *mlx5_ib_gsi_create_qp(struct ib_pd *pd, + struct ib_qp_init_attr *init_attr); +int mlx5_ib_gsi_destroy_qp(struct ib_qp *qp); +int mlx5_ib_gsi_modify_qp(struct ib_qp *qp, struct ib_qp_attr *attr, + int attr_mask); +int mlx5_ib_gsi_query_qp(struct ib_qp *qp, struct ib_qp_attr *qp_attr, + int qp_attr_mask, + struct ib_qp_init_attr *qp_init_attr); +int mlx5_ib_gsi_post_send(struct ib_qp *qp, struct ib_send_wr *wr, + struct ib_send_wr **bad_wr); +int mlx5_ib_gsi_post_recv(struct ib_qp *qp, struct ib_recv_wr *wr, + struct ib_recv_wr **bad_wr); + static inline void init_query_mad(struct ib_smp *mad) { mad->base_version = 1; @@ -773,7 +801,7 @@ static inline u8 convert_access(int acc) static inline int is_qp1(enum ib_qp_type qp_type) { - return qp_type == IB_QPT_GSI; + return qp_type == MLX5_IB_QPT_HW_GSI; } #define MLX5_MAX_UMR_SHIFT 16 diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index c8b12f9..85cf9c4 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -296,7 +296,7 @@ static int sq_overhead(struct ib_qp_init_attr *attr) sizeof(struct mlx5_wqe_eth_seg); /* fall through */ case IB_QPT_SMI: - case IB_QPT_GSI: + case MLX5_IB_QPT_HW_GSI: size += sizeof(struct mlx5_wqe_ctrl_seg) + sizeof(struct mlx5_wqe_datagram_seg); break; @@ -598,7 +598,7 @@ static int to_mlx5_st(enum ib_qp_type type) case IB_QPT_XRC_INI: case IB_QPT_XRC_TGT: return MLX5_QP_ST_XRC; case IB_QPT_SMI: return MLX5_QP_ST_QP0; - case IB_QPT_GSI: return MLX5_QP_ST_QP1; + case MLX5_IB_QPT_HW_GSI: return MLX5_QP_ST_QP1; case IB_QPT_RAW_IPV6: return MLX5_QP_ST_RAW_IPV6; case IB_QPT_RAW_PACKET: case IB_QPT_RAW_ETHERTYPE: return MLX5_QP_ST_RAW_ETHERTYPE; @@ -1530,7 +1530,7 @@ static void get_cqs(struct mlx5_ib_qp *qp, break; case IB_QPT_SMI: - case IB_QPT_GSI: + case MLX5_IB_QPT_HW_GSI: case IB_QPT_RC: case IB_QPT_UC: case IB_QPT_UD: @@ -1693,7 +1693,7 @@ struct ib_qp *mlx5_ib_create_qp(struct ib_pd *pd, case IB_QPT_UC: case IB_QPT_UD: case IB_QPT_SMI: - case IB_QPT_GSI: + case MLX5_IB_QPT_HW_GSI: case MLX5_IB_QPT_REG_UMR: qp = kzalloc(sizeof(*qp), GFP_KERNEL); if (!qp) @@ -1722,6 +1722,9 @@ struct ib_qp *mlx5_ib_create_qp(struct ib_pd *pd, break; + case IB_QPT_GSI: + return mlx5_ib_gsi_create_qp(pd, init_attr); + case IB_QPT_RAW_IPV6: case IB_QPT_RAW_ETHERTYPE: case IB_QPT_MAX: @@ -1740,6 +1743,9 @@ int mlx5_ib_destroy_qp(struct ib_qp *qp) struct mlx5_ib_dev *dev = to_mdev(qp->device); struct mlx5_ib_qp *mqp = to_mqp(qp); + if (unlikely(qp->qp_type == IB_QPT_GSI)) + return mlx5_ib_gsi_destroy_qp(qp); + destroy_qp_common(dev, mqp); kfree(mqp); @@ -2220,7 +2226,7 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp, } } - if (ibqp->qp_type == IB_QPT_GSI || ibqp->qp_type == IB_QPT_SMI) { + if (is_sqp(ibqp->qp_type)) { context->mtu_msgmax = (IB_MTU_256 << 5) | 8; } else if (ibqp->qp_type == IB_QPT_UD || ibqp->qp_type == MLX5_IB_QPT_REG_UMR) { @@ -2403,11 +2409,18 @@ int mlx5_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, { struct mlx5_ib_dev *dev = to_mdev(ibqp->device); struct mlx5_ib_qp *qp = to_mqp(ibqp); + enum ib_qp_type qp_type; enum ib_qp_state cur_state, new_state; int err = -EINVAL; int port; enum rdma_link_layer ll = IB_LINK_LAYER_UNSPECIFIED; + if (unlikely(ibqp->qp_type == IB_QPT_GSI)) + return mlx5_ib_gsi_modify_qp(ibqp, attr, attr_mask); + + qp_type = (unlikely(ibqp->qp_type == MLX5_IB_QPT_HW_GSI)) ? + IB_QPT_GSI : ibqp->qp_type; + mutex_lock(&qp->mutex); cur_state = attr_mask & IB_QP_CUR_STATE ? attr->cur_qp_state : qp->state; @@ -2418,9 +2431,8 @@ int mlx5_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, ll = dev->ib_dev.get_link_layer(&dev->ib_dev, port); } - if (ibqp->qp_type != MLX5_IB_QPT_REG_UMR && - !ib_modify_qp_is_ok(cur_state, new_state, ibqp->qp_type, attr_mask, - ll)) { + if (qp_type != MLX5_IB_QPT_REG_UMR && + !ib_modify_qp_is_ok(cur_state, new_state, qp_type, attr_mask, ll)) { mlx5_ib_dbg(dev, "invalid QP state transition from %d to %d, qp_type %d, attr_mask 0x%x\n", cur_state, new_state, ibqp->qp_type, attr_mask); goto out; @@ -3304,13 +3316,13 @@ int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, { struct mlx5_wqe_ctrl_seg *ctrl = NULL; /* compiler warning */ struct mlx5_ib_dev *dev = to_mdev(ibqp->device); - struct mlx5_ib_qp *qp = to_mqp(ibqp); + struct mlx5_ib_qp *qp; struct mlx5_ib_mr *mr; struct mlx5_wqe_data_seg *dpseg; struct mlx5_wqe_xrc_seg *xrc; - struct mlx5_bf *bf = qp->bf; + struct mlx5_bf *bf; int uninitialized_var(size); - void *qend = qp->sq.qend; + void *qend; unsigned long flags; unsigned idx; int err = 0; @@ -3322,6 +3334,13 @@ int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, u8 next_fence = 0; u8 fence; + if (unlikely(ibqp->qp_type == IB_QPT_GSI)) + return mlx5_ib_gsi_post_send(ibqp, wr, bad_wr); + + qp = to_mqp(ibqp); + bf = qp->bf; + qend = qp->sq.qend; + spin_lock_irqsave(&qp->sq.lock, flags); for (nreq = 0; wr; nreq++, wr = wr->next) { @@ -3482,7 +3501,7 @@ int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, break; case IB_QPT_SMI: - case IB_QPT_GSI: + case MLX5_IB_QPT_HW_GSI: set_datagram_seg(seg, wr); seg += sizeof(struct mlx5_wqe_datagram_seg); size += sizeof(struct mlx5_wqe_datagram_seg) / 16; @@ -3631,6 +3650,9 @@ int mlx5_ib_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr, int ind; int i; + if (unlikely(ibqp->qp_type == IB_QPT_GSI)) + return mlx5_ib_gsi_post_recv(ibqp, wr, bad_wr); + spin_lock_irqsave(&qp->rq.lock, flags); ind = qp->rq.head & (qp->rq.wqe_cnt - 1); @@ -3951,6 +3973,10 @@ int mlx5_ib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, int err = 0; u8 raw_packet_qp_state; + if (unlikely(ibqp->qp_type == IB_QPT_GSI)) + return mlx5_ib_gsi_query_qp(ibqp, qp_attr, qp_attr_mask, + qp_init_attr); + #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING /* * Wait for any outstanding page faults, in case the user frees memory -- cgit v0.10.2 From ebab41cff4db96c42dfc9939d1c1715496bcf961 Mon Sep 17 00:00:00 2001 From: Haggai Eran Date: Mon, 29 Feb 2016 15:45:06 +0200 Subject: IB/mlx5: Create multiple transmission GSI QPs In order to send GSI MADs on different P_Keys, mlx5 needs different QPs to be created, each with a different P_Key set when the QP is modified to the INIT state. Create QPs for each non-zero P_Key in the P_Key table. Signed-off-by: Haggai Eran Signed-off-by: Doug Ledford diff --git a/drivers/infiniband/hw/mlx5/gsi.c b/drivers/infiniband/hw/mlx5/gsi.c index 7116554..91bd20e 100644 --- a/drivers/infiniband/hw/mlx5/gsi.c +++ b/drivers/infiniband/hw/mlx5/gsi.c @@ -40,6 +40,12 @@ struct mlx5_ib_gsi_qp { enum ib_sig_type sq_sig_type; /* Serialize qp state modifications */ struct mutex mutex; + int num_qps; + /* Protects access to the tx_qps. Post send operations synchronize + * with tx_qp creation in setup_qp(). + */ + spinlock_t lock; + struct ib_qp **tx_qps; }; static struct mlx5_ib_gsi_qp *gsi_qp(struct ib_qp *qp) @@ -47,6 +53,11 @@ static struct mlx5_ib_gsi_qp *gsi_qp(struct ib_qp *qp) return container_of(qp, struct mlx5_ib_gsi_qp, ibqp); } +static bool mlx5_ib_deth_sqpn_cap(struct mlx5_ib_dev *dev) +{ + return MLX5_CAP_GEN(dev->mdev, set_deth_sqpn); +} + struct ib_qp *mlx5_ib_gsi_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *init_attr) { @@ -54,6 +65,8 @@ struct ib_qp *mlx5_ib_gsi_create_qp(struct ib_pd *pd, struct mlx5_ib_gsi_qp *gsi; struct ib_qp_init_attr hw_init_attr = *init_attr; const u8 port_num = init_attr->port_num; + const int num_pkeys = pd->device->attrs.max_pkeys; + const int num_qps = mlx5_ib_deth_sqpn_cap(dev) ? num_pkeys : 0; int ret; mlx5_ib_dbg(dev, "creating GSI QP\n"); @@ -69,6 +82,12 @@ struct ib_qp *mlx5_ib_gsi_create_qp(struct ib_pd *pd, if (!gsi) return ERR_PTR(-ENOMEM); + gsi->tx_qps = kcalloc(num_qps, sizeof(*gsi->tx_qps), GFP_KERNEL); + if (!gsi->tx_qps) { + ret = -ENOMEM; + goto err_free; + } + mutex_init(&gsi->mutex); mutex_lock(&dev->devr.mutex); @@ -77,8 +96,10 @@ struct ib_qp *mlx5_ib_gsi_create_qp(struct ib_pd *pd, mlx5_ib_warn(dev, "GSI QP already exists on port %d\n", port_num); ret = -EBUSY; - goto err_free; + goto err_free_tx; } + gsi->num_qps = num_qps; + spin_lock_init(&gsi->lock); gsi->cap = init_attr->cap; gsi->sq_sig_type = init_attr->sq_sig_type; @@ -91,7 +112,7 @@ struct ib_qp *mlx5_ib_gsi_create_qp(struct ib_pd *pd, mlx5_ib_warn(dev, "unable to create hardware GSI QP. error %ld\n", PTR_ERR(gsi->rx_qp)); ret = PTR_ERR(gsi->rx_qp); - goto err_free; + goto err_free_tx; } dev->devr.ports[init_attr->port_num - 1].gsi = gsi; @@ -100,8 +121,10 @@ struct ib_qp *mlx5_ib_gsi_create_qp(struct ib_pd *pd, return &gsi->ibqp; -err_free: +err_free_tx: mutex_unlock(&dev->devr.mutex); + kfree(gsi->tx_qps); +err_free: kfree(gsi); return ERR_PTR(ret); } @@ -111,6 +134,7 @@ int mlx5_ib_gsi_destroy_qp(struct ib_qp *qp) struct mlx5_ib_dev *dev = to_mdev(qp->device); struct mlx5_ib_gsi_qp *gsi = gsi_qp(qp); const int port_num = gsi->port_num; + int qp_index; int ret; mlx5_ib_dbg(dev, "destroying GSI QP\n"); @@ -125,12 +149,143 @@ int mlx5_ib_gsi_destroy_qp(struct ib_qp *qp) } dev->devr.ports[port_num - 1].gsi = NULL; mutex_unlock(&dev->devr.mutex); + gsi->rx_qp = NULL; + + for (qp_index = 0; qp_index < gsi->num_qps; ++qp_index) { + if (!gsi->tx_qps[qp_index]) + continue; + WARN_ON_ONCE(ib_destroy_qp(gsi->tx_qps[qp_index])); + gsi->tx_qps[qp_index] = NULL; + } + kfree(gsi->tx_qps); kfree(gsi); return 0; } +static struct ib_qp *create_gsi_ud_qp(struct mlx5_ib_gsi_qp *gsi) +{ + struct ib_pd *pd = gsi->rx_qp->pd; + struct ib_qp_init_attr init_attr = { + .event_handler = gsi->rx_qp->event_handler, + .qp_context = gsi->rx_qp->qp_context, + .send_cq = gsi->rx_qp->send_cq, + .recv_cq = gsi->rx_qp->recv_cq, + .cap = { + .max_send_wr = gsi->cap.max_send_wr, + .max_send_sge = gsi->cap.max_send_sge, + .max_inline_data = gsi->cap.max_inline_data, + }, + .sq_sig_type = gsi->sq_sig_type, + .qp_type = IB_QPT_UD, + .create_flags = mlx5_ib_create_qp_sqpn_qp1(), + }; + + return ib_create_qp(pd, &init_attr); +} + +static int modify_to_rts(struct mlx5_ib_gsi_qp *gsi, struct ib_qp *qp, + u16 qp_index) +{ + struct mlx5_ib_dev *dev = to_mdev(qp->device); + struct ib_qp_attr attr; + int mask; + int ret; + + mask = IB_QP_STATE | IB_QP_PKEY_INDEX | IB_QP_QKEY | IB_QP_PORT; + attr.qp_state = IB_QPS_INIT; + attr.pkey_index = qp_index; + attr.qkey = IB_QP1_QKEY; + attr.port_num = gsi->port_num; + ret = ib_modify_qp(qp, &attr, mask); + if (ret) { + mlx5_ib_err(dev, "could not change QP%d state to INIT: %d\n", + qp->qp_num, ret); + return ret; + } + + attr.qp_state = IB_QPS_RTR; + ret = ib_modify_qp(qp, &attr, IB_QP_STATE); + if (ret) { + mlx5_ib_err(dev, "could not change QP%d state to RTR: %d\n", + qp->qp_num, ret); + return ret; + } + + attr.qp_state = IB_QPS_RTS; + attr.sq_psn = 0; + ret = ib_modify_qp(qp, &attr, IB_QP_STATE | IB_QP_SQ_PSN); + if (ret) { + mlx5_ib_err(dev, "could not change QP%d state to RTS: %d\n", + qp->qp_num, ret); + return ret; + } + + return 0; +} + +static void setup_qp(struct mlx5_ib_gsi_qp *gsi, u16 qp_index) +{ + struct ib_device *device = gsi->rx_qp->device; + struct mlx5_ib_dev *dev = to_mdev(device); + struct ib_qp *qp; + unsigned long flags; + u16 pkey; + int ret; + + ret = ib_query_pkey(device, gsi->port_num, qp_index, &pkey); + if (ret) { + mlx5_ib_warn(dev, "unable to read P_Key at port %d, index %d\n", + gsi->port_num, qp_index); + return; + } + + if (!pkey) { + mlx5_ib_dbg(dev, "invalid P_Key at port %d, index %d. Skipping.\n", + gsi->port_num, qp_index); + return; + } + + spin_lock_irqsave(&gsi->lock, flags); + qp = gsi->tx_qps[qp_index]; + spin_unlock_irqrestore(&gsi->lock, flags); + if (qp) { + mlx5_ib_dbg(dev, "already existing GSI TX QP at port %d, index %d. Skipping\n", + gsi->port_num, qp_index); + return; + } + + qp = create_gsi_ud_qp(gsi); + if (IS_ERR(qp)) { + mlx5_ib_warn(dev, "unable to create hardware UD QP for GSI: %ld\n", + PTR_ERR(qp)); + return; + } + + ret = modify_to_rts(gsi, qp, qp_index); + if (ret) + goto err_destroy_qp; + + spin_lock_irqsave(&gsi->lock, flags); + WARN_ON_ONCE(gsi->tx_qps[qp_index]); + gsi->tx_qps[qp_index] = qp; + spin_unlock_irqrestore(&gsi->lock, flags); + + return; + +err_destroy_qp: + WARN_ON_ONCE(qp); +} + +static void setup_qps(struct mlx5_ib_gsi_qp *gsi) +{ + u16 qp_index; + + for (qp_index = 0; qp_index < gsi->num_qps; ++qp_index) + setup_qp(gsi, qp_index); +} + int mlx5_ib_gsi_modify_qp(struct ib_qp *qp, struct ib_qp_attr *attr, int attr_mask) { @@ -142,6 +297,15 @@ int mlx5_ib_gsi_modify_qp(struct ib_qp *qp, struct ib_qp_attr *attr, mutex_lock(&gsi->mutex); ret = ib_modify_qp(gsi->rx_qp, attr, attr_mask); + if (ret) { + mlx5_ib_warn(dev, "unable to modify GSI rx QP: %d\n", ret); + goto unlock; + } + + if (to_mqp(gsi->rx_qp)->state == IB_QPS_RTS) + setup_qps(gsi); + +unlock: mutex_unlock(&gsi->mutex); return ret; -- cgit v0.10.2 From 7722f47e71e58592a2ba4437d27c802ba1c64e08 Mon Sep 17 00:00:00 2001 From: Haggai Eran Date: Mon, 29 Feb 2016 15:45:07 +0200 Subject: IB/mlx5: Create GSI transmission QPs when P_Key table is changed Whenever the P_Key table is changed, we create the required GSI transmission QPs on-demand. Reviewed-by: Leon Romanovsky Signed-off-by: Haggai Eran Signed-off-by: Doug Ledford diff --git a/drivers/infiniband/hw/mlx5/gsi.c b/drivers/infiniband/hw/mlx5/gsi.c index 91bd20e..1648f53 100644 --- a/drivers/infiniband/hw/mlx5/gsi.c +++ b/drivers/infiniband/hw/mlx5/gsi.c @@ -341,3 +341,13 @@ int mlx5_ib_gsi_post_recv(struct ib_qp *qp, struct ib_recv_wr *wr, return ib_post_recv(gsi->rx_qp, wr, bad_wr); } + +void mlx5_ib_gsi_pkey_change(struct mlx5_ib_gsi_qp *gsi) +{ + if (!gsi) + return; + + mutex_lock(&gsi->mutex); + setup_qps(gsi); + mutex_unlock(&gsi->mutex); +} diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 0b30dc5..d4224fa 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -1721,6 +1721,17 @@ static struct device_attribute *mlx5_class_attributes[] = { &dev_attr_reg_pages, }; +static void pkey_change_handler(struct work_struct *work) +{ + struct mlx5_ib_port_resources *ports = + container_of(work, struct mlx5_ib_port_resources, + pkey_change_work); + + mutex_lock(&ports->devr->mutex); + mlx5_ib_gsi_pkey_change(ports->gsi); + mutex_unlock(&ports->devr->mutex); +} + static void mlx5_ib_event(struct mlx5_core_dev *dev, void *context, enum mlx5_dev_event event, unsigned long param) { @@ -1757,6 +1768,8 @@ static void mlx5_ib_event(struct mlx5_core_dev *dev, void *context, case MLX5_DEV_EVENT_PKEY_CHANGE: ibev.event = IB_EVENT_PKEY_CHANGE; port = (u8)param; + + schedule_work(&ibdev->devr.ports[port - 1].pkey_change_work); break; case MLX5_DEV_EVENT_GUID_CHANGE: @@ -1966,6 +1979,7 @@ static int create_dev_resources(struct mlx5_ib_resources *devr) struct ib_srq_init_attr attr; struct mlx5_ib_dev *dev; struct ib_cq_init_attr cq_attr = {.cqe = 1}; + int port; int ret = 0; dev = container_of(devr, struct mlx5_ib_dev, devr); @@ -2059,6 +2073,12 @@ static int create_dev_resources(struct mlx5_ib_resources *devr) atomic_inc(&devr->p0->usecnt); atomic_set(&devr->s0->usecnt, 0); + for (port = 0; port < ARRAY_SIZE(devr->ports); ++port) { + INIT_WORK(&devr->ports[port].pkey_change_work, + pkey_change_handler); + devr->ports[port].devr = devr; + } + return 0; error5: @@ -2077,12 +2097,20 @@ error0: static void destroy_dev_resources(struct mlx5_ib_resources *devr) { + struct mlx5_ib_dev *dev = + container_of(devr, struct mlx5_ib_dev, devr); + int port; + mlx5_ib_destroy_srq(devr->s1); mlx5_ib_destroy_srq(devr->s0); mlx5_ib_dealloc_xrcd(devr->x0); mlx5_ib_dealloc_xrcd(devr->x1); mlx5_ib_destroy_cq(devr->c0); mlx5_ib_dealloc_pd(devr->p0); + + /* Make sure no change P_Key work items are still executing */ + for (port = 0; port < dev->num_ports; ++port) + cancel_work_sync(&devr->ports[port].pkey_change_work); } static u32 get_core_cap_flags(struct ib_device *ibdev) diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index c68a913..a8fc345 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -510,7 +510,9 @@ struct mlx5_mr_cache { struct mlx5_ib_gsi_qp; struct mlx5_ib_port_resources { + struct mlx5_ib_resources *devr; struct mlx5_ib_gsi_qp *gsi; + struct work_struct pkey_change_work; }; struct mlx5_ib_resources { @@ -781,6 +783,7 @@ int mlx5_ib_gsi_post_send(struct ib_qp *qp, struct ib_send_wr *wr, struct ib_send_wr **bad_wr); int mlx5_ib_gsi_post_recv(struct ib_qp *qp, struct ib_recv_wr *wr, struct ib_recv_wr **bad_wr); +void mlx5_ib_gsi_pkey_change(struct mlx5_ib_gsi_qp *gsi); static inline void init_query_mad(struct ib_smp *mad) { -- cgit v0.10.2 From 25361e02c44873a17e0148d9d5c42fa2e938a019 Mon Sep 17 00:00:00 2001 From: Haggai Eran Date: Mon, 29 Feb 2016 15:45:08 +0200 Subject: IB/mlx5: Generate completions in software The GSI QP emulation requires also emulating completions for transmitted MADs. The CQ on which these completions are generated can also be used by the hardware, and the MAD layer is free to use any CQ of the device for the GSI QP. Add a method for generating software completions to each mlx5 CQ. Software completions are polled first, and generate calls to the completion handler callback if necessary. Reviewed-by: Leon Romanovsky Signed-off-by: Haggai Eran Signed-off-by: Doug Ledford diff --git a/drivers/infiniband/hw/mlx5/cq.c b/drivers/infiniband/hw/mlx5/cq.c index 5ece9a8..2a9ad84 100644 --- a/drivers/infiniband/hw/mlx5/cq.c +++ b/drivers/infiniband/hw/mlx5/cq.c @@ -568,18 +568,44 @@ repoll: return 0; } +static int poll_soft_wc(struct mlx5_ib_cq *cq, int num_entries, + struct ib_wc *wc) +{ + struct mlx5_ib_dev *dev = to_mdev(cq->ibcq.device); + struct mlx5_ib_wc *soft_wc, *next; + int npolled = 0; + + list_for_each_entry_safe(soft_wc, next, &cq->wc_list, list) { + if (npolled >= num_entries) + break; + + mlx5_ib_dbg(dev, "polled software generated completion on CQ 0x%x\n", + cq->mcq.cqn); + + wc[npolled++] = soft_wc->wc; + list_del(&soft_wc->list); + kfree(soft_wc); + } + + return npolled; +} + int mlx5_ib_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc) { struct mlx5_ib_cq *cq = to_mcq(ibcq); struct mlx5_ib_qp *cur_qp = NULL; unsigned long flags; + int soft_polled = 0; int npolled; int err = 0; spin_lock_irqsave(&cq->lock, flags); - for (npolled = 0; npolled < num_entries; npolled++) { - err = mlx5_poll_one(cq, &cur_qp, wc + npolled); + if (unlikely(!list_empty(&cq->wc_list))) + soft_polled = poll_soft_wc(cq, num_entries, wc); + + for (npolled = 0; npolled < num_entries - soft_polled; npolled++) { + err = mlx5_poll_one(cq, &cur_qp, wc + soft_polled + npolled); if (err) break; } @@ -590,7 +616,7 @@ int mlx5_ib_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc) spin_unlock_irqrestore(&cq->lock, flags); if (err == 0 || err == -EAGAIN) - return npolled; + return soft_polled + npolled; else return err; } @@ -598,16 +624,27 @@ int mlx5_ib_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc) int mlx5_ib_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags) { struct mlx5_core_dev *mdev = to_mdev(ibcq->device)->mdev; + struct mlx5_ib_cq *cq = to_mcq(ibcq); void __iomem *uar_page = mdev->priv.uuari.uars[0].map; + unsigned long irq_flags; + int ret = 0; + + spin_lock_irqsave(&cq->lock, irq_flags); + if (cq->notify_flags != IB_CQ_NEXT_COMP) + cq->notify_flags = flags & IB_CQ_SOLICITED_MASK; - mlx5_cq_arm(&to_mcq(ibcq)->mcq, + if ((flags & IB_CQ_REPORT_MISSED_EVENTS) && !list_empty(&cq->wc_list)) + ret = 1; + spin_unlock_irqrestore(&cq->lock, irq_flags); + + mlx5_cq_arm(&cq->mcq, (flags & IB_CQ_SOLICITED_MASK) == IB_CQ_SOLICITED ? MLX5_CQ_DB_REQ_NOT_SOL : MLX5_CQ_DB_REQ_NOT, uar_page, MLX5_GET_DOORBELL_LOCK(&mdev->priv.cq_uar_lock), to_mcq(ibcq)->mcq.cons_index); - return 0; + return ret; } static int alloc_cq_buf(struct mlx5_ib_dev *dev, struct mlx5_ib_cq_buf *buf, @@ -760,6 +797,14 @@ static void destroy_cq_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_cq *cq) mlx5_db_free(dev->mdev, &cq->db); } +static void notify_soft_wc_handler(struct work_struct *work) +{ + struct mlx5_ib_cq *cq = container_of(work, struct mlx5_ib_cq, + notify_work); + + cq->ibcq.comp_handler(&cq->ibcq, cq->ibcq.cq_context); +} + struct ib_cq *mlx5_ib_create_cq(struct ib_device *ibdev, const struct ib_cq_init_attr *attr, struct ib_ucontext *context, @@ -810,6 +855,8 @@ struct ib_cq *mlx5_ib_create_cq(struct ib_device *ibdev, &index, &inlen); if (err) goto err_create; + + INIT_WORK(&cq->notify_work, notify_soft_wc_handler); } cq->cqe_size = cqe_size; @@ -835,6 +882,8 @@ struct ib_cq *mlx5_ib_create_cq(struct ib_device *ibdev, cq->mcq.comp = mlx5_ib_cq_comp; cq->mcq.event = mlx5_ib_cq_event; + INIT_LIST_HEAD(&cq->wc_list); + if (context) if (ib_copy_to_udata(udata, &cq->mcq.cqn, sizeof(__u32))) { err = -EFAULT; @@ -1222,3 +1271,27 @@ int mlx5_ib_get_cqe_size(struct mlx5_ib_dev *dev, struct ib_cq *ibcq) cq = to_mcq(ibcq); return cq->cqe_size; } + +/* Called from atomic context */ +int mlx5_ib_generate_wc(struct ib_cq *ibcq, struct ib_wc *wc) +{ + struct mlx5_ib_wc *soft_wc; + struct mlx5_ib_cq *cq = to_mcq(ibcq); + unsigned long flags; + + soft_wc = kmalloc(sizeof(*soft_wc), GFP_ATOMIC); + if (!soft_wc) + return -ENOMEM; + + soft_wc->wc = *wc; + spin_lock_irqsave(&cq->lock, flags); + list_add_tail(&soft_wc->list, &cq->wc_list); + if (cq->notify_flags == IB_CQ_NEXT_COMP || + wc->status != IB_WC_SUCCESS) { + cq->notify_flags = 0; + schedule_work(&cq->notify_work); + } + spin_unlock_irqrestore(&cq->lock, flags); + + return 0; +} diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index a8fc345..0142efb 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -393,6 +393,14 @@ struct mlx5_ib_cq { struct ib_umem *resize_umem; int cqe_size; u32 create_flags; + struct list_head wc_list; + enum ib_cq_notify_flags notify_flags; + struct work_struct notify_work; +}; + +struct mlx5_ib_wc { + struct ib_wc wc; + struct list_head list; }; struct mlx5_ib_srq { @@ -785,6 +793,8 @@ int mlx5_ib_gsi_post_recv(struct ib_qp *qp, struct ib_recv_wr *wr, struct ib_recv_wr **bad_wr); void mlx5_ib_gsi_pkey_change(struct mlx5_ib_gsi_qp *gsi); +int mlx5_ib_generate_wc(struct ib_cq *ibcq, struct ib_wc *wc); + static inline void init_query_mad(struct ib_smp *mad) { mad->base_version = 1; -- cgit v0.10.2 From ea6dc2036224aaee887f391a1ee8833bea18c68b Mon Sep 17 00:00:00 2001 From: Haggai Eran Date: Mon, 29 Feb 2016 15:45:09 +0200 Subject: IB/mlx5: Reorder GSI completions The emulated GSI QP's send completions are generated by multiple hardware QPs, so their completions could arrive out of order with respect to the order their work request were submitted. Reorder the completions by keeping a list of the posted work request and their completions. A newly received completion from the hardware updates the list and marks its work request as completed. However, the completions are only reported to the client according to the list order. In order to support that, create a new private CQ to handle the hardware completions. Signed-off-by: Haggai Eran Signed-off-by: Doug Ledford diff --git a/drivers/infiniband/hw/mlx5/gsi.c b/drivers/infiniband/hw/mlx5/gsi.c index 1648f53..8d04062 100644 --- a/drivers/infiniband/hw/mlx5/gsi.c +++ b/drivers/infiniband/hw/mlx5/gsi.c @@ -32,6 +32,13 @@ #include "mlx5_ib.h" +struct mlx5_ib_gsi_wr { + struct ib_cqe cqe; + struct ib_wc wc; + int send_flags; + bool completed:1; +}; + struct mlx5_ib_gsi_qp { struct ib_qp ibqp; struct ib_qp *rx_qp; @@ -40,9 +47,13 @@ struct mlx5_ib_gsi_qp { enum ib_sig_type sq_sig_type; /* Serialize qp state modifications */ struct mutex mutex; + struct ib_cq *cq; + struct mlx5_ib_gsi_wr *outstanding_wrs; + u32 outstanding_pi, outstanding_ci; int num_qps; /* Protects access to the tx_qps. Post send operations synchronize - * with tx_qp creation in setup_qp(). + * with tx_qp creation in setup_qp(). Also protects the + * outstanding_wrs array and indices. */ spinlock_t lock; struct ib_qp **tx_qps; @@ -58,6 +69,57 @@ static bool mlx5_ib_deth_sqpn_cap(struct mlx5_ib_dev *dev) return MLX5_CAP_GEN(dev->mdev, set_deth_sqpn); } +static u32 next_outstanding(struct mlx5_ib_gsi_qp *gsi, u32 index) +{ + return ++index % gsi->cap.max_send_wr; +} + +#define for_each_outstanding_wr(gsi, index) \ + for (index = gsi->outstanding_ci; index != gsi->outstanding_pi; \ + index = next_outstanding(gsi, index)) + +/* Call with gsi->lock locked */ +static void generate_completions(struct mlx5_ib_gsi_qp *gsi) +{ + struct ib_cq *gsi_cq = gsi->ibqp.send_cq; + struct mlx5_ib_gsi_wr *wr; + u32 index; + + for_each_outstanding_wr(gsi, index) { + wr = &gsi->outstanding_wrs[index]; + + if (!wr->completed) + break; + + if (gsi->sq_sig_type == IB_SIGNAL_ALL_WR || + wr->send_flags & IB_SEND_SIGNALED) + WARN_ON_ONCE(mlx5_ib_generate_wc(gsi_cq, &wr->wc)); + + wr->completed = false; + } + + gsi->outstanding_ci = index; +} + +static void handle_single_completion(struct ib_cq *cq, struct ib_wc *wc) +{ + struct mlx5_ib_gsi_qp *gsi = cq->cq_context; + struct mlx5_ib_gsi_wr *wr = + container_of(wc->wr_cqe, struct mlx5_ib_gsi_wr, cqe); + u64 wr_id; + unsigned long flags; + + spin_lock_irqsave(&gsi->lock, flags); + wr->completed = true; + wr_id = wr->wc.wr_id; + wr->wc = *wc; + wr->wc.wr_id = wr_id; + wr->wc.qp = &gsi->ibqp; + + generate_completions(gsi); + spin_unlock_irqrestore(&gsi->lock, flags); +} + struct ib_qp *mlx5_ib_gsi_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *init_attr) { @@ -88,6 +150,14 @@ struct ib_qp *mlx5_ib_gsi_create_qp(struct ib_pd *pd, goto err_free; } + gsi->outstanding_wrs = kcalloc(init_attr->cap.max_send_wr, + sizeof(*gsi->outstanding_wrs), + GFP_KERNEL); + if (!gsi->outstanding_wrs) { + ret = -ENOMEM; + goto err_free_tx; + } + mutex_init(&gsi->mutex); mutex_lock(&dev->devr.mutex); @@ -96,7 +166,7 @@ struct ib_qp *mlx5_ib_gsi_create_qp(struct ib_pd *pd, mlx5_ib_warn(dev, "GSI QP already exists on port %d\n", port_num); ret = -EBUSY; - goto err_free_tx; + goto err_free_wrs; } gsi->num_qps = num_qps; spin_lock_init(&gsi->lock); @@ -106,13 +176,23 @@ struct ib_qp *mlx5_ib_gsi_create_qp(struct ib_pd *pd, gsi->ibqp.qp_num = 1; gsi->port_num = port_num; + gsi->cq = ib_alloc_cq(pd->device, gsi, init_attr->cap.max_send_wr, 0, + IB_POLL_SOFTIRQ); + if (IS_ERR(gsi->cq)) { + mlx5_ib_warn(dev, "unable to create send CQ for GSI QP. error %ld\n", + PTR_ERR(gsi->cq)); + ret = PTR_ERR(gsi->cq); + goto err_free_wrs; + } + hw_init_attr.qp_type = MLX5_IB_QPT_HW_GSI; + hw_init_attr.send_cq = gsi->cq; gsi->rx_qp = ib_create_qp(pd, &hw_init_attr); if (IS_ERR(gsi->rx_qp)) { mlx5_ib_warn(dev, "unable to create hardware GSI QP. error %ld\n", PTR_ERR(gsi->rx_qp)); ret = PTR_ERR(gsi->rx_qp); - goto err_free_tx; + goto err_destroy_cq; } dev->devr.ports[init_attr->port_num - 1].gsi = gsi; @@ -121,8 +201,12 @@ struct ib_qp *mlx5_ib_gsi_create_qp(struct ib_pd *pd, return &gsi->ibqp; -err_free_tx: +err_destroy_cq: + ib_free_cq(gsi->cq); +err_free_wrs: mutex_unlock(&dev->devr.mutex); + kfree(gsi->outstanding_wrs); +err_free_tx: kfree(gsi->tx_qps); err_free: kfree(gsi); @@ -158,6 +242,9 @@ int mlx5_ib_gsi_destroy_qp(struct ib_qp *qp) gsi->tx_qps[qp_index] = NULL; } + ib_free_cq(gsi->cq); + + kfree(gsi->outstanding_wrs); kfree(gsi->tx_qps); kfree(gsi); @@ -170,7 +257,7 @@ static struct ib_qp *create_gsi_ud_qp(struct mlx5_ib_gsi_qp *gsi) struct ib_qp_init_attr init_attr = { .event_handler = gsi->rx_qp->event_handler, .qp_context = gsi->rx_qp->qp_context, - .send_cq = gsi->rx_qp->send_cq, + .send_cq = gsi->cq, .recv_cq = gsi->rx_qp->recv_cq, .cap = { .max_send_wr = gsi->cap.max_send_wr, @@ -326,12 +413,69 @@ int mlx5_ib_gsi_query_qp(struct ib_qp *qp, struct ib_qp_attr *qp_attr, return ret; } +/* Call with gsi->lock locked */ +static int mlx5_ib_add_outstanding_wr(struct mlx5_ib_gsi_qp *gsi, + struct ib_ud_wr *wr, struct ib_wc *wc) +{ + struct mlx5_ib_dev *dev = to_mdev(gsi->rx_qp->device); + struct mlx5_ib_gsi_wr *gsi_wr; + + if (gsi->outstanding_pi == gsi->outstanding_ci + gsi->cap.max_send_wr) { + mlx5_ib_warn(dev, "no available GSI work request.\n"); + return -ENOMEM; + } + + gsi_wr = &gsi->outstanding_wrs[gsi->outstanding_pi]; + gsi->outstanding_pi = next_outstanding(gsi, gsi->outstanding_pi); + + if (!wc) { + memset(&gsi_wr->wc, 0, sizeof(gsi_wr->wc)); + gsi_wr->wc.pkey_index = wr->pkey_index; + gsi_wr->wc.wr_id = wr->wr.wr_id; + } else { + gsi_wr->wc = *wc; + gsi_wr->completed = true; + } + + gsi_wr->cqe.done = &handle_single_completion; + wr->wr.wr_cqe = &gsi_wr->cqe; + + return 0; +} + int mlx5_ib_gsi_post_send(struct ib_qp *qp, struct ib_send_wr *wr, struct ib_send_wr **bad_wr) { struct mlx5_ib_gsi_qp *gsi = gsi_qp(qp); + unsigned long flags; + int ret; + + for (; wr; wr = wr->next) { + struct ib_ud_wr cur_wr = *ud_wr(wr); + + cur_wr.wr.next = NULL; - return ib_post_send(gsi->rx_qp, wr, bad_wr); + spin_lock_irqsave(&gsi->lock, flags); + ret = mlx5_ib_add_outstanding_wr(gsi, &cur_wr, NULL); + if (ret) + goto err; + + ret = ib_post_send(gsi->rx_qp, &cur_wr.wr, bad_wr); + if (ret) { + /* Undo the effect of adding the outstanding wr */ + gsi->outstanding_pi = (gsi->outstanding_pi - 1) % + gsi->cap.max_send_wr; + goto err; + } + spin_unlock_irqrestore(&gsi->lock, flags); + } + + return 0; + +err: + spin_unlock_irqrestore(&gsi->lock, flags); + *bad_wr = wr; + return ret; } int mlx5_ib_gsi_post_recv(struct ib_qp *qp, struct ib_recv_wr *wr, -- cgit v0.10.2 From 83cae2aff53960ab6cf5bb82654201ce43b77fb6 Mon Sep 17 00:00:00 2001 From: Haggai Eran Date: Mon, 29 Feb 2016 15:45:10 +0200 Subject: IB/mlx5: Pick the right GSI transmission QP for sending Pick the QP to use according to the wr.ud.pkey_index field in the work request. If the QP doesn't exist, it means the P_Key is zero and the packet would have been dropped, so just generate a completion and move on. Reviewed-by: Leon Romanovsky Signed-off-by: Haggai Eran Signed-off-by: Doug Ledford diff --git a/drivers/infiniband/hw/mlx5/gsi.c b/drivers/infiniband/hw/mlx5/gsi.c index 8d04062..938f6dd 100644 --- a/drivers/infiniband/hw/mlx5/gsi.c +++ b/drivers/infiniband/hw/mlx5/gsi.c @@ -443,10 +443,47 @@ static int mlx5_ib_add_outstanding_wr(struct mlx5_ib_gsi_qp *gsi, return 0; } +/* Call with gsi->lock locked */ +static int mlx5_ib_gsi_silent_drop(struct mlx5_ib_gsi_qp *gsi, + struct ib_ud_wr *wr) +{ + struct ib_wc wc = { + { .wr_id = wr->wr.wr_id }, + .status = IB_WC_SUCCESS, + .opcode = IB_WC_SEND, + .qp = &gsi->ibqp, + }; + int ret; + + ret = mlx5_ib_add_outstanding_wr(gsi, wr, &wc); + if (ret) + return ret; + + generate_completions(gsi); + + return 0; +} + +/* Call with gsi->lock locked */ +static struct ib_qp *get_tx_qp(struct mlx5_ib_gsi_qp *gsi, struct ib_ud_wr *wr) +{ + struct mlx5_ib_dev *dev = to_mdev(gsi->rx_qp->device); + int qp_index = wr->pkey_index; + + if (!mlx5_ib_deth_sqpn_cap(dev)) + return gsi->rx_qp; + + if (qp_index >= gsi->num_qps) + return NULL; + + return gsi->tx_qps[qp_index]; +} + int mlx5_ib_gsi_post_send(struct ib_qp *qp, struct ib_send_wr *wr, struct ib_send_wr **bad_wr) { struct mlx5_ib_gsi_qp *gsi = gsi_qp(qp); + struct ib_qp *tx_qp; unsigned long flags; int ret; @@ -456,11 +493,20 @@ int mlx5_ib_gsi_post_send(struct ib_qp *qp, struct ib_send_wr *wr, cur_wr.wr.next = NULL; spin_lock_irqsave(&gsi->lock, flags); + tx_qp = get_tx_qp(gsi, &cur_wr); + if (!tx_qp) { + ret = mlx5_ib_gsi_silent_drop(gsi, &cur_wr); + if (ret) + goto err; + spin_unlock_irqrestore(&gsi->lock, flags); + continue; + } + ret = mlx5_ib_add_outstanding_wr(gsi, &cur_wr, NULL); if (ret) goto err; - ret = ib_post_send(gsi->rx_qp, &cur_wr.wr, bad_wr); + ret = ib_post_send(tx_qp, &cur_wr.wr, bad_wr); if (ret) { /* Undo the effect of adding the outstanding wr */ gsi->outstanding_pi = (gsi->outstanding_pi - 1) % -- cgit v0.10.2 From ebe6ccc53ff06a3782b95547eecb393222de057f Mon Sep 17 00:00:00 2001 From: Haggai Eran Date: Mon, 29 Feb 2016 15:45:11 +0200 Subject: IB/mlx5: Eliminate GSI RX QP's send buffers Now that the transmission of GSI MADs is done with the special transmission QPs, eliminate the send buffers in the GSI receive QP. Reviewed-by: Leon Romanovsky Signed-off-by: Haggai Eran Signed-off-by: Doug Ledford diff --git a/drivers/infiniband/hw/mlx5/gsi.c b/drivers/infiniband/hw/mlx5/gsi.c index 938f6dd..53e03c8 100644 --- a/drivers/infiniband/hw/mlx5/gsi.c +++ b/drivers/infiniband/hw/mlx5/gsi.c @@ -187,6 +187,11 @@ struct ib_qp *mlx5_ib_gsi_create_qp(struct ib_pd *pd, hw_init_attr.qp_type = MLX5_IB_QPT_HW_GSI; hw_init_attr.send_cq = gsi->cq; + if (num_qps) { + hw_init_attr.cap.max_send_wr = 0; + hw_init_attr.cap.max_send_sge = 0; + hw_init_attr.cap.max_inline_data = 0; + } gsi->rx_qp = ib_create_qp(pd, &hw_init_attr); if (IS_ERR(gsi->rx_qp)) { mlx5_ib_warn(dev, "unable to create hardware GSI QP. error %ld\n", -- cgit v0.10.2 From 84424a7fc793979da12992cfe5c2f5f73a3e8725 Mon Sep 17 00:00:00 2001 From: Haggai Eran Date: Mon, 29 Feb 2016 15:45:12 +0200 Subject: IB/cma: Print warning on different inner and header P_Keys Commit 4c21b5bcef73 ("IB/cma: Add net_dev and private data checks to RDMA CM") added checks for incoming RDMA CM requests that they can be matched to a netdev based on the P_Key in the BTH of the request. This behavior was reverted in commit ab3964ad2acf ("IB/cma: Use inner P_Key to determine netdev"), since the mlx5 and ipath drivers didn't send the correct value in the BTH P_Key. Since the ipath driver was removed, and the mlx5 driver can now send GSI packets on different P_Keys, we could revert the patch to let the rdma_cm module look on the BTH P_Key when deciding to what netdev a packet belongs. However, that still breaks compatibility with the older drivers. Change the behavior to print a warning when receiving a request that has a different BTH P_Key and inner payload P_Key. In the future, after users have seen the warnings and upgraded their setups, remove the warning and block these requests. Signed-off-by: Haggai Eran Signed-off-by: Doug Ledford diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index 9729639..7eace1f 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -1206,6 +1206,10 @@ static int cma_save_req_info(const struct ib_cm_event *ib_event, req->has_gid = true; req->service_id = req_param->primary_path->service_id; req->pkey = be16_to_cpu(req_param->primary_path->pkey); + if (req->pkey != req_param->bth_pkey) + pr_warn_ratelimited("RDMA CMA: got different BTH P_Key (0x%x) and primary path P_Key (0x%x)\n" + "RDMA CMA: in the future this may cause the request to be dropped\n", + req_param->bth_pkey, req->pkey); break; case IB_CM_SIDR_REQ_RECEIVED: req->device = sidr_param->listen_id->device; @@ -1213,6 +1217,10 @@ static int cma_save_req_info(const struct ib_cm_event *ib_event, req->has_gid = false; req->service_id = sidr_param->service_id; req->pkey = sidr_param->pkey; + if (req->pkey != sidr_param->bth_pkey) + pr_warn_ratelimited("RDMA CMA: got different BTH P_Key (0x%x) and SIDR request payload P_Key (0x%x)\n" + "RDMA CMA: in the future this may cause the request to be dropped\n", + sidr_param->bth_pkey, req->pkey); break; default: return -EINVAL; -- cgit v0.10.2 From 395a8e4c32ea2d032cf803f52f2e00983f91722d Mon Sep 17 00:00:00 2001 From: Noa Osherovich Date: Mon, 29 Feb 2016 16:46:50 +0200 Subject: IB/mlx5: Refactoring register MR code In order to add re-registration of memory region, some logic was extracted to separate functions: - ODP related logic. - Some of the UMR WQE preparation code. - DMA mapping. - Umem creation. - Creating MKey using FW interface. - MR fields assignments after successful creation. Signed-off-by: Noa Osherovich Reviewed-by: Matan Barak Signed-off-by: Doug Ledford diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index 6000f7a..9d6dade 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -77,6 +77,34 @@ static int order2idx(struct mlx5_ib_dev *dev, int order) return order - cache->ent[0].order; } +#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING +static void update_odp_mr(struct mlx5_ib_mr *mr) +{ + if (mr->umem->odp_data) { + /* + * This barrier prevents the compiler from moving the + * setting of umem->odp_data->private to point to our + * MR, before reg_umr finished, to ensure that the MR + * initialization have finished before starting to + * handle invalidations. + */ + smp_wmb(); + mr->umem->odp_data->private = mr; + /* + * Make sure we will see the new + * umem->odp_data->private value in the invalidation + * routines, before we can get page faults on the + * MR. Page faults can happen once we put the MR in + * the tree, below this line. Without the barrier, + * there can be a fault handling and an invalidation + * before umem->odp_data->private == mr is visible to + * the invalidation handler. + */ + smp_wmb(); + } +} +#endif + static void reg_mr_callback(int status, void *context) { struct mlx5_ib_mr *mr = context; @@ -693,10 +721,40 @@ static int use_umr(int order) return order <= MLX5_MAX_UMR_SHIFT; } -static void prep_umr_reg_wqe(struct ib_pd *pd, struct ib_send_wr *wr, - struct ib_sge *sg, u64 dma, int n, u32 key, - int page_shift, u64 virt_addr, u64 len, - int access_flags) +static int dma_map_mr_pas(struct mlx5_ib_dev *dev, struct ib_umem *umem, + int npages, int page_shift, int *size, + __be64 **mr_pas, dma_addr_t *dma) +{ + __be64 *pas; + struct device *ddev = dev->ib_dev.dma_device; + + /* + * UMR copies MTTs in units of MLX5_UMR_MTT_ALIGNMENT bytes. + * To avoid copying garbage after the pas array, we allocate + * a little more. + */ + *size = ALIGN(sizeof(u64) * npages, MLX5_UMR_MTT_ALIGNMENT); + *mr_pas = kmalloc(*size + MLX5_UMR_ALIGN - 1, GFP_KERNEL); + if (!(*mr_pas)) + return -ENOMEM; + + pas = PTR_ALIGN(*mr_pas, MLX5_UMR_ALIGN); + mlx5_ib_populate_pas(dev, umem, page_shift, pas, MLX5_IB_MTT_PRESENT); + /* Clear padding after the actual pages. */ + memset(pas + npages, 0, *size - npages * sizeof(u64)); + + *dma = dma_map_single(ddev, pas, *size, DMA_TO_DEVICE); + if (dma_mapping_error(ddev, *dma)) { + kfree(*mr_pas); + return -ENOMEM; + } + + return 0; +} + +static void prep_umr_wqe_common(struct ib_pd *pd, struct ib_send_wr *wr, + struct ib_sge *sg, u64 dma, int n, u32 key, + int page_shift) { struct mlx5_ib_dev *dev = to_mdev(pd->device); struct mlx5_umr_wr *umrwr = umr_wr(wr); @@ -706,7 +764,6 @@ static void prep_umr_reg_wqe(struct ib_pd *pd, struct ib_send_wr *wr, sg->lkey = dev->umrc.pd->local_dma_lkey; wr->next = NULL; - wr->send_flags = 0; wr->sg_list = sg; if (n) wr->num_sge = 1; @@ -718,6 +775,19 @@ static void prep_umr_reg_wqe(struct ib_pd *pd, struct ib_send_wr *wr, umrwr->npages = n; umrwr->page_shift = page_shift; umrwr->mkey = key; +} + +static void prep_umr_reg_wqe(struct ib_pd *pd, struct ib_send_wr *wr, + struct ib_sge *sg, u64 dma, int n, u32 key, + int page_shift, u64 virt_addr, u64 len, + int access_flags) +{ + struct mlx5_umr_wr *umrwr = umr_wr(wr); + + prep_umr_wqe_common(pd, wr, sg, dma, n, key, page_shift); + + wr->send_flags = 0; + umrwr->target.virt_addr = virt_addr; umrwr->length = len; umrwr->access_flags = access_flags; @@ -734,6 +804,31 @@ static void prep_umr_unreg_wqe(struct mlx5_ib_dev *dev, umrwr->mkey = key; } +static struct ib_umem *mr_umem_get(struct ib_pd *pd, u64 start, u64 length, + int access_flags, int *npages, + int *page_shift, int *ncont, int *order) +{ + struct mlx5_ib_dev *dev = to_mdev(pd->device); + struct ib_umem *umem = ib_umem_get(pd->uobject->context, start, length, + access_flags, 0); + if (IS_ERR(umem)) { + mlx5_ib_err(dev, "umem get failed (%ld)\n", PTR_ERR(umem)); + return (void *)umem; + } + + mlx5_ib_cont_pages(umem, start, npages, page_shift, ncont, order); + if (!*npages) { + mlx5_ib_warn(dev, "avoid zero region\n"); + ib_umem_release(umem); + return ERR_PTR(-EINVAL); + } + + mlx5_ib_dbg(dev, "npages %d, ncont %d, order %d, page_shift %d\n", + *npages, *ncont, *order, *page_shift); + + return umem; +} + void mlx5_umr_cq_handler(struct ib_cq *cq, void *cq_context) { struct mlx5_ib_umr_context *context; @@ -770,7 +865,6 @@ static struct mlx5_ib_mr *reg_umr(struct ib_pd *pd, struct ib_umem *umem, struct ib_sge sg; int size; __be64 *mr_pas; - __be64 *pas; dma_addr_t dma; int err = 0; int i; @@ -790,26 +884,10 @@ static struct mlx5_ib_mr *reg_umr(struct ib_pd *pd, struct ib_umem *umem, if (!mr) return ERR_PTR(-EAGAIN); - /* UMR copies MTTs in units of MLX5_UMR_MTT_ALIGNMENT bytes. - * To avoid copying garbage after the pas array, we allocate - * a little more. */ - size = ALIGN(sizeof(u64) * npages, MLX5_UMR_MTT_ALIGNMENT); - mr_pas = kmalloc(size + MLX5_UMR_ALIGN - 1, GFP_KERNEL); - if (!mr_pas) { - err = -ENOMEM; + err = dma_map_mr_pas(dev, umem, npages, page_shift, &size, &mr_pas, + &dma); + if (err) goto free_mr; - } - - pas = PTR_ALIGN(mr_pas, MLX5_UMR_ALIGN); - mlx5_ib_populate_pas(dev, umem, page_shift, pas, MLX5_IB_MTT_PRESENT); - /* Clear padding after the actual pages. */ - memset(pas + npages, 0, size - npages * sizeof(u64)); - - dma = dma_map_single(ddev, pas, size, DMA_TO_DEVICE); - if (dma_mapping_error(ddev, dma)) { - err = -ENOMEM; - goto free_pas; - } memset(&umrwr, 0, sizeof(umrwr)); umrwr.wr.wr_id = (u64)(unsigned long)&umr_context; @@ -840,7 +918,6 @@ unmap_dma: up(&umrc->sem); dma_unmap_single(ddev, dma, size, DMA_TO_DEVICE); -free_pas: kfree(mr_pas); free_mr: @@ -974,10 +1051,14 @@ free_pas: } #endif -static struct mlx5_ib_mr *reg_create(struct ib_pd *pd, u64 virt_addr, - u64 length, struct ib_umem *umem, - int npages, int page_shift, - int access_flags) +/* + * If ibmr is NULL it will be allocated by reg_create. + * Else, the given ibmr will be used. + */ +static struct mlx5_ib_mr *reg_create(struct ib_mr *ibmr, struct ib_pd *pd, + u64 virt_addr, u64 length, + struct ib_umem *umem, int npages, + int page_shift, int access_flags) { struct mlx5_ib_dev *dev = to_mdev(pd->device); struct mlx5_create_mkey_mbox_in *in; @@ -986,7 +1067,7 @@ static struct mlx5_ib_mr *reg_create(struct ib_pd *pd, u64 virt_addr, int err; bool pg_cap = !!(MLX5_CAP_GEN(dev->mdev, pg)); - mr = kzalloc(sizeof(*mr), GFP_KERNEL); + mr = ibmr ? to_mmr(ibmr) : kzalloc(sizeof(*mr), GFP_KERNEL); if (!mr) return ERR_PTR(-ENOMEM); @@ -1032,11 +1113,22 @@ err_2: kvfree(in); err_1: - kfree(mr); + if (!ibmr) + kfree(mr); return ERR_PTR(err); } +static void set_mr_fileds(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr, + int npages, u64 length, int access_flags) +{ + mr->npages = npages; + atomic_add(npages, &dev->mdev->priv.reg_pages); + mr->ibmr.lkey = mr->mmr.key; + mr->ibmr.rkey = mr->mmr.key; + mr->ibmr.length = length; +} + struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, u64 virt_addr, int access_flags, struct ib_udata *udata) @@ -1052,22 +1144,11 @@ struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, mlx5_ib_dbg(dev, "start 0x%llx, virt_addr 0x%llx, length 0x%llx, access_flags 0x%x\n", start, virt_addr, length, access_flags); - umem = ib_umem_get(pd->uobject->context, start, length, access_flags, - 0); - if (IS_ERR(umem)) { - mlx5_ib_dbg(dev, "umem get failed (%ld)\n", PTR_ERR(umem)); - return (void *)umem; - } + umem = mr_umem_get(pd, start, length, access_flags, &npages, + &page_shift, &ncont, &order); - mlx5_ib_cont_pages(umem, start, &npages, &page_shift, &ncont, &order); - if (!npages) { - mlx5_ib_warn(dev, "avoid zero region\n"); - err = -EINVAL; - goto error; - } - - mlx5_ib_dbg(dev, "npages %d, ncont %d, order %d, page_shift %d\n", - npages, ncont, order, page_shift); + if (IS_ERR(umem)) + return (void *)umem; if (use_umr(order)) { mr = reg_umr(pd, umem, virt_addr, length, ncont, page_shift, @@ -1083,8 +1164,8 @@ struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, } if (!mr) - mr = reg_create(pd, virt_addr, length, umem, ncont, page_shift, - access_flags); + mr = reg_create(NULL, pd, virt_addr, length, umem, ncont, + page_shift, access_flags); if (IS_ERR(mr)) { err = PTR_ERR(mr); @@ -1094,34 +1175,10 @@ struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, mlx5_ib_dbg(dev, "mkey 0x%x\n", mr->mmr.key); mr->umem = umem; - mr->npages = npages; - atomic_add(npages, &dev->mdev->priv.reg_pages); - mr->ibmr.lkey = mr->mmr.key; - mr->ibmr.rkey = mr->mmr.key; + set_mr_fileds(dev, mr, npages, length, access_flags); #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING - if (umem->odp_data) { - /* - * This barrier prevents the compiler from moving the - * setting of umem->odp_data->private to point to our - * MR, before reg_umr finished, to ensure that the MR - * initialization have finished before starting to - * handle invalidations. - */ - smp_wmb(); - mr->umem->odp_data->private = mr; - /* - * Make sure we will see the new - * umem->odp_data->private value in the invalidation - * routines, before we can get page faults on the - * MR. Page faults can happen once we put the MR in - * the tree, below this line. Without the barrier, - * there can be a fault handling and an invalidation - * before umem->odp_data->private == mr is visible to - * the invalidation handler. - */ - smp_wmb(); - } + update_odp_mr(mr); #endif return &mr->ibmr; -- cgit v0.10.2 From 56e11d628c5d0553d9fc2ca1855144970e6b9eb6 Mon Sep 17 00:00:00 2001 From: Noa Osherovich Date: Mon, 29 Feb 2016 16:46:51 +0200 Subject: IB/mlx5: Added support for re-registration of MRs This patch adds support for re-registration of memory regions in MLX5. The functionality is basically the same as deregister followed by register, but attempts to reuse the existing resources as much as possible. Original memory keys are kept if possible, saving the need to communicate new ones to remote peers. Signed-off-by: Noa Osherovich Reviewed-by: Matan Barak Signed-off-by: Doug Ledford diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index d4224fa..16f7d0b 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -2233,6 +2233,7 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev) (1ull << IB_USER_VERBS_CMD_ALLOC_PD) | (1ull << IB_USER_VERBS_CMD_DEALLOC_PD) | (1ull << IB_USER_VERBS_CMD_REG_MR) | + (1ull << IB_USER_VERBS_CMD_REREG_MR) | (1ull << IB_USER_VERBS_CMD_DEREG_MR) | (1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) | (1ull << IB_USER_VERBS_CMD_CREATE_CQ) | @@ -2293,6 +2294,7 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev) dev->ib_dev.req_notify_cq = mlx5_ib_arm_cq; dev->ib_dev.get_dma_mr = mlx5_ib_get_dma_mr; dev->ib_dev.reg_user_mr = mlx5_ib_reg_user_mr; + dev->ib_dev.rereg_user_mr = mlx5_ib_rereg_user_mr; dev->ib_dev.dereg_mr = mlx5_ib_dereg_mr; dev->ib_dev.attach_mcast = mlx5_ib_mcg_attach; dev->ib_dev.detach_mcast = mlx5_ib_mcg_detach; diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index 0142efb..f84ec2b 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -162,6 +162,11 @@ struct mlx5_ib_flow_db { #define MLX5_IB_SEND_UMR_UNREG IB_SEND_RESERVED_START #define MLX5_IB_SEND_UMR_FAIL_IF_FREE (IB_SEND_RESERVED_START << 1) #define MLX5_IB_SEND_UMR_UPDATE_MTT (IB_SEND_RESERVED_START << 2) + +#define MLX5_IB_SEND_UMR_UPDATE_TRANSLATION (IB_SEND_RESERVED_START << 3) +#define MLX5_IB_SEND_UMR_UPDATE_PD (IB_SEND_RESERVED_START << 4) +#define MLX5_IB_SEND_UMR_UPDATE_ACCESS IB_SEND_RESERVED_END + #define MLX5_IB_QPT_REG_UMR IB_QPT_RESERVED1 /* * IB_QPT_GSI creates the software wrapper around GSI, and MLX5_IB_QPT_HW_GSI @@ -453,6 +458,7 @@ struct mlx5_ib_mr { struct mlx5_core_sig_ctx *sig; int live; void *descs_alloc; + int access_flags; /* Needed for rereg MR */ }; struct mlx5_ib_umr_context { @@ -689,6 +695,9 @@ struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, struct ib_udata *udata); int mlx5_ib_update_mtt(struct mlx5_ib_mr *mr, u64 start_page_index, int npages, int zap); +int mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start, + u64 length, u64 virt_addr, int access_flags, + struct ib_pd *pd, struct ib_udata *udata); int mlx5_ib_dereg_mr(struct ib_mr *ibmr); struct ib_mr *mlx5_ib_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type, diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index 9d6dade..cf26cd1 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -77,6 +77,12 @@ static int order2idx(struct mlx5_ib_dev *dev, int order) return order - cache->ent[0].order; } +static bool use_umr_mtt_update(struct mlx5_ib_mr *mr, u64 start, u64 length) +{ + return ((u64)1 << mr->order) * MLX5_ADAPTER_PAGE_SIZE >= + length + (start & (MLX5_ADAPTER_PAGE_SIZE - 1)); +} + #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING static void update_odp_mr(struct mlx5_ib_mr *mr) { @@ -1127,6 +1133,7 @@ static void set_mr_fileds(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr, mr->ibmr.lkey = mr->mmr.key; mr->ibmr.rkey = mr->mmr.key; mr->ibmr.length = length; + mr->access_flags = access_flags; } struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, @@ -1222,6 +1229,167 @@ error: return err; } +static int rereg_umr(struct ib_pd *pd, struct mlx5_ib_mr *mr, u64 virt_addr, + u64 length, int npages, int page_shift, int order, + int access_flags, int flags) +{ + struct mlx5_ib_dev *dev = to_mdev(pd->device); + struct device *ddev = dev->ib_dev.dma_device; + struct mlx5_ib_umr_context umr_context; + struct ib_send_wr *bad; + struct mlx5_umr_wr umrwr = {}; + struct ib_sge sg; + struct umr_common *umrc = &dev->umrc; + dma_addr_t dma = 0; + __be64 *mr_pas = NULL; + int size; + int err; + + umrwr.wr.wr_id = (u64)(unsigned long)&umr_context; + umrwr.wr.send_flags = MLX5_IB_SEND_UMR_FAIL_IF_FREE; + + if (flags & IB_MR_REREG_TRANS) { + err = dma_map_mr_pas(dev, mr->umem, npages, page_shift, &size, + &mr_pas, &dma); + if (err) + return err; + + umrwr.target.virt_addr = virt_addr; + umrwr.length = length; + umrwr.wr.send_flags |= MLX5_IB_SEND_UMR_UPDATE_TRANSLATION; + } + + prep_umr_wqe_common(pd, &umrwr.wr, &sg, dma, npages, mr->mmr.key, + page_shift); + + if (flags & IB_MR_REREG_PD) { + umrwr.pd = pd; + umrwr.wr.send_flags |= MLX5_IB_SEND_UMR_UPDATE_PD; + } + + if (flags & IB_MR_REREG_ACCESS) { + umrwr.access_flags = access_flags; + umrwr.wr.send_flags |= MLX5_IB_SEND_UMR_UPDATE_ACCESS; + } + + mlx5_ib_init_umr_context(&umr_context); + + /* post send request to UMR QP */ + down(&umrc->sem); + err = ib_post_send(umrc->qp, &umrwr.wr, &bad); + + if (err) { + mlx5_ib_warn(dev, "post send failed, err %d\n", err); + } else { + wait_for_completion(&umr_context.done); + if (umr_context.status != IB_WC_SUCCESS) { + mlx5_ib_warn(dev, "reg umr failed (%u)\n", + umr_context.status); + err = -EFAULT; + } + } + + up(&umrc->sem); + if (flags & IB_MR_REREG_TRANS) { + dma_unmap_single(ddev, dma, size, DMA_TO_DEVICE); + kfree(mr_pas); + } + return err; +} + +int mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start, + u64 length, u64 virt_addr, int new_access_flags, + struct ib_pd *new_pd, struct ib_udata *udata) +{ + struct mlx5_ib_dev *dev = to_mdev(ib_mr->device); + struct mlx5_ib_mr *mr = to_mmr(ib_mr); + struct ib_pd *pd = (flags & IB_MR_REREG_PD) ? new_pd : ib_mr->pd; + int access_flags = flags & IB_MR_REREG_ACCESS ? + new_access_flags : + mr->access_flags; + u64 addr = (flags & IB_MR_REREG_TRANS) ? virt_addr : mr->umem->address; + u64 len = (flags & IB_MR_REREG_TRANS) ? length : mr->umem->length; + int page_shift = 0; + int npages = 0; + int ncont = 0; + int order = 0; + int err; + + mlx5_ib_dbg(dev, "start 0x%llx, virt_addr 0x%llx, length 0x%llx, access_flags 0x%x\n", + start, virt_addr, length, access_flags); + + if (flags != IB_MR_REREG_PD) { + /* + * Replace umem. This needs to be done whether or not UMR is + * used. + */ + flags |= IB_MR_REREG_TRANS; + ib_umem_release(mr->umem); + mr->umem = mr_umem_get(pd, addr, len, access_flags, &npages, + &page_shift, &ncont, &order); + if (IS_ERR(mr->umem)) { + err = PTR_ERR(mr->umem); + mr->umem = NULL; + return err; + } + } + + if (flags & IB_MR_REREG_TRANS && !use_umr_mtt_update(mr, addr, len)) { + /* + * UMR can't be used - MKey needs to be replaced. + */ + if (mr->umred) { + err = unreg_umr(dev, mr); + if (err) + mlx5_ib_warn(dev, "Failed to unregister MR\n"); + } else { + err = destroy_mkey(dev, mr); + if (err) + mlx5_ib_warn(dev, "Failed to destroy MKey\n"); + } + if (err) + return err; + + mr = reg_create(ib_mr, pd, addr, len, mr->umem, ncont, + page_shift, access_flags); + + if (IS_ERR(mr)) + return PTR_ERR(mr); + + mr->umred = 0; + } else { + /* + * Send a UMR WQE + */ + err = rereg_umr(pd, mr, addr, len, npages, page_shift, + order, access_flags, flags); + if (err) { + mlx5_ib_warn(dev, "Failed to rereg UMR\n"); + return err; + } + } + + if (flags & IB_MR_REREG_PD) { + ib_mr->pd = pd; + mr->mmr.pd = to_mpd(pd)->pdn; + } + + if (flags & IB_MR_REREG_ACCESS) + mr->access_flags = access_flags; + + if (flags & IB_MR_REREG_TRANS) { + atomic_sub(mr->npages, &dev->mdev->priv.reg_pages); + set_mr_fileds(dev, mr, npages, len, access_flags); + mr->mmr.iova = addr; + mr->mmr.size = len; + } +#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING + update_odp_mr(mr); +#endif + + return 0; +} + static int mlx5_alloc_priv_descs(struct ib_device *device, struct mlx5_ib_mr *mr, diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index 85cf9c4..295eb2a 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -2678,6 +2678,44 @@ static __be64 get_umr_update_mtt_mask(void) return cpu_to_be64(result); } +static __be64 get_umr_update_translation_mask(void) +{ + u64 result; + + result = MLX5_MKEY_MASK_LEN | + MLX5_MKEY_MASK_PAGE_SIZE | + MLX5_MKEY_MASK_START_ADDR | + MLX5_MKEY_MASK_KEY | + MLX5_MKEY_MASK_FREE; + + return cpu_to_be64(result); +} + +static __be64 get_umr_update_access_mask(void) +{ + u64 result; + + result = MLX5_MKEY_MASK_LW | + MLX5_MKEY_MASK_RR | + MLX5_MKEY_MASK_RW | + MLX5_MKEY_MASK_A | + MLX5_MKEY_MASK_KEY | + MLX5_MKEY_MASK_FREE; + + return cpu_to_be64(result); +} + +static __be64 get_umr_update_pd_mask(void) +{ + u64 result; + + result = MLX5_MKEY_MASK_PD | + MLX5_MKEY_MASK_KEY | + MLX5_MKEY_MASK_FREE; + + return cpu_to_be64(result); +} + static void set_reg_umr_segment(struct mlx5_wqe_umr_ctrl_seg *umr, struct ib_send_wr *wr) { @@ -2696,9 +2734,15 @@ static void set_reg_umr_segment(struct mlx5_wqe_umr_ctrl_seg *umr, umr->mkey_mask = get_umr_update_mtt_mask(); umr->bsf_octowords = get_klm_octo(umrwr->target.offset); umr->flags |= MLX5_UMR_TRANSLATION_OFFSET_EN; - } else { - umr->mkey_mask = get_umr_reg_mr_mask(); } + if (wr->send_flags & MLX5_IB_SEND_UMR_UPDATE_TRANSLATION) + umr->mkey_mask |= get_umr_update_translation_mask(); + if (wr->send_flags & MLX5_IB_SEND_UMR_UPDATE_ACCESS) + umr->mkey_mask |= get_umr_update_access_mask(); + if (wr->send_flags & MLX5_IB_SEND_UMR_UPDATE_PD) + umr->mkey_mask |= get_umr_update_pd_mask(); + if (!umr->mkey_mask) + umr->mkey_mask = get_umr_reg_mr_mask(); } else { umr->mkey_mask = get_umr_unreg_mr_mask(); } @@ -2750,7 +2794,8 @@ static void set_reg_mkey_segment(struct mlx5_mkey_seg *seg, struct ib_send_wr *w seg->flags = convert_access(umrwr->access_flags); if (!(wr->send_flags & MLX5_IB_SEND_UMR_UPDATE_MTT)) { - seg->flags_pd = cpu_to_be32(to_mpd(umrwr->pd)->pdn); + if (umrwr->pd) + seg->flags_pd = cpu_to_be32(to_mpd(umrwr->pd)->pdn); seg->start_addr = cpu_to_be64(umrwr->target.virt_addr); } seg->len = cpu_to_be64(umrwr->length); -- cgit v0.10.2 From a606b0f6691daf861482f8b77326f672238ffbfd Mon Sep 17 00:00:00 2001 From: Matan Barak Date: Mon, 29 Feb 2016 18:05:28 +0200 Subject: net/mlx5: Refactor mlx5_core_mr to mkey Mlx5's mkey mechanism is also used for memory windows. The current code base uses MR (memory region) naming, which is inaccurate. Changing MR to mkey in order to represent its different usages more accurately. Signed-off-by: Matan Barak Reviewed-by: Yishai Hadas Signed-off-by: Doug Ledford diff --git a/drivers/infiniband/hw/mlx5/cq.c b/drivers/infiniband/hw/mlx5/cq.c index 2a9ad84..a00ba44 100644 --- a/drivers/infiniband/hw/mlx5/cq.c +++ b/drivers/infiniband/hw/mlx5/cq.c @@ -434,7 +434,7 @@ static int mlx5_poll_one(struct mlx5_ib_cq *cq, struct mlx5_core_qp *mqp; struct mlx5_ib_wq *wq; struct mlx5_sig_err_cqe *sig_err_cqe; - struct mlx5_core_mr *mmr; + struct mlx5_core_mkey *mmkey; struct mlx5_ib_mr *mr; uint8_t opcode; uint32_t qpn; @@ -539,17 +539,17 @@ repoll: case MLX5_CQE_SIG_ERR: sig_err_cqe = (struct mlx5_sig_err_cqe *)cqe64; - read_lock(&dev->mdev->priv.mr_table.lock); - mmr = __mlx5_mr_lookup(dev->mdev, - mlx5_base_mkey(be32_to_cpu(sig_err_cqe->mkey))); - if (unlikely(!mmr)) { - read_unlock(&dev->mdev->priv.mr_table.lock); + read_lock(&dev->mdev->priv.mkey_table.lock); + mmkey = __mlx5_mr_lookup(dev->mdev, + mlx5_base_mkey(be32_to_cpu(sig_err_cqe->mkey))); + if (unlikely(!mmkey)) { + read_unlock(&dev->mdev->priv.mkey_table.lock); mlx5_ib_warn(dev, "CQE@CQ %06x for unknown MR %6x\n", cq->mcq.cqn, be32_to_cpu(sig_err_cqe->mkey)); return -EINVAL; } - mr = to_mibmr(mmr); + mr = to_mibmr(mmkey); get_sig_err_item(sig_err_cqe, &mr->sig->err_item); mr->sig->sig_err_exists = true; mr->sig->sigerr_count++; @@ -561,7 +561,7 @@ repoll: mr->sig->err_item.expected, mr->sig->err_item.actual); - read_unlock(&dev->mdev->priv.mr_table.lock); + read_unlock(&dev->mdev->priv.mkey_table.lock); goto repoll; } diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index f84ec2b..4167d67 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -446,7 +446,7 @@ struct mlx5_ib_mr { int ndescs; int max_descs; int desc_size; - struct mlx5_core_mr mmr; + struct mlx5_core_mkey mmkey; struct ib_umem *umem; struct mlx5_shared_mr_info *smr_info; struct list_head list; @@ -603,9 +603,9 @@ static inline struct mlx5_ib_qp *to_mibqp(struct mlx5_core_qp *mqp) return container_of(mqp, struct mlx5_ib_qp_base, mqp)->container_mibqp; } -static inline struct mlx5_ib_mr *to_mibmr(struct mlx5_core_mr *mmr) +static inline struct mlx5_ib_mr *to_mibmr(struct mlx5_core_mkey *mmkey) { - return container_of(mmr, struct mlx5_ib_mr, mmr); + return container_of(mmkey, struct mlx5_ib_mr, mmkey); } static inline struct mlx5_ib_pd *to_mpd(struct ib_pd *ibpd) diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index cf26cd1..399e2b5 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -57,7 +57,7 @@ static int clean_mr(struct mlx5_ib_mr *mr); static int destroy_mkey(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr) { - int err = mlx5_core_destroy_mkey(dev->mdev, &mr->mmr); + int err = mlx5_core_destroy_mkey(dev->mdev, &mr->mmkey); #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING /* Wait until all page fault handlers using the mr complete. */ @@ -120,7 +120,7 @@ static void reg_mr_callback(int status, void *context) struct mlx5_cache_ent *ent = &cache->ent[c]; u8 key; unsigned long flags; - struct mlx5_mr_table *table = &dev->mdev->priv.mr_table; + struct mlx5_mkey_table *table = &dev->mdev->priv.mkey_table; int err; spin_lock_irqsave(&ent->lock, flags); @@ -147,7 +147,7 @@ static void reg_mr_callback(int status, void *context) spin_lock_irqsave(&dev->mdev->priv.mkey_lock, flags); key = dev->mdev->priv.mkey_key++; spin_unlock_irqrestore(&dev->mdev->priv.mkey_lock, flags); - mr->mmr.key = mlx5_idx_to_mkey(be32_to_cpu(mr->out.mkey) & 0xffffff) | key; + mr->mmkey.key = mlx5_idx_to_mkey(be32_to_cpu(mr->out.mkey) & 0xffffff) | key; cache->last_add = jiffies; @@ -158,10 +158,10 @@ static void reg_mr_callback(int status, void *context) spin_unlock_irqrestore(&ent->lock, flags); write_lock_irqsave(&table->lock, flags); - err = radix_tree_insert(&table->tree, mlx5_base_mkey(mr->mmr.key), - &mr->mmr); + err = radix_tree_insert(&table->tree, mlx5_base_mkey(mr->mmkey.key), + &mr->mmkey); if (err) - pr_err("Error inserting to mr tree. 0x%x\n", -err); + pr_err("Error inserting to mkey tree. 0x%x\n", -err); write_unlock_irqrestore(&table->lock, flags); } @@ -202,7 +202,7 @@ static int add_keys(struct mlx5_ib_dev *dev, int c, int num) spin_lock_irq(&ent->lock); ent->pending++; spin_unlock_irq(&ent->lock); - err = mlx5_core_create_mkey(dev->mdev, &mr->mmr, in, + err = mlx5_core_create_mkey(dev->mdev, &mr->mmkey, in, sizeof(*in), reg_mr_callback, mr, &mr->out); if (err) { @@ -691,14 +691,14 @@ struct ib_mr *mlx5_ib_get_dma_mr(struct ib_pd *pd, int acc) seg->qpn_mkey7_0 = cpu_to_be32(0xffffff << 8); seg->start_addr = 0; - err = mlx5_core_create_mkey(mdev, &mr->mmr, in, sizeof(*in), NULL, NULL, + err = mlx5_core_create_mkey(mdev, &mr->mmkey, in, sizeof(*in), NULL, NULL, NULL); if (err) goto err_in; kfree(in); - mr->ibmr.lkey = mr->mmr.key; - mr->ibmr.rkey = mr->mmr.key; + mr->ibmr.lkey = mr->mmkey.key; + mr->ibmr.rkey = mr->mmkey.key; mr->umem = NULL; return &mr->ibmr; @@ -897,7 +897,7 @@ static struct mlx5_ib_mr *reg_umr(struct ib_pd *pd, struct ib_umem *umem, memset(&umrwr, 0, sizeof(umrwr)); umrwr.wr.wr_id = (u64)(unsigned long)&umr_context; - prep_umr_reg_wqe(pd, &umrwr.wr, &sg, dma, npages, mr->mmr.key, + prep_umr_reg_wqe(pd, &umrwr.wr, &sg, dma, npages, mr->mmkey.key, page_shift, virt_addr, len, access_flags); mlx5_ib_init_umr_context(&umr_context); @@ -914,9 +914,9 @@ static struct mlx5_ib_mr *reg_umr(struct ib_pd *pd, struct ib_umem *umem, } } - mr->mmr.iova = virt_addr; - mr->mmr.size = len; - mr->mmr.pd = to_mpd(pd)->pdn; + mr->mmkey.iova = virt_addr; + mr->mmkey.size = len; + mr->mmkey.pd = to_mpd(pd)->pdn; mr->live = 1; @@ -1027,7 +1027,7 @@ int mlx5_ib_update_mtt(struct mlx5_ib_mr *mr, u64 start_page_index, int npages, wr.wr.opcode = MLX5_IB_WR_UMR; wr.npages = sg.length / sizeof(u64); wr.page_shift = PAGE_SHIFT; - wr.mkey = mr->mmr.key; + wr.mkey = mr->mmkey.key; wr.target.offset = start_page_index; mlx5_ib_init_umr_context(&umr_context); @@ -1100,7 +1100,7 @@ static struct mlx5_ib_mr *reg_create(struct ib_mr *ibmr, struct ib_pd *pd, in->seg.qpn_mkey7_0 = cpu_to_be32(0xffffff << 8); in->xlat_oct_act_size = cpu_to_be32(get_octo_len(virt_addr, length, 1 << page_shift)); - err = mlx5_core_create_mkey(dev->mdev, &mr->mmr, in, inlen, NULL, + err = mlx5_core_create_mkey(dev->mdev, &mr->mmkey, in, inlen, NULL, NULL, NULL); if (err) { mlx5_ib_warn(dev, "create mkey failed\n"); @@ -1111,7 +1111,7 @@ static struct mlx5_ib_mr *reg_create(struct ib_mr *ibmr, struct ib_pd *pd, mr->live = 1; kvfree(in); - mlx5_ib_dbg(dev, "mkey = 0x%x\n", mr->mmr.key); + mlx5_ib_dbg(dev, "mkey = 0x%x\n", mr->mmkey.key); return mr; @@ -1130,8 +1130,8 @@ static void set_mr_fileds(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr, { mr->npages = npages; atomic_add(npages, &dev->mdev->priv.reg_pages); - mr->ibmr.lkey = mr->mmr.key; - mr->ibmr.rkey = mr->mmr.key; + mr->ibmr.lkey = mr->mmkey.key; + mr->ibmr.rkey = mr->mmkey.key; mr->ibmr.length = length; mr->access_flags = access_flags; } @@ -1179,7 +1179,7 @@ struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, goto error; } - mlx5_ib_dbg(dev, "mkey 0x%x\n", mr->mmr.key); + mlx5_ib_dbg(dev, "mkey 0x%x\n", mr->mmkey.key); mr->umem = umem; set_mr_fileds(dev, mr, npages, length, access_flags); @@ -1205,7 +1205,7 @@ static int unreg_umr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr) memset(&umrwr.wr, 0, sizeof(umrwr)); umrwr.wr.wr_id = (u64)(unsigned long)&umr_context; - prep_umr_unreg_wqe(dev, &umrwr.wr, mr->mmr.key); + prep_umr_unreg_wqe(dev, &umrwr.wr, mr->mmkey.key); mlx5_ib_init_umr_context(&umr_context); down(&umrc->sem); @@ -1259,7 +1259,7 @@ static int rereg_umr(struct ib_pd *pd, struct mlx5_ib_mr *mr, u64 virt_addr, umrwr.wr.send_flags |= MLX5_IB_SEND_UMR_UPDATE_TRANSLATION; } - prep_umr_wqe_common(pd, &umrwr.wr, &sg, dma, npages, mr->mmr.key, + prep_umr_wqe_common(pd, &umrwr.wr, &sg, dma, npages, mr->mmkey.key, page_shift); if (flags & IB_MR_REREG_PD) { @@ -1371,7 +1371,7 @@ int mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start, if (flags & IB_MR_REREG_PD) { ib_mr->pd = pd; - mr->mmr.pd = to_mpd(pd)->pdn; + mr->mmkey.pd = to_mpd(pd)->pdn; } if (flags & IB_MR_REREG_ACCESS) @@ -1380,8 +1380,8 @@ int mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start, if (flags & IB_MR_REREG_TRANS) { atomic_sub(mr->npages, &dev->mdev->priv.reg_pages); set_mr_fileds(dev, mr, npages, len, access_flags); - mr->mmr.iova = addr; - mr->mmr.size = len; + mr->mmkey.iova = addr; + mr->mmkey.size = len; } #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING update_odp_mr(mr); @@ -1461,7 +1461,7 @@ static int clean_mr(struct mlx5_ib_mr *mr) err = destroy_mkey(dev, mr); if (err) { mlx5_ib_warn(dev, "failed to destroy mkey 0x%x (%d)\n", - mr->mmr.key, err); + mr->mmkey.key, err); return err; } } else { @@ -1587,13 +1587,13 @@ struct ib_mr *mlx5_ib_alloc_mr(struct ib_pd *pd, } in->seg.flags = MLX5_PERM_UMR_EN | access_mode; - err = mlx5_core_create_mkey(dev->mdev, &mr->mmr, in, sizeof(*in), + err = mlx5_core_create_mkey(dev->mdev, &mr->mmkey, in, sizeof(*in), NULL, NULL, NULL); if (err) goto err_destroy_psv; - mr->ibmr.lkey = mr->mmr.key; - mr->ibmr.rkey = mr->mmr.key; + mr->ibmr.lkey = mr->mmkey.key; + mr->ibmr.rkey = mr->mmkey.key; mr->umem = NULL; kfree(in); diff --git a/drivers/infiniband/hw/mlx5/odp.c b/drivers/infiniband/hw/mlx5/odp.c index b8d7636..34e79e7 100644 --- a/drivers/infiniband/hw/mlx5/odp.c +++ b/drivers/infiniband/hw/mlx5/odp.c @@ -142,13 +142,13 @@ static struct mlx5_ib_mr *mlx5_ib_odp_find_mr_lkey(struct mlx5_ib_dev *dev, u32 key) { u32 base_key = mlx5_base_mkey(key); - struct mlx5_core_mr *mmr = __mlx5_mr_lookup(dev->mdev, base_key); - struct mlx5_ib_mr *mr = container_of(mmr, struct mlx5_ib_mr, mmr); + struct mlx5_core_mkey *mmkey = __mlx5_mr_lookup(dev->mdev, base_key); + struct mlx5_ib_mr *mr = container_of(mmkey, struct mlx5_ib_mr, mmkey); - if (!mmr || mmr->key != key || !mr->live) + if (!mmkey || mmkey->key != key || !mr->live) return NULL; - return container_of(mmr, struct mlx5_ib_mr, mmr); + return container_of(mmkey, struct mlx5_ib_mr, mmkey); } static void mlx5_ib_page_fault_resume(struct mlx5_ib_qp *qp, @@ -232,7 +232,7 @@ static int pagefault_single_data_segment(struct mlx5_ib_qp *qp, io_virt += pfault->mpfault.bytes_committed; bcnt -= pfault->mpfault.bytes_committed; - start_idx = (io_virt - (mr->mmr.iova & PAGE_MASK)) >> PAGE_SHIFT; + start_idx = (io_virt - (mr->mmkey.iova & PAGE_MASK)) >> PAGE_SHIFT; if (mr->umem->writable) access_mask |= ODP_WRITE_ALLOWED_BIT; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index aac071a..6ef0bfd 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -515,7 +515,7 @@ struct mlx5e_priv { struct mlx5_uar cq_uar; u32 pdn; u32 tdn; - struct mlx5_core_mr mr; + struct mlx5_core_mkey mkey; struct mlx5e_rq drop_rq; struct mlx5e_channel **channel; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index d4e1c30..43a1489 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -982,7 +982,7 @@ static int mlx5e_open_channel(struct mlx5e_priv *priv, int ix, c->cpu = cpu; c->pdev = &priv->mdev->pdev->dev; c->netdev = priv->netdev; - c->mkey_be = cpu_to_be32(priv->mr.key); + c->mkey_be = cpu_to_be32(priv->mkey.key); c->num_tc = priv->params.num_tc; mlx5e_build_channeltc_to_txq_map(priv, ix); @@ -2194,7 +2194,7 @@ static void mlx5e_build_netdev(struct net_device *netdev) } static int mlx5e_create_mkey(struct mlx5e_priv *priv, u32 pdn, - struct mlx5_core_mr *mr) + struct mlx5_core_mkey *mkey) { struct mlx5_core_dev *mdev = priv->mdev; struct mlx5_create_mkey_mbox_in *in; @@ -2210,7 +2210,7 @@ static int mlx5e_create_mkey(struct mlx5e_priv *priv, u32 pdn, in->seg.flags_pd = cpu_to_be32(pdn | MLX5_MKEY_LEN64); in->seg.qpn_mkey7_0 = cpu_to_be32(0xffffff << 8); - err = mlx5_core_create_mkey(mdev, mr, in, sizeof(*in), NULL, NULL, + err = mlx5_core_create_mkey(mdev, mkey, in, sizeof(*in), NULL, NULL, NULL); kvfree(in); @@ -2259,7 +2259,7 @@ static void *mlx5e_create_netdev(struct mlx5_core_dev *mdev) goto err_dealloc_pd; } - err = mlx5e_create_mkey(priv, priv->pdn, &priv->mr); + err = mlx5e_create_mkey(priv, priv->pdn, &priv->mkey); if (err) { mlx5_core_err(mdev, "create mkey failed, %d\n", err); goto err_dealloc_transport_domain; @@ -2333,7 +2333,7 @@ err_destroy_tises: mlx5e_destroy_tises(priv); err_destroy_mkey: - mlx5_core_destroy_mkey(mdev, &priv->mr); + mlx5_core_destroy_mkey(mdev, &priv->mkey); err_dealloc_transport_domain: mlx5_core_dealloc_transport_domain(mdev, priv->tdn); @@ -2367,7 +2367,7 @@ static void mlx5e_destroy_netdev(struct mlx5_core_dev *mdev, void *vpriv) mlx5e_destroy_rqt(priv, MLX5E_INDIRECTION_RQT); mlx5e_close_drop_rq(priv); mlx5e_destroy_tises(priv); - mlx5_core_destroy_mkey(priv->mdev, &priv->mr); + mlx5_core_destroy_mkey(priv->mdev, &priv->mkey); mlx5_core_dealloc_transport_domain(priv->mdev, priv->tdn); mlx5_core_dealloc_pd(priv->mdev, priv->pdn); mlx5_unmap_free_uar(priv->mdev, &priv->cq_uar); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index 1545a94..0916bbc 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -1117,7 +1117,7 @@ static int mlx5_load_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv) mlx5_init_cq_table(dev); mlx5_init_qp_table(dev); mlx5_init_srq_table(dev); - mlx5_init_mr_table(dev); + mlx5_init_mkey_table(dev); err = mlx5_init_fs(dev); if (err) { @@ -1164,7 +1164,7 @@ err_sriov: err_reg_dev: mlx5_cleanup_fs(dev); err_fs: - mlx5_cleanup_mr_table(dev); + mlx5_cleanup_mkey_table(dev); mlx5_cleanup_srq_table(dev); mlx5_cleanup_qp_table(dev); mlx5_cleanup_cq_table(dev); @@ -1237,7 +1237,7 @@ static int mlx5_unload_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv) #endif mlx5_cleanup_fs(dev); - mlx5_cleanup_mr_table(dev); + mlx5_cleanup_mkey_table(dev); mlx5_cleanup_srq_table(dev); mlx5_cleanup_qp_table(dev); mlx5_cleanup_cq_table(dev); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mr.c b/drivers/net/ethernet/mellanox/mlx5/core/mr.c index 6fa22b5..77a72939 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/mr.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/mr.c @@ -36,25 +36,26 @@ #include #include "mlx5_core.h" -void mlx5_init_mr_table(struct mlx5_core_dev *dev) +void mlx5_init_mkey_table(struct mlx5_core_dev *dev) { - struct mlx5_mr_table *table = &dev->priv.mr_table; + struct mlx5_mkey_table *table = &dev->priv.mkey_table; memset(table, 0, sizeof(*table)); rwlock_init(&table->lock); INIT_RADIX_TREE(&table->tree, GFP_ATOMIC); } -void mlx5_cleanup_mr_table(struct mlx5_core_dev *dev) +void mlx5_cleanup_mkey_table(struct mlx5_core_dev *dev) { } -int mlx5_core_create_mkey(struct mlx5_core_dev *dev, struct mlx5_core_mr *mr, +int mlx5_core_create_mkey(struct mlx5_core_dev *dev, + struct mlx5_core_mkey *mkey, struct mlx5_create_mkey_mbox_in *in, int inlen, mlx5_cmd_cbk_t callback, void *context, struct mlx5_create_mkey_mbox_out *out) { - struct mlx5_mr_table *table = &dev->priv.mr_table; + struct mlx5_mkey_table *table = &dev->priv.mkey_table; struct mlx5_create_mkey_mbox_out lout; int err; u8 key; @@ -83,34 +84,35 @@ int mlx5_core_create_mkey(struct mlx5_core_dev *dev, struct mlx5_core_mr *mr, return mlx5_cmd_status_to_err(&lout.hdr); } - mr->iova = be64_to_cpu(in->seg.start_addr); - mr->size = be64_to_cpu(in->seg.len); - mr->key = mlx5_idx_to_mkey(be32_to_cpu(lout.mkey) & 0xffffff) | key; - mr->pd = be32_to_cpu(in->seg.flags_pd) & 0xffffff; + mkey->iova = be64_to_cpu(in->seg.start_addr); + mkey->size = be64_to_cpu(in->seg.len); + mkey->key = mlx5_idx_to_mkey(be32_to_cpu(lout.mkey) & 0xffffff) | key; + mkey->pd = be32_to_cpu(in->seg.flags_pd) & 0xffffff; mlx5_core_dbg(dev, "out 0x%x, key 0x%x, mkey 0x%x\n", - be32_to_cpu(lout.mkey), key, mr->key); + be32_to_cpu(lout.mkey), key, mkey->key); - /* connect to MR tree */ + /* connect to mkey tree */ write_lock_irq(&table->lock); - err = radix_tree_insert(&table->tree, mlx5_base_mkey(mr->key), mr); + err = radix_tree_insert(&table->tree, mlx5_base_mkey(mkey->key), mkey); write_unlock_irq(&table->lock); if (err) { - mlx5_core_warn(dev, "failed radix tree insert of mr 0x%x, %d\n", - mlx5_base_mkey(mr->key), err); - mlx5_core_destroy_mkey(dev, mr); + mlx5_core_warn(dev, "failed radix tree insert of mkey 0x%x, %d\n", + mlx5_base_mkey(mkey->key), err); + mlx5_core_destroy_mkey(dev, mkey); } return err; } EXPORT_SYMBOL(mlx5_core_create_mkey); -int mlx5_core_destroy_mkey(struct mlx5_core_dev *dev, struct mlx5_core_mr *mr) +int mlx5_core_destroy_mkey(struct mlx5_core_dev *dev, + struct mlx5_core_mkey *mkey) { - struct mlx5_mr_table *table = &dev->priv.mr_table; + struct mlx5_mkey_table *table = &dev->priv.mkey_table; struct mlx5_destroy_mkey_mbox_in in; struct mlx5_destroy_mkey_mbox_out out; - struct mlx5_core_mr *deleted_mr; + struct mlx5_core_mkey *deleted_mkey; unsigned long flags; int err; @@ -118,16 +120,16 @@ int mlx5_core_destroy_mkey(struct mlx5_core_dev *dev, struct mlx5_core_mr *mr) memset(&out, 0, sizeof(out)); write_lock_irqsave(&table->lock, flags); - deleted_mr = radix_tree_delete(&table->tree, mlx5_base_mkey(mr->key)); + deleted_mkey = radix_tree_delete(&table->tree, mlx5_base_mkey(mkey->key)); write_unlock_irqrestore(&table->lock, flags); - if (!deleted_mr) { - mlx5_core_warn(dev, "failed radix tree delete of mr 0x%x\n", - mlx5_base_mkey(mr->key)); + if (!deleted_mkey) { + mlx5_core_warn(dev, "failed radix tree delete of mkey 0x%x\n", + mlx5_base_mkey(mkey->key)); return -ENOENT; } in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_DESTROY_MKEY); - in.mkey = cpu_to_be32(mlx5_mkey_to_idx(mr->key)); + in.mkey = cpu_to_be32(mlx5_mkey_to_idx(mkey->key)); err = mlx5_cmd_exec(dev, &in, sizeof(in), &out, sizeof(out)); if (err) return err; @@ -139,7 +141,7 @@ int mlx5_core_destroy_mkey(struct mlx5_core_dev *dev, struct mlx5_core_mr *mr) } EXPORT_SYMBOL(mlx5_core_destroy_mkey); -int mlx5_core_query_mkey(struct mlx5_core_dev *dev, struct mlx5_core_mr *mr, +int mlx5_core_query_mkey(struct mlx5_core_dev *dev, struct mlx5_core_mkey *mkey, struct mlx5_query_mkey_mbox_out *out, int outlen) { struct mlx5_query_mkey_mbox_in in; @@ -149,7 +151,7 @@ int mlx5_core_query_mkey(struct mlx5_core_dev *dev, struct mlx5_core_mr *mr, memset(out, 0, outlen); in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_QUERY_MKEY); - in.mkey = cpu_to_be32(mlx5_mkey_to_idx(mr->key)); + in.mkey = cpu_to_be32(mlx5_mkey_to_idx(mkey->key)); err = mlx5_cmd_exec(dev, &in, sizeof(in), out, outlen); if (err) return err; @@ -161,7 +163,7 @@ int mlx5_core_query_mkey(struct mlx5_core_dev *dev, struct mlx5_core_mr *mr, } EXPORT_SYMBOL(mlx5_core_query_mkey); -int mlx5_core_dump_fill_mkey(struct mlx5_core_dev *dev, struct mlx5_core_mr *mr, +int mlx5_core_dump_fill_mkey(struct mlx5_core_dev *dev, struct mlx5_core_mkey *_mkey, u32 *mkey) { struct mlx5_query_special_ctxs_mbox_in in; diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 8edcd08..9108904 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -338,7 +338,7 @@ struct mlx5_core_sig_ctx { u32 sigerr_count; }; -struct mlx5_core_mr { +struct mlx5_core_mkey { u64 iova; u64 size; u32 key; @@ -426,7 +426,7 @@ struct mlx5_srq_table { struct radix_tree_root tree; }; -struct mlx5_mr_table { +struct mlx5_mkey_table { /* protect radix tree */ rwlock_t lock; @@ -484,9 +484,9 @@ struct mlx5_priv { struct mlx5_cq_table cq_table; /* end: cq staff */ - /* start: mr staff */ - struct mlx5_mr_table mr_table; - /* end: mr staff */ + /* start: mkey staff */ + struct mlx5_mkey_table mkey_table; + /* end: mkey staff */ /* start: alloc staff */ /* protect buffer alocation according to numa node */ @@ -739,16 +739,18 @@ int mlx5_core_query_srq(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq, struct mlx5_query_srq_mbox_out *out); int mlx5_core_arm_srq(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq, u16 lwm, int is_srq); -void mlx5_init_mr_table(struct mlx5_core_dev *dev); -void mlx5_cleanup_mr_table(struct mlx5_core_dev *dev); -int mlx5_core_create_mkey(struct mlx5_core_dev *dev, struct mlx5_core_mr *mr, +void mlx5_init_mkey_table(struct mlx5_core_dev *dev); +void mlx5_cleanup_mkey_table(struct mlx5_core_dev *dev); +int mlx5_core_create_mkey(struct mlx5_core_dev *dev, + struct mlx5_core_mkey *mkey, struct mlx5_create_mkey_mbox_in *in, int inlen, mlx5_cmd_cbk_t callback, void *context, struct mlx5_create_mkey_mbox_out *out); -int mlx5_core_destroy_mkey(struct mlx5_core_dev *dev, struct mlx5_core_mr *mr); -int mlx5_core_query_mkey(struct mlx5_core_dev *dev, struct mlx5_core_mr *mr, +int mlx5_core_destroy_mkey(struct mlx5_core_dev *dev, + struct mlx5_core_mkey *mkey); +int mlx5_core_query_mkey(struct mlx5_core_dev *dev, struct mlx5_core_mkey *mkey, struct mlx5_query_mkey_mbox_out *out, int outlen); -int mlx5_core_dump_fill_mkey(struct mlx5_core_dev *dev, struct mlx5_core_mr *mr, +int mlx5_core_dump_fill_mkey(struct mlx5_core_dev *dev, struct mlx5_core_mkey *_mkey, u32 *mkey); int mlx5_core_alloc_pd(struct mlx5_core_dev *dev, u32 *pdn); int mlx5_core_dealloc_pd(struct mlx5_core_dev *dev, u32 pdn); diff --git a/include/linux/mlx5/qp.h b/include/linux/mlx5/qp.h index e5bbcf0..cf031a3 100644 --- a/include/linux/mlx5/qp.h +++ b/include/linux/mlx5/qp.h @@ -622,9 +622,9 @@ static inline struct mlx5_core_qp *__mlx5_qp_lookup(struct mlx5_core_dev *dev, u return radix_tree_lookup(&dev->priv.qp_table.tree, qpn); } -static inline struct mlx5_core_mr *__mlx5_mr_lookup(struct mlx5_core_dev *dev, u32 key) +static inline struct mlx5_core_mkey *__mlx5_mr_lookup(struct mlx5_core_dev *dev, u32 key) { - return radix_tree_lookup(&dev->priv.mr_table.tree, key); + return radix_tree_lookup(&dev->priv.mkey_table.tree, key); } struct mlx5_page_fault_resume_mbox_in { -- cgit v0.10.2 From b2a239df4e65fe35240ddf3e5f9f31335c90589b Mon Sep 17 00:00:00 2001 From: Matan Barak Date: Mon, 29 Feb 2016 18:05:29 +0200 Subject: IB/core: Add vendor's specific data to alloc mw Passing udata to the vendor's driver in order to pass data from the user-space driver to the kernel-space driver. This data will be used in downstream patches. Signed-off-by: Matan Barak Reviewed-by: Yishai Hadas Signed-off-by: Doug Ledford diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index 6ffc9c4..2bf751e 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -1174,6 +1174,7 @@ ssize_t ib_uverbs_alloc_mw(struct ib_uverbs_file *file, struct ib_uobject *uobj; struct ib_pd *pd; struct ib_mw *mw; + struct ib_udata udata; int ret; if (out_len < sizeof(resp)) @@ -1195,7 +1196,12 @@ ssize_t ib_uverbs_alloc_mw(struct ib_uverbs_file *file, goto err_free; } - mw = pd->device->alloc_mw(pd, cmd.mw_type); + INIT_UDATA(&udata, buf + sizeof(cmd), + (unsigned long)cmd.response + sizeof(resp), + in_len - sizeof(cmd) - sizeof(struct ib_uverbs_cmd_hdr), + out_len - sizeof(resp)); + + mw = pd->device->alloc_mw(pd, cmd.mw_type, &udata); if (IS_ERR(mw)) { ret = PTR_ERR(mw); goto err_put; diff --git a/drivers/infiniband/hw/cxgb3/iwch_provider.c b/drivers/infiniband/hw/cxgb3/iwch_provider.c index 2734820..42a7b89 100644 --- a/drivers/infiniband/hw/cxgb3/iwch_provider.c +++ b/drivers/infiniband/hw/cxgb3/iwch_provider.c @@ -657,7 +657,8 @@ err: return ERR_PTR(err); } -static struct ib_mw *iwch_alloc_mw(struct ib_pd *pd, enum ib_mw_type type) +static struct ib_mw *iwch_alloc_mw(struct ib_pd *pd, enum ib_mw_type type, + struct ib_udata *udata) { struct iwch_dev *rhp; struct iwch_pd *php; diff --git a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h index fb2de75..423a3a9 100644 --- a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h +++ b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h @@ -961,7 +961,8 @@ int c4iw_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents); int c4iw_dealloc_mw(struct ib_mw *mw); -struct ib_mw *c4iw_alloc_mw(struct ib_pd *pd, enum ib_mw_type type); +struct ib_mw *c4iw_alloc_mw(struct ib_pd *pd, enum ib_mw_type type, + struct ib_udata *udata); struct ib_mr *c4iw_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, u64 virt, int acc, struct ib_udata *udata); diff --git a/drivers/infiniband/hw/cxgb4/mem.c b/drivers/infiniband/hw/cxgb4/mem.c index 7849890..766d39c 100644 --- a/drivers/infiniband/hw/cxgb4/mem.c +++ b/drivers/infiniband/hw/cxgb4/mem.c @@ -34,6 +34,7 @@ #include #include #include +#include #include "iw_cxgb4.h" @@ -552,7 +553,8 @@ err: return ERR_PTR(err); } -struct ib_mw *c4iw_alloc_mw(struct ib_pd *pd, enum ib_mw_type type) +struct ib_mw *c4iw_alloc_mw(struct ib_pd *pd, enum ib_mw_type type, + struct ib_udata *udata) { struct c4iw_dev *rhp; struct c4iw_pd *php; diff --git a/drivers/infiniband/hw/mlx4/mlx4_ib.h b/drivers/infiniband/hw/mlx4/mlx4_ib.h index 52ce7b0..1eca01c 100644 --- a/drivers/infiniband/hw/mlx4/mlx4_ib.h +++ b/drivers/infiniband/hw/mlx4/mlx4_ib.h @@ -711,7 +711,8 @@ struct ib_mr *mlx4_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, u64 virt_addr, int access_flags, struct ib_udata *udata); int mlx4_ib_dereg_mr(struct ib_mr *mr); -struct ib_mw *mlx4_ib_alloc_mw(struct ib_pd *pd, enum ib_mw_type type); +struct ib_mw *mlx4_ib_alloc_mw(struct ib_pd *pd, enum ib_mw_type type, + struct ib_udata *udata); int mlx4_ib_dealloc_mw(struct ib_mw *mw); struct ib_mr *mlx4_ib_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type, diff --git a/drivers/infiniband/hw/mlx4/mr.c b/drivers/infiniband/hw/mlx4/mr.c index 242b94e..ce0b5aa 100644 --- a/drivers/infiniband/hw/mlx4/mr.c +++ b/drivers/infiniband/hw/mlx4/mr.c @@ -32,6 +32,7 @@ */ #include +#include #include "mlx4_ib.h" @@ -334,7 +335,8 @@ int mlx4_ib_dereg_mr(struct ib_mr *ibmr) return 0; } -struct ib_mw *mlx4_ib_alloc_mw(struct ib_pd *pd, enum ib_mw_type type) +struct ib_mw *mlx4_ib_alloc_mw(struct ib_pd *pd, enum ib_mw_type type, + struct ib_udata *udata) { struct mlx4_ib_dev *dev = to_mdev(pd->device); struct mlx4_ib_mw *mw; diff --git a/drivers/infiniband/hw/nes/nes_verbs.c b/drivers/infiniband/hw/nes/nes_verbs.c index 8c4daf7..5af19b4 100644 --- a/drivers/infiniband/hw/nes/nes_verbs.c +++ b/drivers/infiniband/hw/nes/nes_verbs.c @@ -56,7 +56,8 @@ static int nes_dereg_mr(struct ib_mr *ib_mr); /** * nes_alloc_mw */ -static struct ib_mw *nes_alloc_mw(struct ib_pd *ibpd, enum ib_mw_type type) +static struct ib_mw *nes_alloc_mw(struct ib_pd *ibpd, enum ib_mw_type type, + struct ib_udata *udata) { struct nes_pd *nespd = to_nespd(ibpd); struct nes_vnic *nesvnic = to_nesvnic(ibpd->device); diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 284b00c..3f79070 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -1808,7 +1808,8 @@ struct ib_device { struct scatterlist *sg, int sg_nents); struct ib_mw * (*alloc_mw)(struct ib_pd *pd, - enum ib_mw_type type); + enum ib_mw_type type, + struct ib_udata *udata); int (*dealloc_mw)(struct ib_mw *mw); struct ib_fmr * (*alloc_fmr)(struct ib_pd *pd, int mr_access_flags, -- cgit v0.10.2 From d2370e0a573e5c5ea9c96373558727abb3ea71f7 Mon Sep 17 00:00:00 2001 From: Matan Barak Date: Mon, 29 Feb 2016 18:05:30 +0200 Subject: IB/mlx5: Add memory windows allocation support This patch adds user-space support for memory windows allocation and deallocation. It also exposes the supported types via query_device_caps verb. Signed-off-by: Matan Barak Reviewed-by: Yishai Hadas Tested-by: Max Gurtovoy Signed-off-by: Doug Ledford diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 16f7d0b..4d9b7cc 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -487,6 +487,11 @@ static int mlx5_ib_query_device(struct ib_device *ibdev, props->device_cap_flags |= IB_DEVICE_AUTO_PATH_MIG; if (MLX5_CAP_GEN(mdev, xrc)) props->device_cap_flags |= IB_DEVICE_XRC; + if (MLX5_CAP_GEN(mdev, imaicl)) { + props->device_cap_flags |= IB_DEVICE_MEM_WINDOW | + IB_DEVICE_MEM_WINDOW_TYPE_2B; + props->max_mw = 1 << MLX5_CAP_GEN(mdev, log_max_mkey); + } props->device_cap_flags |= IB_DEVICE_MEM_MGT_EXTENSIONS; if (MLX5_CAP_GEN(mdev, sho)) { props->device_cap_flags |= IB_DEVICE_SIGNATURE_HANDOVER; @@ -2306,6 +2311,14 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev) mlx5_ib_internal_fill_odp_caps(dev); + if (MLX5_CAP_GEN(mdev, imaicl)) { + dev->ib_dev.alloc_mw = mlx5_ib_alloc_mw; + dev->ib_dev.dealloc_mw = mlx5_ib_dealloc_mw; + dev->ib_dev.uverbs_cmd_mask |= + (1ull << IB_USER_VERBS_CMD_ALLOC_MW) | + (1ull << IB_USER_VERBS_CMD_DEALLOC_MW); + } + if (MLX5_CAP_GEN(mdev, xrc)) { dev->ib_dev.alloc_xrcd = mlx5_ib_alloc_xrcd; dev->ib_dev.dealloc_xrcd = mlx5_ib_dealloc_xrcd; diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index 4167d67..648d2e2 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -43,6 +43,7 @@ #include #include #include +#include #define mlx5_ib_dbg(dev, format, arg...) \ pr_debug("%s:%s:%d:(pid %d): " format, (dev)->ib_dev.name, __func__, \ @@ -461,6 +462,11 @@ struct mlx5_ib_mr { int access_flags; /* Needed for rereg MR */ }; +struct mlx5_ib_mw { + struct ib_mw ibmw; + struct mlx5_core_mkey mmkey; +}; + struct mlx5_ib_umr_context { enum ib_wc_status status; struct completion done; @@ -633,6 +639,11 @@ static inline struct mlx5_ib_mr *to_mmr(struct ib_mr *ibmr) return container_of(ibmr, struct mlx5_ib_mr, ibmr); } +static inline struct mlx5_ib_mw *to_mmw(struct ib_mw *ibmw) +{ + return container_of(ibmw, struct mlx5_ib_mw, ibmw); +} + struct mlx5_ib_ah { struct ib_ah ibah; struct mlx5_av av; @@ -693,6 +704,9 @@ struct ib_mr *mlx5_ib_get_dma_mr(struct ib_pd *pd, int acc); struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, u64 virt_addr, int access_flags, struct ib_udata *udata); +struct ib_mw *mlx5_ib_alloc_mw(struct ib_pd *pd, enum ib_mw_type type, + struct ib_udata *udata); +int mlx5_ib_dealloc_mw(struct ib_mw *mw); int mlx5_ib_update_mtt(struct mlx5_ib_mr *mr, u64 start_page_index, int npages, int zap); int mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start, diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index 399e2b5..70a047d 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -40,6 +40,7 @@ #include #include #include "mlx5_ib.h" +#include "user.h" enum { MAX_PENDING_REG_MR = 8, @@ -1620,6 +1621,88 @@ err_free: return ERR_PTR(err); } +struct ib_mw *mlx5_ib_alloc_mw(struct ib_pd *pd, enum ib_mw_type type, + struct ib_udata *udata) +{ + struct mlx5_ib_dev *dev = to_mdev(pd->device); + struct mlx5_create_mkey_mbox_in *in = NULL; + struct mlx5_ib_mw *mw = NULL; + int ndescs; + int err; + struct mlx5_ib_alloc_mw req = {}; + struct { + __u32 comp_mask; + __u32 response_length; + } resp = {}; + + err = ib_copy_from_udata(&req, udata, min(udata->inlen, sizeof(req))); + if (err) + return ERR_PTR(err); + + if (req.comp_mask || req.reserved1 || req.reserved2) + return ERR_PTR(-EOPNOTSUPP); + + if (udata->inlen > sizeof(req) && + !ib_is_udata_cleared(udata, sizeof(req), + udata->inlen - sizeof(req))) + return ERR_PTR(-EOPNOTSUPP); + + ndescs = req.num_klms ? roundup(req.num_klms, 4) : roundup(1, 4); + + mw = kzalloc(sizeof(*mw), GFP_KERNEL); + in = kzalloc(sizeof(*in), GFP_KERNEL); + if (!mw || !in) { + err = -ENOMEM; + goto free; + } + + in->seg.status = MLX5_MKEY_STATUS_FREE; + in->seg.xlt_oct_size = cpu_to_be32(ndescs); + in->seg.flags_pd = cpu_to_be32(to_mpd(pd)->pdn); + in->seg.flags = MLX5_PERM_UMR_EN | MLX5_ACCESS_MODE_KLM | + MLX5_PERM_LOCAL_READ; + if (type == IB_MW_TYPE_2) + in->seg.flags_pd |= cpu_to_be32(MLX5_MKEY_REMOTE_INVAL); + in->seg.qpn_mkey7_0 = cpu_to_be32(0xffffff << 8); + + err = mlx5_core_create_mkey(dev->mdev, &mw->mmkey, in, sizeof(*in), + NULL, NULL, NULL); + if (err) + goto free; + + mw->ibmw.rkey = mw->mmkey.key; + + resp.response_length = min(offsetof(typeof(resp), response_length) + + sizeof(resp.response_length), udata->outlen); + if (resp.response_length) { + err = ib_copy_to_udata(udata, &resp, resp.response_length); + if (err) { + mlx5_core_destroy_mkey(dev->mdev, &mw->mmkey); + goto free; + } + } + + kfree(in); + return &mw->ibmw; + +free: + kfree(mw); + kfree(in); + return ERR_PTR(err); +} + +int mlx5_ib_dealloc_mw(struct ib_mw *mw) +{ + struct mlx5_ib_mw *mmw = to_mmw(mw); + int err; + + err = mlx5_core_destroy_mkey((to_mdev(mw->device))->mdev, + &mmw->mmkey); + if (!err) + kfree(mmw); + return err; +} + int mlx5_ib_check_mr_status(struct ib_mr *ibmr, u32 check_mask, struct ib_mr_status *mr_status) { diff --git a/drivers/infiniband/hw/mlx5/user.h b/drivers/infiniband/hw/mlx5/user.h index b94a554..61bc308 100644 --- a/drivers/infiniband/hw/mlx5/user.h +++ b/drivers/infiniband/hw/mlx5/user.h @@ -152,6 +152,13 @@ struct mlx5_ib_create_qp_resp { __u32 uuar_index; }; +struct mlx5_ib_alloc_mw { + __u32 comp_mask; + __u8 num_klms; + __u8 reserved1; + __u16 reserved2; +}; + static inline int get_qp_user_index(struct mlx5_ib_ucontext *ucontext, struct mlx5_ib_create_qp *ucmd, int inlen, diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 72bba52..3044cfa 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -769,7 +769,9 @@ struct mlx5_ifc_cmd_hca_cap_bits { u8 cd[0x1]; u8 reserved_at_22c[0x1]; u8 apm[0x1]; - u8 reserved_at_22e[0x7]; + u8 reserved_at_22e[0x2]; + u8 imaicl[0x1]; + u8 reserved_at_231[0x4]; u8 qkv[0x1]; u8 pkv[0x1]; u8 set_deth_sqpn[0x1]; -- cgit v0.10.2 From 78a50a5e6068955494117b37b03379dacaf830b7 Mon Sep 17 00:00:00 2001 From: Hans Westgaard Ry Date: Wed, 2 Mar 2016 13:44:28 +0100 Subject: IB/ipoib: Add handling for sending of skb with many frags MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit IPoIB converts skb-fragments to sge adding 1 extra sge when SG is enabled. Current codepath assumes that the max number of sge a device support is at least MAX_SKB_FRAGS+1, there is no interaction with upper layers to limit number of fragments in an skb if a device suports fewer sges. The assumptions also lead to requesting a fixed number of sge when IPoIB creates queue-pairs with SG enabled. A fallback/slowpath is implemented using skb_linearize to handle cases where the conversion would result in more sges than supported. Signed-off-by: Hans Westgaard Ry Reviewed-by: HÃ¥kon Bugge Reviewed-by: Wei Lin Guay Reviewed-by: Yuval Shaia Signed-off-by: Doug Ledford diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h index a6f3eab..85be0de 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib.h +++ b/drivers/infiniband/ulp/ipoib/ipoib.h @@ -244,6 +244,7 @@ struct ipoib_cm_tx { unsigned tx_tail; unsigned long flags; u32 mtu; + unsigned max_send_sge; }; struct ipoib_cm_rx_buf { @@ -390,6 +391,7 @@ struct ipoib_dev_priv { int hca_caps; struct ipoib_ethtool_st ethtool; struct timer_list poll_timer; + unsigned max_send_sge; }; struct ipoib_ah { diff --git a/drivers/infiniband/ulp/ipoib/ipoib_cm.c b/drivers/infiniband/ulp/ipoib/ipoib_cm.c index 917e46e..c8ed535 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_cm.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_cm.c @@ -710,6 +710,7 @@ void ipoib_cm_send(struct net_device *dev, struct sk_buff *skb, struct ipoib_cm_ struct ipoib_dev_priv *priv = netdev_priv(dev); struct ipoib_tx_buf *tx_req; int rc; + unsigned usable_sge = tx->max_send_sge - !!skb_headlen(skb); if (unlikely(skb->len > tx->mtu)) { ipoib_warn(priv, "packet len %d (> %d) too long to send, dropping\n", @@ -719,7 +720,23 @@ void ipoib_cm_send(struct net_device *dev, struct sk_buff *skb, struct ipoib_cm_ ipoib_cm_skb_too_long(dev, skb, tx->mtu - IPOIB_ENCAP_LEN); return; } - + if (skb_shinfo(skb)->nr_frags > usable_sge) { + if (skb_linearize(skb) < 0) { + ipoib_warn(priv, "skb could not be linearized\n"); + ++dev->stats.tx_dropped; + ++dev->stats.tx_errors; + dev_kfree_skb_any(skb); + return; + } + /* Does skb_linearize return ok without reducing nr_frags? */ + if (skb_shinfo(skb)->nr_frags > usable_sge) { + ipoib_warn(priv, "too many frags after skb linearize\n"); + ++dev->stats.tx_dropped; + ++dev->stats.tx_errors; + dev_kfree_skb_any(skb); + return; + } + } ipoib_dbg_data(priv, "sending packet: head 0x%x length %d connection 0x%x\n", tx->tx_head, skb->len, tx->qp->qp_num); @@ -1031,7 +1048,8 @@ static struct ib_qp *ipoib_cm_create_tx_qp(struct net_device *dev, struct ipoib_ struct ib_qp *tx_qp; if (dev->features & NETIF_F_SG) - attr.cap.max_send_sge = MAX_SKB_FRAGS + 1; + attr.cap.max_send_sge = + min_t(u32, priv->ca->attrs.max_sge, MAX_SKB_FRAGS + 1); tx_qp = ib_create_qp(priv->pd, &attr); if (PTR_ERR(tx_qp) == -EINVAL) { @@ -1040,6 +1058,7 @@ static struct ib_qp *ipoib_cm_create_tx_qp(struct net_device *dev, struct ipoib_ attr.create_flags &= ~IB_QP_CREATE_USE_GFP_NOIO; tx_qp = ib_create_qp(priv->pd, &attr); } + tx->max_send_sge = attr.cap.max_send_sge; return tx_qp; } diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ib.c b/drivers/infiniband/ulp/ipoib/ipoib_ib.c index fa9c42f..899e6b7 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_ib.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_ib.c @@ -538,6 +538,7 @@ void ipoib_send(struct net_device *dev, struct sk_buff *skb, struct ipoib_tx_buf *tx_req; int hlen, rc; void *phead; + unsigned usable_sge = priv->max_send_sge - !!skb_headlen(skb); if (skb_is_gso(skb)) { hlen = skb_transport_offset(skb) + tcp_hdrlen(skb); @@ -561,6 +562,23 @@ void ipoib_send(struct net_device *dev, struct sk_buff *skb, phead = NULL; hlen = 0; } + if (skb_shinfo(skb)->nr_frags > usable_sge) { + if (skb_linearize(skb) < 0) { + ipoib_warn(priv, "skb could not be linearized\n"); + ++dev->stats.tx_dropped; + ++dev->stats.tx_errors; + dev_kfree_skb_any(skb); + return; + } + /* Does skb_linearize return ok without reducing nr_frags? */ + if (skb_shinfo(skb)->nr_frags > usable_sge) { + ipoib_warn(priv, "too many frags after skb linearize\n"); + ++dev->stats.tx_dropped; + ++dev->stats.tx_errors; + dev_kfree_skb_any(skb); + return; + } + } ipoib_dbg_data(priv, "sending packet, length=%d address=%p qpn=0x%06x\n", skb->len, address, qpn); diff --git a/drivers/infiniband/ulp/ipoib/ipoib_verbs.c b/drivers/infiniband/ulp/ipoib/ipoib_verbs.c index d48c5ba..b809c37 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_verbs.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_verbs.c @@ -206,7 +206,8 @@ int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca) init_attr.create_flags |= IB_QP_CREATE_NETIF_QP; if (dev->features & NETIF_F_SG) - init_attr.cap.max_send_sge = MAX_SKB_FRAGS + 1; + init_attr.cap.max_send_sge = + min_t(u32, priv->ca->attrs.max_sge, MAX_SKB_FRAGS + 1); priv->qp = ib_create_qp(priv->pd, &init_attr); if (IS_ERR(priv->qp)) { @@ -233,6 +234,8 @@ int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca) priv->rx_wr.next = NULL; priv->rx_wr.sg_list = priv->rx_sge; + priv->max_send_sge = init_attr.cap.max_send_sge; + return 0; out_free_send_cq: -- cgit v0.10.2 From 85d9691ccc96d95629939a877fd6c1f8c4724f56 Mon Sep 17 00:00:00 2001 From: Majd Dibbiny Date: Sun, 14 Feb 2016 18:35:51 +0200 Subject: IB/mlx5: Avoid using user-index for SRQs Normal SRQs, unlike XRC SRQs, don't have user-index, therefore avoid verifying it and using it. Fixes: cfb5e088e26a ('IB/mlx5: Add CQE version 1 support to user QPs and SRQs') Signed-off-by: Majd Dibbiny Reviewed-by: Matan Barak Signed-off-by: Doug Ledford diff --git a/drivers/infiniband/hw/mlx5/srq.c b/drivers/infiniband/hw/mlx5/srq.c index 4659256..a1b7122 100644 --- a/drivers/infiniband/hw/mlx5/srq.c +++ b/drivers/infiniband/hw/mlx5/srq.c @@ -75,7 +75,8 @@ static void mlx5_ib_srq_event(struct mlx5_core_srq *srq, enum mlx5_event type) static int create_srq_user(struct ib_pd *pd, struct mlx5_ib_srq *srq, struct mlx5_create_srq_mbox_in **in, - struct ib_udata *udata, int buf_size, int *inlen) + struct ib_udata *udata, int buf_size, int *inlen, + int is_xrc) { struct mlx5_ib_dev *dev = to_mdev(pd->device); struct mlx5_ib_create_srq ucmd = {}; @@ -108,10 +109,12 @@ static int create_srq_user(struct ib_pd *pd, struct mlx5_ib_srq *srq, drv_data - sizeof(ucmd))) return -EINVAL; - err = get_srq_user_index(to_mucontext(pd->uobject->context), - &ucmd, udata->inlen, &uidx); - if (err) - return err; + if (is_xrc) { + err = get_srq_user_index(to_mucontext(pd->uobject->context), + &ucmd, udata->inlen, &uidx); + if (err) + return err; + } srq->wq_sig = !!(ucmd.flags & MLX5_SRQ_FLAG_SIGNATURE); @@ -151,7 +154,8 @@ static int create_srq_user(struct ib_pd *pd, struct mlx5_ib_srq *srq, (*in)->ctx.log_pg_sz = page_shift - MLX5_ADAPTER_PAGE_SHIFT; (*in)->ctx.pgoff_cqn = cpu_to_be32(offset << 26); - if (MLX5_CAP_GEN(dev->mdev, cqe_version) == MLX5_CQE_VERSION_V1) { + if ((MLX5_CAP_GEN(dev->mdev, cqe_version) == MLX5_CQE_VERSION_V1) && + is_xrc){ xsrqc = MLX5_ADDR_OF(create_xrc_srq_in, *in, xrc_srq_context_entry); MLX5_SET(xrc_srqc, xsrqc, user_index, uidx); @@ -170,7 +174,7 @@ err_umem: static int create_srq_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_srq *srq, struct mlx5_create_srq_mbox_in **in, int buf_size, - int *inlen) + int *inlen, int is_xrc) { int err; int i; @@ -224,7 +228,8 @@ static int create_srq_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_srq *srq, (*in)->ctx.log_pg_sz = page_shift - MLX5_ADAPTER_PAGE_SHIFT; - if (MLX5_CAP_GEN(dev->mdev, cqe_version) == MLX5_CQE_VERSION_V1) { + if ((MLX5_CAP_GEN(dev->mdev, cqe_version) == MLX5_CQE_VERSION_V1) && + is_xrc){ xsrqc = MLX5_ADDR_OF(create_xrc_srq_in, *in, xrc_srq_context_entry); /* 0xffffff means we ask to work with cqe version 0 */ @@ -302,10 +307,14 @@ struct ib_srq *mlx5_ib_create_srq(struct ib_pd *pd, desc_size, init_attr->attr.max_wr, srq->msrq.max, srq->msrq.max_gs, srq->msrq.max_avail_gather); + is_xrc = (init_attr->srq_type == IB_SRQT_XRC); + if (pd->uobject) - err = create_srq_user(pd, srq, &in, udata, buf_size, &inlen); + err = create_srq_user(pd, srq, &in, udata, buf_size, &inlen, + is_xrc); else - err = create_srq_kernel(dev, srq, &in, buf_size, &inlen); + err = create_srq_kernel(dev, srq, &in, buf_size, &inlen, + is_xrc); if (err) { mlx5_ib_warn(dev, "create srq %s failed, err %d\n", @@ -313,7 +322,6 @@ struct ib_srq *mlx5_ib_create_srq(struct ib_pd *pd, goto err_srq; } - is_xrc = (init_attr->srq_type == IB_SRQT_XRC); in->ctx.state_log_sz = ilog2(srq->msrq.max); flgs = ((srq->msrq.wqe_shift - 4) | (is_xrc << 5) | (srq->wq_sig << 7)) << 24; xrcdn = 0; -- cgit v0.10.2 From 3d943c9d1cc5ad1825e46291ef5ce627e1b6b660 Mon Sep 17 00:00:00 2001 From: Majd Dibbiny Date: Sun, 14 Feb 2016 18:35:52 +0200 Subject: IB/{core, mlx5}: Fix input len in vendor part of create_qp/srq Currently, the inlen field of the vendor's part of the command doesn't match the command buffer. This happens because the inlen accommodates ib_uverbs_cmd_hdr which is deducted from the in buffer. This is problematic since the vendor function could be called either from the legacy verb (where the input length mismatches the actual length) or by the extended verb (where the length matches). The vendor has no idea which function calls it and therefore has no way to know how the length variable should be treated. Fixing this by aligning the inlen to the correct length. All vendor drivers either assumed that inlen >= sizeof(vendor_uhw_cmd) or just failed wrongly (mlx5) and fixed in this patch. Fixes: cfb5e088e26a ('IB/mlx5: Add CQE version 1 support to user QPs and SRQs') Signed-off-by: Majd Dibbiny Reviewed-by: Matan Barak Reviewed-by: Haggai Eran Signed-off-by: Doug Ledford diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index 6ffc9c4..6c6fbff 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -1970,7 +1970,8 @@ ssize_t ib_uverbs_create_qp(struct ib_uverbs_file *file, resp_size); INIT_UDATA(&uhw, buf + sizeof(cmd), (unsigned long)cmd.response + resp_size, - in_len - sizeof(cmd), out_len - resp_size); + in_len - sizeof(cmd) - sizeof(struct ib_uverbs_cmd_hdr), + out_len - resp_size); memset(&cmd_ex, 0, sizeof(cmd_ex)); cmd_ex.user_handle = cmd.user_handle; @@ -3413,7 +3414,8 @@ ssize_t ib_uverbs_create_srq(struct ib_uverbs_file *file, INIT_UDATA(&udata, buf + sizeof cmd, (unsigned long) cmd.response + sizeof resp, - in_len - sizeof cmd, out_len - sizeof resp); + in_len - sizeof cmd - sizeof(struct ib_uverbs_cmd_hdr), + out_len - sizeof resp); ret = __uverbs_create_xsrq(file, ib_dev, &xcmd, &udata); if (ret) @@ -3439,7 +3441,8 @@ ssize_t ib_uverbs_create_xsrq(struct ib_uverbs_file *file, INIT_UDATA(&udata, buf + sizeof cmd, (unsigned long) cmd.response + sizeof resp, - in_len - sizeof cmd, out_len - sizeof resp); + in_len - sizeof cmd - sizeof(struct ib_uverbs_cmd_hdr), + out_len - sizeof resp); ret = __uverbs_create_xsrq(file, ib_dev, &cmd, &udata); if (ret) diff --git a/drivers/infiniband/hw/mlx5/srq.c b/drivers/infiniband/hw/mlx5/srq.c index a1b7122..3b2ddd6 100644 --- a/drivers/infiniband/hw/mlx5/srq.c +++ b/drivers/infiniband/hw/mlx5/srq.c @@ -88,13 +88,8 @@ static int create_srq_user(struct ib_pd *pd, struct mlx5_ib_srq *srq, int ncont; u32 offset; u32 uidx = MLX5_IB_DEFAULT_UIDX; - int drv_data = udata->inlen - sizeof(struct ib_uverbs_cmd_hdr); - if (drv_data < 0) - return -EINVAL; - - ucmdlen = (drv_data < sizeof(ucmd)) ? - drv_data : sizeof(ucmd); + ucmdlen = min(udata->inlen, sizeof(ucmd)); if (ib_copy_from_udata(&ucmd, udata, ucmdlen)) { mlx5_ib_dbg(dev, "failed copy udata\n"); @@ -104,9 +99,9 @@ static int create_srq_user(struct ib_pd *pd, struct mlx5_ib_srq *srq, if (ucmd.reserved0 || ucmd.reserved1) return -EINVAL; - if (drv_data > sizeof(ucmd) && + if (udata->inlen > sizeof(ucmd) && !ib_is_udata_cleared(udata, sizeof(ucmd), - drv_data - sizeof(ucmd))) + udata->inlen - sizeof(ucmd))) return -EINVAL; if (is_xrc) { -- cgit v0.10.2 From 74a0b0a5ea6b48f8945e6e900e9c678866aa7520 Mon Sep 17 00:00:00 2001 From: Eli Cohen Date: Sun, 14 Feb 2016 17:07:47 +0200 Subject: IB/core: Avoid duplicate code Move the check on the validity of the command to a common area. Signed-off-by: Eli Cohen Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c index 39680ae..08f1a7b 100644 --- a/drivers/infiniband/core/uverbs_main.c +++ b/drivers/infiniband/core/uverbs_main.c @@ -689,6 +689,7 @@ static ssize_t ib_uverbs_write(struct file *filp, const char __user *buf, struct ib_uverbs_file *file = filp->private_data; struct ib_device *ib_dev; struct ib_uverbs_cmd_hdr hdr; + __u32 command; __u32 flags; int srcu_key; ssize_t ret; @@ -707,20 +708,18 @@ static ssize_t ib_uverbs_write(struct file *filp, const char __user *buf, goto out; } + if (hdr.command & ~(__u32)(IB_USER_VERBS_CMD_FLAGS_MASK | + IB_USER_VERBS_CMD_COMMAND_MASK)) { + ret = -EINVAL; + goto out; + } + + command = hdr.command & IB_USER_VERBS_CMD_COMMAND_MASK; + flags = (hdr.command & IB_USER_VERBS_CMD_FLAGS_MASK) >> IB_USER_VERBS_CMD_FLAGS_SHIFT; if (!flags) { - __u32 command; - - if (hdr.command & ~(__u32)(IB_USER_VERBS_CMD_FLAGS_MASK | - IB_USER_VERBS_CMD_COMMAND_MASK)) { - ret = -EINVAL; - goto out; - } - - command = hdr.command & IB_USER_VERBS_CMD_COMMAND_MASK; - if (command >= ARRAY_SIZE(uverbs_cmd_table) || !uverbs_cmd_table[command]) { ret = -EINVAL; @@ -749,21 +748,11 @@ static ssize_t ib_uverbs_write(struct file *filp, const char __user *buf, hdr.out_words * 4); } else if (flags == IB_USER_VERBS_CMD_FLAG_EXTENDED) { - __u32 command; - struct ib_uverbs_ex_cmd_hdr ex_hdr; struct ib_udata ucore; struct ib_udata uhw; size_t written_count = count; - if (hdr.command & ~(__u32)(IB_USER_VERBS_CMD_FLAGS_MASK | - IB_USER_VERBS_CMD_COMMAND_MASK)) { - ret = -EINVAL; - goto out; - } - - command = hdr.command & IB_USER_VERBS_CMD_COMMAND_MASK; - if (command >= ARRAY_SIZE(uverbs_ex_cmd_table) || !uverbs_ex_cmd_table[command]) { ret = -ENOSYS; -- cgit v0.10.2 From 2dbd5186a39c7ef0d92045c43b4857c23cb117ab Mon Sep 17 00:00:00 2001 From: Eli Cohen Date: Sun, 14 Feb 2016 17:07:48 +0200 Subject: IB/core: IB/core: Allow legacy verbs through extended interfaces When an extended verb is an extension to a legacy verb, the original functionality is preserved. Hence we do not require each hardware driver to set the extended capability. This will allow the use of the extended verb in its simple form with drivers that do not support the extended capability. Signed-off-by: Eli Cohen Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c index 08f1a7b..20f0049 100644 --- a/drivers/infiniband/core/uverbs_main.c +++ b/drivers/infiniband/core/uverbs_main.c @@ -683,6 +683,21 @@ out: return ev_file; } +static int verify_command_mask(struct ib_device *ib_dev, __u32 command) +{ + u64 mask; + + if (command <= IB_USER_VERBS_CMD_OPEN_QP) + mask = ib_dev->uverbs_cmd_mask; + else + mask = ib_dev->uverbs_ex_cmd_mask; + + if (mask & ((u64)1 << command)) + return 0; + + return -1; +} + static ssize_t ib_uverbs_write(struct file *filp, const char __user *buf, size_t count, loff_t *pos) { @@ -715,6 +730,10 @@ static ssize_t ib_uverbs_write(struct file *filp, const char __user *buf, } command = hdr.command & IB_USER_VERBS_CMD_COMMAND_MASK; + if (verify_command_mask(ib_dev, command)) { + ret = -EOPNOTSUPP; + goto out; + } flags = (hdr.command & IB_USER_VERBS_CMD_FLAGS_MASK) >> IB_USER_VERBS_CMD_FLAGS_SHIFT; @@ -732,11 +751,6 @@ static ssize_t ib_uverbs_write(struct file *filp, const char __user *buf, goto out; } - if (!(ib_dev->uverbs_cmd_mask & (1ull << command))) { - ret = -ENOSYS; - goto out; - } - if (hdr.in_words * 4 != count) { ret = -EINVAL; goto out; @@ -764,11 +778,6 @@ static ssize_t ib_uverbs_write(struct file *filp, const char __user *buf, goto out; } - if (!(ib_dev->uverbs_ex_cmd_mask & (1ull << command))) { - ret = -ENOSYS; - goto out; - } - if (count < (sizeof(hdr) + sizeof(ex_hdr))) { ret = -EINVAL; goto out; -- cgit v0.10.2 From eaebc7d21e102d86b389c77686c7e6258dd6d479 Mon Sep 17 00:00:00 2001 From: Eli Cohen Date: Sun, 14 Feb 2016 17:07:49 +0200 Subject: IB/core: Modify conditional on ucontext existence Since we allow to call legacy verbs using their extended counterpart, the check on ucontext has to move up to a common area in case this verb is ever extended. Signed-off-by: Eli Cohen Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c index 20f0049..8b299df 100644 --- a/drivers/infiniband/core/uverbs_main.c +++ b/drivers/infiniband/core/uverbs_main.c @@ -735,6 +735,12 @@ static ssize_t ib_uverbs_write(struct file *filp, const char __user *buf, goto out; } + if (!file->ucontext && + command != IB_USER_VERBS_CMD_GET_CONTEXT) { + ret = -EINVAL; + goto out; + } + flags = (hdr.command & IB_USER_VERBS_CMD_FLAGS_MASK) >> IB_USER_VERBS_CMD_FLAGS_SHIFT; @@ -745,12 +751,6 @@ static ssize_t ib_uverbs_write(struct file *filp, const char __user *buf, goto out; } - if (!file->ucontext && - command != IB_USER_VERBS_CMD_GET_CONTEXT) { - ret = -EINVAL; - goto out; - } - if (hdr.in_words * 4 != count) { ret = -EINVAL; goto out; -- cgit v0.10.2 From db9314cd351ebd734970c7afc2995a743e34aa02 Mon Sep 17 00:00:00 2001 From: Amitoj Kaur Chawla Date: Fri, 12 Feb 2016 13:16:10 +0530 Subject: IB/core: Replace memset with eth_zero_addr Use eth_zero_addr to assign the zero address to the given address array instead of memset when second argument is address of zero. The Coccinelle semantic patch used to make this change is as follows: // @eth_zero_addr@ expression e; @@ -memset(e,0x00,ETH_ALEN); +eth_zero_addr(e); // Signed-off-by: Amitoj Kaur Chawla Signed-off-by: Doug Ledford diff --git a/drivers/infiniband/core/sa_query.c b/drivers/infiniband/core/sa_query.c index f334090..4b3ac76 100644 --- a/drivers/infiniband/core/sa_query.c +++ b/drivers/infiniband/core/sa_query.c @@ -1221,7 +1221,7 @@ static void ib_sa_path_rec_callback(struct ib_sa_query *sa_query, rec.net = NULL; rec.ifindex = 0; rec.gid_type = IB_GID_TYPE_IB; - memset(rec.dmac, 0, ETH_ALEN); + eth_zero_addr(rec.dmac); query->callback(status, &rec, query->context); } else query->callback(status, NULL, query->context); -- cgit v0.10.2 From aba25a3e96563083389fc7b9f7d35be6bdfe6579 Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Wed, 2 Mar 2016 00:50:29 +0530 Subject: IB/core: trivial prink cleanup. 1. Replaced printk with appropriate pr_warn, pr_err, pr_info. 2. Removed unnecessary prints around memory allocation failure which are not required, as reported by the checkpatch script. Signed-off-by: Parav Pandit Reviewed-by: Haggai Eran Reviewed-by: Sagi Grimberg Signed-off-by: Doug Ledford diff --git a/drivers/infiniband/core/cache.c b/drivers/infiniband/core/cache.c index 53343ff..cb00d59 100644 --- a/drivers/infiniband/core/cache.c +++ b/drivers/infiniband/core/cache.c @@ -1043,8 +1043,8 @@ static void ib_cache_update(struct ib_device *device, ret = ib_query_port(device, port, tprops); if (ret) { - printk(KERN_WARNING "ib_query_port failed (%d) for %s\n", - ret, device->name); + pr_warn("ib_query_port failed (%d) for %s\n", + ret, device->name); goto err; } @@ -1067,8 +1067,8 @@ static void ib_cache_update(struct ib_device *device, for (i = 0; i < pkey_cache->table_len; ++i) { ret = ib_query_pkey(device, port, i, pkey_cache->table + i); if (ret) { - printk(KERN_WARNING "ib_query_pkey failed (%d) for %s (index %d)\n", - ret, device->name, i); + pr_warn("ib_query_pkey failed (%d) for %s (index %d)\n", + ret, device->name, i); goto err; } } @@ -1078,8 +1078,8 @@ static void ib_cache_update(struct ib_device *device, ret = ib_query_gid(device, port, i, gid_cache->table + i, NULL); if (ret) { - printk(KERN_WARNING "ib_query_gid failed (%d) for %s (index %d)\n", - ret, device->name, i); + pr_warn("ib_query_gid failed (%d) for %s (index %d)\n", + ret, device->name, i); goto err; } } @@ -1161,8 +1161,7 @@ int ib_cache_setup_one(struct ib_device *device) GFP_KERNEL); if (!device->cache.pkey_cache || !device->cache.lmc_cache) { - printk(KERN_WARNING "Couldn't allocate cache " - "for %s\n", device->name); + pr_warn("Couldn't allocate cache for %s\n", device->name); return -ENOMEM; } diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index 9729639..e13121f 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -1713,7 +1713,7 @@ static int cma_ib_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event) event.param.conn.private_data_len = IB_CM_REJ_PRIVATE_DATA_SIZE; break; default: - printk(KERN_ERR "RDMA CMA: unexpected IB CM event: %d\n", + pr_err("RDMA CMA: unexpected IB CM event: %d\n", ib_event->event); goto out; } @@ -2186,8 +2186,8 @@ static void cma_listen_on_dev(struct rdma_id_private *id_priv, ret = rdma_listen(id, id_priv->backlog); if (ret) - printk(KERN_WARNING "RDMA CMA: cma_listen_on_dev, error %d, " - "listening on device %s\n", ret, cma_dev->device->name); + pr_warn("RDMA CMA: cma_listen_on_dev, error %d, listening on device %s\n", + ret, cma_dev->device->name); } static void cma_listen_on_all(struct rdma_id_private *id_priv) @@ -3239,7 +3239,7 @@ static int cma_sidr_rep_handler(struct ib_cm_id *cm_id, event.status = 0; break; default: - printk(KERN_ERR "RDMA CMA: unexpected IB CM event: %d\n", + pr_err("RDMA CMA: unexpected IB CM event: %d\n", ib_event->event); goto out; } @@ -4003,8 +4003,8 @@ static int cma_netdev_change(struct net_device *ndev, struct rdma_id_private *id if ((dev_addr->bound_dev_if == ndev->ifindex) && (net_eq(dev_net(ndev), dev_addr->net)) && memcmp(dev_addr->src_dev_addr, ndev->dev_addr, ndev->addr_len)) { - printk(KERN_INFO "RDMA CM addr change for ndev %s used by id %p\n", - ndev->name, &id_priv->id); + pr_info("RDMA CM addr change for ndev %s used by id %p\n", + ndev->name, &id_priv->id); work = kzalloc(sizeof *work, GFP_KERNEL); if (!work) return -ENOMEM; @@ -4287,7 +4287,7 @@ static int __init cma_init(void) goto err; if (ibnl_add_client(RDMA_NL_RDMA_CM, RDMA_NL_RDMA_CM_NUM_OPS, cma_cb_table)) - printk(KERN_WARNING "RDMA CMA: failed to add netlink callback\n"); + pr_warn("RDMA CMA: failed to add netlink callback\n"); cma_configfs_init(); return 0; diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c index 94b80a5..270c7ff 100644 --- a/drivers/infiniband/core/device.c +++ b/drivers/infiniband/core/device.c @@ -115,8 +115,8 @@ static int ib_device_check_mandatory(struct ib_device *device) for (i = 0; i < ARRAY_SIZE(mandatory_table); ++i) { if (!*(void **) ((void *) device + mandatory_table[i].offset)) { - printk(KERN_WARNING "Device %s is missing mandatory function %s\n", - device->name, mandatory_table[i].name); + pr_warn("Device %s is missing mandatory function %s\n", + device->name, mandatory_table[i].name); return -EINVAL; } } @@ -255,8 +255,8 @@ static int add_client_context(struct ib_device *device, struct ib_client *client context = kmalloc(sizeof *context, GFP_KERNEL); if (!context) { - printk(KERN_WARNING "Couldn't allocate client context for %s/%s\n", - device->name, client->name); + pr_warn("Couldn't allocate client context for %s/%s\n", + device->name, client->name); return -ENOMEM; } @@ -343,29 +343,29 @@ int ib_register_device(struct ib_device *device, ret = read_port_immutable(device); if (ret) { - printk(KERN_WARNING "Couldn't create per port immutable data %s\n", - device->name); + pr_warn("Couldn't create per port immutable data %s\n", + device->name); goto out; } ret = ib_cache_setup_one(device); if (ret) { - printk(KERN_WARNING "Couldn't set up InfiniBand P_Key/GID cache\n"); + pr_warn("Couldn't set up InfiniBand P_Key/GID cache\n"); goto out; } memset(&device->attrs, 0, sizeof(device->attrs)); ret = device->query_device(device, &device->attrs, &uhw); if (ret) { - printk(KERN_WARNING "Couldn't query the device attributes\n"); + pr_warn("Couldn't query the device attributes\n"); ib_cache_cleanup_one(device); goto out; } ret = ib_device_register_sysfs(device, port_callback); if (ret) { - printk(KERN_WARNING "Couldn't register device %s with driver model\n", - device->name); + pr_warn("Couldn't register device %s with driver model\n", + device->name); ib_cache_cleanup_one(device); goto out; } @@ -566,8 +566,8 @@ void ib_set_client_data(struct ib_device *device, struct ib_client *client, goto out; } - printk(KERN_WARNING "No client context found for %s/%s\n", - device->name, client->name); + pr_warn("No client context found for %s/%s\n", + device->name, client->name); out: spin_unlock_irqrestore(&device->client_data_lock, flags); @@ -960,13 +960,13 @@ static int __init ib_core_init(void) ret = class_register(&ib_class); if (ret) { - printk(KERN_WARNING "Couldn't create InfiniBand device class\n"); + pr_warn("Couldn't create InfiniBand device class\n"); goto err_comp; } ret = ibnl_init(); if (ret) { - printk(KERN_WARNING "Couldn't init IB netlink interface\n"); + pr_warn("Couldn't init IB netlink interface\n"); goto err_sysfs; } diff --git a/drivers/infiniband/core/fmr_pool.c b/drivers/infiniband/core/fmr_pool.c index 6ac3683..cdbb1f1 100644 --- a/drivers/infiniband/core/fmr_pool.c +++ b/drivers/infiniband/core/fmr_pool.c @@ -150,8 +150,8 @@ static void ib_fmr_batch_release(struct ib_fmr_pool *pool) #ifdef DEBUG if (fmr->ref_count !=0) { - printk(KERN_WARNING PFX "Unmapping FMR 0x%08x with ref count %d\n", - fmr, fmr->ref_count); + pr_warn(PFX "Unmapping FMR 0x%08x with ref count %d\n", + fmr, fmr->ref_count); } #endif } @@ -167,7 +167,7 @@ static void ib_fmr_batch_release(struct ib_fmr_pool *pool) ret = ib_unmap_fmr(&fmr_list); if (ret) - printk(KERN_WARNING PFX "ib_unmap_fmr returned %d\n", ret); + pr_warn(PFX "ib_unmap_fmr returned %d\n", ret); spin_lock_irq(&pool->pool_lock); list_splice(&unmap_list, &pool->free_list); @@ -222,8 +222,7 @@ struct ib_fmr_pool *ib_create_fmr_pool(struct ib_pd *pd, device = pd->device; if (!device->alloc_fmr || !device->dealloc_fmr || !device->map_phys_fmr || !device->unmap_fmr) { - printk(KERN_INFO PFX "Device %s does not support FMRs\n", - device->name); + pr_info(PFX "Device %s does not support FMRs\n", device->name); return ERR_PTR(-ENOSYS); } @@ -233,13 +232,10 @@ struct ib_fmr_pool *ib_create_fmr_pool(struct ib_pd *pd, max_remaps = device->attrs.max_map_per_fmr; pool = kmalloc(sizeof *pool, GFP_KERNEL); - if (!pool) { - printk(KERN_WARNING PFX "couldn't allocate pool struct\n"); + if (!pool) return ERR_PTR(-ENOMEM); - } pool->cache_bucket = NULL; - pool->flush_function = params->flush_function; pool->flush_arg = params->flush_arg; @@ -251,7 +247,7 @@ struct ib_fmr_pool *ib_create_fmr_pool(struct ib_pd *pd, kmalloc(IB_FMR_HASH_SIZE * sizeof *pool->cache_bucket, GFP_KERNEL); if (!pool->cache_bucket) { - printk(KERN_WARNING PFX "Failed to allocate cache in pool\n"); + pr_warn(PFX "Failed to allocate cache in pool\n"); ret = -ENOMEM; goto out_free_pool; } @@ -275,7 +271,7 @@ struct ib_fmr_pool *ib_create_fmr_pool(struct ib_pd *pd, "ib_fmr(%s)", device->name); if (IS_ERR(pool->thread)) { - printk(KERN_WARNING PFX "couldn't start cleanup thread\n"); + pr_warn(PFX "couldn't start cleanup thread\n"); ret = PTR_ERR(pool->thread); goto out_free_pool; } @@ -294,11 +290,8 @@ struct ib_fmr_pool *ib_create_fmr_pool(struct ib_pd *pd, for (i = 0; i < params->pool_size; ++i) { fmr = kmalloc(bytes_per_fmr, GFP_KERNEL); - if (!fmr) { - printk(KERN_WARNING PFX "failed to allocate fmr " - "struct for FMR %d\n", i); + if (!fmr) goto out_fail; - } fmr->pool = pool; fmr->remap_count = 0; @@ -307,8 +300,8 @@ struct ib_fmr_pool *ib_create_fmr_pool(struct ib_pd *pd, fmr->fmr = ib_alloc_fmr(pd, params->access, &fmr_attr); if (IS_ERR(fmr->fmr)) { - printk(KERN_WARNING PFX "fmr_create failed " - "for FMR %d\n", i); + pr_warn(PFX "fmr_create failed for FMR %d\n", + i); kfree(fmr); goto out_fail; } @@ -363,8 +356,8 @@ void ib_destroy_fmr_pool(struct ib_fmr_pool *pool) } if (i < pool->pool_size) - printk(KERN_WARNING PFX "pool still has %d regions registered\n", - pool->pool_size - i); + pr_warn(PFX "pool still has %d regions registered\n", + pool->pool_size - i); kfree(pool->cache_bucket); kfree(pool); @@ -463,7 +456,7 @@ struct ib_pool_fmr *ib_fmr_pool_map_phys(struct ib_fmr_pool *pool_handle, list_add(&fmr->list, &pool->free_list); spin_unlock_irqrestore(&pool->pool_lock, flags); - printk(KERN_WARNING PFX "fmr_map returns %d\n", result); + pr_warn(PFX "fmr_map returns %d\n", result); return ERR_PTR(result); } @@ -517,8 +510,8 @@ int ib_fmr_pool_unmap(struct ib_pool_fmr *fmr) #ifdef DEBUG if (fmr->ref_count < 0) - printk(KERN_WARNING PFX "FMR %p has ref count %d < 0\n", - fmr, fmr->ref_count); + pr_warn(PFX "FMR %p has ref count %d < 0\n", + fmr, fmr->ref_count); #endif spin_unlock_irqrestore(&pool->pool_lock, flags); diff --git a/drivers/infiniband/core/packer.c b/drivers/infiniband/core/packer.c index 1b65986..19b1ee3 100644 --- a/drivers/infiniband/core/packer.c +++ b/drivers/infiniband/core/packer.c @@ -44,7 +44,7 @@ static u64 value_read(int offset, int size, void *structure) case 4: return be32_to_cpup((__be32 *) (structure + offset)); case 8: return be64_to_cpup((__be64 *) (structure + offset)); default: - printk(KERN_WARNING "Field size %d bits not handled\n", size * 8); + pr_warn("Field size %d bits not handled\n", size * 8); return 0; } } @@ -104,9 +104,8 @@ void ib_pack(const struct ib_field *desc, } else { if (desc[i].offset_bits % 8 || desc[i].size_bits % 8) { - printk(KERN_WARNING "Structure field %s of size %d " - "bits is not byte-aligned\n", - desc[i].field_name, desc[i].size_bits); + pr_warn("Structure field %s of size %d bits is not byte-aligned\n", + desc[i].field_name, desc[i].size_bits); } if (desc[i].struct_size_bytes) @@ -132,7 +131,7 @@ static void value_write(int offset, int size, u64 val, void *structure) case 32: *(__be32 *) (structure + offset) = cpu_to_be32(val); break; case 64: *(__be64 *) (structure + offset) = cpu_to_be64(val); break; default: - printk(KERN_WARNING "Field size %d bits not handled\n", size * 8); + pr_warn("Field size %d bits not handled\n", size * 8); } } @@ -188,9 +187,8 @@ void ib_unpack(const struct ib_field *desc, } else { if (desc[i].offset_bits % 8 || desc[i].size_bits % 8) { - printk(KERN_WARNING "Structure field %s of size %d " - "bits is not byte-aligned\n", - desc[i].field_name, desc[i].size_bits); + pr_warn("Structure field %s of size %d bits is not byte-aligned\n", + desc[i].field_name, desc[i].size_bits); } memcpy(structure + desc[i].struct_offset_bytes, diff --git a/drivers/infiniband/core/sa_query.c b/drivers/infiniband/core/sa_query.c index 4b3ac76..8e3bf6c 100644 --- a/drivers/infiniband/core/sa_query.c +++ b/drivers/infiniband/core/sa_query.c @@ -864,13 +864,12 @@ static void update_sm_ah(struct work_struct *work) struct ib_ah_attr ah_attr; if (ib_query_port(port->agent->device, port->port_num, &port_attr)) { - printk(KERN_WARNING "Couldn't query port\n"); + pr_warn("Couldn't query port\n"); return; } new_ah = kmalloc(sizeof *new_ah, GFP_KERNEL); if (!new_ah) { - printk(KERN_WARNING "Couldn't allocate new SM AH\n"); return; } @@ -880,7 +879,7 @@ static void update_sm_ah(struct work_struct *work) new_ah->pkey_index = 0; if (ib_find_pkey(port->agent->device, port->port_num, IB_DEFAULT_PKEY_FULL, &new_ah->pkey_index)) - printk(KERN_ERR "Couldn't find index for default PKey\n"); + pr_err("Couldn't find index for default PKey\n"); memset(&ah_attr, 0, sizeof ah_attr); ah_attr.dlid = port_attr.sm_lid; @@ -889,7 +888,7 @@ static void update_sm_ah(struct work_struct *work) new_ah->ah = ib_create_ah(port->agent->qp->pd, &ah_attr); if (IS_ERR(new_ah->ah)) { - printk(KERN_WARNING "Couldn't create new SM AH\n"); + pr_warn("Couldn't create new SM AH\n"); kfree(new_ah); return; } @@ -1800,13 +1799,13 @@ static int __init ib_sa_init(void) ret = ib_register_client(&sa_client); if (ret) { - printk(KERN_ERR "Couldn't register ib_sa client\n"); + pr_err("Couldn't register ib_sa client\n"); goto err1; } ret = mcast_init(); if (ret) { - printk(KERN_ERR "Couldn't initialize multicast handling\n"); + pr_err("Couldn't initialize multicast handling\n"); goto err2; } diff --git a/drivers/infiniband/core/ucm.c b/drivers/infiniband/core/ucm.c index 6b4e8a0..4a9aa04 100644 --- a/drivers/infiniband/core/ucm.c +++ b/drivers/infiniband/core/ucm.c @@ -1234,7 +1234,7 @@ static int find_overflow_devnum(void) ret = alloc_chrdev_region(&overflow_maj, 0, IB_UCM_MAX_DEVICES, "infiniband_cm"); if (ret) { - printk(KERN_ERR "ucm: couldn't register dynamic device number\n"); + pr_err("ucm: couldn't register dynamic device number\n"); return ret; } } @@ -1329,19 +1329,19 @@ static int __init ib_ucm_init(void) ret = register_chrdev_region(IB_UCM_BASE_DEV, IB_UCM_MAX_DEVICES, "infiniband_cm"); if (ret) { - printk(KERN_ERR "ucm: couldn't register device number\n"); + pr_err("ucm: couldn't register device number\n"); goto error1; } ret = class_create_file(&cm_class, &class_attr_abi_version.attr); if (ret) { - printk(KERN_ERR "ucm: couldn't create abi_version attribute\n"); + pr_err("ucm: couldn't create abi_version attribute\n"); goto error2; } ret = ib_register_client(&ucm_client); if (ret) { - printk(KERN_ERR "ucm: couldn't register client\n"); + pr_err("ucm: couldn't register client\n"); goto error3; } return 0; diff --git a/drivers/infiniband/core/ucma.c b/drivers/infiniband/core/ucma.c index 8b5a934..dd3bcce 100644 --- a/drivers/infiniband/core/ucma.c +++ b/drivers/infiniband/core/ucma.c @@ -314,7 +314,7 @@ static void ucma_removal_event_handler(struct rdma_cm_id *cm_id) } } if (!event_found) - printk(KERN_ERR "ucma_removal_event_handler: warning: connect request event wasn't found\n"); + pr_err("ucma_removal_event_handler: warning: connect request event wasn't found\n"); } static int ucma_event_handler(struct rdma_cm_id *cm_id, @@ -1716,13 +1716,13 @@ static int __init ucma_init(void) ret = device_create_file(ucma_misc.this_device, &dev_attr_abi_version); if (ret) { - printk(KERN_ERR "rdma_ucm: couldn't create abi_version attr\n"); + pr_err("rdma_ucm: couldn't create abi_version attr\n"); goto err1; } ucma_ctl_table_hdr = register_net_sysctl(&init_net, "net/rdma_ucm", ucma_ctl_table); if (!ucma_ctl_table_hdr) { - printk(KERN_ERR "rdma_ucm: couldn't register sysctl paths\n"); + pr_err("rdma_ucm: couldn't register sysctl paths\n"); ret = -ENOMEM; goto err2; } diff --git a/drivers/infiniband/core/ud_header.c b/drivers/infiniband/core/ud_header.c index 2116132..29a45d2 100644 --- a/drivers/infiniband/core/ud_header.c +++ b/drivers/infiniband/core/ud_header.c @@ -479,8 +479,8 @@ int ib_ud_header_unpack(void *buf, buf += IB_LRH_BYTES; if (header->lrh.link_version != 0) { - printk(KERN_WARNING "Invalid LRH.link_version %d\n", - header->lrh.link_version); + pr_warn("Invalid LRH.link_version %d\n", + header->lrh.link_version); return -EINVAL; } @@ -496,20 +496,20 @@ int ib_ud_header_unpack(void *buf, buf += IB_GRH_BYTES; if (header->grh.ip_version != 6) { - printk(KERN_WARNING "Invalid GRH.ip_version %d\n", - header->grh.ip_version); + pr_warn("Invalid GRH.ip_version %d\n", + header->grh.ip_version); return -EINVAL; } if (header->grh.next_header != 0x1b) { - printk(KERN_WARNING "Invalid GRH.next_header 0x%02x\n", - header->grh.next_header); + pr_warn("Invalid GRH.next_header 0x%02x\n", + header->grh.next_header); return -EINVAL; } break; default: - printk(KERN_WARNING "Invalid LRH.link_next_header %d\n", - header->lrh.link_next_header); + pr_warn("Invalid LRH.link_next_header %d\n", + header->lrh.link_next_header); return -EINVAL; } @@ -525,14 +525,13 @@ int ib_ud_header_unpack(void *buf, header->immediate_present = 1; break; default: - printk(KERN_WARNING "Invalid BTH.opcode 0x%02x\n", - header->bth.opcode); + pr_warn("Invalid BTH.opcode 0x%02x\n", header->bth.opcode); return -EINVAL; } if (header->bth.transport_header_version != 0) { - printk(KERN_WARNING "Invalid BTH.transport_header_version %d\n", - header->bth.transport_header_version); + pr_warn("Invalid BTH.transport_header_version %d\n", + header->bth.transport_header_version); return -EINVAL; } diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c index 8b299df..28ba2cc 100644 --- a/drivers/infiniband/core/uverbs_main.c +++ b/drivers/infiniband/core/uverbs_main.c @@ -1056,7 +1056,7 @@ static int find_overflow_devnum(void) ret = alloc_chrdev_region(&overflow_maj, 0, IB_UVERBS_MAX_DEVICES, "infiniband_verbs"); if (ret) { - printk(KERN_ERR "user_verbs: couldn't register dynamic device number\n"); + pr_err("user_verbs: couldn't register dynamic device number\n"); return ret; } } @@ -1277,14 +1277,14 @@ static int __init ib_uverbs_init(void) ret = register_chrdev_region(IB_UVERBS_BASE_DEV, IB_UVERBS_MAX_DEVICES, "infiniband_verbs"); if (ret) { - printk(KERN_ERR "user_verbs: couldn't register device number\n"); + pr_err("user_verbs: couldn't register device number\n"); goto out; } uverbs_class = class_create(THIS_MODULE, "infiniband_verbs"); if (IS_ERR(uverbs_class)) { ret = PTR_ERR(uverbs_class); - printk(KERN_ERR "user_verbs: couldn't create class infiniband_verbs\n"); + pr_err("user_verbs: couldn't create class infiniband_verbs\n"); goto out_chrdev; } @@ -1292,13 +1292,13 @@ static int __init ib_uverbs_init(void) ret = class_create_file(uverbs_class, &class_attr_abi_version.attr); if (ret) { - printk(KERN_ERR "user_verbs: couldn't create abi_version attribute\n"); + pr_err("user_verbs: couldn't create abi_version attribute\n"); goto out_class; } ret = ib_register_client(&uverbs_client); if (ret) { - printk(KERN_ERR "user_verbs: couldn't register client\n"); + pr_err("user_verbs: couldn't register client\n"); goto out_class; } -- cgit v0.10.2 From 5a30247bf09eeffaa46c00d59a62359aeb7d0462 Mon Sep 17 00:00:00 2001 From: Hal Rosenstock Date: Tue, 5 Jan 2016 13:52:55 -0500 Subject: IB/core: Documentation fix in the MAD header file In ib_mad.h, ib_mad_snoop_handler uses send_buf rather than send_wr Signed-off-by: Hal Rosenstock Reviewed-by: Ira Weiny Signed-off-by: Doug Ledford diff --git a/include/rdma/ib_mad.h b/include/rdma/ib_mad.h index 0ff049b..37dd534c 100644 --- a/include/rdma/ib_mad.h +++ b/include/rdma/ib_mad.h @@ -424,11 +424,11 @@ typedef void (*ib_mad_send_handler)(struct ib_mad_agent *mad_agent, /** * ib_mad_snoop_handler - Callback handler for snooping sent MADs. * @mad_agent: MAD agent that snooped the MAD. - * @send_wr: Work request information on the sent MAD. + * @send_buf: send MAD data buffer. * @mad_send_wc: Work completion information on the sent MAD. Valid * only for snooping that occurs on a send completion. * - * Clients snooping MADs should not modify data referenced by the @send_wr + * Clients snooping MADs should not modify data referenced by the @send_buf * or @mad_send_wc. */ typedef void (*ib_mad_snoop_handler)(struct ib_mad_agent *mad_agent, -- cgit v0.10.2 From 0ca4c39f32cd3fad57c18cd8df49d6b4e7bc2411 Mon Sep 17 00:00:00 2001 From: Markus Elfring Date: Sat, 26 Dec 2015 18:18:18 +0100 Subject: IB/ocrdma: Delete unnecessary variable initialisations in 11 functions The variable "status" will be set to an appropriate value a bit later. Thus let us omit the explicit initialisation at the beginning. Signed-off-by: Markus Elfring Signed-off-by: Doug Ledford diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_ah.c b/drivers/infiniband/hw/ocrdma/ocrdma_ah.c index e3c4f17..797362a 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_ah.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_ah.c @@ -74,7 +74,7 @@ static inline int set_av_attr(struct ocrdma_dev *dev, struct ocrdma_ah *ah, struct ib_ah_attr *attr, union ib_gid *sgid, int pdid, bool *isvlan, u16 vlan_tag) { - int status = 0; + int status; struct ocrdma_eth_vlan eth; struct ocrdma_grh grh; int eth_sz; diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_hw.c b/drivers/infiniband/hw/ocrdma/ocrdma_hw.c index 2cfbf15..9b8ff26 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_hw.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_hw.c @@ -1113,7 +1113,7 @@ mbx_err: static int ocrdma_nonemb_mbx_cmd(struct ocrdma_dev *dev, struct ocrdma_mqe *mqe, void *payload_va) { - int status = 0; + int status; struct ocrdma_mbx_rsp *rsp = payload_va; if ((mqe->hdr.spcl_sge_cnt_emb & OCRDMA_MQE_HDR_EMB_MASK) >> @@ -2893,7 +2893,7 @@ int ocrdma_mbx_destroy_srq(struct ocrdma_dev *dev, struct ocrdma_srq *srq) static int ocrdma_mbx_get_dcbx_config(struct ocrdma_dev *dev, u32 ptype, struct ocrdma_dcbx_cfg *dcbxcfg) { - int status = 0; + int status; dma_addr_t pa; struct ocrdma_mqe cmd; diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_stats.c b/drivers/infiniband/hw/ocrdma/ocrdma_stats.c index 255f774..8bef09a 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_stats.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_stats.c @@ -610,7 +610,7 @@ static char *ocrdma_driver_dbg_stats(struct ocrdma_dev *dev) static void ocrdma_update_stats(struct ocrdma_dev *dev) { ulong now = jiffies, secs; - int status = 0; + int status; struct ocrdma_rdma_stats_resp *rdma_stats = (struct ocrdma_rdma_stats_resp *)dev->stats_mem.va; struct ocrdma_rsrc_stats *rsrc_stats = &rdma_stats->act_rsrc_stats; @@ -641,7 +641,7 @@ static ssize_t ocrdma_dbgfs_ops_write(struct file *filp, { char tmp_str[32]; long reset; - int status = 0; + int status; struct ocrdma_stats *pstats = filp->private_data; struct ocrdma_dev *dev = pstats->dev; diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c index 4df3f13..4a4c8d6 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c @@ -419,7 +419,7 @@ static struct ocrdma_pd *_ocrdma_alloc_pd(struct ocrdma_dev *dev, struct ib_udata *udata) { struct ocrdma_pd *pd = NULL; - int status = 0; + int status; pd = kzalloc(sizeof(*pd), GFP_KERNEL); if (!pd) @@ -468,7 +468,7 @@ static inline int is_ucontext_pd(struct ocrdma_ucontext *uctx, static int _ocrdma_dealloc_pd(struct ocrdma_dev *dev, struct ocrdma_pd *pd) { - int status = 0; + int status; if (dev->pd_mgr->pd_prealloc_valid) status = ocrdma_put_pd_num(dev, pd->id, pd->dpp_enabled); @@ -596,7 +596,7 @@ map_err: int ocrdma_dealloc_ucontext(struct ib_ucontext *ibctx) { - int status = 0; + int status; struct ocrdma_mm *mm, *tmp; struct ocrdma_ucontext *uctx = get_ocrdma_ucontext(ibctx); struct ocrdma_dev *dev = get_ocrdma_dev(ibctx->device); @@ -623,7 +623,7 @@ int ocrdma_mmap(struct ib_ucontext *context, struct vm_area_struct *vma) unsigned long vm_page = vma->vm_pgoff << PAGE_SHIFT; u64 unmapped_db = (u64) dev->nic_info.unmapped_db; unsigned long len = (vma->vm_end - vma->vm_start); - int status = 0; + int status; bool found; if (vma->vm_start & (PAGE_SIZE - 1)) @@ -1285,7 +1285,7 @@ static int ocrdma_copy_qp_uresp(struct ocrdma_qp *qp, struct ib_udata *udata, int dpp_offset, int dpp_credit_lmt, int srq) { - int status = 0; + int status; u64 usr_db; struct ocrdma_create_qp_uresp uresp; struct ocrdma_pd *pd = qp->pd; @@ -1949,7 +1949,7 @@ int ocrdma_modify_srq(struct ib_srq *ibsrq, enum ib_srq_attr_mask srq_attr_mask, struct ib_udata *udata) { - int status = 0; + int status; struct ocrdma_srq *srq; srq = get_ocrdma_srq(ibsrq); -- cgit v0.10.2 From d1c95b0e6526fc2a1841cc0b6b3bcb46c31cc038 Mon Sep 17 00:00:00 2001 From: Markus Elfring Date: Sat, 26 Dec 2015 18:28:35 +0100 Subject: IB/ocrdma: Skip using unneeded intermediate variable Return zero at the end without using the local variable "status". Signed-off-by: Markus Elfring Signed-off-by: Doug Ledford diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_hw.c b/drivers/infiniband/hw/ocrdma/ocrdma_hw.c index 9b8ff26..16740dc 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_hw.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_hw.c @@ -2141,7 +2141,6 @@ int ocrdma_qp_state_change(struct ocrdma_qp *qp, enum ib_qp_state new_ib_state, enum ib_qp_state *old_ib_state) { unsigned long flags; - int status = 0; enum ocrdma_qp_state new_state; new_state = get_ocrdma_qp_state(new_ib_state); @@ -2166,7 +2165,7 @@ int ocrdma_qp_state_change(struct ocrdma_qp *qp, enum ib_qp_state new_ib_state, qp->state = new_state; spin_unlock_irqrestore(&qp->q_lock, flags); - return status; + return 0; } static u32 ocrdma_set_create_qp_mbx_access_flags(struct ocrdma_qp *qp) -- cgit v0.10.2 From 95f60bb8118c1fc368d7414409d555f050aea7f2 Mon Sep 17 00:00:00 2001 From: Markus Elfring Date: Sat, 26 Dec 2015 18:40:43 +0100 Subject: IB/ocrdma: Skip using unneeded intermediate variable Return the value from a call of the ocrdma_mbx_modify_qp() function without using an extra assignment for the local variable "status". Signed-off-by: Markus Elfring Signed-off-by: Doug Ledford diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c index 4a4c8d6..a8496a1 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c @@ -1494,9 +1494,7 @@ int _ocrdma_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, */ if (status < 0) return status; - status = ocrdma_mbx_modify_qp(dev, qp, attr, attr_mask); - - return status; + return ocrdma_mbx_modify_qp(dev, qp, attr, attr_mask); } int ocrdma_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, -- cgit v0.10.2 From add08d765e942eab8eb15a592baeb372a3dd6831 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 3 Mar 2016 09:38:22 +0100 Subject: IB/mlx5: Convert UMR CQ to new CQ API Simplifies the code, and makes it more fair vs other users by using a softirq for polling. Signed-off-by: Christoph Hellwig Reviewed-by: Haggai Eran Reviewed-by: Sagi Grimberg Signed-off-by: Doug Ledford diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 4d9b7cc..63c3d21 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -1861,7 +1861,7 @@ static void destroy_umrc_res(struct mlx5_ib_dev *dev) mlx5_ib_warn(dev, "mr cache cleanup failed\n"); mlx5_ib_destroy_qp(dev->umrc.qp); - ib_destroy_cq(dev->umrc.cq); + ib_free_cq(dev->umrc.cq); ib_dealloc_pd(dev->umrc.pd); } @@ -1876,7 +1876,6 @@ static int create_umr_res(struct mlx5_ib_dev *dev) struct ib_pd *pd; struct ib_cq *cq; struct ib_qp *qp; - struct ib_cq_init_attr cq_attr = {}; int ret; attr = kzalloc(sizeof(*attr), GFP_KERNEL); @@ -1893,15 +1892,12 @@ static int create_umr_res(struct mlx5_ib_dev *dev) goto error_0; } - cq_attr.cqe = 128; - cq = ib_create_cq(&dev->ib_dev, mlx5_umr_cq_handler, NULL, NULL, - &cq_attr); + cq = ib_alloc_cq(&dev->ib_dev, NULL, 128, 0, IB_POLL_SOFTIRQ); if (IS_ERR(cq)) { mlx5_ib_dbg(dev, "Couldn't create CQ for sync UMR QP\n"); ret = PTR_ERR(cq); goto error_2; } - ib_req_notify_cq(cq, IB_CQ_NEXT_COMP); init_attr->send_cq = cq; init_attr->recv_cq = cq; @@ -1968,7 +1964,7 @@ error_4: mlx5_ib_destroy_qp(qp); error_3: - ib_destroy_cq(cq); + ib_free_cq(cq); error_2: ib_dealloc_pd(pd); diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index 648d2e2..3c02b3c 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -468,16 +468,11 @@ struct mlx5_ib_mw { }; struct mlx5_ib_umr_context { + struct ib_cqe cqe; enum ib_wc_status status; struct completion done; }; -static inline void mlx5_ib_init_umr_context(struct mlx5_ib_umr_context *context) -{ - context->status = -1; - init_completion(&context->done); -} - struct umr_common { struct ib_pd *pd; struct ib_cq *cq; @@ -762,7 +757,6 @@ int mlx5_ib_get_cqe_size(struct mlx5_ib_dev *dev, struct ib_cq *ibcq); int mlx5_mr_cache_init(struct mlx5_ib_dev *dev); int mlx5_mr_cache_cleanup(struct mlx5_ib_dev *dev); int mlx5_mr_ib_cont_pages(struct ib_umem *umem, u64 addr, int *count, int *shift); -void mlx5_umr_cq_handler(struct ib_cq *cq, void *cq_context); int mlx5_ib_check_mr_status(struct ib_mr *ibmr, u32 check_mask, struct ib_mr_status *mr_status); diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index 70a047d..dd92314 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -836,26 +836,20 @@ static struct ib_umem *mr_umem_get(struct ib_pd *pd, u64 start, u64 length, return umem; } -void mlx5_umr_cq_handler(struct ib_cq *cq, void *cq_context) +static void mlx5_ib_umr_done(struct ib_cq *cq, struct ib_wc *wc) { - struct mlx5_ib_umr_context *context; - struct ib_wc wc; - int err; + struct mlx5_ib_umr_context *context = + container_of(wc->wr_cqe, struct mlx5_ib_umr_context, cqe); - while (1) { - err = ib_poll_cq(cq, 1, &wc); - if (err < 0) { - pr_warn("poll cq error %d\n", err); - return; - } - if (err == 0) - break; + context->status = wc->status; + complete(&context->done); +} - context = (struct mlx5_ib_umr_context *) (unsigned long) wc.wr_id; - context->status = wc.status; - complete(&context->done); - } - ib_req_notify_cq(cq, IB_CQ_NEXT_COMP); +static inline void mlx5_ib_init_umr_context(struct mlx5_ib_umr_context *context) +{ + context->cqe.done = mlx5_ib_umr_done; + context->status = -1; + init_completion(&context->done); } static struct mlx5_ib_mr *reg_umr(struct ib_pd *pd, struct ib_umem *umem, @@ -896,12 +890,13 @@ static struct mlx5_ib_mr *reg_umr(struct ib_pd *pd, struct ib_umem *umem, if (err) goto free_mr; + mlx5_ib_init_umr_context(&umr_context); + memset(&umrwr, 0, sizeof(umrwr)); - umrwr.wr.wr_id = (u64)(unsigned long)&umr_context; + umrwr.wr.wr_cqe = &umr_context.cqe; prep_umr_reg_wqe(pd, &umrwr.wr, &sg, dma, npages, mr->mmkey.key, page_shift, virt_addr, len, access_flags); - mlx5_ib_init_umr_context(&umr_context); down(&umrc->sem); err = ib_post_send(umrc->qp, &umrwr.wr, &bad); if (err) { @@ -1013,8 +1008,10 @@ int mlx5_ib_update_mtt(struct mlx5_ib_mr *mr, u64 start_page_index, int npages, dma_sync_single_for_device(ddev, dma, size, DMA_TO_DEVICE); + mlx5_ib_init_umr_context(&umr_context); + memset(&wr, 0, sizeof(wr)); - wr.wr.wr_id = (u64)(unsigned long)&umr_context; + wr.wr.wr_cqe = &umr_context.cqe; sg.addr = dma; sg.length = ALIGN(npages * sizeof(u64), @@ -1031,7 +1028,6 @@ int mlx5_ib_update_mtt(struct mlx5_ib_mr *mr, u64 start_page_index, int npages, wr.mkey = mr->mmkey.key; wr.target.offset = start_page_index; - mlx5_ib_init_umr_context(&umr_context); down(&umrc->sem); err = ib_post_send(umrc->qp, &wr.wr, &bad); if (err) { @@ -1204,11 +1200,12 @@ static int unreg_umr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr) struct ib_send_wr *bad; int err; + mlx5_ib_init_umr_context(&umr_context); + memset(&umrwr.wr, 0, sizeof(umrwr)); - umrwr.wr.wr_id = (u64)(unsigned long)&umr_context; + umrwr.wr.wr_cqe = &umr_context.cqe; prep_umr_unreg_wqe(dev, &umrwr.wr, mr->mmkey.key); - mlx5_ib_init_umr_context(&umr_context); down(&umrc->sem); err = ib_post_send(umrc->qp, &umrwr.wr, &bad); if (err) { @@ -1246,7 +1243,9 @@ static int rereg_umr(struct ib_pd *pd, struct mlx5_ib_mr *mr, u64 virt_addr, int size; int err; - umrwr.wr.wr_id = (u64)(unsigned long)&umr_context; + mlx5_ib_init_umr_context(&umr_context); + + umrwr.wr.wr_cqe = &umr_context.cqe; umrwr.wr.send_flags = MLX5_IB_SEND_UMR_FAIL_IF_FREE; if (flags & IB_MR_REREG_TRANS) { @@ -1273,8 +1272,6 @@ static int rereg_umr(struct ib_pd *pd, struct mlx5_ib_mr *mr, u64 virt_addr, umrwr.wr.send_flags |= MLX5_IB_SEND_UMR_UPDATE_ACCESS; } - mlx5_ib_init_umr_context(&umr_context); - /* post send request to UMR QP */ down(&umrc->sem); err = ib_post_send(umrc->qp, &umrwr.wr, &bad); -- cgit v0.10.2 From 0025b0bdeae7c13b8ab1dce64b0108ed9c071e2e Mon Sep 17 00:00:00 2001 From: Doug Ledford Date: Thu, 3 Mar 2016 11:23:37 -0500 Subject: IB/mlx5: Make coding style more consistent These three related functions can't agree whether to put the umrwr on the stack dirty and then memset it, or to initialize it on the stack. Make them all agree. Signed-off-by: Doug Ledford diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index dd92314..628f4350 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -860,7 +860,7 @@ static struct mlx5_ib_mr *reg_umr(struct ib_pd *pd, struct ib_umem *umem, struct device *ddev = dev->ib_dev.dma_device; struct umr_common *umrc = &dev->umrc; struct mlx5_ib_umr_context umr_context; - struct mlx5_umr_wr umrwr; + struct mlx5_umr_wr umrwr = {}; struct ib_send_wr *bad; struct mlx5_ib_mr *mr; struct ib_sge sg; @@ -892,7 +892,6 @@ static struct mlx5_ib_mr *reg_umr(struct ib_pd *pd, struct ib_umem *umem, mlx5_ib_init_umr_context(&umr_context); - memset(&umrwr, 0, sizeof(umrwr)); umrwr.wr.wr_cqe = &umr_context.cqe; prep_umr_reg_wqe(pd, &umrwr.wr, &sg, dma, npages, mr->mmkey.key, page_shift, virt_addr, len, access_flags); @@ -1196,13 +1195,12 @@ static int unreg_umr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr) { struct umr_common *umrc = &dev->umrc; struct mlx5_ib_umr_context umr_context; - struct mlx5_umr_wr umrwr; + struct mlx5_umr_wr umrwr = {}; struct ib_send_wr *bad; int err; mlx5_ib_init_umr_context(&umr_context); - memset(&umrwr.wr, 0, sizeof(umrwr)); umrwr.wr.wr_cqe = &umr_context.cqe; prep_umr_unreg_wqe(dev, &umrwr.wr, mr->mmkey.key); -- cgit v0.10.2 From 911f4331bc87f4589b9096f4fb24b335d4c2967d Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Thu, 3 Mar 2016 13:37:51 +0200 Subject: IB/mlx5: Expose correct max_fast_reg_page_list_len While documentation indicates that the number of translation entries per memory key is unlimited, in practice, we can only fit a finite amount of translation entries in a single registration wqe (which is log_max_klm_list_size). Signed-off-by: Sagi Grimberg Signed-off-by: Doug Ledford diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 63c3d21..55fa588 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -539,7 +539,8 @@ static int mlx5_ib_query_device(struct ib_device *ibdev, props->local_ca_ack_delay = MLX5_CAP_GEN(mdev, local_ca_ack_delay); props->max_res_rd_atom = props->max_qp_rd_atom * props->max_qp; props->max_srq_sge = max_rq_sg - 1; - props->max_fast_reg_page_list_len = (unsigned int)-1; + props->max_fast_reg_page_list_len = + 1 << MLX5_CAP_GEN(mdev, log_max_klm_list_size); get_atomic_caps(dev, props); props->masked_atomic_cap = IB_ATOMIC_NONE; props->max_mcast_grp = 1 << MLX5_CAP_GEN(mdev, log_max_mcg); -- cgit v0.10.2 From f5aa9159a418726d74b67c8815ffd2739afb4c7a Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Mon, 29 Feb 2016 19:07:32 +0200 Subject: IB/core: Add arbitrary sg_list support Devices that are capable in registering SG lists with gaps can now expose it in the core to ULPs using a new device capability IB_DEVICE_SG_GAPS_REG (in a new field device_cap_flags_ex in the device attributes as we ran out of bits), and a new mr_type IB_MR_TYPE_SG_GAPS_REG which allocates a memory region which is capable of handling SG lists with gaps. Signed-off-by: Sagi Grimberg Signed-off-by: Doug Ledford diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c index 5af6d02..16f3fb1 100644 --- a/drivers/infiniband/core/verbs.c +++ b/drivers/infiniband/core/verbs.c @@ -1567,6 +1567,8 @@ EXPORT_SYMBOL(ib_check_mr_status); * - The last sg element is allowed to have length less than page_size. * - If sg_nents total byte length exceeds the mr max_num_sge * page_size * then only max_num_sg entries will be mapped. + * - If the MR was allocated with type IB_MR_TYPE_SG_GAPS_REG, non of these + * constraints holds and the page_size argument is ignored. * * Returns the number of sg elements that were mapped to the memory region. * diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 3f79070..bcd5b24 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -212,6 +212,7 @@ enum ib_device_cap_flags { IB_DEVICE_MANAGED_FLOW_STEERING = (1 << 29), IB_DEVICE_SIGNATURE_HANDOVER = (1 << 30), IB_DEVICE_ON_DEMAND_PAGING = (1 << 31), + IB_DEVICE_SG_GAPS_REG = (1ULL << 32), }; enum ib_signature_prot_cap { @@ -662,10 +663,15 @@ __attribute_const__ int ib_rate_to_mbps(enum ib_rate rate); * @IB_MR_TYPE_SIGNATURE: memory region that is used for * signature operations (data-integrity * capable regions) + * @IB_MR_TYPE_SG_GAPS: memory region that is capable to + * register any arbitrary sg lists (without + * the normal mr constraints - see + * ib_map_mr_sg) */ enum ib_mr_type { IB_MR_TYPE_MEM_REG, IB_MR_TYPE_SIGNATURE, + IB_MR_TYPE_SG_GAPS, }; /** -- cgit v0.10.2 From b005d316471374b1ff26df8c8460cc1ea9186647 Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Mon, 29 Feb 2016 19:07:33 +0200 Subject: mlx5: Add arbitrary sg list support Allocate proper context for arbitrary scatterlist registration If ib_alloc_mr is called with IB_MR_MAP_ARB_SG, the driver allocate a private klm list instead of a private page list. Set the UMR wqe correctly when posting the fast registration. Also, expose device cap IB_DEVICE_MAP_ARB_SG according to the device id (until we have a FW bit that correctly exposes it). Signed-off-by: Sagi Grimberg Signed-off-by: Doug Ledford diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 55fa588..7e89a54 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -491,6 +491,8 @@ static int mlx5_ib_query_device(struct ib_device *ibdev, props->device_cap_flags |= IB_DEVICE_MEM_WINDOW | IB_DEVICE_MEM_WINDOW_TYPE_2B; props->max_mw = 1 << MLX5_CAP_GEN(mdev, log_max_mkey); + /* We support 'Gappy' memory registration too */ + props->device_cap_flags |= IB_DEVICE_SG_GAPS_REG; } props->device_cap_flags |= IB_DEVICE_MEM_MGT_EXTENSIONS; if (MLX5_CAP_GEN(mdev, sho)) { diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index 3c02b3c..60b8962 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -447,6 +447,7 @@ struct mlx5_ib_mr { int ndescs; int max_descs; int desc_size; + int access_mode; struct mlx5_core_mkey mmkey; struct ib_umem *umem; struct mlx5_shared_mr_info *smr_info; diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index 628f4350..4d5bff1 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -1521,8 +1521,8 @@ struct ib_mr *mlx5_ib_alloc_mr(struct ib_pd *pd, struct mlx5_ib_dev *dev = to_mdev(pd->device); struct mlx5_create_mkey_mbox_in *in; struct mlx5_ib_mr *mr; - int access_mode, err; - int ndescs = roundup(max_num_sg, 4); + int ndescs = ALIGN(max_num_sg, 4); + int err; mr = kzalloc(sizeof(*mr), GFP_KERNEL); if (!mr) @@ -1540,7 +1540,7 @@ struct ib_mr *mlx5_ib_alloc_mr(struct ib_pd *pd, in->seg.flags_pd = cpu_to_be32(to_mpd(pd)->pdn); if (mr_type == IB_MR_TYPE_MEM_REG) { - access_mode = MLX5_ACCESS_MODE_MTT; + mr->access_mode = MLX5_ACCESS_MODE_MTT; in->seg.log2_page_size = PAGE_SHIFT; err = mlx5_alloc_priv_descs(pd->device, mr, @@ -1550,6 +1550,15 @@ struct ib_mr *mlx5_ib_alloc_mr(struct ib_pd *pd, mr->desc_size = sizeof(u64); mr->max_descs = ndescs; + } else if (mr_type == IB_MR_TYPE_SG_GAPS) { + mr->access_mode = MLX5_ACCESS_MODE_KLM; + + err = mlx5_alloc_priv_descs(pd->device, mr, + ndescs, sizeof(struct mlx5_klm)); + if (err) + goto err_free_in; + mr->desc_size = sizeof(struct mlx5_klm); + mr->max_descs = ndescs; } else if (mr_type == IB_MR_TYPE_SIGNATURE) { u32 psv_index[2]; @@ -1568,7 +1577,7 @@ struct ib_mr *mlx5_ib_alloc_mr(struct ib_pd *pd, if (err) goto err_free_sig; - access_mode = MLX5_ACCESS_MODE_KLM; + mr->access_mode = MLX5_ACCESS_MODE_KLM; mr->sig->psv_memory.psv_idx = psv_index[0]; mr->sig->psv_wire.psv_idx = psv_index[1]; @@ -1582,7 +1591,7 @@ struct ib_mr *mlx5_ib_alloc_mr(struct ib_pd *pd, goto err_free_in; } - in->seg.flags = MLX5_PERM_UMR_EN | access_mode; + in->seg.flags = MLX5_PERM_UMR_EN | mr->access_mode; err = mlx5_core_create_mkey(dev->mdev, &mr->mmkey, in, sizeof(*in), NULL, NULL, NULL); if (err) @@ -1739,6 +1748,32 @@ done: return ret; } +static int +mlx5_ib_sg_to_klms(struct mlx5_ib_mr *mr, + struct scatterlist *sgl, + unsigned short sg_nents) +{ + struct scatterlist *sg = sgl; + struct mlx5_klm *klms = mr->descs; + u32 lkey = mr->ibmr.pd->local_dma_lkey; + int i; + + mr->ibmr.iova = sg_dma_address(sg); + mr->ibmr.length = 0; + mr->ndescs = sg_nents; + + for_each_sg(sgl, sg, sg_nents, i) { + if (unlikely(i > mr->max_descs)) + break; + klms[i].va = cpu_to_be64(sg_dma_address(sg)); + klms[i].bcount = cpu_to_be32(sg_dma_len(sg)); + klms[i].key = cpu_to_be32(lkey); + mr->ibmr.length += sg_dma_len(sg); + } + + return i; +} + static int mlx5_set_page(struct ib_mr *ibmr, u64 addr) { struct mlx5_ib_mr *mr = to_mmr(ibmr); @@ -1766,7 +1801,10 @@ int mlx5_ib_map_mr_sg(struct ib_mr *ibmr, mr->desc_size * mr->max_descs, DMA_TO_DEVICE); - n = ib_sg_to_pages(ibmr, sg, sg_nents, mlx5_set_page); + if (mr->access_mode == MLX5_ACCESS_MODE_KLM) + n = mlx5_ib_sg_to_klms(mr, sg, sg_nents); + else + n = ib_sg_to_pages(ibmr, sg, sg_nents, mlx5_set_page); ib_dma_sync_single_for_device(ibmr->device, mr->desc_map, mr->desc_size * mr->max_descs, diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index 295eb2a..8dee8bc 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -2629,6 +2629,11 @@ static void set_reg_umr_seg(struct mlx5_wqe_umr_ctrl_seg *umr, int ndescs = mr->ndescs; memset(umr, 0, sizeof(*umr)); + + if (mr->access_mode == MLX5_ACCESS_MODE_KLM) + /* KLMs take twice the size of MTTs */ + ndescs *= 2; + umr->flags = MLX5_UMR_CHECK_NOT_FREE; umr->klm_octowords = get_klm_octo(ndescs); umr->mkey_mask = frwr_mkey_mask(); @@ -2767,13 +2772,19 @@ static void set_reg_mkey_seg(struct mlx5_mkey_seg *seg, int ndescs = ALIGN(mr->ndescs, 8) >> 1; memset(seg, 0, sizeof(*seg)); - seg->flags = get_umr_flags(access) | MLX5_ACCESS_MODE_MTT; + + if (mr->access_mode == MLX5_ACCESS_MODE_MTT) + seg->log2_page_size = ilog2(mr->ibmr.page_size); + else if (mr->access_mode == MLX5_ACCESS_MODE_KLM) + /* KLMs take twice the size of MTTs */ + ndescs *= 2; + + seg->flags = get_umr_flags(access) | mr->access_mode; seg->qpn_mkey7_0 = cpu_to_be32((key & 0xff) | 0xffffff00); seg->flags_pd = cpu_to_be32(MLX5_MKEY_REMOTE_INVAL); seg->start_addr = cpu_to_be64(mr->ibmr.iova); seg->len = cpu_to_be64(mr->ibmr.length); seg->xlt_oct_size = cpu_to_be32(ndescs); - seg->log2_page_size = ilog2(mr->ibmr.page_size); } static void set_linv_mkey_seg(struct mlx5_mkey_seg *seg) -- cgit v0.10.2 From 318d311e8f016dbbf22160d7b1c19a290a95ad9d Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Mon, 29 Feb 2016 19:07:34 +0200 Subject: iser: Accept arbitrary sg lists mapping if the device supports it If the device support arbitrary sg list mapping (device cap IB_DEVICE_SG_GAPS_REG set) we allocate the memory regions with IB_MR_TYPE_SG_GAPS and allow the block layer to pass us gaps by skip setting the queue virt_boundary. Signed-off-by: Sagi Grimberg Signed-off-by: Doug Ledford diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.c b/drivers/infiniband/ulp/iser/iscsi_iser.c index c827c93..80b6bed 100644 --- a/drivers/infiniband/ulp/iser/iscsi_iser.c +++ b/drivers/infiniband/ulp/iser/iscsi_iser.c @@ -969,7 +969,16 @@ static umode_t iser_attr_is_visible(int param_type, int param) static int iscsi_iser_slave_alloc(struct scsi_device *sdev) { - blk_queue_virt_boundary(sdev->request_queue, ~MASK_4K); + struct iscsi_session *session; + struct iser_conn *iser_conn; + struct ib_device *ib_dev; + + session = starget_to_session(scsi_target(sdev))->dd_data; + iser_conn = session->leadconn->dd_data; + ib_dev = iser_conn->ib_conn.device->ib_device; + + if (!(ib_dev->attrs.device_cap_flags & IB_DEVICE_SG_GAPS_REG)) + blk_queue_virt_boundary(sdev->request_queue, ~MASK_4K); return 0; } diff --git a/drivers/infiniband/ulp/iser/iser_verbs.c b/drivers/infiniband/ulp/iser/iser_verbs.c index 40c0f49..f21bdcc 100644 --- a/drivers/infiniband/ulp/iser/iser_verbs.c +++ b/drivers/infiniband/ulp/iser/iser_verbs.c @@ -252,14 +252,21 @@ void iser_free_fmr_pool(struct ib_conn *ib_conn) } static int -iser_alloc_reg_res(struct ib_device *ib_device, +iser_alloc_reg_res(struct iser_device *device, struct ib_pd *pd, struct iser_reg_resources *res, unsigned int size) { + struct ib_device *ib_dev = device->ib_device; + enum ib_mr_type mr_type; int ret; - res->mr = ib_alloc_mr(pd, IB_MR_TYPE_MEM_REG, size); + if (ib_dev->attrs.device_cap_flags & IB_DEVICE_SG_GAPS_REG) + mr_type = IB_MR_TYPE_SG_GAPS; + else + mr_type = IB_MR_TYPE_MEM_REG; + + res->mr = ib_alloc_mr(pd, mr_type, size); if (IS_ERR(res->mr)) { ret = PTR_ERR(res->mr); iser_err("Failed to allocate ib_fast_reg_mr err=%d\n", ret); @@ -277,7 +284,7 @@ iser_free_reg_res(struct iser_reg_resources *rsc) } static int -iser_alloc_pi_ctx(struct ib_device *ib_device, +iser_alloc_pi_ctx(struct iser_device *device, struct ib_pd *pd, struct iser_fr_desc *desc, unsigned int size) @@ -291,7 +298,7 @@ iser_alloc_pi_ctx(struct ib_device *ib_device, pi_ctx = desc->pi_ctx; - ret = iser_alloc_reg_res(ib_device, pd, &pi_ctx->rsc, size); + ret = iser_alloc_reg_res(device, pd, &pi_ctx->rsc, size); if (ret) { iser_err("failed to allocate reg_resources\n"); goto alloc_reg_res_err; @@ -324,7 +331,7 @@ iser_free_pi_ctx(struct iser_pi_context *pi_ctx) } static struct iser_fr_desc * -iser_create_fastreg_desc(struct ib_device *ib_device, +iser_create_fastreg_desc(struct iser_device *device, struct ib_pd *pd, bool pi_enable, unsigned int size) @@ -336,12 +343,12 @@ iser_create_fastreg_desc(struct ib_device *ib_device, if (!desc) return ERR_PTR(-ENOMEM); - ret = iser_alloc_reg_res(ib_device, pd, &desc->rsc, size); + ret = iser_alloc_reg_res(device, pd, &desc->rsc, size); if (ret) goto reg_res_alloc_failure; if (pi_enable) { - ret = iser_alloc_pi_ctx(ib_device, pd, desc, size); + ret = iser_alloc_pi_ctx(device, pd, desc, size); if (ret) goto pi_ctx_alloc_failure; } @@ -374,7 +381,7 @@ int iser_alloc_fastreg_pool(struct ib_conn *ib_conn, spin_lock_init(&fr_pool->lock); fr_pool->size = 0; for (i = 0; i < cmds_max; i++) { - desc = iser_create_fastreg_desc(device->ib_device, device->pd, + desc = iser_create_fastreg_desc(device, device->pd, ib_conn->pi_support, size); if (IS_ERR(desc)) { ret = PTR_ERR(desc); -- cgit v0.10.2 From 153fefbf34e5079c2dd69490f5f23373758d2e9c Mon Sep 17 00:00:00 2001 From: Maor Gottlieb Date: Mon, 7 Mar 2016 18:51:45 +0200 Subject: net/mlx5_core: Create anchor of last flow table Create an empty flow table in the end of NIC rx namesapce. Adding this flow table simplify the implementation of "forward to next prio" rules. Signed-off-by: Maor Gottlieb Reviewed-by: Matan Barak Signed-off-by: Doug Ledford diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c index 6f68dba..a2781ee 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c @@ -77,6 +77,9 @@ #define KERNEL_NUM_PRIOS 1 #define KENREL_MIN_LEVEL 2 +#define ANCHOR_MAX_FT 1 +#define ANCHOR_NUM_PRIOS 1 +#define ANCHOR_MIN_LEVEL (BY_PASS_MIN_LEVEL + 1) struct node_caps { size_t arr_sz; long *caps; @@ -92,7 +95,7 @@ static struct init_tree_node { int max_ft; } root_fs = { .type = FS_TYPE_NAMESPACE, - .ar_size = 3, + .ar_size = 4, .children = (struct init_tree_node[]) { ADD_PRIO(0, BY_PASS_MIN_LEVEL, 0, FS_REQUIRED_CAPS(FS_CAP(flow_table_properties_nic_receive.flow_modify_en), @@ -108,6 +111,8 @@ static struct init_tree_node { FS_CAP(flow_table_properties_nic_receive.identified_miss_table_mode), FS_CAP(flow_table_properties_nic_receive.flow_table_modify)), ADD_NS(ADD_MULTIPLE_PRIO(LEFTOVERS_NUM_PRIOS, LEFTOVERS_MAX_FT))), + ADD_PRIO(0, ANCHOR_MIN_LEVEL, 0, {}, + ADD_NS(ADD_MULTIPLE_PRIO(ANCHOR_NUM_PRIOS, ANCHOR_MAX_FT))), } }; @@ -1126,6 +1131,7 @@ struct mlx5_flow_namespace *mlx5_get_flow_namespace(struct mlx5_core_dev *dev, case MLX5_FLOW_NAMESPACE_BYPASS: case MLX5_FLOW_NAMESPACE_KERNEL: case MLX5_FLOW_NAMESPACE_LEFTOVERS: + case MLX5_FLOW_NAMESPACE_ANCHOR: prio = type; break; case MLX5_FLOW_NAMESPACE_FDB: @@ -1351,6 +1357,25 @@ static void set_prio_attrs(struct mlx5_flow_root_namespace *root_ns) } } +#define ANCHOR_PRIO 0 +#define ANCHOR_SIZE 1 +static int create_anchor_flow_table(struct mlx5_core_dev + *dev) +{ + struct mlx5_flow_namespace *ns = NULL; + struct mlx5_flow_table *ft; + + ns = mlx5_get_flow_namespace(dev, MLX5_FLOW_NAMESPACE_ANCHOR); + if (!ns) + return -EINVAL; + ft = mlx5_create_flow_table(ns, ANCHOR_PRIO, ANCHOR_SIZE); + if (IS_ERR(ft)) { + mlx5_core_err(dev, "Failed to create last anchor flow table"); + return PTR_ERR(ft); + } + return 0; +} + static int init_root_ns(struct mlx5_core_dev *dev) { @@ -1363,6 +1388,9 @@ static int init_root_ns(struct mlx5_core_dev *dev) set_prio_attrs(dev->priv.root_ns); + if (create_anchor_flow_table(dev)) + goto cleanup; + return 0; cleanup: @@ -1392,6 +1420,15 @@ static void cleanup_single_prio_root_ns(struct mlx5_core_dev *dev, root_ns = NULL; } +static void destroy_flow_tables(struct fs_prio *prio) +{ + struct mlx5_flow_table *iter; + struct mlx5_flow_table *tmp; + + fs_for_each_ft_safe(iter, tmp, prio) + mlx5_destroy_flow_table(iter); +} + static void cleanup_root_ns(struct mlx5_core_dev *dev) { struct mlx5_flow_root_namespace *root_ns = dev->priv.root_ns; @@ -1420,6 +1457,7 @@ static void cleanup_root_ns(struct mlx5_core_dev *dev) list); fs_get_obj(obj_iter_prio2, iter_prio2); + destroy_flow_tables(obj_iter_prio2); if (tree_remove_node(iter_prio2)) { mlx5_core_warn(dev, "Priority %d wasn't destroyed, refcount > 1\n", diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h index 00245fd..574a903 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h @@ -142,6 +142,9 @@ void mlx5_cleanup_fs(struct mlx5_core_dev *dev); #define fs_list_for_each_entry(pos, root) \ list_for_each_entry(pos, root, node.list) +#define fs_list_for_each_entry_safe(pos, tmp, root) \ + list_for_each_entry_safe(pos, tmp, root, node.list) + #define fs_for_each_ns_or_ft_reverse(pos, prio) \ list_for_each_entry_reverse(pos, &(prio)->node.children, list) @@ -157,6 +160,9 @@ void mlx5_cleanup_fs(struct mlx5_core_dev *dev); #define fs_for_each_ft(pos, prio) \ fs_list_for_each_entry(pos, &(prio)->node.children) +#define fs_for_each_ft_safe(pos, tmp, prio) \ + fs_list_for_each_entry_safe(pos, tmp, &(prio)->node.children) + #define fs_for_each_fg(pos, ft) \ fs_list_for_each_entry(pos, &(ft)->node.children) diff --git a/include/linux/mlx5/fs.h b/include/linux/mlx5/fs.h index 8230caa..72adf53 100644 --- a/include/linux/mlx5/fs.h +++ b/include/linux/mlx5/fs.h @@ -52,6 +52,7 @@ enum mlx5_flow_namespace_type { MLX5_FLOW_NAMESPACE_BYPASS, MLX5_FLOW_NAMESPACE_KERNEL, MLX5_FLOW_NAMESPACE_LEFTOVERS, + MLX5_FLOW_NAMESPACE_ANCHOR, MLX5_FLOW_NAMESPACE_FDB, }; -- cgit v0.10.2 From b3638e1a76648dbd482cc5a8f27eb6948cc3bc86 Mon Sep 17 00:00:00 2001 From: Maor Gottlieb Date: Mon, 7 Mar 2016 18:51:46 +0200 Subject: net/mlx5_core: Introduce forward to next priority action Add support to create flow rule that forward packets to the first flow table in the next priority (next priority could be the first priority in the next namespace or the next priority in the same namespace). This feature could be used for DONT_TRAP rules or rules that only want to mark the packet with flow tag. In order to do it optimally, each flow table has list of all rules that point to this flow table, when a flow table is destroyed/created, we update the list head correspondingly. This kind of rule is created when destination is NULL and action is MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_PRIO. Signed-off-by: Maor Gottlieb Reviewed-by: Matan Barak Signed-off-by: Doug Ledford diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c index a2781ee..bf34467 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c @@ -201,8 +201,10 @@ static void tree_put_node(struct fs_node *node) static int tree_remove_node(struct fs_node *node) { - if (atomic_read(&node->refcount) > 1) - return -EPERM; + if (atomic_read(&node->refcount) > 1) { + atomic_dec(&node->refcount); + return -EEXIST; + } tree_put_node(node); return 0; } @@ -365,6 +367,11 @@ static void del_rule(struct fs_node *node) memcpy(match_value, fte->val, sizeof(fte->val)); fs_get_obj(ft, fg->node.parent); list_del(&rule->node.list); + if (rule->sw_action == MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_PRIO) { + mutex_lock(&rule->dest_attr.ft->lock); + list_del(&rule->next_ft); + mutex_unlock(&rule->dest_attr.ft->lock); + } fte->dests_size--; if (fte->dests_size) { err = mlx5_cmd_update_fte(dev, ft, @@ -470,6 +477,8 @@ static struct mlx5_flow_table *alloc_flow_table(int level, int max_fte, ft->node.type = FS_TYPE_FLOW_TABLE; ft->type = table_type; ft->max_fte = max_fte; + INIT_LIST_HEAD(&ft->fwd_rules); + mutex_init(&ft->lock); return ft; } @@ -606,9 +615,63 @@ static int update_root_ft_create(struct mlx5_flow_table *ft, struct fs_prio return err; } +static int mlx5_modify_rule_destination(struct mlx5_flow_rule *rule, + struct mlx5_flow_destination *dest) +{ + struct mlx5_flow_table *ft; + struct mlx5_flow_group *fg; + struct fs_fte *fte; + int err = 0; + + fs_get_obj(fte, rule->node.parent); + if (!(fte->action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST)) + return -EINVAL; + lock_ref_node(&fte->node); + fs_get_obj(fg, fte->node.parent); + fs_get_obj(ft, fg->node.parent); + + memcpy(&rule->dest_attr, dest, sizeof(*dest)); + err = mlx5_cmd_update_fte(get_dev(&ft->node), + ft, fg->id, fte); + unlock_ref_node(&fte->node); + + return err; +} + +/* Modify/set FWD rules that point on old_next_ft to point on new_next_ft */ +static int connect_fwd_rules(struct mlx5_core_dev *dev, + struct mlx5_flow_table *new_next_ft, + struct mlx5_flow_table *old_next_ft) +{ + struct mlx5_flow_destination dest; + struct mlx5_flow_rule *iter; + int err = 0; + + /* new_next_ft and old_next_ft could be NULL only + * when we create/destroy the anchor flow table. + */ + if (!new_next_ft || !old_next_ft) + return 0; + + dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; + dest.ft = new_next_ft; + + mutex_lock(&old_next_ft->lock); + list_splice_init(&old_next_ft->fwd_rules, &new_next_ft->fwd_rules); + mutex_unlock(&old_next_ft->lock); + list_for_each_entry(iter, &new_next_ft->fwd_rules, next_ft) { + err = mlx5_modify_rule_destination(iter, &dest); + if (err) + pr_err("mlx5_core: failed to modify rule to point on flow table %d\n", + new_next_ft->id); + } + return 0; +} + static int connect_flow_table(struct mlx5_core_dev *dev, struct mlx5_flow_table *ft, struct fs_prio *prio) { + struct mlx5_flow_table *next_ft; int err = 0; /* Connect_prev_fts and update_root_ft_create are mutually exclusive */ @@ -617,6 +680,11 @@ static int connect_flow_table(struct mlx5_core_dev *dev, struct mlx5_flow_table err = connect_prev_fts(dev, ft, prio); if (err) return err; + + next_ft = find_next_chained_ft(prio); + err = connect_fwd_rules(dev, ft, next_ft); + if (err) + return err; } if (MLX5_CAP_FLOWTABLE(dev, @@ -767,6 +835,7 @@ static struct mlx5_flow_rule *alloc_rule(struct mlx5_flow_destination *dest) if (!rule) return NULL; + INIT_LIST_HEAD(&rule->next_ft); rule->node.type = FS_TYPE_FLOW_DEST; memcpy(&rule->dest_attr, dest, sizeof(*dest)); @@ -787,9 +856,14 @@ static struct mlx5_flow_rule *add_rule_fte(struct fs_fte *fte, return ERR_PTR(-ENOMEM); fs_get_obj(ft, fg->node.parent); - /* Add dest to dests list- added as first element after the head */ + /* Add dest to dests list- we need flow tables to be in the + * end of the list for forward to next prio rules. + */ tree_init_node(&rule->node, 1, del_rule); - list_add_tail(&rule->node.list, &fte->node.children); + if (dest && dest->type != MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE) + list_add(&rule->node.list, &fte->node.children); + else + list_add_tail(&rule->node.list, &fte->node.children); fte->dests_size++; if (fte->dests_size == 1) err = mlx5_cmd_create_fte(get_dev(&ft->node), @@ -908,6 +982,25 @@ out: return fg; } +static struct mlx5_flow_rule *find_flow_rule(struct fs_fte *fte, + struct mlx5_flow_destination *dest) +{ + struct mlx5_flow_rule *rule; + + list_for_each_entry(rule, &fte->node.children, node.list) { + if (rule->dest_attr.type == dest->type) { + if ((dest->type == MLX5_FLOW_DESTINATION_TYPE_VPORT && + dest->vport_num == rule->dest_attr.vport_num) || + (dest->type == MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE && + dest->ft == rule->dest_attr.ft) || + (dest->type == MLX5_FLOW_DESTINATION_TYPE_TIR && + dest->tir_num == rule->dest_attr.tir_num)) + return rule; + } + } + return NULL; +} + static struct mlx5_flow_rule *add_rule_fg(struct mlx5_flow_group *fg, u32 *match_value, u8 action, @@ -924,6 +1017,13 @@ static struct mlx5_flow_rule *add_rule_fg(struct mlx5_flow_group *fg, nested_lock_ref_node(&fte->node, FS_MUTEX_CHILD); if (compare_match_value(&fg->mask, match_value, &fte->val) && action == fte->action && flow_tag == fte->flow_tag) { + rule = find_flow_rule(fte, dest); + if (rule) { + atomic_inc(&rule->node.refcount); + unlock_ref_node(&fte->node); + unlock_ref_node(&fg->node); + return rule; + } rule = add_rule_fte(fte, fg, dest); unlock_ref_node(&fte->node); if (IS_ERR(rule)) @@ -989,14 +1089,14 @@ static struct mlx5_flow_rule *add_rule_to_auto_fg(struct mlx5_flow_table *ft, return rule; } -struct mlx5_flow_rule * -mlx5_add_flow_rule(struct mlx5_flow_table *ft, - u8 match_criteria_enable, - u32 *match_criteria, - u32 *match_value, - u32 action, - u32 flow_tag, - struct mlx5_flow_destination *dest) +static struct mlx5_flow_rule * +_mlx5_add_flow_rule(struct mlx5_flow_table *ft, + u8 match_criteria_enable, + u32 *match_criteria, + u32 *match_value, + u32 action, + u32 flow_tag, + struct mlx5_flow_destination *dest) { struct mlx5_flow_group *g; struct mlx5_flow_rule *rule; @@ -1019,6 +1119,63 @@ unlock: unlock_ref_node(&ft->node); return rule; } + +static bool fwd_next_prio_supported(struct mlx5_flow_table *ft) +{ + return ((ft->type == FS_FT_NIC_RX) && + (MLX5_CAP_FLOWTABLE(get_dev(&ft->node), nic_rx_multi_path_tirs))); +} + +struct mlx5_flow_rule * +mlx5_add_flow_rule(struct mlx5_flow_table *ft, + u8 match_criteria_enable, + u32 *match_criteria, + u32 *match_value, + u32 action, + u32 flow_tag, + struct mlx5_flow_destination *dest) +{ + struct mlx5_flow_root_namespace *root = find_root(&ft->node); + struct mlx5_flow_destination gen_dest; + struct mlx5_flow_table *next_ft = NULL; + struct mlx5_flow_rule *rule = NULL; + u32 sw_action = action; + struct fs_prio *prio; + + fs_get_obj(prio, ft->node.parent); + if (action == MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_PRIO) { + if (!fwd_next_prio_supported(ft)) + return ERR_PTR(-EOPNOTSUPP); + if (dest) + return ERR_PTR(-EINVAL); + mutex_lock(&root->chain_lock); + next_ft = find_next_chained_ft(prio); + if (next_ft) { + gen_dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; + gen_dest.ft = next_ft; + dest = &gen_dest; + action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; + } else { + mutex_unlock(&root->chain_lock); + return ERR_PTR(-EOPNOTSUPP); + } + } + + rule = _mlx5_add_flow_rule(ft, match_criteria_enable, match_criteria, + match_value, action, flow_tag, dest); + + if (sw_action == MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_PRIO) { + if (!IS_ERR_OR_NULL(rule) && + (list_empty(&rule->next_ft))) { + mutex_lock(&next_ft->lock); + list_add(&rule->next_ft, &next_ft->fwd_rules); + mutex_unlock(&next_ft->lock); + rule->sw_action = MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_PRIO; + } + mutex_unlock(&root->chain_lock); + } + return rule; +} EXPORT_SYMBOL(mlx5_add_flow_rule); void mlx5_del_flow_rule(struct mlx5_flow_rule *rule) @@ -1082,6 +1239,10 @@ static int disconnect_flow_table(struct mlx5_flow_table *ft) return 0; next_ft = find_next_chained_ft(prio); + err = connect_fwd_rules(dev, next_ft, ft); + if (err) + return err; + err = connect_prev_fts(dev, next_ft, prio); if (err) mlx5_core_warn(dev, "Failed to disconnect flow table %d\n", diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h index 574a903..f37a624 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h @@ -68,6 +68,11 @@ struct fs_node { struct mlx5_flow_rule { struct fs_node node; struct mlx5_flow_destination dest_attr; + /* next_ft should be accessed under chain_lock and only of + * destination type is FWD_NEXT_fT. + */ + struct list_head next_ft; + u32 sw_action; }; /* Type of children is mlx5_flow_group */ @@ -82,6 +87,10 @@ struct mlx5_flow_table { unsigned int required_groups; unsigned int num_groups; } autogroup; + /* Protect fwd_rules */ + struct mutex lock; + /* FWD rules that point on this flow table */ + struct list_head fwd_rules; }; /* Type of children is mlx5_flow_rule */ diff --git a/include/linux/mlx5/fs.h b/include/linux/mlx5/fs.h index 72adf53..8dec550 100644 --- a/include/linux/mlx5/fs.h +++ b/include/linux/mlx5/fs.h @@ -38,6 +38,10 @@ #define MLX5_FS_DEFAULT_FLOW_TAG 0x0 +enum { + MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_PRIO = 1 << 16, +}; + #define LEFTOVERS_RULE_NUM 2 static inline void build_leftovers_ft_param(int *priority, int *n_ent, diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 51f1e54..5f70f36 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -458,7 +458,8 @@ struct mlx5_ifc_ads_bits { }; struct mlx5_ifc_flow_table_nic_cap_bits { - u8 reserved_at_0[0x200]; + u8 nic_rx_multi_path_tirs[0x1]; + u8 reserved_at_1[0x1ff]; struct mlx5_ifc_flow_table_prop_layout_bits flow_table_properties_nic_receive; -- cgit v0.10.2 From 35d1901134e97cf95c0ab6ef70f5aead6cb34e9e Mon Sep 17 00:00:00 2001 From: Maor Gottlieb Date: Mon, 7 Mar 2016 18:51:47 +0200 Subject: IB/mlx5: Add support for don't trap rules Each bypass flow steering priority will be split into two priorities: 1. Priority for don't trap rules. 2. Priority for normal rules. When user creates a flow using IB_FLOW_ATTR_FLAGS_DONT_TRAP flag, the driver creates two flow rules, one used for receiving the traffic and the other one for forwarding the packet to continue matching in lower or equal priorities. Signed-off-by: Maor Gottlieb Reviewed-by: Matan Barak Signed-off-by: Doug Ledford diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 03c418c..5863644 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -1369,11 +1369,20 @@ static int mlx5_ib_destroy_flow(struct ib_flow *flow_id) return 0; } +static int ib_prio_to_core_prio(unsigned int priority, bool dont_trap) +{ + priority *= 2; + if (!dont_trap) + priority++; + return priority; +} + #define MLX5_FS_MAX_TYPES 10 #define MLX5_FS_MAX_ENTRIES 32000UL static struct mlx5_ib_flow_prio *get_flow_table(struct mlx5_ib_dev *dev, struct ib_flow_attr *flow_attr) { + bool dont_trap = flow_attr->flags & IB_FLOW_ATTR_FLAGS_DONT_TRAP; struct mlx5_flow_namespace *ns = NULL; struct mlx5_ib_flow_prio *prio; struct mlx5_flow_table *ft; @@ -1383,10 +1392,12 @@ static struct mlx5_ib_flow_prio *get_flow_table(struct mlx5_ib_dev *dev, int err = 0; if (flow_attr->type == IB_FLOW_ATTR_NORMAL) { - if (flow_is_multicast_only(flow_attr)) + if (flow_is_multicast_only(flow_attr) && + !dont_trap) priority = MLX5_IB_FLOW_MCAST_PRIO; else - priority = flow_attr->priority; + priority = ib_prio_to_core_prio(flow_attr->priority, + dont_trap); ns = mlx5_get_flow_namespace(dev->mdev, MLX5_FLOW_NAMESPACE_BYPASS); num_entries = MLX5_FS_MAX_ENTRIES; @@ -1434,6 +1445,7 @@ static struct mlx5_ib_flow_handler *create_flow_rule(struct mlx5_ib_dev *dev, unsigned int spec_index; u32 *match_c; u32 *match_v; + u32 action; int err = 0; if (!is_valid_attr(flow_attr)) @@ -1459,9 +1471,11 @@ static struct mlx5_ib_flow_handler *create_flow_rule(struct mlx5_ib_dev *dev, /* Outer header support only */ match_criteria_enable = (!outer_header_zero(match_c)) << 0; + action = dst ? MLX5_FLOW_CONTEXT_ACTION_FWD_DEST : + MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_PRIO; handler->rule = mlx5_add_flow_rule(ft, match_criteria_enable, match_c, match_v, - MLX5_FLOW_CONTEXT_ACTION_FWD_DEST, + action, MLX5_FS_DEFAULT_FLOW_TAG, dst); @@ -1481,6 +1495,29 @@ free: return err ? ERR_PTR(err) : handler; } +static struct mlx5_ib_flow_handler *create_dont_trap_rule(struct mlx5_ib_dev *dev, + struct mlx5_ib_flow_prio *ft_prio, + struct ib_flow_attr *flow_attr, + struct mlx5_flow_destination *dst) +{ + struct mlx5_ib_flow_handler *handler_dst = NULL; + struct mlx5_ib_flow_handler *handler = NULL; + + handler = create_flow_rule(dev, ft_prio, flow_attr, NULL); + if (!IS_ERR(handler)) { + handler_dst = create_flow_rule(dev, ft_prio, + flow_attr, dst); + if (IS_ERR(handler_dst)) { + mlx5_del_flow_rule(handler->rule); + kfree(handler); + handler = handler_dst; + } else { + list_add(&handler_dst->list, &handler->list); + } + } + + return handler; +} enum { LEFTOVERS_MC, LEFTOVERS_UC, @@ -1558,7 +1595,7 @@ static struct ib_flow *mlx5_ib_create_flow(struct ib_qp *qp, if (domain != IB_FLOW_DOMAIN_USER || flow_attr->port > MLX5_CAP_GEN(dev->mdev, num_ports) || - flow_attr->flags) + (flow_attr->flags & ~IB_FLOW_ATTR_FLAGS_DONT_TRAP)) return ERR_PTR(-EINVAL); dst = kzalloc(sizeof(*dst), GFP_KERNEL); @@ -1577,8 +1614,13 @@ static struct ib_flow *mlx5_ib_create_flow(struct ib_qp *qp, dst->tir_num = to_mqp(qp)->raw_packet_qp.rq.tirn; if (flow_attr->type == IB_FLOW_ATTR_NORMAL) { - handler = create_flow_rule(dev, ft_prio, flow_attr, - dst); + if (flow_attr->flags & IB_FLOW_ATTR_FLAGS_DONT_TRAP) { + handler = create_dont_trap_rule(dev, ft_prio, + flow_attr, dst); + } else { + handler = create_flow_rule(dev, ft_prio, flow_attr, + dst); + } } else if (flow_attr->type == IB_FLOW_ATTR_ALL_DEFAULT || flow_attr->type == IB_FLOW_ATTR_MC_DEFAULT) { handler = create_leftovers_rule(dev, ft_prio, flow_attr, diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index d2b9737..bd84b1f 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -126,7 +126,7 @@ struct mlx5_ib_pd { }; #define MLX5_IB_FLOW_MCAST_PRIO (MLX5_BY_PASS_NUM_PRIOS - 1) -#define MLX5_IB_FLOW_LAST_PRIO (MLX5_IB_FLOW_MCAST_PRIO - 1) +#define MLX5_IB_FLOW_LAST_PRIO (MLX5_BY_PASS_NUM_REGULAR_PRIOS - 1) #if (MLX5_IB_FLOW_LAST_PRIO <= 0) #error "Invalid number of bypass priorities" #endif diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h index 987764a..2bf222e 100644 --- a/include/linux/mlx5/device.h +++ b/include/linux/mlx5/device.h @@ -1294,6 +1294,11 @@ static inline u16 mlx5_to_sw_pkey_sz(int pkey_sz) return MLX5_MIN_PKEY_TABLE_SIZE << pkey_sz; } -#define MLX5_BY_PASS_NUM_PRIOS 9 +#define MLX5_BY_PASS_NUM_REGULAR_PRIOS 8 +#define MLX5_BY_PASS_NUM_DONT_TRAP_PRIOS 8 +#define MLX5_BY_PASS_NUM_MULTICAST_PRIOS 1 +#define MLX5_BY_PASS_NUM_PRIOS (MLX5_BY_PASS_NUM_REGULAR_PRIOS +\ + MLX5_BY_PASS_NUM_DONT_TRAP_PRIOS +\ + MLX5_BY_PASS_NUM_MULTICAST_PRIOS) #endif /* MLX5_DEVICE_H */ -- cgit v0.10.2 From a699c6c27fbce4942bc902f42b69e03c5ce03fa9 Mon Sep 17 00:00:00 2001 From: Vennila Megavannan Date: Mon, 11 Jan 2016 18:30:56 -0500 Subject: staging/hfi1: add per SDMA engine stats to hfistats Added the following per sdma engine stats: - SendDmaDescFetchedCnt - software maintained count of SDMA interrupts (SDmaInt, SDmaIdleInt, SDmaProgressInt) - software maintained counts of SDMA error cases Reviewed-by: Dennis Dalessandro Signed-off-by: Mike Marciniszyn Signed-off-by: Vennila Megavannan Signed-off-by: Jubin John Signed-off-by: Doug Ledford diff --git a/drivers/staging/rdma/hfi1/chip.c b/drivers/staging/rdma/hfi1/chip.c index 503bfca..f4f720d 100644 --- a/drivers/staging/rdma/hfi1/chip.c +++ b/drivers/staging/rdma/hfi1/chip.c @@ -1297,10 +1297,58 @@ static u64 dev_access_u32_csr(const struct cntr_entry *entry, void *context, int vl, int mode, u64 data) { struct hfi1_devdata *dd = context; + u64 csr = entry->csr; - if (vl != CNTR_INVALID_VL) - return 0; - return read_write_csr(dd, entry->csr, mode, data); + if (entry->flags & CNTR_SDMA) { + if (vl == CNTR_INVALID_VL) + return 0; + csr += 0x100 * vl; + } else { + if (vl != CNTR_INVALID_VL) + return 0; + } + return read_write_csr(dd, csr, mode, data); +} + +static u64 access_sde_err_cnt(const struct cntr_entry *entry, + void *context, int idx, int mode, u64 data) +{ + struct hfi1_devdata *dd = (struct hfi1_devdata *)context; + + if (dd->per_sdma && idx < dd->num_sdma) + return dd->per_sdma[idx].err_cnt; + return 0; +} + +static u64 access_sde_int_cnt(const struct cntr_entry *entry, + void *context, int idx, int mode, u64 data) +{ + struct hfi1_devdata *dd = (struct hfi1_devdata *)context; + + if (dd->per_sdma && idx < dd->num_sdma) + return dd->per_sdma[idx].sdma_int_cnt; + return 0; +} + +static u64 access_sde_idle_int_cnt(const struct cntr_entry *entry, + void *context, int idx, int mode, u64 data) +{ + struct hfi1_devdata *dd = (struct hfi1_devdata *)context; + + if (dd->per_sdma && idx < dd->num_sdma) + return dd->per_sdma[idx].idle_int_cnt; + return 0; +} + +static u64 access_sde_progress_int_cnt(const struct cntr_entry *entry, + void *context, int idx, int mode, + u64 data) +{ + struct hfi1_devdata *dd = (struct hfi1_devdata *)context; + + if (dd->per_sdma && idx < dd->num_sdma) + return dd->per_sdma[idx].progress_int_cnt; + return 0; } static u64 dev_access_u64_csr(const struct cntr_entry *entry, void *context, @@ -4070,6 +4118,22 @@ static struct cntr_entry dev_cntrs[DEV_CNTR_LAST] = { access_sw_kmem_wait), [C_SW_SEND_SCHED] = CNTR_ELEM("SendSched", 0, 0, CNTR_NORMAL, access_sw_send_schedule), +[C_SDMA_DESC_FETCHED_CNT] = CNTR_ELEM("SDEDscFdCn", + SEND_DMA_DESC_FETCHED_CNT, 0, + CNTR_NORMAL | CNTR_32BIT | CNTR_SDMA, + dev_access_u32_csr), +[C_SDMA_INT_CNT] = CNTR_ELEM("SDMAInt", 0, 0, + CNTR_NORMAL | CNTR_32BIT | CNTR_SDMA, + access_sde_int_cnt), +[C_SDMA_ERR_CNT] = CNTR_ELEM("SDMAErrCt", 0, 0, + CNTR_NORMAL | CNTR_32BIT | CNTR_SDMA, + access_sde_err_cnt), +[C_SDMA_IDLE_INT_CNT] = CNTR_ELEM("SDMAIdInt", 0, 0, + CNTR_NORMAL | CNTR_32BIT | CNTR_SDMA, + access_sde_idle_int_cnt), +[C_SDMA_PROGRESS_INT_CNT] = CNTR_ELEM("SDMAPrIntCn", 0, 0, + CNTR_NORMAL | CNTR_32BIT | CNTR_SDMA, + access_sde_progress_int_cnt), /* MISC_ERR_STATUS */ [C_MISC_PLL_LOCK_FAIL_ERR] = CNTR_ELEM("MISC_PLL_LOCK_FAIL_ERR", 0, 0, CNTR_NORMAL, @@ -5707,6 +5771,7 @@ static void handle_sdma_eng_err(struct hfi1_devdata *dd, dd_dev_err(sde->dd, "CONFIG SDMA(%u) source: %u status 0x%llx\n", sde->this_idx, source, (unsigned long long)status); #endif + sde->err_cnt++; sdma_engine_error(sde, status); /* @@ -11150,6 +11215,20 @@ u32 hfi1_read_cntrs(struct hfi1_devdata *dd, loff_t pos, char **namep, dd->cntrs[entry->offset + j] = val; } + } else if (entry->flags & CNTR_SDMA) { + hfi1_cdbg(CNTR, + "\t Per SDMA Engine\n"); + for (j = 0; j < dd->chip_sdma_engines; + j++) { + val = + entry->rw_cntr(entry, dd, j, + CNTR_MODE_R, 0); + hfi1_cdbg(CNTR, + "\t\tRead 0x%llx for %d\n", + val, j); + dd->cntrs[entry->offset + j] = + val; + } } else { val = entry->rw_cntr(entry, dd, CNTR_INVALID_VL, @@ -11553,6 +11632,21 @@ static int init_cntrs(struct hfi1_devdata *dd) dd->ndevcntrs++; index++; } + } else if (dev_cntrs[i].flags & CNTR_SDMA) { + hfi1_dbg_early( + "\tProcessing per SDE counters chip enginers %u\n", + dd->chip_sdma_engines); + dev_cntrs[i].offset = index; + for (j = 0; j < dd->chip_sdma_engines; j++) { + memset(name, '\0', C_MAX_NAME); + snprintf(name, C_MAX_NAME, "%s%d", + dev_cntrs[i].name, j); + sz += strlen(name); + sz++; + hfi1_dbg_early("\t\t%s\n", name); + dd->ndevcntrs++; + index++; + } } else { /* +1 for newline */ sz += strlen(dev_cntrs[i].name) + 1; @@ -11594,6 +11688,16 @@ static int init_cntrs(struct hfi1_devdata *dd) p += strlen(name); *p++ = '\n'; } + } else if (dev_cntrs[i].flags & CNTR_SDMA) { + for (j = 0; j < TXE_NUM_SDMA_ENGINES; + j++) { + memset(name, '\0', C_MAX_NAME); + snprintf(name, C_MAX_NAME, "%s%d", + dev_cntrs[i].name, j); + memcpy(p, name, strlen(name)); + p += strlen(name); + *p++ = '\n'; + } } else { memcpy(p, dev_cntrs[i].name, strlen(dev_cntrs[i].name)); diff --git a/drivers/staging/rdma/hfi1/chip.h b/drivers/staging/rdma/hfi1/chip.h index 1368a44..b46ef66 100644 --- a/drivers/staging/rdma/hfi1/chip.h +++ b/drivers/staging/rdma/hfi1/chip.h @@ -787,6 +787,11 @@ enum { C_SW_PIO_WAIT, C_SW_KMEM_WAIT, C_SW_SEND_SCHED, + C_SDMA_DESC_FETCHED_CNT, + C_SDMA_INT_CNT, + C_SDMA_ERR_CNT, + C_SDMA_IDLE_INT_CNT, + C_SDMA_PROGRESS_INT_CNT, /* MISC_ERR_STATUS */ C_MISC_PLL_LOCK_FAIL_ERR, C_MISC_MBIST_FAIL_ERR, diff --git a/drivers/staging/rdma/hfi1/chip_registers.h b/drivers/staging/rdma/hfi1/chip_registers.h index 014d7a6..3cd3352 100644 --- a/drivers/staging/rdma/hfi1/chip_registers.h +++ b/drivers/staging/rdma/hfi1/chip_registers.h @@ -1301,5 +1301,6 @@ #define CCE_INT_BLOCKED (CCE + 0x000000110C00) #define SEND_DMA_IDLE_CNT (TXE + 0x000000200040) #define SEND_DMA_DESC_FETCHED_CNT (TXE + 0x000000200058) +#define CCE_MSIX_PBA_OFFSET 0X0110000 #endif /* DEF_CHIP_REG */ diff --git a/drivers/staging/rdma/hfi1/hfi.h b/drivers/staging/rdma/hfi1/hfi.h index b33bcca..6bfa5c8 100644 --- a/drivers/staging/rdma/hfi1/hfi.h +++ b/drivers/staging/rdma/hfi1/hfi.h @@ -490,6 +490,7 @@ struct hfi1_sge_state; #define CNTR_DISABLED 0x2 /* Disable this counter */ #define CNTR_32BIT 0x4 /* Simulate 64 bits for this counter */ #define CNTR_VL 0x8 /* Per VL counter */ +#define CNTR_SDMA 0x10 #define CNTR_INVALID_VL -1 /* Specifies invalid VL */ #define CNTR_MODE_W 0x0 #define CNTR_MODE_R 0x1 diff --git a/drivers/staging/rdma/hfi1/sdma.c b/drivers/staging/rdma/hfi1/sdma.c index 1d38be5..4eb55fa 100644 --- a/drivers/staging/rdma/hfi1/sdma.c +++ b/drivers/staging/rdma/hfi1/sdma.c @@ -1061,18 +1061,18 @@ int sdma_init(struct hfi1_devdata *dd, u8 port) sde->desc_avail = sdma_descq_freecnt(sde); sde->sdma_shift = ilog2(descq_cnt); sde->sdma_mask = (1 << sde->sdma_shift) - 1; - sde->descq_full_count = 0; - - /* Create a mask for all 3 chip interrupt sources */ - sde->imask = (u64)1 << (0*TXE_NUM_SDMA_ENGINES + this_idx) - | (u64)1 << (1*TXE_NUM_SDMA_ENGINES + this_idx) - | (u64)1 << (2*TXE_NUM_SDMA_ENGINES + this_idx); - /* Create a mask specifically for sdma_idle */ - sde->idle_mask = - (u64)1 << (2*TXE_NUM_SDMA_ENGINES + this_idx); - /* Create a mask specifically for sdma_progress */ - sde->progress_mask = - (u64)1 << (TXE_NUM_SDMA_ENGINES + this_idx); + + /* Create a mask specifically for each interrupt source */ + sde->int_mask = (u64)1 << (0 * TXE_NUM_SDMA_ENGINES + + this_idx); + sde->progress_mask = (u64)1 << (1 * TXE_NUM_SDMA_ENGINES + + this_idx); + sde->idle_mask = (u64)1 << (2 * TXE_NUM_SDMA_ENGINES + + this_idx); + /* Create a combined mask to cover all 3 interrupt sources */ + sde->imask = sde->int_mask | sde->progress_mask | + sde->idle_mask; + spin_lock_init(&sde->tail_lock); seqlock_init(&sde->head_lock); spin_lock_init(&sde->senddmactrl_lock); @@ -1552,6 +1552,12 @@ void sdma_engine_interrupt(struct sdma_engine *sde, u64 status) trace_hfi1_sdma_engine_interrupt(sde, status); write_seqlock(&sde->head_lock); sdma_set_desc_cnt(sde, sdma_desct_intr); + if (status & sde->idle_mask) + sde->idle_int_cnt++; + else if (status & sde->progress_mask) + sde->progress_int_cnt++; + else if (status & sde->int_mask) + sde->sdma_int_cnt++; sdma_make_progress(sde, status); write_sequnlock(&sde->head_lock); } diff --git a/drivers/staging/rdma/hfi1/sdma.h b/drivers/staging/rdma/hfi1/sdma.h index da89e64..757017a 100644 --- a/drivers/staging/rdma/hfi1/sdma.h +++ b/drivers/staging/rdma/hfi1/sdma.h @@ -409,6 +409,7 @@ struct sdma_engine { u64 imask; /* clear interrupt mask */ u64 idle_mask; u64 progress_mask; + u64 int_mask; /* private: */ volatile __le64 *head_dma; /* DMA'ed by chip */ /* private: */ @@ -465,6 +466,12 @@ struct sdma_engine { u16 tx_head; /* private: */ u64 last_status; + /* private */ + u64 err_cnt; + /* private */ + u64 sdma_int_cnt; + u64 idle_int_cnt; + u64 progress_int_cnt; /* private: */ struct list_head dmawait; -- cgit v0.10.2 From c024c554aeaf6197a1869fdc79c190139182203a Mon Sep 17 00:00:00 2001 From: Dean Luick Date: Mon, 11 Jan 2016 18:30:57 -0500 Subject: staging/hfi1: Remove unneeded variable index The variable "index" increments the same as dd->ndevcntrs. Just use the later. Remove uneeded usage of "index" in the fill loop - it is not used there or later in the function. Reviewed-by: Dennis Dalessandro Signed-off-by: Dean Luick Signed-off-by: Jubin John Signed-off-by: Doug Ledford diff --git a/drivers/staging/rdma/hfi1/chip.c b/drivers/staging/rdma/hfi1/chip.c index f4f720d..1109049 100644 --- a/drivers/staging/rdma/hfi1/chip.c +++ b/drivers/staging/rdma/hfi1/chip.c @@ -11592,7 +11592,7 @@ mod_timer(&dd->synth_stats_timer, jiffies + HZ * SYNTH_CNT_TIME); #define C_MAX_NAME 13 /* 12 chars + one for /0 */ static int init_cntrs(struct hfi1_devdata *dd) { - int i, rcv_ctxts, index, j; + int i, rcv_ctxts, j; size_t sz; char *p; char name[C_MAX_NAME]; @@ -11609,7 +11609,6 @@ static int init_cntrs(struct hfi1_devdata *dd) /* size names and determine how many we have*/ dd->ndevcntrs = 0; sz = 0; - index = 0; for (i = 0; i < DEV_CNTR_LAST; i++) { hfi1_dbg_early("Init cntr %s\n", dev_cntrs[i].name); @@ -11620,7 +11619,7 @@ static int init_cntrs(struct hfi1_devdata *dd) if (dev_cntrs[i].flags & CNTR_VL) { hfi1_dbg_early("\tProcessing VL cntr\n"); - dev_cntrs[i].offset = index; + dev_cntrs[i].offset = dd->ndevcntrs; for (j = 0; j < C_VL_COUNT; j++) { memset(name, '\0', C_MAX_NAME); snprintf(name, C_MAX_NAME, "%s%d", @@ -11630,13 +11629,12 @@ static int init_cntrs(struct hfi1_devdata *dd) sz++; hfi1_dbg_early("\t\t%s\n", name); dd->ndevcntrs++; - index++; } } else if (dev_cntrs[i].flags & CNTR_SDMA) { hfi1_dbg_early( "\tProcessing per SDE counters chip enginers %u\n", dd->chip_sdma_engines); - dev_cntrs[i].offset = index; + dev_cntrs[i].offset = dd->ndevcntrs; for (j = 0; j < dd->chip_sdma_engines; j++) { memset(name, '\0', C_MAX_NAME); snprintf(name, C_MAX_NAME, "%s%d", @@ -11645,24 +11643,22 @@ static int init_cntrs(struct hfi1_devdata *dd) sz++; hfi1_dbg_early("\t\t%s\n", name); dd->ndevcntrs++; - index++; } } else { /* +1 for newline */ sz += strlen(dev_cntrs[i].name) + 1; + dev_cntrs[i].offset = dd->ndevcntrs; dd->ndevcntrs++; - dev_cntrs[i].offset = index; - index++; hfi1_dbg_early("\tAdding %s\n", dev_cntrs[i].name); } } /* allocate space for the counter values */ - dd->cntrs = kcalloc(index, sizeof(u64), GFP_KERNEL); + dd->cntrs = kcalloc(dd->ndevcntrs, sizeof(u64), GFP_KERNEL); if (!dd->cntrs) goto bail; - dd->scntrs = kcalloc(index, sizeof(u64), GFP_KERNEL); + dd->scntrs = kcalloc(dd->ndevcntrs, sizeof(u64), GFP_KERNEL); if (!dd->scntrs) goto bail; @@ -11674,7 +11670,7 @@ static int init_cntrs(struct hfi1_devdata *dd) goto bail; /* fill in the names */ - for (p = dd->cntrnames, i = 0, index = 0; i < DEV_CNTR_LAST; i++) { + for (p = dd->cntrnames, i = 0; i < DEV_CNTR_LAST; i++) { if (dev_cntrs[i].flags & CNTR_DISABLED) { /* Nothing */ } else { @@ -11704,7 +11700,6 @@ static int init_cntrs(struct hfi1_devdata *dd) p += strlen(dev_cntrs[i].name); *p++ = '\n'; } - index++; } } -- cgit v0.10.2 From 624be1dbdb7c69c0218e78a3afec98a09a08e747 Mon Sep 17 00:00:00 2001 From: Edward Mascarenhas Date: Mon, 11 Jan 2016 18:31:43 -0500 Subject: staging/hfi1: Clean up comments Clean up comments by deleting numbering and terms internal to Intel. The information on the actual bugs is not deleted. Reviewed-by: Mike Marciniszyn Signed-off-by: Edward Mascarenhas Signed-off-by: Jubin John Signed-off-by: Doug Ledford diff --git a/drivers/staging/rdma/hfi1/chip.c b/drivers/staging/rdma/hfi1/chip.c index 1109049..f7bf902 100644 --- a/drivers/staging/rdma/hfi1/chip.c +++ b/drivers/staging/rdma/hfi1/chip.c @@ -13636,7 +13636,6 @@ int hfi1_set_ctxt_jkey(struct hfi1_devdata *dd, unsigned ctxt, u16 jkey) write_kctxt_csr(dd, sctxt, SEND_CTXT_CHECK_JOB_KEY, reg); /* * Enable send-side J_KEY integrity check, unless this is A0 h/w - * (due to A0 erratum). */ if (!is_ax(dd)) { reg = read_kctxt_csr(dd, sctxt, SEND_CTXT_CHECK_ENABLE); diff --git a/drivers/staging/rdma/hfi1/driver.c b/drivers/staging/rdma/hfi1/driver.c index 8485de1..32185206 100644 --- a/drivers/staging/rdma/hfi1/driver.c +++ b/drivers/staging/rdma/hfi1/driver.c @@ -368,7 +368,7 @@ static void rcv_hdrerr(struct hfi1_ctxtdata *rcd, struct hfi1_pportdata *ppd, if (opcode == IB_OPCODE_CNP) { /* * Only in pre-B0 h/w is the CNP_OPCODE handled - * via this code path (errata 291394). + * via this code path. */ struct hfi1_qp *qp = NULL; u32 lqpn, rqpn; diff --git a/drivers/staging/rdma/hfi1/hfi.h b/drivers/staging/rdma/hfi1/hfi.h index 6bfa5c8..5bc385a 100644 --- a/drivers/staging/rdma/hfi1/hfi.h +++ b/drivers/staging/rdma/hfi1/hfi.h @@ -1741,7 +1741,7 @@ static inline u64 hfi1_pkt_default_send_ctxt_mask(struct hfi1_devdata *dd, base_sc_integrity |= HFI1_PKT_KERNEL_SC_INTEGRITY; if (is_ax(dd)) - /* turn off send-side job key checks - A0 erratum */ + /* turn off send-side job key checks - A0 */ return base_sc_integrity & ~SEND_CTXT_CHECK_ENABLE_CHECK_JOB_KEY_SMASK; return base_sc_integrity; @@ -1768,7 +1768,7 @@ static inline u64 hfi1_pkt_base_sdma_integrity(struct hfi1_devdata *dd) | SEND_DMA_CHECK_ENABLE_CHECK_ENABLE_SMASK; if (is_ax(dd)) - /* turn off send-side job key checks - A0 erratum */ + /* turn off send-side job key checks - A0 */ return base_sdma_integrity & ~SEND_DMA_CHECK_ENABLE_CHECK_JOB_KEY_SMASK; return base_sdma_integrity; diff --git a/drivers/staging/rdma/hfi1/pcie.c b/drivers/staging/rdma/hfi1/pcie.c index 9917faf..b2f553d8 100644 --- a/drivers/staging/rdma/hfi1/pcie.c +++ b/drivers/staging/rdma/hfi1/pcie.c @@ -1063,7 +1063,7 @@ retry: * PcieCfgRegPl100 - Gen3 Control * * turn off PcieCfgRegPl100.Gen3ZRxDcNonCompl - * turn on PcieCfgRegPl100.EqEieosCnt (erratum) + * turn on PcieCfgRegPl100.EqEieosCnt * Everything else zero. */ reg32 = PCIE_CFG_REG_PL100_EQ_EIEOS_CNT_SMASK; diff --git a/drivers/staging/rdma/hfi1/ud.c b/drivers/staging/rdma/hfi1/ud.c index bd1b402..25e6053 100644 --- a/drivers/staging/rdma/hfi1/ud.c +++ b/drivers/staging/rdma/hfi1/ud.c @@ -671,7 +671,7 @@ void hfi1_ud_rcv(struct hfi1_packet *packet) if (unlikely(bth1 & HFI1_BECN_SMASK)) { /* * In pre-B0 h/w the CNP_OPCODE is handled via an - * error path (errata 291394). + * error path. */ struct hfi1_pportdata *ppd = ppd_from_ibp(ibp); u32 lqpn = be32_to_cpu(ohdr->bth[1]) & HFI1_QPN_MASK; -- cgit v0.10.2 From f4ddedf4263bb94c81b2647ec5cf5ee79c6c20b0 Mon Sep 17 00:00:00 2001 From: Ira Weiny Date: Mon, 11 Jan 2016 18:31:44 -0500 Subject: staging/hfi1: Fix Xmit Wait calculation Total XMIT wait needs to sum the xmit wait values of all the VLs not just those requested in the query. Also, make the algorithm used for both PortStatus and PortDataCounters the same. Reviewed-by: Arthur Kepner Reviewed-by: Breyer, Scott J Signed-off-by: Ira Weiny Signed-off-by: Jubin John Signed-off-by: Doug Ledford diff --git a/drivers/staging/rdma/hfi1/mad.c b/drivers/staging/rdma/hfi1/mad.c index eeb8687..aa84781 100644 --- a/drivers/staging/rdma/hfi1/mad.c +++ b/drivers/staging/rdma/hfi1/mad.c @@ -2279,17 +2279,23 @@ static void a0_portstatus(struct hfi1_pportdata *ppd, { if (!is_bx(ppd->dd)) { unsigned long vl; - u64 max_vl_xmit_wait = 0, tmp; + u64 sum_vl_xmit_wait = 0; u32 vl_all_mask = VL_MASK_ALL; for_each_set_bit(vl, (unsigned long *)&(vl_all_mask), 8 * sizeof(vl_all_mask)) { - tmp = read_port_cntr(ppd, C_TX_WAIT_VL, - idx_from_vl(vl)); - if (tmp > max_vl_xmit_wait) - max_vl_xmit_wait = tmp; + u64 tmp = sum_vl_xmit_wait + + read_port_cntr(ppd, C_TX_WAIT_VL, + idx_from_vl(vl)); + if (tmp < sum_vl_xmit_wait) { + /* we wrapped */ + sum_vl_xmit_wait = (u64)~0; + break; + } + sum_vl_xmit_wait = tmp; } - rsp->port_xmit_wait = cpu_to_be64(max_vl_xmit_wait); + if (be64_to_cpu(rsp->port_xmit_wait) > sum_vl_xmit_wait) + rsp->port_xmit_wait = cpu_to_be64(sum_vl_xmit_wait); } } @@ -2491,18 +2497,19 @@ static u64 get_error_counter_summary(struct ib_device *ibdev, u8 port, return error_counter_summary; } -static void a0_datacounters(struct hfi1_devdata *dd, struct _port_dctrs *rsp, +static void a0_datacounters(struct hfi1_pportdata *ppd, struct _port_dctrs *rsp, u32 vl_select_mask) { - if (!is_bx(dd)) { + if (!is_bx(ppd->dd)) { unsigned long vl; - int vfi = 0; u64 sum_vl_xmit_wait = 0; + u32 vl_all_mask = VL_MASK_ALL; - for_each_set_bit(vl, (unsigned long *)&(vl_select_mask), - 8 * sizeof(vl_select_mask)) { + for_each_set_bit(vl, (unsigned long *)&(vl_all_mask), + 8 * sizeof(vl_all_mask)) { u64 tmp = sum_vl_xmit_wait + - be64_to_cpu(rsp->vls[vfi++].port_vl_xmit_wait); + read_port_cntr(ppd, C_TX_WAIT_VL, + idx_from_vl(vl)); if (tmp < sum_vl_xmit_wait) { /* we wrapped */ sum_vl_xmit_wait = (u64) ~0; @@ -2665,7 +2672,7 @@ static int pma_get_opa_datacounters(struct opa_pma_mad *pmp, vfi++; } - a0_datacounters(dd, rsp, vl_select_mask); + a0_datacounters(ppd, rsp, vl_select_mask); if (resp_len) *resp_len += response_data_size; -- cgit v0.10.2 From fb9036dd8cd85533456aec43d7892b707561eba8 Mon Sep 17 00:00:00 2001 From: Jim Snow Date: Mon, 11 Jan 2016 18:32:21 -0500 Subject: staging/hfi1: check for ARMED->ACTIVE change in recv int The link state will transition from ARMED to ACTIVE when a non-SC15 packet arrives, but the driver might not notice the change. With this fix, if the slowpath receive interrupt handler sees a non-SC15 packet while in the ARMED state, we queue work to call linkstate_active_work from process context to promote it to ACTIVE. Reviewed-by: Dean Luick Reviewed-by: Ira Weiny Reviewed-by: Mike Marciniszyn Signed-off-by: Jim Snow Signed-off-by: Brendan Cunningham Signed-off-by: Jubin John Signed-off-by: Doug Ledford diff --git a/drivers/staging/rdma/hfi1/chip.c b/drivers/staging/rdma/hfi1/chip.c index f7bf902..63d5d71 100644 --- a/drivers/staging/rdma/hfi1/chip.c +++ b/drivers/staging/rdma/hfi1/chip.c @@ -7878,7 +7878,7 @@ static inline void clear_recv_intr(struct hfi1_ctxtdata *rcd) } /* force the receive interrupt */ -static inline void force_recv_intr(struct hfi1_ctxtdata *rcd) +void force_recv_intr(struct hfi1_ctxtdata *rcd) { write_csr(rcd->dd, CCE_INT_FORCE + (8 * rcd->ireg), rcd->imask); } @@ -7977,7 +7977,7 @@ u32 read_physical_state(struct hfi1_devdata *dd) & DC_DC8051_STS_CUR_STATE_PORT_MASK; } -static u32 read_logical_state(struct hfi1_devdata *dd) +u32 read_logical_state(struct hfi1_devdata *dd) { u64 reg; @@ -9952,6 +9952,7 @@ int set_link_state(struct hfi1_pportdata *ppd, u32 state) ppd->link_enabled = 1; } + set_all_slowpath(ppd->dd); ret = set_local_link_attributes(ppd); if (ret) break; diff --git a/drivers/staging/rdma/hfi1/chip.h b/drivers/staging/rdma/hfi1/chip.h index b46ef66..78ba425 100644 --- a/drivers/staging/rdma/hfi1/chip.h +++ b/drivers/staging/rdma/hfi1/chip.h @@ -690,6 +690,8 @@ u64 read_dev_cntr(struct hfi1_devdata *dd, int index, int vl); u64 write_dev_cntr(struct hfi1_devdata *dd, int index, int vl, u64 data); u64 read_port_cntr(struct hfi1_pportdata *ppd, int index, int vl); u64 write_port_cntr(struct hfi1_pportdata *ppd, int index, int vl, u64 data); +u32 read_logical_state(struct hfi1_devdata *dd); +void force_recv_intr(struct hfi1_ctxtdata *rcd); /* Per VL indexes */ enum { diff --git a/drivers/staging/rdma/hfi1/driver.c b/drivers/staging/rdma/hfi1/driver.c index 32185206..d096f11 100644 --- a/drivers/staging/rdma/hfi1/driver.c +++ b/drivers/staging/rdma/hfi1/driver.c @@ -862,6 +862,37 @@ static inline void set_all_dma_rtail(struct hfi1_devdata *dd) &handle_receive_interrupt_dma_rtail; } +void set_all_slowpath(struct hfi1_devdata *dd) +{ + int i; + + /* HFI1_CTRL_CTXT must always use the slow path interrupt handler */ + for (i = HFI1_CTRL_CTXT + 1; i < dd->first_user_ctxt; i++) + dd->rcd[i]->do_interrupt = &handle_receive_interrupt; +} + +static inline int set_armed_to_active(struct hfi1_ctxtdata *rcd, + struct hfi1_packet packet, + struct hfi1_devdata *dd) +{ + struct work_struct *lsaw = &rcd->ppd->linkstate_active_work; + struct hfi1_message_header *hdr = hfi1_get_msgheader(packet.rcd->dd, + packet.rhf_addr); + + if (hdr2sc(hdr, packet.rhf) != 0xf) { + int hwstate = read_logical_state(dd); + + if (hwstate != LSTATE_ACTIVE) { + dd_dev_info(dd, "Unexpected link state %d\n", hwstate); + return 0; + } + + queue_work(rcd->ppd->hfi1_wq, lsaw); + return 1; + } + return 0; +} + /* * handle_receive_interrupt - receive a packet * @rcd: the context @@ -929,6 +960,11 @@ int handle_receive_interrupt(struct hfi1_ctxtdata *rcd, int thread) last = skip_rcv_packet(&packet, thread); skip_pkt = 0; } else { + /* Auto activate link on non-SC15 packet receive */ + if (unlikely(rcd->ppd->host_link_state == + HLS_UP_ARMED) && + set_armed_to_active(rcd, packet, dd)) + goto bail; last = process_rcv_packet(&packet, thread); } @@ -984,6 +1020,42 @@ bail: } /* + * We may discover in the interrupt that the hardware link state has + * changed from ARMED to ACTIVE (due to the arrival of a non-SC15 packet), + * and we need to update the driver's notion of the link state. We cannot + * run set_link_state from interrupt context, so we queue this function on + * a workqueue. + * + * We delay the regular interrupt processing until after the state changes + * so that the link will be in the correct state by the time any application + * we wake up attempts to send a reply to any message it received. + * (Subsequent receive interrupts may possibly force the wakeup before we + * update the link state.) + * + * The rcd is freed in hfi1_free_ctxtdata after hfi1_postinit_cleanup invokes + * dd->f_cleanup(dd) to disable the interrupt handler and flush workqueues, + * so we're safe from use-after-free of the rcd. + */ +void receive_interrupt_work(struct work_struct *work) +{ + struct hfi1_pportdata *ppd = container_of(work, struct hfi1_pportdata, + linkstate_active_work); + struct hfi1_devdata *dd = ppd->dd; + int i; + + /* Received non-SC15 packet implies neighbor_normal */ + ppd->neighbor_normal = 1; + set_link_state(ppd, HLS_UP_ACTIVE); + + /* + * Interrupt all kernel contexts that could have had an + * interrupt during auto activation. + */ + for (i = HFI1_CTRL_CTXT; i < dd->first_user_ctxt; i++) + force_recv_intr(dd->rcd[i]); +} + +/* * Convert a given MTU size to the on-wire MAD packet enumeration. * Return -1 if the size is invalid. */ diff --git a/drivers/staging/rdma/hfi1/hfi.h b/drivers/staging/rdma/hfi1/hfi.h index 5bc385a..23d7e02 100644 --- a/drivers/staging/rdma/hfi1/hfi.h +++ b/drivers/staging/rdma/hfi1/hfi.h @@ -729,6 +729,7 @@ struct hfi1_pportdata { u8 remote_link_down_reason; /* Error events that will cause a port bounce. */ u32 port_error_action; + struct work_struct linkstate_active_work; }; typedef int (*rhf_rcv_function_ptr)(struct hfi1_packet *packet); @@ -1177,6 +1178,7 @@ void hfi1_free_ctxtdata(struct hfi1_devdata *, struct hfi1_ctxtdata *); int handle_receive_interrupt(struct hfi1_ctxtdata *, int); int handle_receive_interrupt_nodma_rtail(struct hfi1_ctxtdata *, int); int handle_receive_interrupt_dma_rtail(struct hfi1_ctxtdata *, int); +void set_all_slowpath(struct hfi1_devdata *dd); /* receive packet handler dispositions */ #define RCV_PKT_OK 0x0 /* keep going */ @@ -1197,6 +1199,15 @@ static inline u32 driver_lstate(struct hfi1_pportdata *ppd) return ppd->lstate; /* use the cached value */ } +void receive_interrupt_work(struct work_struct *work); + +/* extract service channel from header and rhf */ +static inline int hdr2sc(struct hfi1_message_header *hdr, u64 rhf) +{ + return ((be16_to_cpu(hdr->lrh[0]) >> 12) & 0xf) | + ((!!(rhf & RHF_DC_INFO_MASK)) << 4); +} + static inline u16 generate_jkey(kuid_t uid) { return from_kuid(current_user_ns(), uid) & 0xffff; diff --git a/drivers/staging/rdma/hfi1/init.c b/drivers/staging/rdma/hfi1/init.c index 48269a2..27b31fc 100644 --- a/drivers/staging/rdma/hfi1/init.c +++ b/drivers/staging/rdma/hfi1/init.c @@ -498,6 +498,7 @@ void hfi1_init_pportdata(struct pci_dev *pdev, struct hfi1_pportdata *ppd, INIT_WORK(&ppd->link_downgrade_work, handle_link_downgrade); INIT_WORK(&ppd->sma_message_work, handle_sma_message); INIT_WORK(&ppd->link_bounce_work, handle_link_bounce); + INIT_WORK(&ppd->linkstate_active_work, receive_interrupt_work); mutex_init(&ppd->hls_lock); spin_lock_init(&ppd->sdma_alllock); spin_lock_init(&ppd->qsfp_info.qsfp_lock); -- cgit v0.10.2 From 0194621b225348428c212f330c26d194fc77bd15 Mon Sep 17 00:00:00 2001 From: Dennis Dalessandro Date: Wed, 6 Jan 2016 09:50:24 -0800 Subject: IB/rdmavt: Create module framework and handle driver registration This patch introduces the basics for a new module called rdma_vt. This new driver is a software implementation of the InfiniBand verbs and aims to replace the multiple implementations that exist and duplicate each others' code. While the call to actually register the device with the IB core happens in rdma_vt, most of the work is still done in the drivers themselves. This will be changing in a follow on patch this is just laying the groundwork for this infrastructure. Reviewed-by: Ira Weiny Reviewed-by: Mike Marciniszyn Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford diff --git a/MAINTAINERS b/MAINTAINERS index da3e4d8..1eb4f8e 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -9075,6 +9075,12 @@ L: rds-devel@oss.oracle.com (moderated for non-subscribers) S: Supported F: net/rds/ +RDMAVT - RDMA verbs software +M: Dennis Dalessandro +L: linux-rdma@vger.kernel.org +S: Supported +F: drivers/infiniband/sw/rdmavt + READ-COPY UPDATE (RCU) M: "Paul E. McKenney" M: Josh Triplett diff --git a/drivers/infiniband/Kconfig b/drivers/infiniband/Kconfig index 8a8440c..d00d86d 100644 --- a/drivers/infiniband/Kconfig +++ b/drivers/infiniband/Kconfig @@ -82,4 +82,6 @@ source "drivers/infiniband/ulp/srpt/Kconfig" source "drivers/infiniband/ulp/iser/Kconfig" source "drivers/infiniband/ulp/isert/Kconfig" +source "drivers/infiniband/sw/rdmavt/Kconfig" + endif # INFINIBAND diff --git a/drivers/infiniband/Makefile b/drivers/infiniband/Makefile index dc21836..fad0b44 100644 --- a/drivers/infiniband/Makefile +++ b/drivers/infiniband/Makefile @@ -1,3 +1,4 @@ obj-$(CONFIG_INFINIBAND) += core/ obj-$(CONFIG_INFINIBAND) += hw/ obj-$(CONFIG_INFINIBAND) += ulp/ +obj-$(CONFIG_INFINIBAND) += sw/ diff --git a/drivers/infiniband/sw/Makefile b/drivers/infiniband/sw/Makefile new file mode 100644 index 0000000..988b6a0 --- /dev/null +++ b/drivers/infiniband/sw/Makefile @@ -0,0 +1 @@ +obj-$(CONFIG_INFINIBAND_RDMAVT) += rdmavt/ diff --git a/drivers/infiniband/sw/rdmavt/Kconfig b/drivers/infiniband/sw/rdmavt/Kconfig new file mode 100644 index 0000000..11aa6a3 --- /dev/null +++ b/drivers/infiniband/sw/rdmavt/Kconfig @@ -0,0 +1,6 @@ +config INFINIBAND_RDMAVT + tristate "RDMA verbs transport library" + depends on 64BIT + default m + ---help--- + This is a common software verbs provider for RDMA networks. diff --git a/drivers/infiniband/sw/rdmavt/Makefile b/drivers/infiniband/sw/rdmavt/Makefile new file mode 100644 index 0000000..98a664d --- /dev/null +++ b/drivers/infiniband/sw/rdmavt/Makefile @@ -0,0 +1,10 @@ +# +# rdmavt driver +# +# +# +# Called from the kernel module build system. +# +obj-$(CONFIG_INFINIBAND_RDMAVT) += rdmavt.o + +rdmavt-y := vt.o diff --git a/drivers/infiniband/sw/rdmavt/vt.c b/drivers/infiniband/sw/rdmavt/vt.c new file mode 100644 index 0000000..aa325db --- /dev/null +++ b/drivers/infiniband/sw/rdmavt/vt.c @@ -0,0 +1,83 @@ +/* + * Copyright(c) 2015 Intel Corporation. + * + * This file is provided under a dual BSD/GPLv2 license. When using or + * redistributing this file, you may do so under either license. + * + * GPL LICENSE SUMMARY + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * BSD LICENSE + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * - Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + */ + +#include +#include +#include "vt.h" + +MODULE_LICENSE("Dual BSD/GPL"); +MODULE_DESCRIPTION("RDMA Verbs Transport Library"); + +static int rvt_init(void) +{ + /* Do any work needed prior to drivers calling for registration*/ + return 0; +} +module_init(rvt_init); + +static void rvt_cleanup(void) +{ +} +module_exit(rvt_cleanup); + +int rvt_register_device(struct rvt_dev_info *rdi) +{ + if (!rdi) + return -EINVAL; + + return ib_register_device(&rdi->ibdev, rdi->port_callback); +} +EXPORT_SYMBOL(rvt_register_device); + +void rvt_unregister_device(struct rvt_dev_info *rdi) +{ + if (!rdi) + return; + + ib_unregister_device(&rdi->ibdev); +} +EXPORT_SYMBOL(rvt_unregister_device); diff --git a/drivers/infiniband/sw/rdmavt/vt.h b/drivers/infiniband/sw/rdmavt/vt.h new file mode 100644 index 0000000..0a39448 --- /dev/null +++ b/drivers/infiniband/sw/rdmavt/vt.h @@ -0,0 +1,53 @@ +#ifndef DEF_RDMAVT_H +#define DEF_RDMAVT_H + +/* + * Copyright(c) 2015 Intel Corporation. + * + * This file is provided under a dual BSD/GPLv2 license. When using or + * redistributing this file, you may do so under either license. + * + * GPL LICENSE SUMMARY + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * BSD LICENSE + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * - Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + */ + +#include + +#endif /* DEF_RDMAVT_H */ diff --git a/include/rdma/rdma_vt.h b/include/rdma/rdma_vt.h new file mode 100644 index 0000000..0438bf2 --- /dev/null +++ b/include/rdma/rdma_vt.h @@ -0,0 +1,70 @@ +#ifndef DEF_RDMA_VT_H +#define DEF_RDMA_VT_H + +/* + * Copyright(c) 2015 Intel Corporation. + * + * This file is provided under a dual BSD/GPLv2 license. When using or + * redistributing this file, you may do so under either license. + * + * GPL LICENSE SUMMARY + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * BSD LICENSE + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * - Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + */ + +/* + * Structure that low level drivers will populate in order to register with the + * rdmavt layer. + */ + +#include "ib_verbs.h" +struct rvt_dev_info { + struct ib_device ibdev; + int (*port_callback)(struct ib_device *, u8, struct kobject *); + + /* + * TODO: + * need to reflect module parameters that may vary by dev + */ +}; + +int rvt_register_device(struct rvt_dev_info *rvd); +void rvt_unregister_device(struct rvt_dev_info *rvd); + +#endif /* DEF_RDMA_VT_H */ -- cgit v0.10.2 From c1b332bc16a6e9d42b4ab0b5282e3937f776bf71 Mon Sep 17 00:00:00 2001 From: Dennis Dalessandro Date: Wed, 6 Jan 2016 09:51:18 -0800 Subject: IB/rdmavt: Consolidate dma ops in rdmavt. This patch adds dma functions to rdmavt. The source is hfi1's version of dma.c which will be removed by a subsequent hfi1 patch. Reviewed-by: Ira Weiny Reviewed-by: Mike Marciniszyn Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford diff --git a/drivers/infiniband/sw/rdmavt/Makefile b/drivers/infiniband/sw/rdmavt/Makefile index 98a664d..134d2d0 100644 --- a/drivers/infiniband/sw/rdmavt/Makefile +++ b/drivers/infiniband/sw/rdmavt/Makefile @@ -7,4 +7,4 @@ # obj-$(CONFIG_INFINIBAND_RDMAVT) += rdmavt.o -rdmavt-y := vt.o +rdmavt-y := vt.o dma.o diff --git a/drivers/infiniband/sw/rdmavt/dma.c b/drivers/infiniband/sw/rdmavt/dma.c new file mode 100644 index 0000000..c070141 --- /dev/null +++ b/drivers/infiniband/sw/rdmavt/dma.c @@ -0,0 +1,184 @@ +/* + * Copyright(c) 2015 Intel Corporation. + * + * This file is provided under a dual BSD/GPLv2 license. When using or + * redistributing this file, you may do so under either license. + * + * GPL LICENSE SUMMARY + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * BSD LICENSE + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * - Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + */ +#include +#include +#include + +#include "dma.h" + +#define BAD_DMA_ADDRESS ((u64)0) + +/* + * The following functions implement driver specific replacements + * for the ib_dma_*() functions. + * + * These functions return kernel virtual addresses instead of + * device bus addresses since the driver uses the CPU to copy + * data instead of using hardware DMA. + */ + +static int rvt_mapping_error(struct ib_device *dev, u64 dma_addr) +{ + return dma_addr == BAD_DMA_ADDRESS; +} + +static u64 rvt_dma_map_single(struct ib_device *dev, void *cpu_addr, + size_t size, enum dma_data_direction direction) +{ + if (WARN_ON(!valid_dma_direction(direction))) + return BAD_DMA_ADDRESS; + + return (u64)cpu_addr; +} + +static void rvt_dma_unmap_single(struct ib_device *dev, u64 addr, size_t size, + enum dma_data_direction direction) +{ + /* This is a stub, nothing to be done here */ +} + +static u64 rvt_dma_map_page(struct ib_device *dev, struct page *page, + unsigned long offset, size_t size, + enum dma_data_direction direction) +{ + u64 addr; + + if (WARN_ON(!valid_dma_direction(direction))) + return BAD_DMA_ADDRESS; + + if (offset + size > PAGE_SIZE) + return BAD_DMA_ADDRESS; + + addr = (u64)page_address(page); + if (addr) + addr += offset; + + return addr; +} + +static void rvt_dma_unmap_page(struct ib_device *dev, u64 addr, size_t size, + enum dma_data_direction direction) +{ + /* This is a stub, nothing to be done here */ +} + +static int rvt_map_sg(struct ib_device *dev, struct scatterlist *sgl, + int nents, enum dma_data_direction direction) +{ + struct scatterlist *sg; + u64 addr; + int i; + int ret = nents; + + if (WARN_ON(!valid_dma_direction(direction))) + return 0; + + for_each_sg(sgl, sg, nents, i) { + addr = (u64)page_address(sg_page(sg)); + if (!addr) { + ret = 0; + break; + } + sg->dma_address = addr + sg->offset; +#ifdef CONFIG_NEED_SG_DMA_LENGTH + sg->dma_length = sg->length; +#endif + } + return ret; +} + +static void rvt_unmap_sg(struct ib_device *dev, + struct scatterlist *sg, int nents, + enum dma_data_direction direction) +{ + /* This is a stub, nothing to be done here */ +} + +static void rvt_sync_single_for_cpu(struct ib_device *dev, u64 addr, + size_t size, enum dma_data_direction dir) +{ +} + +static void rvt_sync_single_for_device(struct ib_device *dev, u64 addr, + size_t size, + enum dma_data_direction dir) +{ +} + +static void *rvt_dma_alloc_coherent(struct ib_device *dev, size_t size, + u64 *dma_handle, gfp_t flag) +{ + struct page *p; + void *addr = NULL; + + p = alloc_pages(flag, get_order(size)); + if (p) + addr = page_address(p); + if (dma_handle) + *dma_handle = (u64)addr; + return addr; +} + +static void rvt_dma_free_coherent(struct ib_device *dev, size_t size, + void *cpu_addr, u64 dma_handle) +{ + free_pages((unsigned long)cpu_addr, get_order(size)); +} + +struct ib_dma_mapping_ops rvt_default_dma_mapping_ops = { + .mapping_error = rvt_mapping_error, + .map_single = rvt_dma_map_single, + .unmap_single = rvt_dma_unmap_single, + .map_page = rvt_dma_map_page, + .unmap_page = rvt_dma_unmap_page, + .map_sg = rvt_map_sg, + .unmap_sg = rvt_unmap_sg, + .sync_single_for_cpu = rvt_sync_single_for_cpu, + .sync_single_for_device = rvt_sync_single_for_device, + .alloc_coherent = rvt_dma_alloc_coherent, + .free_coherent = rvt_dma_free_coherent +}; diff --git a/drivers/infiniband/sw/rdmavt/dma.h b/drivers/infiniband/sw/rdmavt/dma.h new file mode 100644 index 0000000..a80cc35 --- /dev/null +++ b/drivers/infiniband/sw/rdmavt/dma.h @@ -0,0 +1,53 @@ +#ifndef DEF_RDMAVTDMA_H +#define DEF_RDMAVTDMA_H + +/* + * Copyright(c) 2015 Intel Corporation. + * + * This file is provided under a dual BSD/GPLv2 license. When using or + * redistributing this file, you may do so under either license. + * + * GPL LICENSE SUMMARY + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * BSD LICENSE + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * - Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + */ + +extern struct ib_dma_mapping_ops rvt_default_dma_mapping_ops; + +#endif /* DEF_RDMAVTDMA_H */ diff --git a/drivers/infiniband/sw/rdmavt/vt.c b/drivers/infiniband/sw/rdmavt/vt.c index aa325db..d82fdda 100644 --- a/drivers/infiniband/sw/rdmavt/vt.c +++ b/drivers/infiniband/sw/rdmavt/vt.c @@ -69,6 +69,16 @@ int rvt_register_device(struct rvt_dev_info *rdi) if (!rdi) return -EINVAL; + /* + * Drivers have the option to override anything in the ibdev that they + * want to specifically handle. VT needs to check for things it supports + * and if the driver wants to handle that functionality let it. We may + * come up with a better mechanism that simplifies the code at some + * point. + */ + rdi->ibdev.dma_ops = + rdi->ibdev.dma_ops ? : &rvt_default_dma_mapping_ops; + return ib_register_device(&rdi->ibdev, rdi->port_callback); } EXPORT_SYMBOL(rvt_register_device); diff --git a/drivers/infiniband/sw/rdmavt/vt.h b/drivers/infiniband/sw/rdmavt/vt.h index 0a39448..cfe59a7 100644 --- a/drivers/infiniband/sw/rdmavt/vt.h +++ b/drivers/infiniband/sw/rdmavt/vt.h @@ -49,5 +49,6 @@ */ #include +#include "dma.h" #endif /* DEF_RDMAVT_H */ -- cgit v0.10.2 From 8afd32eb58b6885fc3e268c69b1b1b627aa2afaf Mon Sep 17 00:00:00 2001 From: Dennis Dalessandro Date: Wed, 6 Jan 2016 09:51:48 -0800 Subject: IB/rdmavt: Add protection domain to rdmavt. Add datastructure for and allocation/deallocation of protection domains for RDMAVT. Reviewed-by: Ira Weiny Reviewed-by: Mike Marciniszyn Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford diff --git a/drivers/infiniband/sw/rdmavt/Makefile b/drivers/infiniband/sw/rdmavt/Makefile index 134d2d0..c6751bb 100644 --- a/drivers/infiniband/sw/rdmavt/Makefile +++ b/drivers/infiniband/sw/rdmavt/Makefile @@ -7,4 +7,5 @@ # obj-$(CONFIG_INFINIBAND_RDMAVT) += rdmavt.o -rdmavt-y := vt.o dma.o +rdmavt-y := vt.o dma.o pd.o + diff --git a/drivers/infiniband/sw/rdmavt/pd.c b/drivers/infiniband/sw/rdmavt/pd.c new file mode 100644 index 0000000..e6e153f --- /dev/null +++ b/drivers/infiniband/sw/rdmavt/pd.c @@ -0,0 +1,103 @@ +/* + * Copyright(c) 2015 Intel Corporation. + * + * This file is provided under a dual BSD/GPLv2 license. When using or + * redistributing this file, you may do so under either license. + * + * GPL LICENSE SUMMARY + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * BSD LICENSE + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * - Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + */ + +#include +#include "pd.h" + +struct ib_pd *rvt_alloc_pd(struct ib_device *ibdev, + struct ib_ucontext *context, + struct ib_udata *udata) +{ + struct rvt_dev_info *dev = ib_to_rvt(ibdev); + struct rvt_pd *pd; + struct ib_pd *ret; + + pd = kmalloc(sizeof(*pd), GFP_KERNEL); + if (!pd) { + ret = ERR_PTR(-ENOMEM); + goto bail; + } + /* + * While we could continue allocating protecetion domains, being + * constrained only by system resources. The IBTA spec defines that + * there is a max_pd limit that can be set and we need to check for + * that. + */ + + spin_lock(&dev->n_pds_lock); + if (dev->n_pds_allocated == dev->dparms.max_pds) { + spin_unlock(&dev->n_pds_lock); + kfree(pd); + ret = ERR_PTR(-ENOMEM); + goto bail; + } + + dev->n_pds_allocated++; + spin_unlock(&dev->n_pds_lock); + + /* ib_alloc_pd() will initialize pd->ibpd. */ + pd->user = udata ? 1 : 0; + + ret = &pd->ibpd; + +bail: + return ret; +} + +int rvt_dealloc_pd(struct ib_pd *ibpd) +{ + struct rvt_pd *pd = ibpd_to_rvtpd(ibpd); + struct rvt_dev_info *dev = ib_to_rvt(ibpd->device); + + spin_lock(&dev->n_pds_lock); + dev->n_pds_allocated--; + spin_unlock(&dev->n_pds_lock); + + kfree(pd); + + return 0; +} diff --git a/drivers/infiniband/sw/rdmavt/pd.h b/drivers/infiniband/sw/rdmavt/pd.h new file mode 100644 index 0000000..56d75e6 --- /dev/null +++ b/drivers/infiniband/sw/rdmavt/pd.h @@ -0,0 +1,58 @@ +#ifndef DEF_RDMAVTPD_H +#define DEF_RDMAVTPD_H + +/* + * Copyright(c) 2015 Intel Corporation. + * + * This file is provided under a dual BSD/GPLv2 license. When using or + * redistributing this file, you may do so under either license. + * + * GPL LICENSE SUMMARY + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * BSD LICENSE + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * - Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + */ + +#include + +struct ib_pd *rvt_alloc_pd(struct ib_device *ibdev, + struct ib_ucontext *context, + struct ib_udata *udata); +int rvt_dealloc_pd(struct ib_pd *ibpd); + +#endif /* DEF_RDMAVTPD_H */ diff --git a/drivers/infiniband/sw/rdmavt/vt.c b/drivers/infiniband/sw/rdmavt/vt.c index d82fdda..b65cde4 100644 --- a/drivers/infiniband/sw/rdmavt/vt.c +++ b/drivers/infiniband/sw/rdmavt/vt.c @@ -76,9 +76,21 @@ int rvt_register_device(struct rvt_dev_info *rdi) * come up with a better mechanism that simplifies the code at some * point. */ + + /* DMA Operations */ rdi->ibdev.dma_ops = rdi->ibdev.dma_ops ? : &rvt_default_dma_mapping_ops; + /* Protection Domain */ + rdi->ibdev.alloc_pd = + rdi->ibdev.alloc_pd ? : rvt_alloc_pd; + rdi->ibdev.dealloc_pd = + rdi->ibdev.dealloc_pd ? : rvt_dealloc_pd; + + spin_lock_init(&rdi->n_pds_lock); + rdi->n_pds_allocated = 0; + + /* We are now good to announce we exist */ return ib_register_device(&rdi->ibdev, rdi->port_callback); } EXPORT_SYMBOL(rvt_register_device); diff --git a/drivers/infiniband/sw/rdmavt/vt.h b/drivers/infiniband/sw/rdmavt/vt.h index cfe59a7..ba2f806 100644 --- a/drivers/infiniband/sw/rdmavt/vt.h +++ b/drivers/infiniband/sw/rdmavt/vt.h @@ -50,5 +50,6 @@ #include #include "dma.h" +#include "pd.h" #endif /* DEF_RDMAVT_H */ diff --git a/include/rdma/rdma_vt.h b/include/rdma/rdma_vt.h index 0438bf2..6bf5fd4 100644 --- a/include/rdma/rdma_vt.h +++ b/include/rdma/rdma_vt.h @@ -54,16 +54,42 @@ */ #include "ib_verbs.h" + +/* + * Things that are driver specific, module parameters in hfi1 and qib + */ +struct rvt_driver_params { + int max_pds; +}; + +/* Protection domain */ +struct rvt_pd { + struct ib_pd ibpd; + int user; /* non-zero if created from user space */ +}; + struct rvt_dev_info { struct ib_device ibdev; + + /* Driver specific */ + struct rvt_driver_params dparms; int (*port_callback)(struct ib_device *, u8, struct kobject *); - /* - * TODO: - * need to reflect module parameters that may vary by dev - */ + /* Internal use */ + int n_pds_allocated; + spinlock_t n_pds_lock; /* Protect pd allocated count */ }; +static inline struct rvt_pd *ibpd_to_rvtpd(struct ib_pd *ibpd) +{ + return container_of(ibpd, struct rvt_pd, ibpd); +} + +static inline struct rvt_dev_info *ib_to_rvt(struct ib_device *ibdev) +{ + return container_of(ibdev, struct rvt_dev_info, ibdev); +} + int rvt_register_device(struct rvt_dev_info *rvd); void rvt_unregister_device(struct rvt_dev_info *rvd); -- cgit v0.10.2 From b1070a7a4d304e680eb6c1158d76645cf5a923f1 Mon Sep 17 00:00:00 2001 From: Dennis Dalessandro Date: Wed, 6 Jan 2016 09:52:19 -0800 Subject: IB/rdmavt: Add ib core device attributes to rvt driver params list Instead of trying to handle each parameter separately, add ib_device_attr to rvt_driver_params. This means drivers will fill this in and pass to the rvt registration function. Reviewed-by: Mike Marciniszyn Reviewed-by: Ira Weiny Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford diff --git a/drivers/infiniband/sw/rdmavt/pd.c b/drivers/infiniband/sw/rdmavt/pd.c index e6e153f..f8dba88 100644 --- a/drivers/infiniband/sw/rdmavt/pd.c +++ b/drivers/infiniband/sw/rdmavt/pd.c @@ -69,7 +69,7 @@ struct ib_pd *rvt_alloc_pd(struct ib_device *ibdev, */ spin_lock(&dev->n_pds_lock); - if (dev->n_pds_allocated == dev->dparms.max_pds) { + if (dev->n_pds_allocated == dev->dparms.props.max_pd) { spin_unlock(&dev->n_pds_lock); kfree(pd); ret = ERR_PTR(-ENOMEM); diff --git a/include/rdma/rdma_vt.h b/include/rdma/rdma_vt.h index 6bf5fd4..2990e03 100644 --- a/include/rdma/rdma_vt.h +++ b/include/rdma/rdma_vt.h @@ -59,7 +59,45 @@ * Things that are driver specific, module parameters in hfi1 and qib */ struct rvt_driver_params { - int max_pds; + /* + * driver required fields: + * node_guid + * phys_port_cnt + * dma_device + * owner + * driver optional fields (rvt will provide generic value if blank): + * name + * node_desc + * rvt fields, driver value ignored: + * uverbs_abi_ver + * node_type + * num_comp_vectors + * uverbs_cmd_mask + */ + struct ib_device_attr props; + + /* + * Drivers will need to support a number of notifications to rvt in + * accordance with certain events. This structure should contain a mask + * of the supported events. Such events that the rvt may need to know + * about include: + * port errors + * port active + * lid change + * sm change + * client reregister + * pkey change + * + * There may also be other events that the rvt layers needs to know + * about this is not an exhaustive list. Some events though rvt does not + * need to rely on the driver for such as completion queue error. + */ + int rvt_signal_supported; + + /* + * Anything driver specific that is not covered by props + * For instance special module parameters. Goes here. + */ }; /* Protection domain */ @@ -69,10 +107,25 @@ struct rvt_pd { }; struct rvt_dev_info { + /* + * Prior to calling for registration the driver will be responsible for + * allocating space for this structure. + * + * The driver will also be responsible for filling in certain members of + * dparms.props + */ + struct ib_device ibdev; - /* Driver specific */ + /* Driver specific properties */ struct rvt_driver_params dparms; + + /* + * The work to create port files in /sys/class Infiniband is different + * depending on the driver. This should not be extracted away and + * instead drivers are responsible for setting the correct callback for + * this. + */ int (*port_callback)(struct ib_device *, u8, struct kobject *); /* Internal use */ -- cgit v0.10.2 From 4997870a0935b923d2ba67293fca0b8c05f74c1e Mon Sep 17 00:00:00 2001 From: Dennis Dalessandro Date: Wed, 6 Jan 2016 09:52:40 -0800 Subject: IB/rdmavt: Macroize override checks during driver registration Add a macro to cut down on writing the same lines over and over again for what will be a large number of functions that will be supported. Reviewed-by: Mike Marciniszyn Reviewed-by: Ira Weiny Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford diff --git a/drivers/infiniband/sw/rdmavt/vt.c b/drivers/infiniband/sw/rdmavt/vt.c index b65cde4..8bd25c3 100644 --- a/drivers/infiniband/sw/rdmavt/vt.c +++ b/drivers/infiniband/sw/rdmavt/vt.c @@ -64,29 +64,25 @@ static void rvt_cleanup(void) } module_exit(rvt_cleanup); +/* + * Check driver override. If driver passes a value use it, otherwise we use our + * own value. + */ +#define CHECK_DRIVER_OVERRIDE(rdi, x) \ + rdi->ibdev.x = rdi->ibdev.x ? : rvt_ ##x + int rvt_register_device(struct rvt_dev_info *rdi) { if (!rdi) return -EINVAL; - /* - * Drivers have the option to override anything in the ibdev that they - * want to specifically handle. VT needs to check for things it supports - * and if the driver wants to handle that functionality let it. We may - * come up with a better mechanism that simplifies the code at some - * point. - */ - /* DMA Operations */ rdi->ibdev.dma_ops = rdi->ibdev.dma_ops ? : &rvt_default_dma_mapping_ops; /* Protection Domain */ - rdi->ibdev.alloc_pd = - rdi->ibdev.alloc_pd ? : rvt_alloc_pd; - rdi->ibdev.dealloc_pd = - rdi->ibdev.dealloc_pd ? : rvt_dealloc_pd; - + CHECK_DRIVER_OVERRIDE(rdi, alloc_pd); + CHECK_DRIVER_OVERRIDE(rdi, dealloc_pd); spin_lock_init(&rdi->n_pds_lock); rdi->n_pds_allocated = 0; -- cgit v0.10.2 From 19ef1edd7f75f805dc8fa60e0c61773f9cb6d50b Mon Sep 17 00:00:00 2001 From: Dennis Dalessandro Date: Wed, 6 Jan 2016 09:53:05 -0800 Subject: IB/rdmavt: Add query and modify device stubs Adds the stubs which will handle the query and modify device functions. At this time the only intention is to support changing the node desc and the guid via these calls. Reviewed-by: Mike Marciniszyn Reviewed-by: Ira Weiny Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford diff --git a/drivers/infiniband/sw/rdmavt/vt.c b/drivers/infiniband/sw/rdmavt/vt.c index 8bd25c3..db14646 100644 --- a/drivers/infiniband/sw/rdmavt/vt.c +++ b/drivers/infiniband/sw/rdmavt/vt.c @@ -64,6 +64,33 @@ static void rvt_cleanup(void) } module_exit(rvt_cleanup); +static int rvt_query_device(struct ib_device *ibdev, + struct ib_device_attr *props, + struct ib_udata *uhw) +{ + /* + * Return rvt_dev_info.props contents + */ + return -EOPNOTSUPP; +} + +static int rvt_modify_device(struct ib_device *device, + int device_modify_mask, + struct ib_device_modify *device_modify) +{ + /* + * Change dev props. Planned support is for node desc change and sys + * guid change only. This matches hfi1 and qib behavior. Other drivers + * that support existing modifications will need to add their support. + */ + + /* + * VT-DRIVER-API: node_desc_change() + * VT-DRIVER-API: sys_guid_change() + */ + return -EOPNOTSUPP; +} + /* * Check driver override. If driver passes a value use it, otherwise we use our * own value. @@ -76,6 +103,10 @@ int rvt_register_device(struct rvt_dev_info *rdi) if (!rdi) return -EINVAL; + /* Dev Ops */ + CHECK_DRIVER_OVERRIDE(rdi, query_device); + CHECK_DRIVER_OVERRIDE(rdi, modify_device); + /* DMA Operations */ rdi->ibdev.dma_ops = rdi->ibdev.dma_ops ? : &rvt_default_dma_mapping_ops; -- cgit v0.10.2 From 765525c11ef48e63717891a6636f610620ab4e7c Mon Sep 17 00:00:00 2001 From: Dennis Dalessandro Date: Wed, 6 Jan 2016 09:54:07 -0800 Subject: IB/rdmavt: Add query and modify port stubs This patch adds the query and modify port stubs. The query will mostly entail the driver returning everything in the ib_port_attr which will get handed back to the verbs layer. The modify will need some API helpers in the driver. The send_trap and post_mad_send are still issues to address. Reviewed-by: Mike Marciniszyn Reviewed-by: Ira Weiny Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford diff --git a/drivers/infiniband/sw/rdmavt/vt.c b/drivers/infiniband/sw/rdmavt/vt.c index db14646..5ac241c 100644 --- a/drivers/infiniband/sw/rdmavt/vt.c +++ b/drivers/infiniband/sw/rdmavt/vt.c @@ -91,6 +91,52 @@ static int rvt_modify_device(struct ib_device *device, return -EOPNOTSUPP; } +/** + * rvt_query_port: Passes the query port call to the driver + * @ibdev: Verbs IB dev + * @port: port number + * @props: structure to hold returned properties + * + * Returns 0 on success + */ +static int rvt_query_port(struct ib_device *ibdev, u8 port, + struct ib_port_attr *props) +{ + /* + * VT-DRIVER-API: query_port_state() + * driver returns pretty much everything in ib_port_attr + */ + return -EOPNOTSUPP; +} + +/** + * rvt_modify_port + * @ibdev: Verbs IB dev + * @port: Port number + * @port_modify_mask: How to change the port + * @props: Structure to fill in + * + * Returns 0 on success + */ +static int rvt_modify_port(struct ib_device *ibdev, u8 port, + int port_modify_mask, struct ib_port_modify *props) +{ + /* + * VT-DRIVER-API: set_link_state() + * driver will set the link state using the IB enumeration + * + * VT-DRIVER-API: clear_qkey_violations() + * clears driver private qkey counter + * + * VT-DRIVER-API: get_lid() + * driver needs to return the LID + * + * TBD: send_trap() and post_mad_send() need examined to see where they + * fit in. + */ + return -EOPNOTSUPP; +} + /* * Check driver override. If driver passes a value use it, otherwise we use our * own value. @@ -106,6 +152,8 @@ int rvt_register_device(struct rvt_dev_info *rdi) /* Dev Ops */ CHECK_DRIVER_OVERRIDE(rdi, query_device); CHECK_DRIVER_OVERRIDE(rdi, modify_device); + CHECK_DRIVER_OVERRIDE(rdi, query_port); + CHECK_DRIVER_OVERRIDE(rdi, modify_port); /* DMA Operations */ rdi->ibdev.dma_ops = -- cgit v0.10.2 From 30588643f95e1bb1239e2568de7a653722832a5e Mon Sep 17 00:00:00 2001 From: Dennis Dalessandro Date: Wed, 6 Jan 2016 09:54:16 -0800 Subject: IB/rdmavt: Add pkey query stub The pkey table will reside in the rvt structure but it will be modified only when the driver requests then rvt will simply read the value to return in the query. Reviewed-by: Mike Marciniszyn Reviewed-by: Ira Weiny Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford diff --git a/drivers/infiniband/sw/rdmavt/vt.c b/drivers/infiniband/sw/rdmavt/vt.c index 5ac241c..fc5372d 100644 --- a/drivers/infiniband/sw/rdmavt/vt.c +++ b/drivers/infiniband/sw/rdmavt/vt.c @@ -137,6 +137,26 @@ static int rvt_modify_port(struct ib_device *ibdev, u8 port, return -EOPNOTSUPP; } +/** + * rvt_query_pkey - Return a pkey from the table at a given index + * @ibdev: Verbs IB dev + * @port: Port number + * @intex: Index into pkey table + * + * Returns 0 on failure pkey otherwise + */ +static int rvt_query_pkey(struct ib_device *ibdev, u8 port, u16 index, + u16 *pkey) +{ + /* + * Driver will be responsible for keeping rvt_dev_info.pkey_table up to + * date. This function will just return that value. There is no need to + * lock, if a stale value is read and sent to the user so be it there is + * no way to protect against that anyway. + */ + return 0; +} + /* * Check driver override. If driver passes a value use it, otherwise we use our * own value. @@ -154,6 +174,7 @@ int rvt_register_device(struct rvt_dev_info *rdi) CHECK_DRIVER_OVERRIDE(rdi, modify_device); CHECK_DRIVER_OVERRIDE(rdi, query_port); CHECK_DRIVER_OVERRIDE(rdi, modify_port); + CHECK_DRIVER_OVERRIDE(rdi, query_pkey); /* DMA Operations */ rdi->ibdev.dma_ops = diff --git a/include/rdma/rdma_vt.h b/include/rdma/rdma_vt.h index 2990e03..bf072a4 100644 --- a/include/rdma/rdma_vt.h +++ b/include/rdma/rdma_vt.h @@ -114,12 +114,13 @@ struct rvt_dev_info { * The driver will also be responsible for filling in certain members of * dparms.props */ - struct ib_device ibdev; /* Driver specific properties */ struct rvt_driver_params dparms; + /* PKey Table goes here */ + /* * The work to create port files in /sys/class Infiniband is different * depending on the driver. This should not be extracted away and -- cgit v0.10.2 From 2d092e11744695c30b76fac070c80bb7e1ac9325 Mon Sep 17 00:00:00 2001 From: Dennis Dalessandro Date: Wed, 6 Jan 2016 09:54:50 -0800 Subject: IB/rdmavt: Add query gid stub The handler for query gid operates along the same lines as the query pkey handler. The driver will take care to keep the guid table updated. Reviewed-by: Mike Marciniszyn Reviewed-by: Ira Weiny Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford diff --git a/drivers/infiniband/sw/rdmavt/vt.c b/drivers/infiniband/sw/rdmavt/vt.c index fc5372d..e95f197 100644 --- a/drivers/infiniband/sw/rdmavt/vt.c +++ b/drivers/infiniband/sw/rdmavt/vt.c @@ -157,6 +157,27 @@ static int rvt_query_pkey(struct ib_device *ibdev, u8 port, u16 index, return 0; } +/** + * rvt_query_gid - Return a gid from the table + * @ibdev: Verbs IB dev + * @port: Port number + * @index: = Index in table + * @gid: Gid to return + * + * Returns 0 on success + */ +static int rvt_query_gid(struct ib_device *ibdev, u8 port, + int index, union ib_gid *gid) +{ + /* + * Driver is responsible for updating the guid table. Which will be used + * to craft the return value. This will work similar to how query_pkey() + * is being done. + */ + + return -EOPNOTSUPP; +} + /* * Check driver override. If driver passes a value use it, otherwise we use our * own value. @@ -175,6 +196,7 @@ int rvt_register_device(struct rvt_dev_info *rdi) CHECK_DRIVER_OVERRIDE(rdi, query_port); CHECK_DRIVER_OVERRIDE(rdi, modify_port); CHECK_DRIVER_OVERRIDE(rdi, query_pkey); + CHECK_DRIVER_OVERRIDE(rdi, query_gid); /* DMA Operations */ rdi->ibdev.dma_ops = -- cgit v0.10.2 From c4ed7d8bb08061218a3bff5ad80a9537cea232f2 Mon Sep 17 00:00:00 2001 From: Dennis Dalessandro Date: Wed, 6 Jan 2016 09:55:39 -0800 Subject: IB/rdmavt: Alloc and dealloc ucontexts Add the stubs to allocate and deallocate user contexts. This will be handled completely by rvt. Reviewed-by: Mike Marciniszyn Reviewed-by: Ira Weiny Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford diff --git a/drivers/infiniband/sw/rdmavt/vt.c b/drivers/infiniband/sw/rdmavt/vt.c index e95f197..cd19429 100644 --- a/drivers/infiniband/sw/rdmavt/vt.c +++ b/drivers/infiniband/sw/rdmavt/vt.c @@ -178,6 +178,26 @@ static int rvt_query_gid(struct ib_device *ibdev, u8 port, return -EOPNOTSUPP; } +/** + * rvt_alloc_ucontext - Allocate a user context + * @ibdev: Vers IB dev + * @data: User data allocated + */ +static struct ib_ucontext *rvt_alloc_ucontext(struct ib_device *ibdev, + struct ib_udata *udata) +{ + return ERR_PTR(-EOPNOTSUPP); +} + +/** + *rvt_dealloc_ucontext - Free a user context + *@context - Free this + */ +static int rvt_dealloc_ucontext(struct ib_ucontext *context) +{ + return -EOPNOTSUPP; +} + /* * Check driver override. If driver passes a value use it, otherwise we use our * own value. @@ -197,6 +217,8 @@ int rvt_register_device(struct rvt_dev_info *rdi) CHECK_DRIVER_OVERRIDE(rdi, modify_port); CHECK_DRIVER_OVERRIDE(rdi, query_pkey); CHECK_DRIVER_OVERRIDE(rdi, query_gid); + CHECK_DRIVER_OVERRIDE(rdi, alloc_ucontext); + CHECK_DRIVER_OVERRIDE(rdi, dealloc_ucontext); /* DMA Operations */ rdi->ibdev.dma_ops = -- cgit v0.10.2 From b518d3e69e7df49bf0bc4efe447338917ef41843 Mon Sep 17 00:00:00 2001 From: Dennis Dalessandro Date: Wed, 6 Jan 2016 09:56:15 -0800 Subject: IB/rdmavt: Add queue pair function stubs Adds the stubs for create, modify, destroy and query functions for queue pairs. Reviewed-by: Mike Marciniszyn Reviewed-by: Ira Weiny Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford diff --git a/drivers/infiniband/sw/rdmavt/Makefile b/drivers/infiniband/sw/rdmavt/Makefile index c6751bb..9a9a095 100644 --- a/drivers/infiniband/sw/rdmavt/Makefile +++ b/drivers/infiniband/sw/rdmavt/Makefile @@ -7,5 +7,5 @@ # obj-$(CONFIG_INFINIBAND_RDMAVT) += rdmavt.o -rdmavt-y := vt.o dma.o pd.o +rdmavt-y := vt.o dma.o pd.o qp.o diff --git a/drivers/infiniband/sw/rdmavt/qp.c b/drivers/infiniband/sw/rdmavt/qp.c new file mode 100644 index 0000000..a59f28d --- /dev/null +++ b/drivers/infiniband/sw/rdmavt/qp.c @@ -0,0 +1,120 @@ +/* + * Copyright(c) 2015 Intel Corporation. + * + * This file is provided under a dual BSD/GPLv2 license. When using or + * redistributing this file, you may do so under either license. + * + * GPL LICENSE SUMMARY + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * BSD LICENSE + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * - Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + */ + +#include "qp.h" + +/** + * rvt_create_qp - create a queue pair for a device + * @ibpd: the protection domain who's device we create the queue pair for + * @init_attr: the attributes of the queue pair + * @udata: user data for libibverbs.so + * + * Returns the queue pair on success, otherwise returns an errno. + * + * Called by the ib_create_qp() core verbs function. + */ +struct ib_qp *rvt_create_qp(struct ib_pd *ibpd, + struct ib_qp_init_attr *init_attr, + struct ib_udata *udata) +{ + /* + * Queue pair creation is mostly an rvt issue. However, drivers have + * their own unique idea of what queue pare numbers mean. For instance + * there is a reserved range for PSM. + * + * VI-DRIVER-API: make_qpn() + * Returns a valid QPN for verbs to use + */ + return ERR_PTR(-EOPNOTSUPP); +} + +/** + * qib_modify_qp - modify the attributes of a queue pair + * @ibqp: the queue pair who's attributes we're modifying + * @attr: the new attributes + * @attr_mask: the mask of attributes to modify + * @udata: user data for libibverbs.so + * + * Returns 0 on success, otherwise returns an errno. + */ +int rvt_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, + int attr_mask, struct ib_udata *udata) +{ + /* + * VT-DRIVER-API: qp_mtu() + * OPA devices have a per VL MTU the driver has a mapping of IB SL to SC + * to VL and the mapping table of MTUs per VL. This is not something + * that IB has and should not live in the rvt. + */ + return -EOPNOTSUPP; +} + +/** + * rvt_destroy_qp - destroy a queue pair + * @ibqp: the queue pair to destroy + * + * Returns 0 on success. + * + * Note that this can be called while the QP is actively sending or + * receiving! + */ +int rvt_destroy_qp(struct ib_qp *ibqp) +{ + /* + * VT-DRIVER-API: qp_flush() + * Driver provies a mechanism to flush and wait for that flush to + * finish. + */ + + return -EOPNOTSUPP; +} + +int rvt_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, + int attr_mask, struct ib_qp_init_attr *init_attr) +{ + return -EOPNOTSUPP; +} diff --git a/drivers/infiniband/sw/rdmavt/qp.h b/drivers/infiniband/sw/rdmavt/qp.h new file mode 100644 index 0000000..43acba0 --- /dev/null +++ b/drivers/infiniband/sw/rdmavt/qp.h @@ -0,0 +1,67 @@ +#ifndef DEF_RVTQP_H +#define DEF_RVTQP_H + +/* + * Copyright(c) 2015 Intel Corporation. + * + * This file is provided under a dual BSD/GPLv2 license. When using or + * redistributing this file, you may do so under either license. + * + * GPL LICENSE SUMMARY + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * BSD LICENSE + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * - Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + */ + +#include + +struct rvt_qp { + struct ib_qp *ibqp; + /* Other stuff */ +}; + +struct ib_qp *rvt_create_qp(struct ib_pd *ibpd, + struct ib_qp_init_attr *init_attr, + struct ib_udata *udata); +int rvt_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, + int attr_mask, struct ib_udata *udata); +int rvt_destroy_qp(struct ib_qp *ibqp); +int rvt_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, + int attr_mask, struct ib_qp_init_attr *init_attr); + +#endif /* DEF_RVTQP_H */ diff --git a/drivers/infiniband/sw/rdmavt/vt.c b/drivers/infiniband/sw/rdmavt/vt.c index cd19429..07b0ada 100644 --- a/drivers/infiniband/sw/rdmavt/vt.c +++ b/drivers/infiniband/sw/rdmavt/vt.c @@ -220,6 +220,12 @@ int rvt_register_device(struct rvt_dev_info *rdi) CHECK_DRIVER_OVERRIDE(rdi, alloc_ucontext); CHECK_DRIVER_OVERRIDE(rdi, dealloc_ucontext); + /* Queue Pairs */ + CHECK_DRIVER_OVERRIDE(rdi, create_qp); + CHECK_DRIVER_OVERRIDE(rdi, modify_qp); + CHECK_DRIVER_OVERRIDE(rdi, destroy_qp); + CHECK_DRIVER_OVERRIDE(rdi, query_qp); + /* DMA Operations */ rdi->ibdev.dma_ops = rdi->ibdev.dma_ops ? : &rvt_default_dma_mapping_ops; diff --git a/drivers/infiniband/sw/rdmavt/vt.h b/drivers/infiniband/sw/rdmavt/vt.h index ba2f806..8758f25 100644 --- a/drivers/infiniband/sw/rdmavt/vt.h +++ b/drivers/infiniband/sw/rdmavt/vt.h @@ -51,5 +51,6 @@ #include #include "dma.h" #include "pd.h" +#include "qp.h" #endif /* DEF_RDMAVT_H */ -- cgit v0.10.2 From 4c1e497286debf733521b24c20f350a69a77b3c8 Mon Sep 17 00:00:00 2001 From: Dennis Dalessandro Date: Wed, 6 Jan 2016 09:56:41 -0800 Subject: IB/rdmavt: Add address handle stubs Adds the stubs for create, destroy, modify, and query of the address handle. Reviewed-by: Mike Marciniszyn Reviewed-by: Ira Weiny Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford diff --git a/drivers/infiniband/sw/rdmavt/Makefile b/drivers/infiniband/sw/rdmavt/Makefile index 9a9a095..628c684 100644 --- a/drivers/infiniband/sw/rdmavt/Makefile +++ b/drivers/infiniband/sw/rdmavt/Makefile @@ -7,5 +7,4 @@ # obj-$(CONFIG_INFINIBAND_RDMAVT) += rdmavt.o -rdmavt-y := vt.o dma.o pd.o qp.o - +rdmavt-y := vt.o ah.o dma.o pd.o qp.o diff --git a/drivers/infiniband/sw/rdmavt/ah.c b/drivers/infiniband/sw/rdmavt/ah.c new file mode 100644 index 0000000..d368955 --- /dev/null +++ b/drivers/infiniband/sw/rdmavt/ah.c @@ -0,0 +1,76 @@ +/* + * Copyright(c) 2015 Intel Corporation. + * + * This file is provided under a dual BSD/GPLv2 license. When using or + * redistributing this file, you may do so under either license. + * + * GPL LICENSE SUMMARY + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * BSD LICENSE + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * - Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + */ + +#include "ah.h" + +/** + * rvt_create_ah - create an address handle + * @pd: the protection domain + * @ah_attr: the attributes of the AH + * + * This may be called from interrupt context. + */ +struct ib_ah *rvt_create_ah(struct ib_pd *pd, + struct ib_ah_attr *ah_attr) +{ + return ERR_PTR(-EOPNOTSUPP); +} + +int rvt_destroy_ah(struct ib_ah *ibah) +{ + return -EOPNOTSUPP; +} + +int rvt_modify_ah(struct ib_ah *ibah, struct ib_ah_attr *ah_attr) +{ + return -EOPNOTSUPP; +} + +int rvt_query_ah(struct ib_ah *ibah, struct ib_ah_attr *ah_attr) +{ + return -EOPNOTSUPP; +} diff --git a/drivers/infiniband/sw/rdmavt/ah.h b/drivers/infiniband/sw/rdmavt/ah.h new file mode 100644 index 0000000..8cd7ea7 --- /dev/null +++ b/drivers/infiniband/sw/rdmavt/ah.h @@ -0,0 +1,59 @@ +#ifndef DEF_RVTAH_H +#define DEF_RVTAH_H + +/* + * Copyright(c) 2015 Intel Corporation. + * + * This file is provided under a dual BSD/GPLv2 license. When using or + * redistributing this file, you may do so under either license. + * + * GPL LICENSE SUMMARY + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * BSD LICENSE + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * - Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + */ + +#include + +struct ib_ah *rvt_create_ah(struct ib_pd *pd, + struct ib_ah_attr *ah_attr); +int rvt_destroy_ah(struct ib_ah *ibah); +int rvt_modify_ah(struct ib_ah *ibah, struct ib_ah_attr *ah_attr); +int rvt_query_ah(struct ib_ah *ibah, struct ib_ah_attr *ah_attr); + +#endif /* DEF_RVTAH_H */ diff --git a/drivers/infiniband/sw/rdmavt/vt.c b/drivers/infiniband/sw/rdmavt/vt.c index 07b0ada..6243562 100644 --- a/drivers/infiniband/sw/rdmavt/vt.c +++ b/drivers/infiniband/sw/rdmavt/vt.c @@ -226,6 +226,12 @@ int rvt_register_device(struct rvt_dev_info *rdi) CHECK_DRIVER_OVERRIDE(rdi, destroy_qp); CHECK_DRIVER_OVERRIDE(rdi, query_qp); + /* Address Handle */ + CHECK_DRIVER_OVERRIDE(rdi, create_ah); + CHECK_DRIVER_OVERRIDE(rdi, destroy_ah); + CHECK_DRIVER_OVERRIDE(rdi, modify_ah); + CHECK_DRIVER_OVERRIDE(rdi, query_ah); + /* DMA Operations */ rdi->ibdev.dma_ops = rdi->ibdev.dma_ops ? : &rvt_default_dma_mapping_ops; diff --git a/drivers/infiniband/sw/rdmavt/vt.h b/drivers/infiniband/sw/rdmavt/vt.h index 8758f25..96072cd 100644 --- a/drivers/infiniband/sw/rdmavt/vt.h +++ b/drivers/infiniband/sw/rdmavt/vt.h @@ -52,5 +52,6 @@ #include "dma.h" #include "pd.h" #include "qp.h" +#include "ah.h" #endif /* DEF_RDMAVT_H */ -- cgit v0.10.2 From 2a055eb7aa2ad168ec7c616a183e385266b6bf76 Mon Sep 17 00:00:00 2001 From: Dennis Dalessandro Date: Wed, 6 Jan 2016 09:57:21 -0800 Subject: IB/rdmavt: Add memory region stubs Adds the function stubs for allocating, and registering memory regions, as well as deregistering them. Reviewed-by: Mike Marciniszyn Reviewed-by: Ira Weiny Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford diff --git a/drivers/infiniband/sw/rdmavt/Makefile b/drivers/infiniband/sw/rdmavt/Makefile index 628c684..084ee6a 100644 --- a/drivers/infiniband/sw/rdmavt/Makefile +++ b/drivers/infiniband/sw/rdmavt/Makefile @@ -7,4 +7,4 @@ # obj-$(CONFIG_INFINIBAND_RDMAVT) += rdmavt.o -rdmavt-y := vt.o ah.o dma.o pd.o qp.o +rdmavt-y := vt.o ah.o dma.o mr.o pd.o qp.o diff --git a/drivers/infiniband/sw/rdmavt/mr.c b/drivers/infiniband/sw/rdmavt/mr.c new file mode 100644 index 0000000..c672043 --- /dev/null +++ b/drivers/infiniband/sw/rdmavt/mr.c @@ -0,0 +1,165 @@ +/* + * Copyright(c) 2015 Intel Corporation. + * + * This file is provided under a dual BSD/GPLv2 license. When using or + * redistributing this file, you may do so under either license. + * + * GPL LICENSE SUMMARY + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * BSD LICENSE + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * - Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + */ + +#include +#include "mr.h" + +/** + * rvt_get_dma_mr - get a DMA memory region + * @pd: protection domain for this memory region + * @acc: access flags + * + * Returns the memory region on success, otherwise returns an errno. + * Note that all DMA addresses should be created via the + * struct ib_dma_mapping_ops functions (see dma.c). + */ +struct ib_mr *rvt_get_dma_mr(struct ib_pd *pd, int acc) +{ + /* + * Alloc mr and init it. + * Alloc lkey. + */ + return ERR_PTR(-EOPNOTSUPP); +} + +/** + * rvt_reg_user_mr - register a userspace memory region + * @pd: protection domain for this memory region + * @start: starting userspace address + * @length: length of region to register + * @mr_access_flags: access flags for this memory region + * @udata: unused by the driver + * + * Returns the memory region on success, otherwise returns an errno. + */ +struct ib_mr *rvt_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, + u64 virt_addr, int mr_access_flags, + struct ib_udata *udata) +{ + return ERR_PTR(-EOPNOTSUPP); +} + +/** + * rvt_dereg_mr - unregister and free a memory region + * @ibmr: the memory region to free + * + * Returns 0 on success. + * + * Note that this is called to free MRs created by rvt_get_dma_mr() + * or rvt_reg_user_mr(). + */ +int rvt_dereg_mr(struct ib_mr *ibmr) +{ + return -EOPNOTSUPP; +} + +/** + * rvt_alloc_mr - Allocate a memory region usable with the + * @pd: protection domain for this memory region + * @mr_type: mem region type + * @max_num_sg: Max number of segments allowed + * + * Return the memory region on success, otherwise return an errno. + */ +struct ib_mr *rvt_alloc_mr(struct ib_pd *pd, + enum ib_mr_type mr_type, + u32 max_num_sg) +{ + return ERR_PTR(-EOPNOTSUPP); +} + +/** + * rvt_alloc_fmr - allocate a fast memory region + * @pd: the protection domain for this memory region + * @mr_access_flags: access flags for this memory region + * @fmr_attr: fast memory region attributes + * + * Returns the memory region on success, otherwise returns an errno. + */ +struct ib_fmr *rvt_alloc_fmr(struct ib_pd *pd, int mr_access_flags, + struct ib_fmr_attr *fmr_attr) +{ + return ERR_PTR(-EOPNOTSUPP); +} + +/** + * rvt_map_phys_fmr - set up a fast memory region + * @ibmfr: the fast memory region to set up + * @page_list: the list of pages to associate with the fast memory region + * @list_len: the number of pages to associate with the fast memory region + * @iova: the virtual address of the start of the fast memory region + * + * This may be called from interrupt context. + */ + +int rvt_map_phys_fmr(struct ib_fmr *ibfmr, u64 *page_list, + int list_len, u64 iova) +{ + return -EOPNOTSUPP; +} + +/** + * rvt_unmap_fmr - unmap fast memory regions + * @fmr_list: the list of fast memory regions to unmap + * + * Returns 0 on success. + */ +int rvt_unmap_fmr(struct list_head *fmr_list) +{ + return -EOPNOTSUPP; +} + +/** + * rvt_dealloc_fmr - deallocate a fast memory region + * @ibfmr: the fast memory region to deallocate + * + * Returns 0 on success. + */ +int rvt_dealloc_fmr(struct ib_fmr *ibfmr) +{ + return -EOPNOTSUPP; +} diff --git a/drivers/infiniband/sw/rdmavt/mr.h b/drivers/infiniband/sw/rdmavt/mr.h new file mode 100644 index 0000000..f19e9da --- /dev/null +++ b/drivers/infiniband/sw/rdmavt/mr.h @@ -0,0 +1,69 @@ +#ifndef DEF_RVTMR_H +#define DEF_RVTMR_H + +/* + * Copyright(c) 2015 Intel Corporation. + * + * This file is provided under a dual BSD/GPLv2 license. When using or + * redistributing this file, you may do so under either license. + * + * GPL LICENSE SUMMARY + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * BSD LICENSE + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * - Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + */ + +#include + +/* Mem Regions */ +struct ib_mr *rvt_get_dma_mr(struct ib_pd *pd, int acc); +struct ib_mr *rvt_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, + u64 virt_addr, int mr_access_flags, + struct ib_udata *udata); +int rvt_dereg_mr(struct ib_mr *ibmr); +struct ib_mr *rvt_alloc_mr(struct ib_pd *pd, + enum ib_mr_type mr_type, + u32 max_num_sg); +struct ib_fmr *rvt_alloc_fmr(struct ib_pd *pd, int mr_access_flags, + struct ib_fmr_attr *fmr_attr); +int rvt_map_phys_fmr(struct ib_fmr *ibfmr, u64 *page_list, + int list_len, u64 iova); +int rvt_unmap_fmr(struct list_head *fmr_list); +int rvt_dealloc_fmr(struct ib_fmr *ibfmr); + +#endif /* DEF_RVTMR_H */ diff --git a/drivers/infiniband/sw/rdmavt/vt.c b/drivers/infiniband/sw/rdmavt/vt.c index 6243562..004ca14 100644 --- a/drivers/infiniband/sw/rdmavt/vt.c +++ b/drivers/infiniband/sw/rdmavt/vt.c @@ -232,6 +232,16 @@ int rvt_register_device(struct rvt_dev_info *rdi) CHECK_DRIVER_OVERRIDE(rdi, modify_ah); CHECK_DRIVER_OVERRIDE(rdi, query_ah); + /* Mem Region */ + CHECK_DRIVER_OVERRIDE(rdi, get_dma_mr); + CHECK_DRIVER_OVERRIDE(rdi, reg_user_mr); + CHECK_DRIVER_OVERRIDE(rdi, dereg_mr); + CHECK_DRIVER_OVERRIDE(rdi, alloc_mr); + CHECK_DRIVER_OVERRIDE(rdi, alloc_fmr); + CHECK_DRIVER_OVERRIDE(rdi, map_phys_fmr); + CHECK_DRIVER_OVERRIDE(rdi, unmap_fmr); + CHECK_DRIVER_OVERRIDE(rdi, dealloc_fmr); + /* DMA Operations */ rdi->ibdev.dma_ops = rdi->ibdev.dma_ops ? : &rvt_default_dma_mapping_ops; diff --git a/drivers/infiniband/sw/rdmavt/vt.h b/drivers/infiniband/sw/rdmavt/vt.h index 96072cd..78b9549 100644 --- a/drivers/infiniband/sw/rdmavt/vt.h +++ b/drivers/infiniband/sw/rdmavt/vt.h @@ -53,5 +53,6 @@ #include "pd.h" #include "qp.h" #include "ah.h" +#include "mr.h" #endif /* DEF_RDMAVT_H */ -- cgit v0.10.2 From aad9158bda02875cceb2408d7cc2675de2e2829a Mon Sep 17 00:00:00 2001 From: Dennis Dalessandro Date: Wed, 6 Jan 2016 09:57:58 -0800 Subject: IB/rdmavt: Add SRQ stubs Adds the stubs for create, modify, query, and destory for shared request queues. Reviewed-by: Mike Marciniszyn Reviewed-by: Ira Weiny Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford diff --git a/drivers/infiniband/sw/rdmavt/Makefile b/drivers/infiniband/sw/rdmavt/Makefile index 084ee6a..204be84 100644 --- a/drivers/infiniband/sw/rdmavt/Makefile +++ b/drivers/infiniband/sw/rdmavt/Makefile @@ -7,4 +7,4 @@ # obj-$(CONFIG_INFINIBAND_RDMAVT) += rdmavt.o -rdmavt-y := vt.o ah.o dma.o mr.o pd.o qp.o +rdmavt-y := vt.o ah.o dma.o mr.o pd.o qp.o srq.o diff --git a/drivers/infiniband/sw/rdmavt/srq.c b/drivers/infiniband/sw/rdmavt/srq.c new file mode 100644 index 0000000..bbb623a --- /dev/null +++ b/drivers/infiniband/sw/rdmavt/srq.c @@ -0,0 +1,86 @@ +/* + * Copyright(c) 2015 Intel Corporation. + * + * This file is provided under a dual BSD/GPLv2 license. When using or + * redistributing this file, you may do so under either license. + * + * GPL LICENSE SUMMARY + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * BSD LICENSE + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * - Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + */ + +#include "srq.h" + +/** + * rvt_create_srq - create a shared receive queue + * @ibpd: the protection domain of the SRQ to create + * @srq_init_attr: the attributes of the SRQ + * @udata: data from libibverbs when creating a user SRQ + */ +struct ib_srq *rvt_create_srq(struct ib_pd *ibpd, + struct ib_srq_init_attr *srq_init_attr, + struct ib_udata *udata) +{ + return ERR_PTR(-EOPNOTSUPP); +} + +/** + * rvt_modify_srq - modify a shared receive queue + * @ibsrq: the SRQ to modify + * @attr: the new attributes of the SRQ + * @attr_mask: indicates which attributes to modify + * @udata: user data for libibverbs.so + */ +int rvt_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr, + enum ib_srq_attr_mask attr_mask, + struct ib_udata *udata) +{ + return -EOPNOTSUPP; +} + +int rvt_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr) +{ + return -EOPNOTSUPP; +} + +int rvt_destroy_srq(struct ib_srq *ibsrq) +{ + return -EOPNOTSUPP; +} + diff --git a/drivers/infiniband/sw/rdmavt/srq.h b/drivers/infiniband/sw/rdmavt/srq.h new file mode 100644 index 0000000..0c3c5a7 --- /dev/null +++ b/drivers/infiniband/sw/rdmavt/srq.h @@ -0,0 +1,61 @@ +#ifndef DEF_RVTSRQ_H +#define DEF_RVTSRQ_H + +/* + * Copyright(c) 2015 Intel Corporation. + * + * This file is provided under a dual BSD/GPLv2 license. When using or + * redistributing this file, you may do so under either license. + * + * GPL LICENSE SUMMARY + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * BSD LICENSE + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * - Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + */ + +#include +struct ib_srq *rvt_create_srq(struct ib_pd *ibpd, + struct ib_srq_init_attr *srq_init_attr, + struct ib_udata *udata); +int rvt_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr, + enum ib_srq_attr_mask attr_mask, + struct ib_udata *udata); +int rvt_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr); +int rvt_destroy_srq(struct ib_srq *ibsrq); + +#endif /* DEF_RVTSRQ_H */ diff --git a/drivers/infiniband/sw/rdmavt/vt.c b/drivers/infiniband/sw/rdmavt/vt.c index 004ca14..9b318e9 100644 --- a/drivers/infiniband/sw/rdmavt/vt.c +++ b/drivers/infiniband/sw/rdmavt/vt.c @@ -232,6 +232,12 @@ int rvt_register_device(struct rvt_dev_info *rdi) CHECK_DRIVER_OVERRIDE(rdi, modify_ah); CHECK_DRIVER_OVERRIDE(rdi, query_ah); + /* Shared Receive Queue */ + CHECK_DRIVER_OVERRIDE(rdi, create_srq); + CHECK_DRIVER_OVERRIDE(rdi, modify_srq); + CHECK_DRIVER_OVERRIDE(rdi, destroy_srq); + CHECK_DRIVER_OVERRIDE(rdi, query_srq); + /* Mem Region */ CHECK_DRIVER_OVERRIDE(rdi, get_dma_mr); CHECK_DRIVER_OVERRIDE(rdi, reg_user_mr); diff --git a/drivers/infiniband/sw/rdmavt/vt.h b/drivers/infiniband/sw/rdmavt/vt.h index 78b9549..5019534 100644 --- a/drivers/infiniband/sw/rdmavt/vt.h +++ b/drivers/infiniband/sw/rdmavt/vt.h @@ -54,5 +54,6 @@ #include "qp.h" #include "ah.h" #include "mr.h" +#include "srq.h" #endif /* DEF_RDMAVT_H */ -- cgit v0.10.2 From 9fa2517116b1841e6a6402fe5d2d51acc31164d2 Mon Sep 17 00:00:00 2001 From: Dennis Dalessandro Date: Wed, 6 Jan 2016 09:58:23 -0800 Subject: IB/rdmavt: Add multicast stubs Adds the function stubs for attach and detach multicast. Reviewed-by: Mike Marciniszyn Reviewed-by: Ira Weiny Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford diff --git a/drivers/infiniband/sw/rdmavt/Makefile b/drivers/infiniband/sw/rdmavt/Makefile index 204be84..d2af114 100644 --- a/drivers/infiniband/sw/rdmavt/Makefile +++ b/drivers/infiniband/sw/rdmavt/Makefile @@ -7,4 +7,4 @@ # obj-$(CONFIG_INFINIBAND_RDMAVT) += rdmavt.o -rdmavt-y := vt.o ah.o dma.o mr.o pd.o qp.o srq.o +rdmavt-y := vt.o ah.o dma.o mcast.o mr.o pd.o qp.o srq.o diff --git a/drivers/infiniband/sw/rdmavt/mcast.c b/drivers/infiniband/sw/rdmavt/mcast.c new file mode 100644 index 0000000..5a78dc7 --- /dev/null +++ b/drivers/infiniband/sw/rdmavt/mcast.c @@ -0,0 +1,58 @@ +/* + * Copyright(c) 2015 Intel Corporation. + * + * This file is provided under a dual BSD/GPLv2 license. When using or + * redistributing this file, you may do so under either license. + * + * GPL LICENSE SUMMARY + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * BSD LICENSE + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * - Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + */ + +#include "mcast.h" + +int rvt_attach_mcast(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) +{ + return -EOPNOTSUPP; +} + +int rvt_detach_mcast(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) +{ + return -EOPNOTSUPP; +} diff --git a/drivers/infiniband/sw/rdmavt/mcast.h b/drivers/infiniband/sw/rdmavt/mcast.h new file mode 100644 index 0000000..21647c3 --- /dev/null +++ b/drivers/infiniband/sw/rdmavt/mcast.h @@ -0,0 +1,56 @@ +#ifndef DEF_RVTMCAST_H +#define DEF_RVTMCAST_H + +/* + * Copyright(c) 2015 Intel Corporation. + * + * This file is provided under a dual BSD/GPLv2 license. When using or + * redistributing this file, you may do so under either license. + * + * GPL LICENSE SUMMARY + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * BSD LICENSE + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * - Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + */ + +#include + +int rvt_attach_mcast(struct ib_qp *ibqp, union ib_gid *gid, u16 lid); +int rvt_detach_mcast(struct ib_qp *ibqp, union ib_gid *gid, u16 lid); + +#endif /* DEF_RVTMCAST_H */ diff --git a/drivers/infiniband/sw/rdmavt/vt.c b/drivers/infiniband/sw/rdmavt/vt.c index 9b318e9..e69bc8a 100644 --- a/drivers/infiniband/sw/rdmavt/vt.c +++ b/drivers/infiniband/sw/rdmavt/vt.c @@ -238,6 +238,10 @@ int rvt_register_device(struct rvt_dev_info *rdi) CHECK_DRIVER_OVERRIDE(rdi, destroy_srq); CHECK_DRIVER_OVERRIDE(rdi, query_srq); + /* Multicast */ + CHECK_DRIVER_OVERRIDE(rdi, attach_mcast); + CHECK_DRIVER_OVERRIDE(rdi, detach_mcast); + /* Mem Region */ CHECK_DRIVER_OVERRIDE(rdi, get_dma_mr); CHECK_DRIVER_OVERRIDE(rdi, reg_user_mr); diff --git a/drivers/infiniband/sw/rdmavt/vt.h b/drivers/infiniband/sw/rdmavt/vt.h index 5019534..02e1212 100644 --- a/drivers/infiniband/sw/rdmavt/vt.h +++ b/drivers/infiniband/sw/rdmavt/vt.h @@ -55,5 +55,6 @@ #include "ah.h" #include "mr.h" #include "srq.h" +#include "mcast.h" #endif /* DEF_RDMAVT_H */ -- cgit v0.10.2 From 3dd1faf89b418f036ff664edbf04d4e266e2ceaa Mon Sep 17 00:00:00 2001 From: Dennis Dalessandro Date: Wed, 6 Jan 2016 09:58:37 -0800 Subject: IB/rdmavt: Add process MAD stub This adds the stub for process mad. More study is needed to determine the final MAD interaction between the driver and rvt. Reviewed-by: Mike Marciniszyn Reviewed-by: Ira Weiny Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford diff --git a/drivers/infiniband/sw/rdmavt/Makefile b/drivers/infiniband/sw/rdmavt/Makefile index d2af114..fe65410 100644 --- a/drivers/infiniband/sw/rdmavt/Makefile +++ b/drivers/infiniband/sw/rdmavt/Makefile @@ -7,4 +7,4 @@ # obj-$(CONFIG_INFINIBAND_RDMAVT) += rdmavt.o -rdmavt-y := vt.o ah.o dma.o mcast.o mr.o pd.o qp.o srq.o +rdmavt-y := vt.o ah.o dma.o mad.o mcast.o mr.o pd.o qp.o srq.o diff --git a/drivers/infiniband/sw/rdmavt/mad.c b/drivers/infiniband/sw/rdmavt/mad.c new file mode 100644 index 0000000..eef7029 --- /dev/null +++ b/drivers/infiniband/sw/rdmavt/mad.c @@ -0,0 +1,85 @@ +/* + * Copyright(c) 2015 Intel Corporation. + * + * This file is provided under a dual BSD/GPLv2 license. When using or + * redistributing this file, you may do so under either license. + * + * GPL LICENSE SUMMARY + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * BSD LICENSE + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * - Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + */ + +#include "mad.h" + +/** + * rvt_process_mad - process an incoming MAD packet + * @ibdev: the infiniband device this packet came in on + * @mad_flags: MAD flags + * @port: the port number this packet came in on + * @in_wc: the work completion entry for this packet + * @in_grh: the global route header for this packet + * @in_mad: the incoming MAD + * @out_mad: any outgoing MAD reply + * + * Returns IB_MAD_RESULT_SUCCESS if this is a MAD that we are not + * interested in processing. + * + * Note that the verbs framework has already done the MAD sanity checks, + * and hop count/pointer updating for IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE + * MADs. + * + * This is called by the ib_mad module. + */ +int rvt_process_mad(struct ib_device *ibdev, int mad_flags, u8 port, + const struct ib_wc *in_wc, const struct ib_grh *in_grh, + const struct ib_mad_hdr *in, size_t in_mad_size, + struct ib_mad_hdr *out, size_t *out_mad_size, + u16 *out_mad_pkey_index) +{ + /* + * Drivers will need to provide a number of things. For exmaple counters + * will need to be maintained by the driver but shoud live in the rvt + * structure. More study will be needed to finalize the interface + * between drivers and rvt for mad packets. + * + *VT-DRIVER-API: ???? + * + */ + return IB_MAD_RESULT_FAILURE; +} diff --git a/drivers/infiniband/sw/rdmavt/mad.h b/drivers/infiniband/sw/rdmavt/mad.h new file mode 100644 index 0000000..ee740e9 --- /dev/null +++ b/drivers/infiniband/sw/rdmavt/mad.h @@ -0,0 +1,59 @@ +#ifndef DEF_RVTMAD_H +#define DEF_RVTMAD_H + +/* + * Copyright(c) 2015 Intel Corporation. + * + * This file is provided under a dual BSD/GPLv2 license. When using or + * redistributing this file, you may do so under either license. + * + * GPL LICENSE SUMMARY + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * BSD LICENSE + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * - Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + */ + +#include + +int rvt_process_mad(struct ib_device *ibdev, int mad_flags, u8 port, + const struct ib_wc *in_wc, const struct ib_grh *in_grh, + const struct ib_mad_hdr *in, size_t in_mad_size, + struct ib_mad_hdr *out, size_t *out_mad_size, + u16 *out_mad_pkey_index); + +#endif /* DEF_RVTMAD_H */ -- cgit v0.10.2 From dc21752e2c930ae32f11affb2e1d7a6f8a44a16a Mon Sep 17 00:00:00 2001 From: Dennis Dalessandro Date: Wed, 6 Jan 2016 09:59:04 -0800 Subject: IB/rdmavt: Add mmap stub Adds the stub for the mmap verbs call. Reviewed-by: Mike Marciniszyn Reviewed-by: Ira Weiny Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford diff --git a/drivers/infiniband/sw/rdmavt/Makefile b/drivers/infiniband/sw/rdmavt/Makefile index fe65410..6f530d1 100644 --- a/drivers/infiniband/sw/rdmavt/Makefile +++ b/drivers/infiniband/sw/rdmavt/Makefile @@ -7,4 +7,4 @@ # obj-$(CONFIG_INFINIBAND_RDMAVT) += rdmavt.o -rdmavt-y := vt.o ah.o dma.o mad.o mcast.o mr.o pd.o qp.o srq.o +rdmavt-y := vt.o ah.o dma.o mad.o mcast.o mmap.o mr.o pd.o qp.o srq.o diff --git a/drivers/infiniband/sw/rdmavt/mmap.c b/drivers/infiniband/sw/rdmavt/mmap.c new file mode 100644 index 0000000..d09f3a05 --- /dev/null +++ b/drivers/infiniband/sw/rdmavt/mmap.c @@ -0,0 +1,60 @@ +/* + * Copyright(c) 2015 Intel Corporation. + * + * This file is provided under a dual BSD/GPLv2 license. When using or + * redistributing this file, you may do so under either license. + * + * GPL LICENSE SUMMARY + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * BSD LICENSE + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * - Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + */ + +#include +#include "mmap.h" + +/** + * rvt_mmap - create a new mmap region + * @context: the IB user context of the process making the mmap() call + * @vma: the VMA to be initialized + * Return zero if the mmap is OK. Otherwise, return an errno. + */ +int rvt_mmap(struct ib_ucontext *context, struct vm_area_struct *vma) +{ + return -EOPNOTSUPP; +} diff --git a/drivers/infiniband/sw/rdmavt/mmap.h b/drivers/infiniband/sw/rdmavt/mmap.h new file mode 100644 index 0000000..94f6377 --- /dev/null +++ b/drivers/infiniband/sw/rdmavt/mmap.h @@ -0,0 +1,55 @@ +#ifndef DEF_RDMAVTMMAP_H +#define DEF_RDMAVTMMAP_H + +/* + * Copyright(c) 2015 Intel Corporation. + * + * This file is provided under a dual BSD/GPLv2 license. When using or + * redistributing this file, you may do so under either license. + * + * GPL LICENSE SUMMARY + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * BSD LICENSE + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * - Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + */ + +#include + +int rvt_mmap(struct ib_ucontext *context, struct vm_area_struct *vma); + +#endif /* DEF_RDMAVTMMAP_H */ diff --git a/drivers/infiniband/sw/rdmavt/vt.c b/drivers/infiniband/sw/rdmavt/vt.c index e69bc8a..41cfd84 100644 --- a/drivers/infiniband/sw/rdmavt/vt.c +++ b/drivers/infiniband/sw/rdmavt/vt.c @@ -251,6 +251,7 @@ int rvt_register_device(struct rvt_dev_info *rdi) CHECK_DRIVER_OVERRIDE(rdi, map_phys_fmr); CHECK_DRIVER_OVERRIDE(rdi, unmap_fmr); CHECK_DRIVER_OVERRIDE(rdi, dealloc_fmr); + CHECK_DRIVER_OVERRIDE(rdi, mmap); /* DMA Operations */ rdi->ibdev.dma_ops = diff --git a/drivers/infiniband/sw/rdmavt/vt.h b/drivers/infiniband/sw/rdmavt/vt.h index 02e1212..6a5f5bf 100644 --- a/drivers/infiniband/sw/rdmavt/vt.h +++ b/drivers/infiniband/sw/rdmavt/vt.h @@ -56,5 +56,6 @@ #include "mr.h" #include "srq.h" #include "mcast.h" +#include "mmap.h" #endif /* DEF_RDMAVT_H */ -- cgit v0.10.2 From e6a8818a354db4d5a13b42d9fbc0f11ed74058e8 Mon Sep 17 00:00:00 2001 From: Dennis Dalessandro Date: Wed, 6 Jan 2016 09:59:38 -0800 Subject: IB/rdmavt: Add get port immutable stub This adds the get port immutable verbs call. Reviewed-by: Mike Marciniszyn Reviewed-by: Ira Weiny Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford diff --git a/drivers/infiniband/sw/rdmavt/vt.c b/drivers/infiniband/sw/rdmavt/vt.c index 41cfd84..a122ee8 100644 --- a/drivers/infiniband/sw/rdmavt/vt.c +++ b/drivers/infiniband/sw/rdmavt/vt.c @@ -198,6 +198,12 @@ static int rvt_dealloc_ucontext(struct ib_ucontext *context) return -EOPNOTSUPP; } +static int rvt_get_port_immutable(struct ib_device *ibdev, u8 port_num, + struct ib_port_immutable *immutable) +{ + return -EOPNOTSUPP; +} + /* * Check driver override. If driver passes a value use it, otherwise we use our * own value. @@ -219,6 +225,7 @@ int rvt_register_device(struct rvt_dev_info *rdi) CHECK_DRIVER_OVERRIDE(rdi, query_gid); CHECK_DRIVER_OVERRIDE(rdi, alloc_ucontext); CHECK_DRIVER_OVERRIDE(rdi, dealloc_ucontext); + CHECK_DRIVER_OVERRIDE(rdi, get_port_immutable); /* Queue Pairs */ CHECK_DRIVER_OVERRIDE(rdi, create_qp); -- cgit v0.10.2 From cf16335a0ccf5adda3d4bad932a7e012891709c5 Mon Sep 17 00:00:00 2001 From: Dennis Dalessandro Date: Wed, 6 Jan 2016 10:00:42 -0800 Subject: IB/rdmavt: Add completion queue function stubs Create stubs for completion queue creation, polling, resizing, calling for notification, and destroying. Reviewed-by: Mike Marciniszyn Reviewed-by: Ira Weiny Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford diff --git a/drivers/infiniband/sw/rdmavt/Makefile b/drivers/infiniband/sw/rdmavt/Makefile index 6f530d1..00f0188 100644 --- a/drivers/infiniband/sw/rdmavt/Makefile +++ b/drivers/infiniband/sw/rdmavt/Makefile @@ -7,4 +7,4 @@ # obj-$(CONFIG_INFINIBAND_RDMAVT) += rdmavt.o -rdmavt-y := vt.o ah.o dma.o mad.o mcast.o mmap.o mr.o pd.o qp.o srq.o +rdmavt-y := vt.o ah.o cq.o dma.o mad.o mcast.o mmap.o mr.o pd.o qp.o srq.o diff --git a/drivers/infiniband/sw/rdmavt/cq.c b/drivers/infiniband/sw/rdmavt/cq.c new file mode 100644 index 0000000..8d96194 --- /dev/null +++ b/drivers/infiniband/sw/rdmavt/cq.c @@ -0,0 +1,113 @@ +/* + * Copyright(c) 2015 Intel Corporation. + * + * This file is provided under a dual BSD/GPLv2 license. When using or + * redistributing this file, you may do so under either license. + * + * GPL LICENSE SUMMARY + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * BSD LICENSE + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * - Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + */ + +#include "cq.h" + +/** + * rvt_create_cq - create a completion queue + * @ibdev: the device this completion queue is attached to + * @attr: creation attributes + * @context: unused by the QLogic_IB driver + * @udata: user data for libibverbs.so + * + * Returns a pointer to the completion queue or negative errno values + * for failure. + * + * Called by ib_create_cq() in the generic verbs code. + */ +struct ib_cq *rvt_create_cq(struct ib_device *ibdev, + const struct ib_cq_init_attr *attr, + struct ib_ucontext *context, + struct ib_udata *udata) +{ + return ERR_PTR(-EOPNOTSUPP); +} + +/** + * rvt_destroy_cq - destroy a completion queue + * @ibcq: the completion queue to destroy. + * + * Returns 0 for success. + * + * Called by ib_destroy_cq() in the generic verbs code. + */ +int rvt_destroy_cq(struct ib_cq *ibcq) +{ + return -EOPNOTSUPP; +} + +int rvt_req_notify_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags notify_flags) +{ + return -EOPNOTSUPP; +} + +/** + * rvt_resize_cq - change the size of the CQ + * @ibcq: the completion queue + * + * Returns 0 for success. + */ +int rvt_resize_cq(struct ib_cq *ibcq, int cqe, struct ib_udata *udata) +{ + return -EOPNOTSUPP; +} + +/** + * rvt_poll_cq - poll for work completion entries + * @ibcq: the completion queue to poll + * @num_entries: the maximum number of entries to return + * @entry: pointer to array where work completions are placed + * + * Returns the number of completion entries polled. + * + * This may be called from interrupt context. Also called by ib_poll_cq() + * in the generic verbs code. + */ +int rvt_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *entry) +{ + return -EOPNOTSUPP; +} diff --git a/drivers/infiniband/sw/rdmavt/cq.h b/drivers/infiniband/sw/rdmavt/cq.h new file mode 100644 index 0000000..63a517d --- /dev/null +++ b/drivers/infiniband/sw/rdmavt/cq.h @@ -0,0 +1,62 @@ +#ifndef DEF_RVTCQ_H +#define DEF_RVTCQ_H + +/* + * Copyright(c) 2015 Intel Corporation. + * + * This file is provided under a dual BSD/GPLv2 license. When using or + * redistributing this file, you may do so under either license. + * + * GPL LICENSE SUMMARY + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * BSD LICENSE + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * - Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + */ + +#include + +struct ib_cq *rvt_create_cq(struct ib_device *ibdev, + const struct ib_cq_init_attr *attr, + struct ib_ucontext *context, + struct ib_udata *udata); +int rvt_destroy_cq(struct ib_cq *ibcq); +int rvt_req_notify_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags notify_flags); +int rvt_resize_cq(struct ib_cq *ibcq, int cqe, struct ib_udata *udata); +int rvt_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *entry); + +#endif /* DEF_RVTCQ_H */ diff --git a/drivers/infiniband/sw/rdmavt/vt.c b/drivers/infiniband/sw/rdmavt/vt.c index a122ee8..7fd879f 100644 --- a/drivers/infiniband/sw/rdmavt/vt.c +++ b/drivers/infiniband/sw/rdmavt/vt.c @@ -260,6 +260,13 @@ int rvt_register_device(struct rvt_dev_info *rdi) CHECK_DRIVER_OVERRIDE(rdi, dealloc_fmr); CHECK_DRIVER_OVERRIDE(rdi, mmap); + /* Completion queues */ + CHECK_DRIVER_OVERRIDE(rdi, create_cq); + CHECK_DRIVER_OVERRIDE(rdi, destroy_cq); + CHECK_DRIVER_OVERRIDE(rdi, poll_cq); + CHECK_DRIVER_OVERRIDE(rdi, req_notify_cq); + CHECK_DRIVER_OVERRIDE(rdi, resize_cq); + /* DMA Operations */ rdi->ibdev.dma_ops = rdi->ibdev.dma_ops ? : &rvt_default_dma_mapping_ops; diff --git a/drivers/infiniband/sw/rdmavt/vt.h b/drivers/infiniband/sw/rdmavt/vt.h index 6a5f5bf..fdb52a8 100644 --- a/drivers/infiniband/sw/rdmavt/vt.h +++ b/drivers/infiniband/sw/rdmavt/vt.h @@ -57,5 +57,6 @@ #include "srq.h" #include "mcast.h" #include "mmap.h" +#include "cq.h" #endif /* DEF_RDMAVT_H */ -- cgit v0.10.2 From 8cf4020b2ad0d19f74fed043b882da1b79f52566 Mon Sep 17 00:00:00 2001 From: Dennis Dalessandro Date: Wed, 6 Jan 2016 10:01:17 -0800 Subject: IB/rdmavt: Add post send and recv stubs This adds the post sned and recv function stubs. Reviewed-by: Mike Marciniszyn Reviewed-by: Ira Weiny Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford diff --git a/drivers/infiniband/sw/rdmavt/qp.c b/drivers/infiniband/sw/rdmavt/qp.c index a59f28d..23a5f68 100644 --- a/drivers/infiniband/sw/rdmavt/qp.c +++ b/drivers/infiniband/sw/rdmavt/qp.c @@ -118,3 +118,65 @@ int rvt_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, { return -EOPNOTSUPP; } + +/** + * rvt_post_receive - post a receive on a QP + * @ibqp: the QP to post the receive on + * @wr: the WR to post + * @bad_wr: the first bad WR is put here + * + * This may be called from interrupt context. + */ +int rvt_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr, + struct ib_recv_wr **bad_wr) +{ + /* + * When a packet arrives the driver needs to call up to rvt to process + * the packet. The UD, RC, UC processing will be done in rvt, however + * the driver should be able to override this if it so choses. Perhaps a + * set of function pointers set up at registration time. + */ + + return -EOPNOTSUPP; +} + +/** + * rvt_post_send - post a send on a QP + * @ibqp: the QP to post the send on + * @wr: the list of work requests to post + * @bad_wr: the first bad WR is put here + * + * This may be called from interrupt context. + */ +int rvt_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, + struct ib_send_wr **bad_wr) +{ + /* + * VT-DRIVER-API: do_send() + * Driver needs to have a do_send() call which is a single entry point + * to take an already formed packet and throw it out on the wire. Once + * the packet is sent the driver needs to make an upcall to rvt so the + * completion queue can be notified and/or any other outstanding + * work/book keeping can be finished. + * + * Note that there should also be a way for rvt to protect itself + * against hangs in the driver layer. If a send doesn't actually + * complete in a timely manor rvt needs to return an error event. + */ + + return -EOPNOTSUPP; +} + +/** + * rvt_post_srq_receive - post a receive on a shared receive queue + * @ibsrq: the SRQ to post the receive on + * @wr: the list of work requests to post + * @bad_wr: A pointer to the first WR to cause a problem is put here + * + * This may be called from interrupt context. + */ +int rvt_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr, + struct ib_recv_wr **bad_wr) +{ + return -EOPNOTSUPP; +} diff --git a/drivers/infiniband/sw/rdmavt/qp.h b/drivers/infiniband/sw/rdmavt/qp.h index 43acba0..10bc636 100644 --- a/drivers/infiniband/sw/rdmavt/qp.h +++ b/drivers/infiniband/sw/rdmavt/qp.h @@ -63,5 +63,10 @@ int rvt_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int rvt_destroy_qp(struct ib_qp *ibqp); int rvt_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask, struct ib_qp_init_attr *init_attr); - +int rvt_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr, + struct ib_recv_wr **bad_wr); +int rvt_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, + struct ib_send_wr **bad_wr); +int rvt_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr, + struct ib_recv_wr **bad_wr); #endif /* DEF_RVTQP_H */ diff --git a/drivers/infiniband/sw/rdmavt/vt.c b/drivers/infiniband/sw/rdmavt/vt.c index 7fd879f..367bc45 100644 --- a/drivers/infiniband/sw/rdmavt/vt.c +++ b/drivers/infiniband/sw/rdmavt/vt.c @@ -232,6 +232,9 @@ int rvt_register_device(struct rvt_dev_info *rdi) CHECK_DRIVER_OVERRIDE(rdi, modify_qp); CHECK_DRIVER_OVERRIDE(rdi, destroy_qp); CHECK_DRIVER_OVERRIDE(rdi, query_qp); + CHECK_DRIVER_OVERRIDE(rdi, post_send); + CHECK_DRIVER_OVERRIDE(rdi, post_recv); + CHECK_DRIVER_OVERRIDE(rdi, post_srq_recv); /* Address Handle */ CHECK_DRIVER_OVERRIDE(rdi, create_ah); -- cgit v0.10.2 From b92a7568037e2a28f61c3f79c2320431bb24dfab Mon Sep 17 00:00:00 2001 From: Dennis Dalessandro Date: Wed, 6 Jan 2016 10:01:42 -0800 Subject: IB/rdmavt: Move MR datastructures into rvt This patch adds the MR datastructures based on hfi1 into rvt. For now the data structures are defined in include/rdma/rdma_vt.h but once all MR functionality has been moved from the drivers into rvt these should move to rdmavt/mr.h Reviewed-by: Ira Weiny Reviewed-by: Dean Luick Reviewed-by: Mike Marciniszyn Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford diff --git a/include/rdma/rdma_vt.h b/include/rdma/rdma_vt.h index bf072a4..f232e39 100644 --- a/include/rdma/rdma_vt.h +++ b/include/rdma/rdma_vt.h @@ -56,6 +56,56 @@ #include "ib_verbs.h" /* + * For Memory Regions. This stuff should probably be moved into rdmavt/mr.h once + * drivers no longer need access to the MR directly. + */ + +/* + * A segment is a linear region of low physical memory. + * Used by the verbs layer. + */ +struct rvt_seg { + void *vaddr; + size_t length; +}; + +/* The number of rvt_segs that fit in a page. */ +#define RVT_SEGSZ (PAGE_SIZE / sizeof(struct rvt_seg)) + +struct rvt_segarray { + struct rvt_seg segs[RVT_SEGSZ]; +}; + +struct rvt_mregion { + struct ib_pd *pd; /* shares refcnt of ibmr.pd */ + u64 user_base; /* User's address for this region */ + u64 iova; /* IB start address of this region */ + size_t length; + u32 lkey; + u32 offset; /* offset (bytes) to start of region */ + int access_flags; + u32 max_segs; /* number of rvt_segs in all the arrays */ + u32 mapsz; /* size of the map array */ + u8 page_shift; /* 0 - non unform/non powerof2 sizes */ + u8 lkey_published; /* in global table */ + struct completion comp; /* complete when refcount goes to zero */ + atomic_t refcount; + struct rvt_segarray *map[0]; /* the segments */ +}; + +#define RVT_MAX_LKEY_TABLE_BITS 23 + +struct rvt_lkey_table { + spinlock_t lock; /* protect changes in this struct */ + u32 next; /* next unused index (speeds search) */ + u32 gen; /* generation count */ + u32 max; /* size of the table */ + struct rvt_mregion __rcu **table; +}; + +/* End Memmory Region */ + +/* * Things that are driver specific, module parameters in hfi1 and qib */ struct rvt_driver_params { @@ -119,6 +169,9 @@ struct rvt_dev_info { /* Driver specific properties */ struct rvt_driver_params dparms; + struct rvt_mregion __rcu *dma_mr; + struct rvt_lkey_table lkey_table; + /* PKey Table goes here */ /* -- cgit v0.10.2 From ca889e8ad3af9f1dfeb827356bc9839fb20f32be Mon Sep 17 00:00:00 2001 From: Dennis Dalessandro Date: Wed, 6 Jan 2016 10:02:41 -0800 Subject: IB/rdmavt: Add queue pair data structure to rdmavt Add queue pair data structure as well as supporting structures to rdmavt. Reviewed-by: Ira Weiny Reviewed-by: Mike Marciniszyn Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford diff --git a/drivers/infiniband/sw/rdmavt/qp.h b/drivers/infiniband/sw/rdmavt/qp.h index 10bc636..9c2999d 100644 --- a/drivers/infiniband/sw/rdmavt/qp.h +++ b/drivers/infiniband/sw/rdmavt/qp.h @@ -50,11 +50,6 @@ #include -struct rvt_qp { - struct ib_qp *ibqp; - /* Other stuff */ -}; - struct ib_qp *rvt_create_qp(struct ib_pd *ibpd, struct ib_qp_init_attr *init_attr, struct ib_udata *udata); diff --git a/include/rdma/rdma_vt.h b/include/rdma/rdma_vt.h index f232e39..9baa7f0 100644 --- a/include/rdma/rdma_vt.h +++ b/include/rdma/rdma_vt.h @@ -106,6 +106,239 @@ struct rvt_lkey_table { /* End Memmory Region */ /* + * Things needed for the Queue Pair definition. Like the MR stuff above the + * following should probably get moved to qp.h once drivers stop trying to make + * and manipulate thier own QPs. For the few instnaces where a driver may need + * to look into a queue pair there should be a pointer to a driver priavte data + * structure that they can look at. + */ + +/* + * These keep track of the copy progress within a memory region. + * Used by the verbs layer. + */ +struct rvt_sge { + struct rvt_mregion *mr; + void *vaddr; /* kernel virtual address of segment */ + u32 sge_length; /* length of the SGE */ + u32 length; /* remaining length of the segment */ + u16 m; /* current index: mr->map[m] */ + u16 n; /* current index: mr->map[m]->segs[n] */ +}; + +/* + * Send work request queue entry. + * The size of the sg_list is determined when the QP is created and stored + * in qp->s_max_sge. + */ +struct rvt_swqe { + union { + struct ib_send_wr wr; /* don't use wr.sg_list */ + struct ib_ud_wr ud_wr; + struct ib_reg_wr reg_wr; + struct ib_rdma_wr rdma_wr; + struct ib_atomic_wr atomic_wr; + }; + u32 psn; /* first packet sequence number */ + u32 lpsn; /* last packet sequence number */ + u32 ssn; /* send sequence number */ + u32 length; /* total length of data in sg_list */ + struct rvt_sge sg_list[0]; +}; + +/* + * Receive work request queue entry. + * The size of the sg_list is determined when the QP (or SRQ) is created + * and stored in qp->r_rq.max_sge (or srq->rq.max_sge). + */ +struct rvt_rwqe { + u64 wr_id; + u8 num_sge; + struct ib_sge sg_list[0]; +}; + +/* + * This structure is used to contain the head pointer, tail pointer, + * and receive work queue entries as a single memory allocation so + * it can be mmap'ed into user space. + * Note that the wq array elements are variable size so you can't + * just index into the array to get the N'th element; + * use get_rwqe_ptr() instead. + */ +struct rvt_rwq { + u32 head; /* new work requests posted to the head */ + u32 tail; /* receives pull requests from here. */ + struct rvt_rwqe wq[0]; +}; + +struct rvt_rq { + struct rvt_rwq *wq; + u32 size; /* size of RWQE array */ + u8 max_sge; + /* protect changes in this struct */ + spinlock_t lock ____cacheline_aligned_in_smp; +}; + +/* + * This structure is used by rvt_mmap() to validate an offset + * when an mmap() request is made. The vm_area_struct then uses + * this as its vm_private_data. + */ +struct rvt_mmap_info { + struct list_head pending_mmaps; + struct ib_ucontext *context; + void *obj; + __u64 offset; + struct kref ref; + unsigned size; +}; + +#define RVT_MAX_RDMA_ATOMIC 16 + +/* + * This structure holds the information that the send tasklet needs + * to send a RDMA read response or atomic operation. + */ +struct rvt_ack_entry { + u8 opcode; + u8 sent; + u32 psn; + u32 lpsn; + union { + struct rvt_sge rdma_sge; + u64 atomic_data; + }; +}; + +struct rvt_sge_state { + struct rvt_sge *sg_list; /* next SGE to be used if any */ + struct rvt_sge sge; /* progress state for the current SGE */ + u32 total_len; + u8 num_sge; +}; + +/* + * Variables prefixed with s_ are for the requester (sender). + * Variables prefixed with r_ are for the responder (receiver). + * Variables prefixed with ack_ are for responder replies. + * + * Common variables are protected by both r_rq.lock and s_lock in that order + * which only happens in modify_qp() or changing the QP 'state'. + */ +struct rvt_qp { + struct ib_qp ibqp; + void *priv; /* Driver private data */ + /* read mostly fields above and below */ + struct ib_ah_attr remote_ah_attr; + struct ib_ah_attr alt_ah_attr; + struct rvt_qp __rcu *next; /* link list for QPN hash table */ + struct rvt_swqe *s_wq; /* send work queue */ + struct rvt_mmap_info *ip; + + unsigned long timeout_jiffies; /* computed from timeout */ + + enum ib_mtu path_mtu; + int srate_mbps; /* s_srate (below) converted to Mbit/s */ + u32 remote_qpn; + u32 pmtu; /* decoded from path_mtu */ + u32 qkey; /* QKEY for this QP (for UD or RD) */ + u32 s_size; /* send work queue size */ + u32 s_rnr_timeout; /* number of milliseconds for RNR timeout */ + u32 s_ahgpsn; /* set to the psn in the copy of the header */ + + u8 state; /* QP state */ + u8 allowed_ops; /* high order bits of allowed opcodes */ + u8 qp_access_flags; + u8 alt_timeout; /* Alternate path timeout for this QP */ + u8 timeout; /* Timeout for this QP */ + u8 s_srate; + u8 s_mig_state; + u8 port_num; + u8 s_pkey_index; /* PKEY index to use */ + u8 s_alt_pkey_index; /* Alternate path PKEY index to use */ + u8 r_max_rd_atomic; /* max number of RDMA read/atomic to receive */ + u8 s_max_rd_atomic; /* max number of RDMA read/atomic to send */ + u8 s_retry_cnt; /* number of times to retry */ + u8 s_rnr_retry_cnt; + u8 r_min_rnr_timer; /* retry timeout value for RNR NAKs */ + u8 s_max_sge; /* size of s_wq->sg_list */ + u8 s_draining; + + /* start of read/write fields */ + atomic_t refcount ____cacheline_aligned_in_smp; + wait_queue_head_t wait; + + struct rvt_ack_entry s_ack_queue[RVT_MAX_RDMA_ATOMIC + 1] + ____cacheline_aligned_in_smp; + struct rvt_sge_state s_rdma_read_sge; + + spinlock_t r_lock ____cacheline_aligned_in_smp; /* used for APM */ + unsigned long r_aflags; + u64 r_wr_id; /* ID for current receive WQE */ + u32 r_ack_psn; /* PSN for next ACK or atomic ACK */ + u32 r_len; /* total length of r_sge */ + u32 r_rcv_len; /* receive data len processed */ + u32 r_psn; /* expected rcv packet sequence number */ + u32 r_msn; /* message sequence number */ + + u8 r_state; /* opcode of last packet received */ + u8 r_flags; + u8 r_head_ack_queue; /* index into s_ack_queue[] */ + + struct list_head rspwait; /* link for waiting to respond */ + + struct rvt_sge_state r_sge; /* current receive data */ + struct rvt_rq r_rq; /* receive work queue */ + + spinlock_t s_lock ____cacheline_aligned_in_smp; + struct rvt_sge_state *s_cur_sge; + u32 s_flags; + struct rvt_swqe *s_wqe; + struct rvt_sge_state s_sge; /* current send request data */ + struct rvt_mregion *s_rdma_mr; + struct sdma_engine *s_sde; /* current sde */ + u32 s_cur_size; /* size of send packet in bytes */ + u32 s_len; /* total length of s_sge */ + u32 s_rdma_read_len; /* total length of s_rdma_read_sge */ + u32 s_next_psn; /* PSN for next request */ + u32 s_last_psn; /* last response PSN processed */ + u32 s_sending_psn; /* lowest PSN that is being sent */ + u32 s_sending_hpsn; /* highest PSN that is being sent */ + u32 s_psn; /* current packet sequence number */ + u32 s_ack_rdma_psn; /* PSN for sending RDMA read responses */ + u32 s_ack_psn; /* PSN for acking sends and RDMA writes */ + u32 s_head; /* new entries added here */ + u32 s_tail; /* next entry to process */ + u32 s_cur; /* current work queue entry */ + u32 s_acked; /* last un-ACK'ed entry */ + u32 s_last; /* last completed entry */ + u32 s_ssn; /* SSN of tail entry */ + u32 s_lsn; /* limit sequence number (credit) */ + u16 s_hdrwords; /* size of s_hdr in 32 bit words */ + u16 s_rdma_ack_cnt; + s8 s_ahgidx; + u8 s_state; /* opcode of last packet sent */ + u8 s_ack_state; /* opcode of packet to ACK */ + u8 s_nak_state; /* non-zero if NAK is pending */ + u8 r_nak_state; /* non-zero if NAK is pending */ + u8 s_retry; /* requester retry counter */ + u8 s_rnr_retry; /* requester RNR retry counter */ + u8 s_num_rd_atomic; /* number of RDMA read/atomic pending */ + u8 s_tail_ack_queue; /* index into s_ack_queue[] */ + + struct rvt_sge_state s_ack_rdma_sge; + struct timer_list s_timer; + + /* + * This sge list MUST be last. Do not add anything below here. + */ + struct rvt_sge r_sg_list[0] /* verified SGEs */ + ____cacheline_aligned_in_smp; +}; + +/* End QP section */ + +/* * Things that are driver specific, module parameters in hfi1 and qib */ struct rvt_driver_params { -- cgit v0.10.2 From aec5778775ac03ee6cfd6480adbbf6b05513d77b Mon Sep 17 00:00:00 2001 From: Dennis Dalessandro Date: Wed, 6 Jan 2016 10:02:52 -0800 Subject: IB/rdmavt: Move driver helper functions to a common structure Drivers are going to need to provide multiple functions for rdmavt to call in to. We already have one, so go ahead and push this into a data structure designated for driver supplied functions. Reviewed-by: Mike Marciniszyn Reviewed-by: Ira Weiny Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford diff --git a/drivers/infiniband/sw/rdmavt/vt.c b/drivers/infiniband/sw/rdmavt/vt.c index 367bc45..b4ce97e 100644 --- a/drivers/infiniband/sw/rdmavt/vt.c +++ b/drivers/infiniband/sw/rdmavt/vt.c @@ -280,8 +280,12 @@ int rvt_register_device(struct rvt_dev_info *rdi) spin_lock_init(&rdi->n_pds_lock); rdi->n_pds_allocated = 0; + /* Validate that drivers have provided the right functions */ + if (!rdi->driver_f.port_callback) + return -EINVAL; + /* We are now good to announce we exist */ - return ib_register_device(&rdi->ibdev, rdi->port_callback); + return ib_register_device(&rdi->ibdev, rdi->driver_f.port_callback); } EXPORT_SYMBOL(rvt_register_device); diff --git a/include/rdma/rdma_vt.h b/include/rdma/rdma_vt.h index 9baa7f0..e0beedc 100644 --- a/include/rdma/rdma_vt.h +++ b/include/rdma/rdma_vt.h @@ -383,6 +383,19 @@ struct rvt_driver_params { */ }; +/* + * Functions that drivers are required to support + */ +struct rvt_driver_provided { + /* + * The work to create port files in /sys/class Infiniband is different + * depending on the driver. This should not be extracted away and + * instead drivers are responsible for setting the correct callback for + * this. + */ + int (*port_callback)(struct ib_device *, u8, struct kobject *); +}; + /* Protection domain */ struct rvt_pd { struct ib_pd ibpd; @@ -407,13 +420,8 @@ struct rvt_dev_info { /* PKey Table goes here */ - /* - * The work to create port files in /sys/class Infiniband is different - * depending on the driver. This should not be extracted away and - * instead drivers are responsible for setting the correct callback for - * this. - */ - int (*port_callback)(struct ib_device *, u8, struct kobject *); + /* Driver specific helper functions */ + struct rvt_driver_provided driver_f; /* Internal use */ int n_pds_allocated; -- cgit v0.10.2 From b534875d5ab348fb9193692589e2ee82ae768e3a Mon Sep 17 00:00:00 2001 From: Dennis Dalessandro Date: Wed, 6 Jan 2016 10:02:59 -0800 Subject: IB/rdmavt: Add device specific info prints Follow hfi1's example for printing information about the driver and incorporate into rdmavt. This requires two new functions to be provided by the driver, one to get_card_name and one to get_pci_dev. Reviewed-by: Mike Marciniszyn Reviewed-by: Ira Weiny Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford diff --git a/drivers/infiniband/sw/rdmavt/vt.c b/drivers/infiniband/sw/rdmavt/vt.c index b4ce97e..2a13e36 100644 --- a/drivers/infiniband/sw/rdmavt/vt.c +++ b/drivers/infiniband/sw/rdmavt/vt.c @@ -213,9 +213,18 @@ static int rvt_get_port_immutable(struct ib_device *ibdev, u8 port_num, int rvt_register_device(struct rvt_dev_info *rdi) { + /* Validate that drivers have provided the right information */ if (!rdi) return -EINVAL; + if ((!rdi->driver_f.port_callback) || + (!rdi->driver_f.get_card_name) || + (!rdi->driver_f.get_pci_dev)) { + return -EINVAL; + } + + /* Once we get past here we can use the rvt_pr macros */ + /* Dev Ops */ CHECK_DRIVER_OVERRIDE(rdi, query_device); CHECK_DRIVER_OVERRIDE(rdi, modify_device); @@ -280,9 +289,7 @@ int rvt_register_device(struct rvt_dev_info *rdi) spin_lock_init(&rdi->n_pds_lock); rdi->n_pds_allocated = 0; - /* Validate that drivers have provided the right functions */ - if (!rdi->driver_f.port_callback) - return -EINVAL; + rvt_pr_info(rdi, "Registration with rdmavt done.\n"); /* We are now good to announce we exist */ return ib_register_device(&rdi->ibdev, rdi->driver_f.port_callback); diff --git a/drivers/infiniband/sw/rdmavt/vt.h b/drivers/infiniband/sw/rdmavt/vt.h index fdb52a8..54ee05a 100644 --- a/drivers/infiniband/sw/rdmavt/vt.h +++ b/drivers/infiniband/sw/rdmavt/vt.h @@ -49,6 +49,7 @@ */ #include +#include #include "dma.h" #include "pd.h" #include "qp.h" @@ -59,4 +60,31 @@ #include "mmap.h" #include "cq.h" +#define rvt_pr_info(rdi, fmt, ...) \ + __rvt_pr_info(rdi->driver_f.get_pci_dev(rdi), \ + rdi->driver_f.get_card_name(rdi), \ + fmt, \ + ##__VA_ARGS__) + +#define rvt_pr_warn(rdi, fmt, ...) \ + __rvt_pr_warn(rdi->driver_f.get_pci_dev(rdi), \ + rdi->driver_f.get_card_name(rdi), \ + fmt, \ + ##__VA_ARGS__) + +#define rvt_pr_err(rdi, fmt, ...) \ + __rvt_pr_err(rdi->driver_f.get_pci_dev(rdi), \ + rdi->driver_f.get_card_name(rdi), \ + fmt, \ + ##__VA_ARGS__) + +#define __rvt_pr_info(pdev, name, fmt, ...) \ + dev_info(&pdev->dev, "%s: " fmt, name, ##__VA_ARGS__) + +#define __rvt_pr_warn(pdev, name, fmt, ...) \ + dev_warn(&pdev->dev, "%s: " fmt, name, ##__VA_ARGS__) + +#define __rvt_pr_err(pdev, name, fmt, ...) \ + dev_err(&pdev->dev, "%s: " fmt, name, ##__VA_ARGS__) + #endif /* DEF_RDMAVT_H */ diff --git a/include/rdma/rdma_vt.h b/include/rdma/rdma_vt.h index e0beedc..4b83770 100644 --- a/include/rdma/rdma_vt.h +++ b/include/rdma/rdma_vt.h @@ -386,6 +386,7 @@ struct rvt_driver_params { /* * Functions that drivers are required to support */ +struct rvt_dev_info; struct rvt_driver_provided { /* * The work to create port files in /sys/class Infiniband is different @@ -394,6 +395,8 @@ struct rvt_driver_provided { * this. */ int (*port_callback)(struct ib_device *, u8, struct kobject *); + const char * (*get_card_name)(struct rvt_dev_info *rdi); + struct pci_dev * (*get_pci_dev)(struct rvt_dev_info *rdi); }; /* Protection domain */ -- cgit v0.10.2 From 0b8a8aae02abfbd724186cffe400fbdbf0cb41d6 Mon Sep 17 00:00:00 2001 From: Dennis Dalessandro Date: Wed, 6 Jan 2016 10:03:07 -0800 Subject: IB/rdmavt: Add the start of capability flags Drivers will need a set of flags to dictate behavior to rdmavt. This patch adds a placeholder and a spot for it to live, as well as a few flags that will be used. Reviewed-by: Ira Weiny Reviewed-by: Mike Marciniszyn Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford diff --git a/include/rdma/rdma_vt.h b/include/rdma/rdma_vt.h index 4b83770..b44ac17 100644 --- a/include/rdma/rdma_vt.h +++ b/include/rdma/rdma_vt.h @@ -56,6 +56,16 @@ #include "ib_verbs.h" /* + * For some of the IBTA objects there will likely be some + * initializations required. We need flags to determine whether it is OK + * for rdmavt to do this or not. This does not imply any functions of a + * partiuclar IBTA object are overridden. + */ +#define RVT_FLAG_MR_INIT_DRIVER BIT(1) +#define RVT_FLAG_QP_INIT_DRIVER BIT(2) +#define RVT_FLAG_CQ_INIT_DRIVER BIT(3) + +/* * For Memory Regions. This stuff should probably be moved into rdmavt/mr.h once * drivers no longer need access to the MR directly. */ @@ -429,6 +439,8 @@ struct rvt_dev_info { /* Internal use */ int n_pds_allocated; spinlock_t n_pds_lock; /* Protect pd allocated count */ + + int flags; }; static inline struct rvt_pd *ibpd_to_rvtpd(struct ib_pd *ibpd) -- cgit v0.10.2 From 7b1e2099adc8e66f78fee2dd2f10cb8a11362083 Mon Sep 17 00:00:00 2001 From: Dennis Dalessandro Date: Wed, 6 Jan 2016 10:03:31 -0800 Subject: IB/rdmavt: Move memory registration into rdmavt Use the memory registration routines in hfi1 and move them to rdmavt. A follow on patch will address removing the duplicated code in the hfi1 and qib drivers. Reviewed-by: Ira Weiny Reviewed-by: Mike Marciniszyn Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford diff --git a/drivers/infiniband/sw/rdmavt/mr.c b/drivers/infiniband/sw/rdmavt/mr.c index c672043..f1dcaf4 100644 --- a/drivers/infiniband/sw/rdmavt/mr.c +++ b/drivers/infiniband/sw/rdmavt/mr.c @@ -46,8 +46,252 @@ */ #include +#include +#include +#include +#include "vt.h" #include "mr.h" +/* + * Do any intilization needed when a driver registers with rdmavt. + */ +int rvt_driver_mr_init(struct rvt_dev_info *rdi) +{ + unsigned int lkey_table_size = rdi->dparms.lkey_table_size; + unsigned lk_tab_size; + int i; + + if (rdi->flags & RVT_FLAG_MR_INIT_DRIVER) { + rvt_pr_info(rdi, "Driver is doing MR init.\n"); + return 0; + } + + /* + * The top hfi1_lkey_table_size bits are used to index the + * table. The lower 8 bits can be owned by the user (copied from + * the LKEY). The remaining bits act as a generation number or tag. + */ + if (!lkey_table_size) + return -EINVAL; + + spin_lock_init(&rdi->lkey_table.lock); + + rdi->lkey_table.max = 1 << lkey_table_size; + + /* ensure generation is at least 4 bits */ + if (lkey_table_size > RVT_MAX_LKEY_TABLE_BITS) { + rvt_pr_warn(rdi, "lkey bits %u too large, reduced to %u\n", + lkey_table_size, RVT_MAX_LKEY_TABLE_BITS); + rdi->dparms.lkey_table_size = RVT_MAX_LKEY_TABLE_BITS; + lkey_table_size = rdi->dparms.lkey_table_size; + } + lk_tab_size = rdi->lkey_table.max * sizeof(*rdi->lkey_table.table); + rdi->lkey_table.table = (struct rvt_mregion __rcu **) + vmalloc(lk_tab_size); + if (!rdi->lkey_table.table) + return -ENOMEM; + + RCU_INIT_POINTER(rdi->dma_mr, NULL); + for (i = 0; i < rdi->lkey_table.max; i++) + RCU_INIT_POINTER(rdi->lkey_table.table[i], NULL); + + return 0; +} + +/* + * called when drivers have unregistered or perhaps failed to register with us + */ +void rvt_mr_exit(struct rvt_dev_info *rdi) +{ + if (rdi->dma_mr) + rvt_pr_err(rdi, "DMA MR not null!\n"); + + vfree(rdi->lkey_table.table); +} + +static void rvt_deinit_mregion(struct rvt_mregion *mr) +{ + int i = mr->mapsz; + + mr->mapsz = 0; + while (i) + kfree(mr->map[--i]); +} + +static int rvt_init_mregion(struct rvt_mregion *mr, struct ib_pd *pd, + int count) +{ + int m, i = 0; + + mr->mapsz = 0; + m = (count + RVT_SEGSZ - 1) / RVT_SEGSZ; + for (; i < m; i++) { + mr->map[i] = kzalloc(sizeof(*mr->map[0]), GFP_KERNEL); + if (!mr->map[i]) { + rvt_deinit_mregion(mr); + return -ENOMEM; + } + mr->mapsz++; + } + init_completion(&mr->comp); + /* count returning the ptr to user */ + atomic_set(&mr->refcount, 1); + mr->pd = pd; + mr->max_segs = count; + return 0; +} + +/** + * rvt_alloc_lkey - allocate an lkey + * @mr: memory region that this lkey protects + * @dma_region: 0->normal key, 1->restricted DMA key + * + * Returns 0 if successful, otherwise returns -errno. + * + * Increments mr reference count as required. + * + * Sets the lkey field mr for non-dma regions. + * + */ +static int rvt_alloc_lkey(struct rvt_mregion *mr, int dma_region) +{ + unsigned long flags; + u32 r; + u32 n; + int ret = 0; + struct rvt_dev_info *dev = ib_to_rvt(mr->pd->device); + struct rvt_lkey_table *rkt = &dev->lkey_table; + + rvt_get_mr(mr); + spin_lock_irqsave(&rkt->lock, flags); + + /* special case for dma_mr lkey == 0 */ + if (dma_region) { + struct rvt_mregion *tmr; + + tmr = rcu_access_pointer(dev->dma_mr); + if (!tmr) { + rcu_assign_pointer(dev->dma_mr, mr); + mr->lkey_published = 1; + } else { + rvt_put_mr(mr); + } + goto success; + } + + /* Find the next available LKEY */ + r = rkt->next; + n = r; + for (;;) { + if (!rcu_access_pointer(rkt->table[r])) + break; + r = (r + 1) & (rkt->max - 1); + if (r == n) + goto bail; + } + rkt->next = (r + 1) & (rkt->max - 1); + /* + * Make sure lkey is never zero which is reserved to indicate an + * unrestricted LKEY. + */ + rkt->gen++; + /* + * bits are capped to ensure enough bits for generation number + */ + mr->lkey = (r << (32 - dev->dparms.lkey_table_size)) | + ((((1 << (24 - dev->dparms.lkey_table_size)) - 1) & rkt->gen) + << 8); + if (mr->lkey == 0) { + mr->lkey |= 1 << 8; + rkt->gen++; + } + rcu_assign_pointer(rkt->table[r], mr); + mr->lkey_published = 1; +success: + spin_unlock_irqrestore(&rkt->lock, flags); +out: + return ret; +bail: + rvt_put_mr(mr); + spin_unlock_irqrestore(&rkt->lock, flags); + ret = -ENOMEM; + goto out; +} + +/** + * rvt_free_lkey - free an lkey + * @mr: mr to free from tables + */ +static void rvt_free_lkey(struct rvt_mregion *mr) +{ + unsigned long flags; + u32 lkey = mr->lkey; + u32 r; + struct rvt_dev_info *dev = ib_to_rvt(mr->pd->device); + struct rvt_lkey_table *rkt = &dev->lkey_table; + int freed = 0; + + spin_lock_irqsave(&rkt->lock, flags); + if (!mr->lkey_published) + goto out; + if (lkey == 0) { + RCU_INIT_POINTER(dev->dma_mr, NULL); + } else { + r = lkey >> (32 - dev->dparms.lkey_table_size); + RCU_INIT_POINTER(rkt->table[r], NULL); + } + mr->lkey_published = 0; + freed++; +out: + spin_unlock_irqrestore(&rkt->lock, flags); + if (freed) { + synchronize_rcu(); + rvt_put_mr(mr); + } +} + +static struct rvt_mr *__rvt_alloc_mr(int count, struct ib_pd *pd) +{ + struct rvt_mr *mr; + int rval = -ENOMEM; + int m; + + /* Allocate struct plus pointers to first level page tables. */ + m = (count + RVT_SEGSZ - 1) / RVT_SEGSZ; + mr = kzalloc(sizeof(*mr) + m * sizeof(mr->mr.map[0]), GFP_KERNEL); + if (!mr) + goto bail; + + rval = rvt_init_mregion(&mr->mr, pd, count); + if (rval) + goto bail; + /* + * ib_reg_phys_mr() will initialize mr->ibmr except for + * lkey and rkey. + */ + rval = rvt_alloc_lkey(&mr->mr, 0); + if (rval) + goto bail_mregion; + mr->ibmr.lkey = mr->mr.lkey; + mr->ibmr.rkey = mr->mr.lkey; +done: + return mr; + +bail_mregion: + rvt_deinit_mregion(&mr->mr); +bail: + kfree(mr); + mr = ERR_PTR(rval); + goto done; +} + +static void __rvt_free_mr(struct rvt_mr *mr) +{ + rvt_deinit_mregion(&mr->mr); + rvt_free_lkey(&mr->mr); + vfree(mr); +} + /** * rvt_get_dma_mr - get a DMA memory region * @pd: protection domain for this memory region @@ -59,11 +303,41 @@ */ struct ib_mr *rvt_get_dma_mr(struct ib_pd *pd, int acc) { - /* - * Alloc mr and init it. - * Alloc lkey. - */ - return ERR_PTR(-EOPNOTSUPP); + struct rvt_mr *mr; + struct ib_mr *ret; + int rval; + + if (ibpd_to_rvtpd(pd)->user) + return ERR_PTR(-EPERM); + + mr = kzalloc(sizeof(*mr), GFP_KERNEL); + if (!mr) { + ret = ERR_PTR(-ENOMEM); + goto bail; + } + + rval = rvt_init_mregion(&mr->mr, pd, 0); + if (rval) { + ret = ERR_PTR(rval); + goto bail; + } + + rval = rvt_alloc_lkey(&mr->mr, 1); + if (rval) { + ret = ERR_PTR(rval); + goto bail_mregion; + } + + mr->mr.access_flags = acc; + ret = &mr->ibmr; +done: + return ret; + +bail_mregion: + rvt_deinit_mregion(&mr->mr); +bail: + kfree(mr); + goto done; } /** @@ -80,7 +354,64 @@ struct ib_mr *rvt_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, u64 virt_addr, int mr_access_flags, struct ib_udata *udata) { - return ERR_PTR(-EOPNOTSUPP); + struct rvt_mr *mr; + struct ib_umem *umem; + struct scatterlist *sg; + int n, m, entry; + struct ib_mr *ret; + + if (length == 0) + return ERR_PTR(-EINVAL); + + umem = ib_umem_get(pd->uobject->context, start, length, + mr_access_flags, 0); + if (IS_ERR(umem)) + return (void *)umem; + + n = umem->nmap; + + mr = __rvt_alloc_mr(n, pd); + if (IS_ERR(mr)) { + ret = (struct ib_mr *)mr; + goto bail_umem; + } + + mr->mr.user_base = start; + mr->mr.iova = virt_addr; + mr->mr.length = length; + mr->mr.offset = ib_umem_offset(umem); + mr->mr.access_flags = mr_access_flags; + mr->umem = umem; + + if (is_power_of_2(umem->page_size)) + mr->mr.page_shift = ilog2(umem->page_size); + m = 0; + n = 0; + for_each_sg(umem->sg_head.sgl, sg, umem->nmap, entry) { + void *vaddr; + + vaddr = page_address(sg_page(sg)); + if (!vaddr) { + ret = ERR_PTR(-EINVAL); + goto bail_inval; + } + mr->mr.map[m]->segs[n].vaddr = vaddr; + mr->mr.map[m]->segs[n].length = umem->page_size; + n++; + if (n == RVT_SEGSZ) { + m++; + n = 0; + } + } + return &mr->ibmr; + +bail_inval: + __rvt_free_mr(mr); + +bail_umem: + ib_umem_release(umem); + + return ret; } /** @@ -94,7 +425,29 @@ struct ib_mr *rvt_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, */ int rvt_dereg_mr(struct ib_mr *ibmr) { - return -EOPNOTSUPP; + struct rvt_mr *mr = to_imr(ibmr); + struct rvt_dev_info *rdi = ib_to_rvt(ibmr->pd->device); + int ret = 0; + unsigned long timeout; + + rvt_free_lkey(&mr->mr); + + rvt_put_mr(&mr->mr); /* will set completion if last */ + timeout = wait_for_completion_timeout(&mr->mr.comp, 5 * HZ); + if (!timeout) { + rvt_pr_err(rdi, + "rvt_dereg_mr timeout mr %p pd %p refcount %u\n", + mr, mr->mr.pd, atomic_read(&mr->mr.refcount)); + rvt_get_mr(&mr->mr); + ret = -EBUSY; + goto out; + } + rvt_deinit_mregion(&mr->mr); + if (mr->umem) + ib_umem_release(mr->umem); + kfree(mr); +out: + return ret; } /** @@ -109,7 +462,16 @@ struct ib_mr *rvt_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type, u32 max_num_sg) { - return ERR_PTR(-EOPNOTSUPP); + struct rvt_mr *mr; + + if (mr_type != IB_MR_TYPE_MEM_REG) + return ERR_PTR(-EINVAL); + + mr = __rvt_alloc_mr(max_num_sg, pd); + if (IS_ERR(mr)) + return (struct ib_mr *)mr; + + return &mr->ibmr; } /** @@ -123,7 +485,48 @@ struct ib_mr *rvt_alloc_mr(struct ib_pd *pd, struct ib_fmr *rvt_alloc_fmr(struct ib_pd *pd, int mr_access_flags, struct ib_fmr_attr *fmr_attr) { - return ERR_PTR(-EOPNOTSUPP); + struct rvt_fmr *fmr; + int m; + struct ib_fmr *ret; + int rval = -ENOMEM; + + /* Allocate struct plus pointers to first level page tables. */ + m = (fmr_attr->max_pages + RVT_SEGSZ - 1) / RVT_SEGSZ; + fmr = kzalloc(sizeof(*fmr) + m * sizeof(fmr->mr.map[0]), GFP_KERNEL); + if (!fmr) + goto bail; + + rval = rvt_init_mregion(&fmr->mr, pd, fmr_attr->max_pages); + if (rval) + goto bail; + + /* + * ib_alloc_fmr() will initialize fmr->ibfmr except for lkey & + * rkey. + */ + rval = rvt_alloc_lkey(&fmr->mr, 0); + if (rval) + goto bail_mregion; + fmr->ibfmr.rkey = fmr->mr.lkey; + fmr->ibfmr.lkey = fmr->mr.lkey; + /* + * Resources are allocated but no valid mapping (RKEY can't be + * used). + */ + fmr->mr.access_flags = mr_access_flags; + fmr->mr.max_segs = fmr_attr->max_pages; + fmr->mr.page_shift = fmr_attr->page_shift; + + ret = &fmr->ibfmr; +done: + return ret; + +bail_mregion: + rvt_deinit_mregion(&fmr->mr); +bail: + kfree(fmr); + ret = ERR_PTR(rval); + goto done; } /** @@ -139,7 +542,38 @@ struct ib_fmr *rvt_alloc_fmr(struct ib_pd *pd, int mr_access_flags, int rvt_map_phys_fmr(struct ib_fmr *ibfmr, u64 *page_list, int list_len, u64 iova) { - return -EOPNOTSUPP; + struct rvt_fmr *fmr = to_ifmr(ibfmr); + struct rvt_lkey_table *rkt; + unsigned long flags; + int m, n, i; + u32 ps; + struct rvt_dev_info *rdi = ib_to_rvt(ibfmr->device); + + i = atomic_read(&fmr->mr.refcount); + if (i > 2) + return -EBUSY; + + if (list_len > fmr->mr.max_segs) + return -EINVAL; + + rkt = &rdi->lkey_table; + spin_lock_irqsave(&rkt->lock, flags); + fmr->mr.user_base = iova; + fmr->mr.iova = iova; + ps = 1 << fmr->mr.page_shift; + fmr->mr.length = list_len * ps; + m = 0; + n = 0; + for (i = 0; i < list_len; i++) { + fmr->mr.map[m]->segs[n].vaddr = (void *)page_list[i]; + fmr->mr.map[m]->segs[n].length = ps; + if (++n == RVT_SEGSZ) { + m++; + n = 0; + } + } + spin_unlock_irqrestore(&rkt->lock, flags); + return 0; } /** @@ -150,7 +584,21 @@ int rvt_map_phys_fmr(struct ib_fmr *ibfmr, u64 *page_list, */ int rvt_unmap_fmr(struct list_head *fmr_list) { - return -EOPNOTSUPP; + struct rvt_fmr *fmr; + struct rvt_lkey_table *rkt; + unsigned long flags; + struct rvt_dev_info *rdi; + + list_for_each_entry(fmr, fmr_list, ibfmr.list) { + rdi = ib_to_rvt(fmr->ibfmr.device); + rkt = &rdi->lkey_table; + spin_lock_irqsave(&rkt->lock, flags); + fmr->mr.user_base = 0; + fmr->mr.iova = 0; + fmr->mr.length = 0; + spin_unlock_irqrestore(&rkt->lock, flags); + } + return 0; } /** @@ -161,5 +609,216 @@ int rvt_unmap_fmr(struct list_head *fmr_list) */ int rvt_dealloc_fmr(struct ib_fmr *ibfmr) { - return -EOPNOTSUPP; + struct rvt_fmr *fmr = to_ifmr(ibfmr); + int ret = 0; + unsigned long timeout; + + rvt_free_lkey(&fmr->mr); + rvt_put_mr(&fmr->mr); /* will set completion if last */ + timeout = wait_for_completion_timeout(&fmr->mr.comp, 5 * HZ); + if (!timeout) { + rvt_get_mr(&fmr->mr); + ret = -EBUSY; + goto out; + } + rvt_deinit_mregion(&fmr->mr); + kfree(fmr); +out: + return ret; +} + +/** + * rvt_lkey_ok - check IB SGE for validity and initialize + * @rkt: table containing lkey to check SGE against + * @pd: protection domain + * @isge: outgoing internal SGE + * @sge: SGE to check + * @acc: access flags + * + * Return 1 if valid and successful, otherwise returns 0. + * + * increments the reference count upon success + * + * Check the IB SGE for validity and initialize our internal version + * of it. + */ +int rvt_lkey_ok(struct rvt_lkey_table *rkt, struct rvt_pd *pd, + struct rvt_sge *isge, struct ib_sge *sge, int acc) +{ + struct rvt_mregion *mr; + unsigned n, m; + size_t off; + struct rvt_dev_info *dev = ib_to_rvt(pd->ibpd.device); + + /* + * We use LKEY == zero for kernel virtual addresses + * (see rvt_get_dma_mr and dma.c). + */ + rcu_read_lock(); + if (sge->lkey == 0) { + if (pd->user) + goto bail; + mr = rcu_dereference(dev->dma_mr); + if (!mr) + goto bail; + atomic_inc(&mr->refcount); + rcu_read_unlock(); + + isge->mr = mr; + isge->vaddr = (void *)sge->addr; + isge->length = sge->length; + isge->sge_length = sge->length; + isge->m = 0; + isge->n = 0; + goto ok; + } + mr = rcu_dereference( + rkt->table[(sge->lkey >> (32 - dev->dparms.lkey_table_size))]); + if (unlikely(!mr || mr->lkey != sge->lkey || mr->pd != &pd->ibpd)) + goto bail; + + off = sge->addr - mr->user_base; + if (unlikely(sge->addr < mr->user_base || + off + sge->length > mr->length || + (mr->access_flags & acc) != acc)) + goto bail; + atomic_inc(&mr->refcount); + rcu_read_unlock(); + + off += mr->offset; + if (mr->page_shift) { + /* + * page sizes are uniform power of 2 so no loop is necessary + * entries_spanned_by_off is the number of times the loop below + * would have executed. + */ + size_t entries_spanned_by_off; + + entries_spanned_by_off = off >> mr->page_shift; + off -= (entries_spanned_by_off << mr->page_shift); + m = entries_spanned_by_off / RVT_SEGSZ; + n = entries_spanned_by_off % RVT_SEGSZ; + } else { + m = 0; + n = 0; + while (off >= mr->map[m]->segs[n].length) { + off -= mr->map[m]->segs[n].length; + n++; + if (n >= RVT_SEGSZ) { + m++; + n = 0; + } + } + } + isge->mr = mr; + isge->vaddr = mr->map[m]->segs[n].vaddr + off; + isge->length = mr->map[m]->segs[n].length - off; + isge->sge_length = sge->length; + isge->m = m; + isge->n = n; +ok: + return 1; +bail: + rcu_read_unlock(); + return 0; +} +EXPORT_SYMBOL(rvt_lkey_ok); + +/** + * rvt_rkey_ok - check the IB virtual address, length, and RKEY + * @qp: qp for validation + * @sge: SGE state + * @len: length of data + * @vaddr: virtual address to place data + * @rkey: rkey to check + * @acc: access flags + * + * Return 1 if successful, otherwise 0. + * + * increments the reference count upon success + */ +int rvt_rkey_ok(struct rvt_qp *qp, struct rvt_sge *sge, + u32 len, u64 vaddr, u32 rkey, int acc) +{ + struct rvt_dev_info *dev = ib_to_rvt(qp->ibqp.device); + struct rvt_lkey_table *rkt = &dev->lkey_table; + struct rvt_mregion *mr; + unsigned n, m; + size_t off; + + /* + * We use RKEY == zero for kernel virtual addresses + * (see rvt_get_dma_mr and dma.c). + */ + rcu_read_lock(); + if (rkey == 0) { + struct rvt_pd *pd = ibpd_to_rvtpd(qp->ibqp.pd); + struct rvt_dev_info *rdi = ib_to_rvt(pd->ibpd.device); + + if (pd->user) + goto bail; + mr = rcu_dereference(rdi->dma_mr); + if (!mr) + goto bail; + atomic_inc(&mr->refcount); + rcu_read_unlock(); + + sge->mr = mr; + sge->vaddr = (void *)vaddr; + sge->length = len; + sge->sge_length = len; + sge->m = 0; + sge->n = 0; + goto ok; + } + + mr = rcu_dereference( + rkt->table[(rkey >> (32 - dev->dparms.lkey_table_size))]); + if (unlikely(!mr || mr->lkey != rkey || qp->ibqp.pd != mr->pd)) + goto bail; + + off = vaddr - mr->iova; + if (unlikely(vaddr < mr->iova || off + len > mr->length || + (mr->access_flags & acc) == 0)) + goto bail; + atomic_inc(&mr->refcount); + rcu_read_unlock(); + + off += mr->offset; + if (mr->page_shift) { + /* + * page sizes are uniform power of 2 so no loop is necessary + * entries_spanned_by_off is the number of times the loop below + * would have executed. + */ + size_t entries_spanned_by_off; + + entries_spanned_by_off = off >> mr->page_shift; + off -= (entries_spanned_by_off << mr->page_shift); + m = entries_spanned_by_off / RVT_SEGSZ; + n = entries_spanned_by_off % RVT_SEGSZ; + } else { + m = 0; + n = 0; + while (off >= mr->map[m]->segs[n].length) { + off -= mr->map[m]->segs[n].length; + n++; + if (n >= RVT_SEGSZ) { + m++; + n = 0; + } + } + } + sge->mr = mr; + sge->vaddr = mr->map[m]->segs[n].vaddr + off; + sge->length = mr->map[m]->segs[n].length - off; + sge->sge_length = len; + sge->m = m; + sge->n = n; +ok: + return 1; +bail: + rcu_read_unlock(); + return 0; } +EXPORT_SYMBOL(rvt_rkey_ok); diff --git a/drivers/infiniband/sw/rdmavt/mr.h b/drivers/infiniband/sw/rdmavt/mr.h index f19e9da..c5339aa 100644 --- a/drivers/infiniband/sw/rdmavt/mr.h +++ b/drivers/infiniband/sw/rdmavt/mr.h @@ -49,6 +49,29 @@ */ #include +struct rvt_fmr { + struct ib_fmr ibfmr; + struct rvt_mregion mr; /* must be last */ +}; + +struct rvt_mr { + struct ib_mr ibmr; + struct ib_umem *umem; + struct rvt_mregion mr; /* must be last */ +}; + +static inline struct rvt_fmr *to_ifmr(struct ib_fmr *ibfmr) +{ + return container_of(ibfmr, struct rvt_fmr, ibfmr); +} + +static inline struct rvt_mr *to_imr(struct ib_mr *ibmr) +{ + return container_of(ibmr, struct rvt_mr, ibmr); +} + +int rvt_driver_mr_init(struct rvt_dev_info *rdi); +void rvt_mr_exit(struct rvt_dev_info *rdi); /* Mem Regions */ struct ib_mr *rvt_get_dma_mr(struct ib_pd *pd, int acc); diff --git a/drivers/infiniband/sw/rdmavt/vt.c b/drivers/infiniband/sw/rdmavt/vt.c index 2a13e36..516c810 100644 --- a/drivers/infiniband/sw/rdmavt/vt.c +++ b/drivers/infiniband/sw/rdmavt/vt.c @@ -214,6 +214,8 @@ static int rvt_get_port_immutable(struct ib_device *ibdev, u8 port_num, int rvt_register_device(struct rvt_dev_info *rdi) { /* Validate that drivers have provided the right information */ + int ret = 0; + if (!rdi) return -EINVAL; @@ -262,6 +264,12 @@ int rvt_register_device(struct rvt_dev_info *rdi) CHECK_DRIVER_OVERRIDE(rdi, detach_mcast); /* Mem Region */ + ret = rvt_driver_mr_init(rdi); + if (ret) { + rvt_pr_err(rdi, "Error in driver MR init.\n"); + goto bail_no_mr; + } + CHECK_DRIVER_OVERRIDE(rdi, get_dma_mr); CHECK_DRIVER_OVERRIDE(rdi, reg_user_mr); CHECK_DRIVER_OVERRIDE(rdi, dereg_mr); @@ -289,10 +297,21 @@ int rvt_register_device(struct rvt_dev_info *rdi) spin_lock_init(&rdi->n_pds_lock); rdi->n_pds_allocated = 0; + /* We are now good to announce we exist */ + ret = ib_register_device(&rdi->ibdev, rdi->driver_f.port_callback); + if (ret) { + rvt_pr_err(rdi, "Failed to register driver with ib core.\n"); + goto bail_mr; + } + rvt_pr_info(rdi, "Registration with rdmavt done.\n"); + return ret; - /* We are now good to announce we exist */ - return ib_register_device(&rdi->ibdev, rdi->driver_f.port_callback); +bail_mr: + rvt_mr_exit(rdi); + +bail_no_mr: + return ret; } EXPORT_SYMBOL(rvt_register_device); @@ -302,5 +321,6 @@ void rvt_unregister_device(struct rvt_dev_info *rdi) return; ib_unregister_device(&rdi->ibdev); + rvt_mr_exit(rdi); } EXPORT_SYMBOL(rvt_unregister_device); diff --git a/include/rdma/rdma_vt.h b/include/rdma/rdma_vt.h index b44ac17..9a47957 100644 --- a/include/rdma/rdma_vt.h +++ b/include/rdma/rdma_vt.h @@ -391,6 +391,7 @@ struct rvt_driver_params { * Anything driver specific that is not covered by props * For instance special module parameters. Goes here. */ + unsigned int lkey_table_size; }; /* @@ -416,6 +417,8 @@ struct rvt_pd { }; struct rvt_dev_info { + struct ib_device ibdev; /* Keep this first. Nothing above here */ + /* * Prior to calling for registration the driver will be responsible for * allocating space for this structure. @@ -423,7 +426,6 @@ struct rvt_dev_info { * The driver will also be responsible for filling in certain members of * dparms.props */ - struct ib_device ibdev; /* Driver specific properties */ struct rvt_driver_params dparms; @@ -453,7 +455,22 @@ static inline struct rvt_dev_info *ib_to_rvt(struct ib_device *ibdev) return container_of(ibdev, struct rvt_dev_info, ibdev); } +static inline void rvt_put_mr(struct rvt_mregion *mr) +{ + if (unlikely(atomic_dec_and_test(&mr->refcount))) + complete(&mr->comp); +} + +static inline void rvt_get_mr(struct rvt_mregion *mr) +{ + atomic_inc(&mr->refcount); +} + int rvt_register_device(struct rvt_dev_info *rvd); void rvt_unregister_device(struct rvt_dev_info *rvd); +int rvt_rkey_ok(struct rvt_qp *qp, struct rvt_sge *sge, + u32 len, u64 vaddr, u32 rkey, int acc); +int rvt_lkey_ok(struct rvt_lkey_table *rkt, struct rvt_pd *pd, + struct rvt_sge *isge, struct ib_sge *sge, int acc); #endif /* DEF_RDMA_VT_H */ -- cgit v0.10.2 From 36055a0652b6acb0e54ca0ad5ead2ac51e90f9ab Mon Sep 17 00:00:00 2001 From: Dennis Dalessandro Date: Wed, 6 Jan 2016 10:03:39 -0800 Subject: IB/rdmavt: Do not use rvt prints which rely on driver too early Trying to print debug and error messages with the rdmavt helpers will not work out so well if the drivers have not provided the get_card and get pci functions. Use the normal pr_error instead until we can check this. Reviewed-by: Ira Weiny Reviewed-by: Mike Marciniszyn Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford diff --git a/drivers/infiniband/sw/rdmavt/vt.c b/drivers/infiniband/sw/rdmavt/vt.c index 516c810..e92af9c 100644 --- a/drivers/infiniband/sw/rdmavt/vt.c +++ b/drivers/infiniband/sw/rdmavt/vt.c @@ -266,7 +266,7 @@ int rvt_register_device(struct rvt_dev_info *rdi) /* Mem Region */ ret = rvt_driver_mr_init(rdi); if (ret) { - rvt_pr_err(rdi, "Error in driver MR init.\n"); + pr_err("Error in driver MR init.\n"); goto bail_no_mr; } -- cgit v0.10.2 From f2f342115ef2b0755abd73573831351e371f6242 Mon Sep 17 00:00:00 2001 From: Kamal Heib Date: Wed, 6 Jan 2016 10:03:47 -0800 Subject: IB/rdmavt: Add common LID defines to rdmavt Original patch is from Kamal Heib . It has been split into separate patches. This patch adds RVT_PERMISSIVE_LID and RVT_MULTICAST_LID_BASE to rdmavt. Reviewed-by: Ira Weiny Signed-off-by: Kamal Heib Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford diff --git a/include/rdma/rdma_vt.h b/include/rdma/rdma_vt.h index 9a47957..dbb45bcd 100644 --- a/include/rdma/rdma_vt.h +++ b/include/rdma/rdma_vt.h @@ -55,6 +55,9 @@ #include "ib_verbs.h" +#define RVT_MULTICAST_LID_BASE 0xC000 +#define RVT_PERMISSIVE_LID 0xFFFF + /* * For some of the IBTA objects there will likely be some * initializations required. We need flags to determine whether it is OK -- cgit v0.10.2 From 119a8e708d16d38eedfa3d920b89b709dda41a8f Mon Sep 17 00:00:00 2001 From: Kamal Heib Date: Wed, 6 Jan 2016 10:03:59 -0800 Subject: IB/rdmavt: Add AH to rdmavt Original patch is from Kamal Heib . It has been split into three separate patches. This one for rdmavt, a follow on for qib, and one for hfi1. Create datastructure for address handle and implement the create/destroy/modify/query of address handle for rdmavt. Reviewed-by: Ira Weiny Signed-off-by: Kamal Heib Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford diff --git a/drivers/infiniband/sw/rdmavt/ah.c b/drivers/infiniband/sw/rdmavt/ah.c index d368955..2519db9 100644 --- a/drivers/infiniband/sw/rdmavt/ah.c +++ b/drivers/infiniband/sw/rdmavt/ah.c @@ -45,7 +45,49 @@ * */ +#include #include "ah.h" +#include "vt.h" /* for prints */ + +/** + * rvt_check_ah - validate the attributes of AH + * @ibdev: the ib device + * @ah_attr: the attributes of the AH + */ +int rvt_check_ah(struct ib_device *ibdev, + struct ib_ah_attr *ah_attr) +{ + int err; + struct ib_port_attr port_attr; + struct rvt_dev_info *rdi = ib_to_rvt(ibdev); + enum rdma_link_layer link = rdma_port_get_link_layer(ibdev, + ah_attr->port_num); + + err = ib_query_port(ibdev, ah_attr->port_num, &port_attr); + if (err) + return -EINVAL; + if (ah_attr->port_num < 1 || + ah_attr->port_num > ibdev->phys_port_cnt) + return -EINVAL; + if (ah_attr->static_rate != IB_RATE_PORT_CURRENT && + ib_rate_to_mbps(ah_attr->static_rate) < 0) + return -EINVAL; + if ((ah_attr->ah_flags & IB_AH_GRH) && + ah_attr->grh.sgid_index >= port_attr.gid_tbl_len) + return -EINVAL; + if (link != IB_LINK_LAYER_ETHERNET) { + if (ah_attr->dlid == 0) + return -EINVAL; + if (ah_attr->dlid >= RVT_MULTICAST_LID_BASE && + ah_attr->dlid != RVT_PERMISSIVE_LID && + !(ah_attr->ah_flags & IB_AH_GRH)) + return -EINVAL; + } + if (rdi->driver_f.check_ah(ibdev, ah_attr)) + return -EINVAL; + return 0; +} +EXPORT_SYMBOL(rvt_check_ah); /** * rvt_create_ah - create an address handle @@ -57,20 +99,68 @@ struct ib_ah *rvt_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr) { - return ERR_PTR(-EOPNOTSUPP); + struct rvt_ah *ah; + struct rvt_dev_info *dev = ib_to_rvt(pd->device); + unsigned long flags; + + if (rvt_check_ah(pd->device, ah_attr)) + return ERR_PTR(-EINVAL); + + ah = kmalloc(sizeof(*ah), GFP_ATOMIC); + if (!ah) + return ERR_PTR(-ENOMEM); + + spin_lock_irqsave(&dev->n_ahs_lock, flags); + if (dev->n_ahs_allocated == dev->dparms.props.max_ah) { + spin_unlock(&dev->n_ahs_lock); + kfree(ah); + return ERR_PTR(-ENOMEM); + } + + dev->n_ahs_allocated++; + spin_unlock_irqrestore(&dev->n_ahs_lock, flags); + + ah->attr = *ah_attr; + atomic_set(&ah->refcount, 0); + + return &ah->ibah; } int rvt_destroy_ah(struct ib_ah *ibah) { - return -EOPNOTSUPP; + struct rvt_dev_info *dev = ib_to_rvt(ibah->device); + struct rvt_ah *ah = ibah_to_rvtah(ibah); + unsigned long flags; + + if (atomic_read(&ah->refcount) != 0) + return -EBUSY; + + spin_lock_irqsave(&dev->n_ahs_lock, flags); + dev->n_ahs_allocated--; + spin_unlock_irqrestore(&dev->n_ahs_lock, flags); + + kfree(ah); + + return 0; } int rvt_modify_ah(struct ib_ah *ibah, struct ib_ah_attr *ah_attr) { - return -EOPNOTSUPP; + struct rvt_ah *ah = ibah_to_rvtah(ibah); + + if (rvt_check_ah(ibah->device, ah_attr)) + return -EINVAL; + + ah->attr = *ah_attr; + + return 0; } int rvt_query_ah(struct ib_ah *ibah, struct ib_ah_attr *ah_attr) { - return -EOPNOTSUPP; + struct rvt_ah *ah = ibah_to_rvtah(ibah); + + *ah_attr = ah->attr; + + return 0; } diff --git a/drivers/infiniband/sw/rdmavt/vt.c b/drivers/infiniband/sw/rdmavt/vt.c index e92af9c..7dab0ca 100644 --- a/drivers/infiniband/sw/rdmavt/vt.c +++ b/drivers/infiniband/sw/rdmavt/vt.c @@ -221,7 +221,8 @@ int rvt_register_device(struct rvt_dev_info *rdi) if ((!rdi->driver_f.port_callback) || (!rdi->driver_f.get_card_name) || - (!rdi->driver_f.get_pci_dev)) { + (!rdi->driver_f.get_pci_dev) || + (!rdi->driver_f.check_ah)) { return -EINVAL; } @@ -252,6 +253,8 @@ int rvt_register_device(struct rvt_dev_info *rdi) CHECK_DRIVER_OVERRIDE(rdi, destroy_ah); CHECK_DRIVER_OVERRIDE(rdi, modify_ah); CHECK_DRIVER_OVERRIDE(rdi, query_ah); + spin_lock_init(&rdi->n_ahs_lock); + rdi->n_ahs_allocated = 0; /* Shared Receive Queue */ CHECK_DRIVER_OVERRIDE(rdi, create_srq); diff --git a/include/rdma/rdma_vt.h b/include/rdma/rdma_vt.h index dbb45bcd..36cced6 100644 --- a/include/rdma/rdma_vt.h +++ b/include/rdma/rdma_vt.h @@ -411,6 +411,7 @@ struct rvt_driver_provided { int (*port_callback)(struct ib_device *, u8, struct kobject *); const char * (*get_card_name)(struct rvt_dev_info *rdi); struct pci_dev * (*get_pci_dev)(struct rvt_dev_info *rdi); + int (*check_ah)(struct ib_device *, struct ib_ah_attr *); }; /* Protection domain */ @@ -419,6 +420,13 @@ struct rvt_pd { int user; /* non-zero if created from user space */ }; +/* Address handle */ +struct rvt_ah { + struct ib_ah ibah; + struct ib_ah_attr attr; + atomic_t refcount; +}; + struct rvt_dev_info { struct ib_device ibdev; /* Keep this first. Nothing above here */ @@ -445,6 +453,9 @@ struct rvt_dev_info { int n_pds_allocated; spinlock_t n_pds_lock; /* Protect pd allocated count */ + int n_ahs_allocated; + spinlock_t n_ahs_lock; /* Protect ah allocated count */ + int flags; }; @@ -453,6 +464,11 @@ static inline struct rvt_pd *ibpd_to_rvtpd(struct ib_pd *ibpd) return container_of(ibpd, struct rvt_pd, ibpd); } +static inline struct rvt_ah *ibah_to_rvtah(struct ib_ah *ibah) +{ + return container_of(ibah, struct rvt_ah, ibah); +} + static inline struct rvt_dev_info *ib_to_rvt(struct ib_device *ibdev) { return container_of(ibdev, struct rvt_dev_info, ibdev); @@ -471,6 +487,7 @@ static inline void rvt_get_mr(struct rvt_mregion *mr) int rvt_register_device(struct rvt_dev_info *rvd); void rvt_unregister_device(struct rvt_dev_info *rvd); +int rvt_check_ah(struct ib_device *ibdev, struct ib_ah_attr *ah_attr); int rvt_rkey_ok(struct rvt_qp *qp, struct rvt_sge *sge, u32 len, u64 vaddr, u32 rkey, int acc); int rvt_lkey_ok(struct rvt_lkey_table *rkt, struct rvt_pd *pd, -- cgit v0.10.2 From 70a1a351626073123ab79de24119977c4a297fdf Mon Sep 17 00:00:00 2001 From: Dennis Dalessandro Date: Wed, 6 Jan 2016 10:04:06 -0800 Subject: IB/rdmavt: Move SRQ data structure into rdmavt Patch moves the srq data structure into rdmavt in preparation for removal from qib and hfi1 which will follow in subsequent patches. Reviewed-by: Ira Weiny Reviewed-by: Harish Chegondi Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford diff --git a/include/rdma/rdma_vt.h b/include/rdma/rdma_vt.h index 36cced6..fcf3ec0 100644 --- a/include/rdma/rdma_vt.h +++ b/include/rdma/rdma_vt.h @@ -349,6 +349,14 @@ struct rvt_qp { ____cacheline_aligned_in_smp; }; +struct rvt_srq { + struct ib_srq ibsrq; + struct rvt_rq rq; + struct rvt_mmap_info *ip; + /* send signal when number of RWQEs < limit */ + u32 limit; +}; + /* End QP section */ /* @@ -485,6 +493,11 @@ static inline void rvt_get_mr(struct rvt_mregion *mr) atomic_inc(&mr->refcount); } +static inline struct rvt_srq *ibsrq_to_rvtsrq(struct ib_srq *ibsrq) +{ + return container_of(ibsrq, struct rvt_srq, ibsrq); +} + int rvt_register_device(struct rvt_dev_info *rvd); void rvt_unregister_device(struct rvt_dev_info *rvd); int rvt_check_ah(struct ib_device *ibdev, struct ib_ah_attr *ah_attr); -- cgit v0.10.2 From f3d01bbcdc47a728336008a9254732c1652aeddd Mon Sep 17 00:00:00 2001 From: Dennis Dalessandro Date: Wed, 6 Jan 2016 10:04:13 -0800 Subject: IB/rdmavt: Add an ibport data structure to rdmavt Converge the ibport data structures of qib and hfi1 into a common ib port structure. Also provides a place to keep track of these ports in case rdmavt needs it. Along with this goes an attach and detach function for drivers to use to notify rdmavt of the ports. Reviewed-by: Ira Weiny Reviewed-by: Harish Chegondi Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford diff --git a/drivers/infiniband/sw/rdmavt/vt.c b/drivers/infiniband/sw/rdmavt/vt.c index 7dab0ca..44de280 100644 --- a/drivers/infiniband/sw/rdmavt/vt.c +++ b/drivers/infiniband/sw/rdmavt/vt.c @@ -300,6 +300,19 @@ int rvt_register_device(struct rvt_dev_info *rdi) spin_lock_init(&rdi->n_pds_lock); rdi->n_pds_allocated = 0; + if (rdi->dparms.nports) { + rdi->ports = kcalloc(rdi->dparms.nports, + sizeof(struct rvt_ibport **), + GFP_KERNEL); + if (!rdi->ports) { + rvt_pr_err(rdi, "Could not allocate port mem.\n"); + ret = -ENOMEM; + goto bail_mr; + } + } else { + rvt_pr_warn(rdi, "Driver says it has no ports.\n"); + } + /* We are now good to announce we exist */ ret = ib_register_device(&rdi->ibdev, rdi->driver_f.port_callback); if (ret) { @@ -327,3 +340,14 @@ void rvt_unregister_device(struct rvt_dev_info *rdi) rvt_mr_exit(rdi); } EXPORT_SYMBOL(rvt_unregister_device); + +/* + * Keep track of a list of ports. No need to have a detach port. + * They persist until the driver goes away. + */ +void rvt_attach_port(struct rvt_dev_info *rdi, struct rvt_ibport *port, + int portnum) +{ + rdi->ports[portnum] = port; +} +EXPORT_SYMBOL(rvt_attach_port); diff --git a/include/rdma/rdma_vt.h b/include/rdma/rdma_vt.h index fcf3ec0..a3d6a5b 100644 --- a/include/rdma/rdma_vt.h +++ b/include/rdma/rdma_vt.h @@ -53,6 +53,8 @@ * rdmavt layer. */ +#include +#include #include "ib_verbs.h" #define RVT_MULTICAST_LID_BASE 0xC000 @@ -359,6 +361,65 @@ struct rvt_srq { /* End QP section */ +struct rvt_ibport { + struct rvt_qp __rcu *qp[2]; + struct ib_mad_agent *send_agent; /* agent for SMI (traps) */ + struct rb_root mcast_tree; + spinlock_t lock; /* protect changes in this struct */ + + /* non-zero when timer is set */ + unsigned long mkey_lease_timeout; + unsigned long trap_timeout; + __be64 gid_prefix; /* in network order */ + __be64 mkey; + u64 tid; + u32 port_cap_flags; + u32 pma_sample_start; + u32 pma_sample_interval; + __be16 pma_counter_select[5]; + u16 pma_tag; + u16 mkey_lease_period; + u16 sm_lid; + u8 sm_sl; + u8 mkeyprot; + u8 subnet_timeout; + u8 vl_high_limit; + + /* + * Driver is expected to keep these up to date. These + * counters are informational only and not required to be + * completely accurate. + */ + u64 n_rc_resends; + u64 n_seq_naks; + u64 n_rdma_seq; + u64 n_rnr_naks; + u64 n_other_naks; + u64 n_loop_pkts; + u64 n_pkt_drops; + u64 n_vl15_dropped; + u64 n_rc_timeouts; + u64 n_dmawait; + u64 n_unaligned; + u64 n_rc_dupreq; + u64 n_rc_seqnak; + u16 pkey_violations; + u16 qkey_violations; + u16 mkey_violations; + + /* Hot-path per CPU counters to avoid cacheline trading to update */ + u64 z_rc_acks; + u64 z_rc_qacks; + u64 z_rc_delayed_comp; + u64 __percpu *rc_acks; + u64 __percpu *rc_qacks; + u64 __percpu *rc_delayed_comp; + + void *priv; /* driver private data */ + + /* TODO: Move sm_ah and smi_ah into here as well*/ +}; + /* * Things that are driver specific, module parameters in hfi1 and qib */ @@ -403,6 +464,7 @@ struct rvt_driver_params { * For instance special module parameters. Goes here. */ unsigned int lkey_table_size; + int nports; }; /* @@ -465,6 +527,7 @@ struct rvt_dev_info { spinlock_t n_ahs_lock; /* Protect ah allocated count */ int flags; + struct rvt_ibport **ports; }; static inline struct rvt_pd *ibpd_to_rvtpd(struct ib_pd *ibpd) @@ -501,9 +564,10 @@ static inline struct rvt_srq *ibsrq_to_rvtsrq(struct ib_srq *ibsrq) int rvt_register_device(struct rvt_dev_info *rvd); void rvt_unregister_device(struct rvt_dev_info *rvd); int rvt_check_ah(struct ib_device *ibdev, struct ib_ah_attr *ah_attr); +void rvt_attach_port(struct rvt_dev_info *rdi, struct rvt_ibport *port, + int portnum); int rvt_rkey_ok(struct rvt_qp *qp, struct rvt_sge *sge, u32 len, u64 vaddr, u32 rkey, int acc); int rvt_lkey_ok(struct rvt_lkey_table *rkt, struct rvt_pd *pd, struct rvt_sge *isge, struct ib_sge *sge, int acc); - #endif /* DEF_RDMA_VT_H */ -- cgit v0.10.2 From b036db83c0ec8d1e81df19410a494be4cfe0b186 Mon Sep 17 00:00:00 2001 From: Dennis Dalessandro Date: Wed, 6 Jan 2016 10:04:23 -0800 Subject: IB/rdmavt: Add driver notification for new AH Drivers may need to do some work once an address handle has been created. Add a driver function for this purpose. Reviewed-by: Ira Weiny Reviewed-by: Mike Marciniszyn Reviewed-by: Harish Chegondi Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford diff --git a/drivers/infiniband/sw/rdmavt/ah.c b/drivers/infiniband/sw/rdmavt/ah.c index 2519db9..621afc3 100644 --- a/drivers/infiniband/sw/rdmavt/ah.c +++ b/drivers/infiniband/sw/rdmavt/ah.c @@ -83,8 +83,8 @@ int rvt_check_ah(struct ib_device *ibdev, !(ah_attr->ah_flags & IB_AH_GRH)) return -EINVAL; } - if (rdi->driver_f.check_ah(ibdev, ah_attr)) - return -EINVAL; + if (rdi->driver_f.check_ah) + return rdi->driver_f.check_ah(ibdev, ah_attr); return 0; } EXPORT_SYMBOL(rvt_check_ah); @@ -123,6 +123,9 @@ struct ib_ah *rvt_create_ah(struct ib_pd *pd, ah->attr = *ah_attr; atomic_set(&ah->refcount, 0); + if (dev->driver_f.notify_new_ah) + dev->driver_f.notify_new_ah(pd->device, ah_attr, ah); + return &ah->ibah; } diff --git a/include/rdma/rdma_vt.h b/include/rdma/rdma_vt.h index a3d6a5b..ef66d2b 100644 --- a/include/rdma/rdma_vt.h +++ b/include/rdma/rdma_vt.h @@ -467,9 +467,21 @@ struct rvt_driver_params { int nports; }; -/* - * Functions that drivers are required to support - */ +/* Protection domain */ +struct rvt_pd { + struct ib_pd ibpd; + int user; /* non-zero if created from user space */ +}; + +/* Address handle */ +struct rvt_ah { + struct ib_ah ibah; + struct ib_ah_attr attr; + atomic_t refcount; + u8 vl; + u8 log_pmtu; +}; + struct rvt_dev_info; struct rvt_driver_provided { /* @@ -478,23 +490,20 @@ struct rvt_driver_provided { * instead drivers are responsible for setting the correct callback for * this. */ + + /* -------------------*/ + /* Required functions */ + /* -------------------*/ int (*port_callback)(struct ib_device *, u8, struct kobject *); const char * (*get_card_name)(struct rvt_dev_info *rdi); struct pci_dev * (*get_pci_dev)(struct rvt_dev_info *rdi); - int (*check_ah)(struct ib_device *, struct ib_ah_attr *); -}; -/* Protection domain */ -struct rvt_pd { - struct ib_pd ibpd; - int user; /* non-zero if created from user space */ -}; - -/* Address handle */ -struct rvt_ah { - struct ib_ah ibah; - struct ib_ah_attr attr; - atomic_t refcount; + /*--------------------*/ + /* Optional functions */ + /*--------------------*/ + int (*check_ah)(struct ib_device *, struct ib_ah_attr *); + void (*notify_new_ah)(struct ib_device *, struct ib_ah_attr *, + struct rvt_ah *); }; struct rvt_dev_info { -- cgit v0.10.2 From b4e64397dabc946b83ffb1defa1215ede84c3b97 Mon Sep 17 00:00:00 2001 From: Dennis Dalessandro Date: Wed, 6 Jan 2016 10:04:31 -0800 Subject: IB/rdmavt: Break rdma_vt main include header file up Until all functionality is moved over to rdmavt drivers still need to access a number of fields in data structures that are predominantly meant to be used by rdmavt. Once these rdmavt_.h header files are no longer being touched by drivers their content should be moved to rdmavt/.h. While here move a couple #defines over to more general IB verbs header files because they fit better. Reviewed-by: Ira Weiny Reviewed-by: Mike Marciniszyn Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford diff --git a/drivers/infiniband/sw/rdmavt/ah.c b/drivers/infiniband/sw/rdmavt/ah.c index 621afc3..c194d9d 100644 --- a/drivers/infiniband/sw/rdmavt/ah.c +++ b/drivers/infiniband/sw/rdmavt/ah.c @@ -78,8 +78,8 @@ int rvt_check_ah(struct ib_device *ibdev, if (link != IB_LINK_LAYER_ETHERNET) { if (ah_attr->dlid == 0) return -EINVAL; - if (ah_attr->dlid >= RVT_MULTICAST_LID_BASE && - ah_attr->dlid != RVT_PERMISSIVE_LID && + if (ah_attr->dlid >= be16_to_cpu(IB_MULTICAST_LID_BASE) && + ah_attr->dlid != be16_to_cpu(IB_LID_PERMISSIVE) && !(ah_attr->ah_flags & IB_AH_GRH)) return -EINVAL; } diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 284b00c..d7d531c 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -613,6 +613,7 @@ enum { }; #define IB_LID_PERMISSIVE cpu_to_be16(0xFFFF) +#define IB_MULTICAST_LID_BASE cpu_to_be16(0xC000) enum ib_ah_flags { IB_AH_GRH = 1 diff --git a/include/rdma/rdma_vt.h b/include/rdma/rdma_vt.h index ef66d2b..79da8ee 100644 --- a/include/rdma/rdma_vt.h +++ b/include/rdma/rdma_vt.h @@ -55,10 +55,9 @@ #include #include -#include "ib_verbs.h" - -#define RVT_MULTICAST_LID_BASE 0xC000 -#define RVT_PERMISSIVE_LID 0xFFFF +#include +#include +#include /* * For some of the IBTA objects there will likely be some @@ -70,297 +69,6 @@ #define RVT_FLAG_QP_INIT_DRIVER BIT(2) #define RVT_FLAG_CQ_INIT_DRIVER BIT(3) -/* - * For Memory Regions. This stuff should probably be moved into rdmavt/mr.h once - * drivers no longer need access to the MR directly. - */ - -/* - * A segment is a linear region of low physical memory. - * Used by the verbs layer. - */ -struct rvt_seg { - void *vaddr; - size_t length; -}; - -/* The number of rvt_segs that fit in a page. */ -#define RVT_SEGSZ (PAGE_SIZE / sizeof(struct rvt_seg)) - -struct rvt_segarray { - struct rvt_seg segs[RVT_SEGSZ]; -}; - -struct rvt_mregion { - struct ib_pd *pd; /* shares refcnt of ibmr.pd */ - u64 user_base; /* User's address for this region */ - u64 iova; /* IB start address of this region */ - size_t length; - u32 lkey; - u32 offset; /* offset (bytes) to start of region */ - int access_flags; - u32 max_segs; /* number of rvt_segs in all the arrays */ - u32 mapsz; /* size of the map array */ - u8 page_shift; /* 0 - non unform/non powerof2 sizes */ - u8 lkey_published; /* in global table */ - struct completion comp; /* complete when refcount goes to zero */ - atomic_t refcount; - struct rvt_segarray *map[0]; /* the segments */ -}; - -#define RVT_MAX_LKEY_TABLE_BITS 23 - -struct rvt_lkey_table { - spinlock_t lock; /* protect changes in this struct */ - u32 next; /* next unused index (speeds search) */ - u32 gen; /* generation count */ - u32 max; /* size of the table */ - struct rvt_mregion __rcu **table; -}; - -/* End Memmory Region */ - -/* - * Things needed for the Queue Pair definition. Like the MR stuff above the - * following should probably get moved to qp.h once drivers stop trying to make - * and manipulate thier own QPs. For the few instnaces where a driver may need - * to look into a queue pair there should be a pointer to a driver priavte data - * structure that they can look at. - */ - -/* - * These keep track of the copy progress within a memory region. - * Used by the verbs layer. - */ -struct rvt_sge { - struct rvt_mregion *mr; - void *vaddr; /* kernel virtual address of segment */ - u32 sge_length; /* length of the SGE */ - u32 length; /* remaining length of the segment */ - u16 m; /* current index: mr->map[m] */ - u16 n; /* current index: mr->map[m]->segs[n] */ -}; - -/* - * Send work request queue entry. - * The size of the sg_list is determined when the QP is created and stored - * in qp->s_max_sge. - */ -struct rvt_swqe { - union { - struct ib_send_wr wr; /* don't use wr.sg_list */ - struct ib_ud_wr ud_wr; - struct ib_reg_wr reg_wr; - struct ib_rdma_wr rdma_wr; - struct ib_atomic_wr atomic_wr; - }; - u32 psn; /* first packet sequence number */ - u32 lpsn; /* last packet sequence number */ - u32 ssn; /* send sequence number */ - u32 length; /* total length of data in sg_list */ - struct rvt_sge sg_list[0]; -}; - -/* - * Receive work request queue entry. - * The size of the sg_list is determined when the QP (or SRQ) is created - * and stored in qp->r_rq.max_sge (or srq->rq.max_sge). - */ -struct rvt_rwqe { - u64 wr_id; - u8 num_sge; - struct ib_sge sg_list[0]; -}; - -/* - * This structure is used to contain the head pointer, tail pointer, - * and receive work queue entries as a single memory allocation so - * it can be mmap'ed into user space. - * Note that the wq array elements are variable size so you can't - * just index into the array to get the N'th element; - * use get_rwqe_ptr() instead. - */ -struct rvt_rwq { - u32 head; /* new work requests posted to the head */ - u32 tail; /* receives pull requests from here. */ - struct rvt_rwqe wq[0]; -}; - -struct rvt_rq { - struct rvt_rwq *wq; - u32 size; /* size of RWQE array */ - u8 max_sge; - /* protect changes in this struct */ - spinlock_t lock ____cacheline_aligned_in_smp; -}; - -/* - * This structure is used by rvt_mmap() to validate an offset - * when an mmap() request is made. The vm_area_struct then uses - * this as its vm_private_data. - */ -struct rvt_mmap_info { - struct list_head pending_mmaps; - struct ib_ucontext *context; - void *obj; - __u64 offset; - struct kref ref; - unsigned size; -}; - -#define RVT_MAX_RDMA_ATOMIC 16 - -/* - * This structure holds the information that the send tasklet needs - * to send a RDMA read response or atomic operation. - */ -struct rvt_ack_entry { - u8 opcode; - u8 sent; - u32 psn; - u32 lpsn; - union { - struct rvt_sge rdma_sge; - u64 atomic_data; - }; -}; - -struct rvt_sge_state { - struct rvt_sge *sg_list; /* next SGE to be used if any */ - struct rvt_sge sge; /* progress state for the current SGE */ - u32 total_len; - u8 num_sge; -}; - -/* - * Variables prefixed with s_ are for the requester (sender). - * Variables prefixed with r_ are for the responder (receiver). - * Variables prefixed with ack_ are for responder replies. - * - * Common variables are protected by both r_rq.lock and s_lock in that order - * which only happens in modify_qp() or changing the QP 'state'. - */ -struct rvt_qp { - struct ib_qp ibqp; - void *priv; /* Driver private data */ - /* read mostly fields above and below */ - struct ib_ah_attr remote_ah_attr; - struct ib_ah_attr alt_ah_attr; - struct rvt_qp __rcu *next; /* link list for QPN hash table */ - struct rvt_swqe *s_wq; /* send work queue */ - struct rvt_mmap_info *ip; - - unsigned long timeout_jiffies; /* computed from timeout */ - - enum ib_mtu path_mtu; - int srate_mbps; /* s_srate (below) converted to Mbit/s */ - u32 remote_qpn; - u32 pmtu; /* decoded from path_mtu */ - u32 qkey; /* QKEY for this QP (for UD or RD) */ - u32 s_size; /* send work queue size */ - u32 s_rnr_timeout; /* number of milliseconds for RNR timeout */ - u32 s_ahgpsn; /* set to the psn in the copy of the header */ - - u8 state; /* QP state */ - u8 allowed_ops; /* high order bits of allowed opcodes */ - u8 qp_access_flags; - u8 alt_timeout; /* Alternate path timeout for this QP */ - u8 timeout; /* Timeout for this QP */ - u8 s_srate; - u8 s_mig_state; - u8 port_num; - u8 s_pkey_index; /* PKEY index to use */ - u8 s_alt_pkey_index; /* Alternate path PKEY index to use */ - u8 r_max_rd_atomic; /* max number of RDMA read/atomic to receive */ - u8 s_max_rd_atomic; /* max number of RDMA read/atomic to send */ - u8 s_retry_cnt; /* number of times to retry */ - u8 s_rnr_retry_cnt; - u8 r_min_rnr_timer; /* retry timeout value for RNR NAKs */ - u8 s_max_sge; /* size of s_wq->sg_list */ - u8 s_draining; - - /* start of read/write fields */ - atomic_t refcount ____cacheline_aligned_in_smp; - wait_queue_head_t wait; - - struct rvt_ack_entry s_ack_queue[RVT_MAX_RDMA_ATOMIC + 1] - ____cacheline_aligned_in_smp; - struct rvt_sge_state s_rdma_read_sge; - - spinlock_t r_lock ____cacheline_aligned_in_smp; /* used for APM */ - unsigned long r_aflags; - u64 r_wr_id; /* ID for current receive WQE */ - u32 r_ack_psn; /* PSN for next ACK or atomic ACK */ - u32 r_len; /* total length of r_sge */ - u32 r_rcv_len; /* receive data len processed */ - u32 r_psn; /* expected rcv packet sequence number */ - u32 r_msn; /* message sequence number */ - - u8 r_state; /* opcode of last packet received */ - u8 r_flags; - u8 r_head_ack_queue; /* index into s_ack_queue[] */ - - struct list_head rspwait; /* link for waiting to respond */ - - struct rvt_sge_state r_sge; /* current receive data */ - struct rvt_rq r_rq; /* receive work queue */ - - spinlock_t s_lock ____cacheline_aligned_in_smp; - struct rvt_sge_state *s_cur_sge; - u32 s_flags; - struct rvt_swqe *s_wqe; - struct rvt_sge_state s_sge; /* current send request data */ - struct rvt_mregion *s_rdma_mr; - struct sdma_engine *s_sde; /* current sde */ - u32 s_cur_size; /* size of send packet in bytes */ - u32 s_len; /* total length of s_sge */ - u32 s_rdma_read_len; /* total length of s_rdma_read_sge */ - u32 s_next_psn; /* PSN for next request */ - u32 s_last_psn; /* last response PSN processed */ - u32 s_sending_psn; /* lowest PSN that is being sent */ - u32 s_sending_hpsn; /* highest PSN that is being sent */ - u32 s_psn; /* current packet sequence number */ - u32 s_ack_rdma_psn; /* PSN for sending RDMA read responses */ - u32 s_ack_psn; /* PSN for acking sends and RDMA writes */ - u32 s_head; /* new entries added here */ - u32 s_tail; /* next entry to process */ - u32 s_cur; /* current work queue entry */ - u32 s_acked; /* last un-ACK'ed entry */ - u32 s_last; /* last completed entry */ - u32 s_ssn; /* SSN of tail entry */ - u32 s_lsn; /* limit sequence number (credit) */ - u16 s_hdrwords; /* size of s_hdr in 32 bit words */ - u16 s_rdma_ack_cnt; - s8 s_ahgidx; - u8 s_state; /* opcode of last packet sent */ - u8 s_ack_state; /* opcode of packet to ACK */ - u8 s_nak_state; /* non-zero if NAK is pending */ - u8 r_nak_state; /* non-zero if NAK is pending */ - u8 s_retry; /* requester retry counter */ - u8 s_rnr_retry; /* requester RNR retry counter */ - u8 s_num_rd_atomic; /* number of RDMA read/atomic pending */ - u8 s_tail_ack_queue; /* index into s_ack_queue[] */ - - struct rvt_sge_state s_ack_rdma_sge; - struct timer_list s_timer; - - /* - * This sge list MUST be last. Do not add anything below here. - */ - struct rvt_sge r_sg_list[0] /* verified SGEs */ - ____cacheline_aligned_in_smp; -}; - -struct rvt_srq { - struct ib_srq ibsrq; - struct rvt_rq rq; - struct rvt_mmap_info *ip; - /* send signal when number of RWQEs < limit */ - u32 limit; -}; - -/* End QP section */ - struct rvt_ibport { struct rvt_qp __rcu *qp[2]; struct ib_mad_agent *send_agent; /* agent for SMI (traps) */ @@ -554,17 +262,6 @@ static inline struct rvt_dev_info *ib_to_rvt(struct ib_device *ibdev) return container_of(ibdev, struct rvt_dev_info, ibdev); } -static inline void rvt_put_mr(struct rvt_mregion *mr) -{ - if (unlikely(atomic_dec_and_test(&mr->refcount))) - complete(&mr->comp); -} - -static inline void rvt_get_mr(struct rvt_mregion *mr) -{ - atomic_inc(&mr->refcount); -} - static inline struct rvt_srq *ibsrq_to_rvtsrq(struct ib_srq *ibsrq) { return container_of(ibsrq, struct rvt_srq, ibsrq); diff --git a/include/rdma/rdmavt_mr.h b/include/rdma/rdmavt_mr.h new file mode 100644 index 0000000..ea60476 --- /dev/null +++ b/include/rdma/rdmavt_mr.h @@ -0,0 +1,130 @@ +#ifndef DEF_RDMAVT_INCMR_H +#define DEF_RDMAVT_INCMR_H + +/* + * Copyright(c) 2015 Intel Corporation. + * + * This file is provided under a dual BSD/GPLv2 license. When using or + * redistributing this file, you may do so under either license. + * + * GPL LICENSE SUMMARY + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * BSD LICENSE + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * - Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + */ + +/* + * For Memory Regions. This stuff should probably be moved into rdmavt/mr.h once + * drivers no longer need access to the MR directly. + */ + +/* + * A segment is a linear region of low physical memory. + * Used by the verbs layer. + */ +struct rvt_seg { + void *vaddr; + size_t length; +}; + +/* The number of rvt_segs that fit in a page. */ +#define RVT_SEGSZ (PAGE_SIZE / sizeof(struct rvt_seg)) + +struct rvt_segarray { + struct rvt_seg segs[RVT_SEGSZ]; +}; + +struct rvt_mregion { + struct ib_pd *pd; /* shares refcnt of ibmr.pd */ + u64 user_base; /* User's address for this region */ + u64 iova; /* IB start address of this region */ + size_t length; + u32 lkey; + u32 offset; /* offset (bytes) to start of region */ + int access_flags; + u32 max_segs; /* number of rvt_segs in all the arrays */ + u32 mapsz; /* size of the map array */ + u8 page_shift; /* 0 - non unform/non powerof2 sizes */ + u8 lkey_published; /* in global table */ + struct completion comp; /* complete when refcount goes to zero */ + atomic_t refcount; + struct rvt_segarray *map[0]; /* the segments */ +}; + +#define RVT_MAX_LKEY_TABLE_BITS 23 + +struct rvt_lkey_table { + spinlock_t lock; /* protect changes in this struct */ + u32 next; /* next unused index (speeds search) */ + u32 gen; /* generation count */ + u32 max; /* size of the table */ + struct rvt_mregion __rcu **table; +}; + +/* + * These keep track of the copy progress within a memory region. + * Used by the verbs layer. + */ +struct rvt_sge { + struct rvt_mregion *mr; + void *vaddr; /* kernel virtual address of segment */ + u32 sge_length; /* length of the SGE */ + u32 length; /* remaining length of the segment */ + u16 m; /* current index: mr->map[m] */ + u16 n; /* current index: mr->map[m]->segs[n] */ +}; + +struct rvt_sge_state { + struct rvt_sge *sg_list; /* next SGE to be used if any */ + struct rvt_sge sge; /* progress state for the current SGE */ + u32 total_len; + u8 num_sge; +}; + +static inline void rvt_put_mr(struct rvt_mregion *mr) +{ + if (unlikely(atomic_dec_and_test(&mr->refcount))) + complete(&mr->comp); +} + +static inline void rvt_get_mr(struct rvt_mregion *mr) +{ + atomic_inc(&mr->refcount); +} + +#endif /* DEF_RDMAVT_INCMRH */ diff --git a/include/rdma/rdmavt_qp.h b/include/rdma/rdmavt_qp.h new file mode 100644 index 0000000..f33fbb0 --- /dev/null +++ b/include/rdma/rdmavt_qp.h @@ -0,0 +1,262 @@ +#ifndef DEF_RDMAVT_INCQP_H +#define DEF_RDMAVT_INCQP_H + +/* + * Copyright(c) 2015 Intel Corporation. + * + * This file is provided under a dual BSD/GPLv2 license. When using or + * redistributing this file, you may do so under either license. + * + * GPL LICENSE SUMMARY + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * BSD LICENSE + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * - Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + */ + +/* + * Send work request queue entry. + * The size of the sg_list is determined when the QP is created and stored + * in qp->s_max_sge. + */ +struct rvt_swqe { + union { + struct ib_send_wr wr; /* don't use wr.sg_list */ + struct ib_ud_wr ud_wr; + struct ib_reg_wr reg_wr; + struct ib_rdma_wr rdma_wr; + struct ib_atomic_wr atomic_wr; + }; + u32 psn; /* first packet sequence number */ + u32 lpsn; /* last packet sequence number */ + u32 ssn; /* send sequence number */ + u32 length; /* total length of data in sg_list */ + struct rvt_sge sg_list[0]; +}; + +/* + * Receive work request queue entry. + * The size of the sg_list is determined when the QP (or SRQ) is created + * and stored in qp->r_rq.max_sge (or srq->rq.max_sge). + */ +struct rvt_rwqe { + u64 wr_id; + u8 num_sge; + struct ib_sge sg_list[0]; +}; + +/* + * This structure is used to contain the head pointer, tail pointer, + * and receive work queue entries as a single memory allocation so + * it can be mmap'ed into user space. + * Note that the wq array elements are variable size so you can't + * just index into the array to get the N'th element; + * use get_rwqe_ptr() instead. + */ +struct rvt_rwq { + u32 head; /* new work requests posted to the head */ + u32 tail; /* receives pull requests from here. */ + struct rvt_rwqe wq[0]; +}; + +struct rvt_rq { + struct rvt_rwq *wq; + u32 size; /* size of RWQE array */ + u8 max_sge; + /* protect changes in this struct */ + spinlock_t lock ____cacheline_aligned_in_smp; +}; + +/* + * This structure is used by rvt_mmap() to validate an offset + * when an mmap() request is made. The vm_area_struct then uses + * this as its vm_private_data. + */ +struct rvt_mmap_info { + struct list_head pending_mmaps; + struct ib_ucontext *context; + void *obj; + __u64 offset; + struct kref ref; + unsigned size; +}; + +#define RVT_MAX_RDMA_ATOMIC 16 + +/* + * This structure holds the information that the send tasklet needs + * to send a RDMA read response or atomic operation. + */ +struct rvt_ack_entry { + u8 opcode; + u8 sent; + u32 psn; + u32 lpsn; + union { + struct rvt_sge rdma_sge; + u64 atomic_data; + }; +}; + +/* + * Variables prefixed with s_ are for the requester (sender). + * Variables prefixed with r_ are for the responder (receiver). + * Variables prefixed with ack_ are for responder replies. + * + * Common variables are protected by both r_rq.lock and s_lock in that order + * which only happens in modify_qp() or changing the QP 'state'. + */ +struct rvt_qp { + struct ib_qp ibqp; + void *priv; /* Driver private data */ + /* read mostly fields above and below */ + struct ib_ah_attr remote_ah_attr; + struct ib_ah_attr alt_ah_attr; + struct rvt_qp __rcu *next; /* link list for QPN hash table */ + struct rvt_swqe *s_wq; /* send work queue */ + struct rvt_mmap_info *ip; + + unsigned long timeout_jiffies; /* computed from timeout */ + + enum ib_mtu path_mtu; + int srate_mbps; /* s_srate (below) converted to Mbit/s */ + u32 remote_qpn; + u32 pmtu; /* decoded from path_mtu */ + u32 qkey; /* QKEY for this QP (for UD or RD) */ + u32 s_size; /* send work queue size */ + u32 s_rnr_timeout; /* number of milliseconds for RNR timeout */ + u32 s_ahgpsn; /* set to the psn in the copy of the header */ + + u8 state; /* QP state */ + u8 allowed_ops; /* high order bits of allowed opcodes */ + u8 qp_access_flags; + u8 alt_timeout; /* Alternate path timeout for this QP */ + u8 timeout; /* Timeout for this QP */ + u8 s_srate; + u8 s_mig_state; + u8 port_num; + u8 s_pkey_index; /* PKEY index to use */ + u8 s_alt_pkey_index; /* Alternate path PKEY index to use */ + u8 r_max_rd_atomic; /* max number of RDMA read/atomic to receive */ + u8 s_max_rd_atomic; /* max number of RDMA read/atomic to send */ + u8 s_retry_cnt; /* number of times to retry */ + u8 s_rnr_retry_cnt; + u8 r_min_rnr_timer; /* retry timeout value for RNR NAKs */ + u8 s_max_sge; /* size of s_wq->sg_list */ + u8 s_draining; + + /* start of read/write fields */ + atomic_t refcount ____cacheline_aligned_in_smp; + wait_queue_head_t wait; + + struct rvt_ack_entry s_ack_queue[RVT_MAX_RDMA_ATOMIC + 1] + ____cacheline_aligned_in_smp; + struct rvt_sge_state s_rdma_read_sge; + + spinlock_t r_lock ____cacheline_aligned_in_smp; /* used for APM */ + unsigned long r_aflags; + u64 r_wr_id; /* ID for current receive WQE */ + u32 r_ack_psn; /* PSN for next ACK or atomic ACK */ + u32 r_len; /* total length of r_sge */ + u32 r_rcv_len; /* receive data len processed */ + u32 r_psn; /* expected rcv packet sequence number */ + u32 r_msn; /* message sequence number */ + + u8 r_state; /* opcode of last packet received */ + u8 r_flags; + u8 r_head_ack_queue; /* index into s_ack_queue[] */ + + struct list_head rspwait; /* link for waiting to respond */ + + struct rvt_sge_state r_sge; /* current receive data */ + struct rvt_rq r_rq; /* receive work queue */ + + spinlock_t s_lock ____cacheline_aligned_in_smp; + struct rvt_sge_state *s_cur_sge; + u32 s_flags; + struct rvt_swqe *s_wqe; + struct rvt_sge_state s_sge; /* current send request data */ + struct rvt_mregion *s_rdma_mr; + struct sdma_engine *s_sde; /* current sde */ + u32 s_cur_size; /* size of send packet in bytes */ + u32 s_len; /* total length of s_sge */ + u32 s_rdma_read_len; /* total length of s_rdma_read_sge */ + u32 s_next_psn; /* PSN for next request */ + u32 s_last_psn; /* last response PSN processed */ + u32 s_sending_psn; /* lowest PSN that is being sent */ + u32 s_sending_hpsn; /* highest PSN that is being sent */ + u32 s_psn; /* current packet sequence number */ + u32 s_ack_rdma_psn; /* PSN for sending RDMA read responses */ + u32 s_ack_psn; /* PSN for acking sends and RDMA writes */ + u32 s_head; /* new entries added here */ + u32 s_tail; /* next entry to process */ + u32 s_cur; /* current work queue entry */ + u32 s_acked; /* last un-ACK'ed entry */ + u32 s_last; /* last completed entry */ + u32 s_ssn; /* SSN of tail entry */ + u32 s_lsn; /* limit sequence number (credit) */ + u16 s_hdrwords; /* size of s_hdr in 32 bit words */ + u16 s_rdma_ack_cnt; + s8 s_ahgidx; + u8 s_state; /* opcode of last packet sent */ + u8 s_ack_state; /* opcode of packet to ACK */ + u8 s_nak_state; /* non-zero if NAK is pending */ + u8 r_nak_state; /* non-zero if NAK is pending */ + u8 s_retry; /* requester retry counter */ + u8 s_rnr_retry; /* requester RNR retry counter */ + u8 s_num_rd_atomic; /* number of RDMA read/atomic pending */ + u8 s_tail_ack_queue; /* index into s_ack_queue[] */ + + struct rvt_sge_state s_ack_rdma_sge; + struct timer_list s_timer; + + /* + * This sge list MUST be last. Do not add anything below here. + */ + struct rvt_sge r_sg_list[0] /* verified SGEs */ + ____cacheline_aligned_in_smp; +}; + +struct rvt_srq { + struct ib_srq ibsrq; + struct rvt_rq rq; + struct rvt_mmap_info *ip; + /* send signal when number of RWQEs < limit */ + u32 limit; +}; + +#endif /* DEF_RDMAVT_INCQP_H */ -- cgit v0.10.2 From 0acb0cc7ecc1e4860b056368566c0c2c254ae281 Mon Sep 17 00:00:00 2001 From: Dennis Dalessandro Date: Wed, 6 Jan 2016 10:04:46 -0800 Subject: IB/rdmavt: Initialize and teardown of qpn table Add table init as well as teardown for handling qpn maps. Drivers can still provide this functionality by setting the QP_INIT_DRIVER bit. Reviewed-by: Ira Weiny Reviewed-by: Mike Marciniszyn Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford diff --git a/drivers/infiniband/sw/rdmavt/qp.c b/drivers/infiniband/sw/rdmavt/qp.c index 23a5f68..17dd6ab 100644 --- a/drivers/infiniband/sw/rdmavt/qp.c +++ b/drivers/infiniband/sw/rdmavt/qp.c @@ -45,8 +45,205 @@ * */ +#include +#include +#include "vt.h" #include "qp.h" +static void get_map_page(struct rvt_qpn_table *qpt, struct rvt_qpn_map *map) +{ + unsigned long page = get_zeroed_page(GFP_KERNEL); + + /* + * Free the page if someone raced with us installing it. + */ + + spin_lock(&qpt->lock); + if (map->page) + free_page(page); + else + map->page = (void *)page; + spin_unlock(&qpt->lock); +} + +/** + * init_qpn_table - initialize the QP number table for a device + * @qpt: the QPN table + */ +static int init_qpn_table(struct rvt_dev_info *rdi, struct rvt_qpn_table *qpt) +{ + u32 offset, i; + struct rvt_qpn_map *map; + int ret = 0; + + if (!(rdi->dparms.qpn_res_end > rdi->dparms.qpn_res_start)) + return -EINVAL; + + spin_lock_init(&qpt->lock); + + qpt->last = rdi->dparms.qpn_start; + qpt->incr = rdi->dparms.qpn_inc << rdi->dparms.qos_shift; + + /* + * Drivers may want some QPs beyond what we need for verbs let them use + * our qpn table. No need for two. Lets go ahead and mark the bitmaps + * for those. The reserved range must be *after* the range which verbs + * will pick from. + */ + + /* Figure out number of bit maps needed before reserved range */ + qpt->nmaps = rdi->dparms.qpn_res_start / RVT_BITS_PER_PAGE; + + /* This should always be zero */ + offset = rdi->dparms.qpn_res_start & RVT_BITS_PER_PAGE_MASK; + + /* Starting with the first reserved bit map */ + map = &qpt->map[qpt->nmaps]; + + rvt_pr_info(rdi, "Reserving QPNs from 0x%x to 0x%x for non-verbs use\n", + rdi->dparms.qpn_res_start, rdi->dparms.qpn_res_end); + for (i = rdi->dparms.qpn_res_start; i < rdi->dparms.qpn_res_end; i++) { + if (!map->page) { + get_map_page(qpt, map); + if (!map->page) { + ret = -ENOMEM; + break; + } + } + set_bit(offset, map->page); + offset++; + if (offset == RVT_BITS_PER_PAGE) { + /* next page */ + qpt->nmaps++; + map++; + offset = 0; + } + } + return ret; +} + +/** + * free_qpn_table - free the QP number table for a device + * @qpt: the QPN table + */ +static void free_qpn_table(struct rvt_qpn_table *qpt) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(qpt->map); i++) + free_page((unsigned long)qpt->map[i].page); +} + +int rvt_driver_qp_init(struct rvt_dev_info *rdi) +{ + int i; + int ret = -ENOMEM; + + if (rdi->flags & RVT_FLAG_QP_INIT_DRIVER) { + rvt_pr_info(rdi, "Driver is doing QP init.\n"); + return 0; + } + + if (!rdi->dparms.qp_table_size) + return -EINVAL; + + /* + * If driver is not doing any QP allocation then make sure it is + * providing the necessary QP functions. + */ + if (!rdi->driver_f.free_all_qps) + return -EINVAL; + + /* allocate parent object */ + rdi->qp_dev = kzalloc(sizeof(*rdi->qp_dev), GFP_KERNEL); + if (!rdi->qp_dev) + return -ENOMEM; + + /* allocate hash table */ + rdi->qp_dev->qp_table_size = rdi->dparms.qp_table_size; + rdi->qp_dev->qp_table_bits = ilog2(rdi->dparms.qp_table_size); + rdi->qp_dev->qp_table = + kmalloc(rdi->qp_dev->qp_table_size * + sizeof(*rdi->qp_dev->qp_table), + GFP_KERNEL); + if (!rdi->qp_dev->qp_table) + goto no_qp_table; + + for (i = 0; i < rdi->qp_dev->qp_table_size; i++) + RCU_INIT_POINTER(rdi->qp_dev->qp_table[i], NULL); + + spin_lock_init(&rdi->qp_dev->qpt_lock); + + /* initialize qpn map */ + if (init_qpn_table(rdi, &rdi->qp_dev->qpn_table)) + goto fail_table; + + return ret; + +fail_table: + kfree(rdi->qp_dev->qp_table); + free_qpn_table(&rdi->qp_dev->qpn_table); + +no_qp_table: + kfree(rdi->qp_dev); + + return ret; +} + +/** + * free_all_qps - check for QPs still in use + * @qpt: the QP table to empty + * + * There should not be any QPs still in use. + * Free memory for table. + */ +static unsigned free_all_qps(struct rvt_dev_info *rdi) +{ + unsigned long flags; + struct rvt_qp *qp; + unsigned n, qp_inuse = 0; + spinlock_t *ql; /* work around too long line below */ + + rdi->driver_f.free_all_qps(rdi); + + if (!rdi->qp_dev) + return 0; + + ql = &rdi->qp_dev->qpt_lock; + spin_lock_irqsave(&rdi->qp_dev->qpt_lock, flags); + for (n = 0; n < rdi->qp_dev->qp_table_size; n++) { + qp = rcu_dereference_protected(rdi->qp_dev->qp_table[n], + lockdep_is_held(ql)); + RCU_INIT_POINTER(rdi->qp_dev->qp_table[n], NULL); + qp = rcu_dereference_protected(qp->next, + lockdep_is_held(ql)); + while (qp) { + qp_inuse++; + qp = rcu_dereference_protected(qp->next, + lockdep_is_held(ql)); + } + } + spin_unlock_irqrestore(ql, flags); + synchronize_rcu(); + return qp_inuse; +} + +void rvt_qp_exit(struct rvt_dev_info *rdi) +{ + u32 qps_inuse = free_all_qps(rdi); + + qps_inuse = free_all_qps(rdi); + if (qps_inuse) + rvt_pr_err(rdi, "QP memory leak! %u still in use\n", + qps_inuse); + if (!rdi->qp_dev) + return; + + kfree(rdi->qp_dev->qp_table); + free_qpn_table(&rdi->qp_dev->qpn_table); + kfree(rdi->qp_dev); +} + /** * rvt_create_qp - create a queue pair for a device * @ibpd: the protection domain who's device we create the queue pair for diff --git a/drivers/infiniband/sw/rdmavt/qp.h b/drivers/infiniband/sw/rdmavt/qp.h index 9c2999d..f438809 100644 --- a/drivers/infiniband/sw/rdmavt/qp.h +++ b/drivers/infiniband/sw/rdmavt/qp.h @@ -50,6 +50,8 @@ #include +int rvt_driver_qp_init(struct rvt_dev_info *rdi); +void rvt_qp_exit(struct rvt_dev_info *rdi); struct ib_qp *rvt_create_qp(struct ib_pd *ibpd, struct ib_qp_init_attr *init_attr, struct ib_udata *udata); diff --git a/drivers/infiniband/sw/rdmavt/vt.c b/drivers/infiniband/sw/rdmavt/vt.c index 44de280..f2d995d 100644 --- a/drivers/infiniband/sw/rdmavt/vt.c +++ b/drivers/infiniband/sw/rdmavt/vt.c @@ -223,9 +223,23 @@ int rvt_register_device(struct rvt_dev_info *rdi) (!rdi->driver_f.get_card_name) || (!rdi->driver_f.get_pci_dev) || (!rdi->driver_f.check_ah)) { + pr_err("Driver not supporting req func\n"); return -EINVAL; } + if (!rdi->dparms.nports) { + rvt_pr_err(rdi, "Driver says it has no ports.\n"); + return -EINVAL; + } + + rdi->ports = kcalloc(rdi->dparms.nports, + sizeof(struct rvt_ibport **), + GFP_KERNEL); + if (!rdi->ports) { + rvt_pr_err(rdi, "Could not allocate port mem.\n"); + return -ENOMEM; + } + /* Once we get past here we can use the rvt_pr macros */ /* Dev Ops */ @@ -240,6 +254,12 @@ int rvt_register_device(struct rvt_dev_info *rdi) CHECK_DRIVER_OVERRIDE(rdi, get_port_immutable); /* Queue Pairs */ + ret = rvt_driver_qp_init(rdi); + if (ret) { + pr_err("Error in driver QP init.\n"); + return -EINVAL; + } + CHECK_DRIVER_OVERRIDE(rdi, create_qp); CHECK_DRIVER_OVERRIDE(rdi, modify_qp); CHECK_DRIVER_OVERRIDE(rdi, destroy_qp); @@ -300,19 +320,6 @@ int rvt_register_device(struct rvt_dev_info *rdi) spin_lock_init(&rdi->n_pds_lock); rdi->n_pds_allocated = 0; - if (rdi->dparms.nports) { - rdi->ports = kcalloc(rdi->dparms.nports, - sizeof(struct rvt_ibport **), - GFP_KERNEL); - if (!rdi->ports) { - rvt_pr_err(rdi, "Could not allocate port mem.\n"); - ret = -ENOMEM; - goto bail_mr; - } - } else { - rvt_pr_warn(rdi, "Driver says it has no ports.\n"); - } - /* We are now good to announce we exist */ ret = ib_register_device(&rdi->ibdev, rdi->driver_f.port_callback); if (ret) { @@ -327,6 +334,8 @@ bail_mr: rvt_mr_exit(rdi); bail_no_mr: + rvt_qp_exit(rdi); + return ret; } EXPORT_SYMBOL(rvt_register_device); diff --git a/include/rdma/rdma_vt.h b/include/rdma/rdma_vt.h index 79da8ee..950c291 100644 --- a/include/rdma/rdma_vt.h +++ b/include/rdma/rdma_vt.h @@ -172,7 +172,13 @@ struct rvt_driver_params { * For instance special module parameters. Goes here. */ unsigned int lkey_table_size; + unsigned int qp_table_size; + int qpn_start; + int qpn_inc; + int qpn_res_start; + int qpn_res_end; int nports; + u8 qos_shift; }; /* Protection domain */ @@ -205,6 +211,7 @@ struct rvt_driver_provided { int (*port_callback)(struct ib_device *, u8, struct kobject *); const char * (*get_card_name)(struct rvt_dev_info *rdi); struct pci_dev * (*get_pci_dev)(struct rvt_dev_info *rdi); + void (*free_all_qps)(struct rvt_dev_info *rdi); /*--------------------*/ /* Optional functions */ @@ -245,6 +252,8 @@ struct rvt_dev_info { int flags; struct rvt_ibport **ports; + + struct rvt_qp_ibdev *qp_dev; }; static inline struct rvt_pd *ibpd_to_rvtpd(struct ib_pd *ibpd) diff --git a/include/rdma/rdmavt_qp.h b/include/rdma/rdmavt_qp.h index f33fbb0..e6a7d17 100644 --- a/include/rdma/rdmavt_qp.h +++ b/include/rdma/rdmavt_qp.h @@ -259,4 +259,37 @@ struct rvt_srq { u32 limit; }; +#define RVT_QPN_MAX BIT(24) +#define RVT_QPNMAP_ENTRIES (RVT_QPN_MAX / PAGE_SIZE / BITS_PER_BYTE) +#define RVT_BITS_PER_PAGE (PAGE_SIZE * BITS_PER_BYTE) +#define RVT_BITS_PER_PAGE_MASK (RVT_BITS_PER_PAGE - 1) + +/* + * QPN-map pages start out as NULL, they get allocated upon + * first use and are never deallocated. This way, + * large bitmaps are not allocated unless large numbers of QPs are used. + */ +struct rvt_qpn_map { + void *page; +}; + +struct rvt_qpn_table { + spinlock_t lock; /* protect changes to the qp table */ + unsigned flags; /* flags for QP0/1 allocated for each port */ + u32 last; /* last QP number allocated */ + u32 nmaps; /* size of the map table */ + u16 limit; + u8 incr; + /* bit map of free QP numbers other than 0/1 */ + struct rvt_qpn_map map[RVT_QPNMAP_ENTRIES]; +}; + +struct rvt_qp_ibdev { + u32 qp_table_size; + u32 qp_table_bits; + struct rvt_qp __rcu **qp_table; + spinlock_t qpt_lock; /* qptable lock */ + struct rvt_qpn_table qpn_table; +}; + #endif /* DEF_RDMAVT_INCQP_H */ -- cgit v0.10.2 From 822514d75a9647662fff39d728c1f4636b75d904 Mon Sep 17 00:00:00 2001 From: Dennis Dalessandro Date: Wed, 6 Jan 2016 10:04:57 -0800 Subject: IB/rdmavt: Add mmap related functions The mmap data structure was moved in a previous commit. This patch now pulls in the related functions. Reviewed-by: Ira Weiny Reviewed-by: Mike Marciniszyn Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford diff --git a/drivers/infiniband/sw/rdmavt/mmap.c b/drivers/infiniband/sw/rdmavt/mmap.c index d09f3a05..fc30ff7 100644 --- a/drivers/infiniband/sw/rdmavt/mmap.c +++ b/drivers/infiniband/sw/rdmavt/mmap.c @@ -46,8 +46,61 @@ */ #include +#include +#include +#include #include "mmap.h" +void rvt_mmap_init(struct rvt_dev_info *rdi) +{ + INIT_LIST_HEAD(&rdi->pending_mmaps); + spin_lock_init(&rdi->pending_lock); + rdi->mmap_offset = PAGE_SIZE; + spin_lock_init(&rdi->mmap_offset_lock); +} + +/** + * rvt_release_mmap_info - free mmap info structure + * @ref: a pointer to the kref within struct rvt_mmap_info + */ +void rvt_release_mmap_info(struct kref *ref) +{ + struct rvt_mmap_info *ip = + container_of(ref, struct rvt_mmap_info, ref); + struct rvt_dev_info *rdi = ib_to_rvt(ip->context->device); + + spin_lock_irq(&rdi->pending_lock); + list_del(&ip->pending_mmaps); + spin_unlock_irq(&rdi->pending_lock); + + vfree(ip->obj); + kfree(ip); +} +EXPORT_SYMBOL(rvt_release_mmap_info); + +/* + * open and close keep track of how many times the CQ is mapped, + * to avoid releasing it. + */ +static void rvt_vma_open(struct vm_area_struct *vma) +{ + struct rvt_mmap_info *ip = vma->vm_private_data; + + kref_get(&ip->ref); +} + +static void rvt_vma_close(struct vm_area_struct *vma) +{ + struct rvt_mmap_info *ip = vma->vm_private_data; + + kref_put(&ip->ref, rvt_release_mmap_info); +} + +static const struct vm_operations_struct rvt_vm_ops = { + .open = rvt_vma_open, + .close = rvt_vma_close, +}; + /** * rvt_mmap - create a new mmap region * @context: the IB user context of the process making the mmap() call @@ -56,5 +109,90 @@ */ int rvt_mmap(struct ib_ucontext *context, struct vm_area_struct *vma) { - return -EOPNOTSUPP; + struct rvt_dev_info *rdi = ib_to_rvt(context->device); + unsigned long offset = vma->vm_pgoff << PAGE_SHIFT; + unsigned long size = vma->vm_end - vma->vm_start; + struct rvt_mmap_info *ip, *pp; + int ret = -EINVAL; + + /* + * Search the device's list of objects waiting for a mmap call. + * Normally, this list is very short since a call to create a + * CQ, QP, or SRQ is soon followed by a call to mmap(). + */ + spin_lock_irq(&rdi->pending_lock); + list_for_each_entry_safe(ip, pp, &rdi->pending_mmaps, + pending_mmaps) { + /* Only the creator is allowed to mmap the object */ + if (context != ip->context || (__u64)offset != ip->offset) + continue; + /* Don't allow a mmap larger than the object. */ + if (size > ip->size) + break; + + list_del_init(&ip->pending_mmaps); + spin_unlock_irq(&rdi->pending_lock); + + ret = remap_vmalloc_range(vma, ip->obj, 0); + if (ret) + goto done; + vma->vm_ops = &rvt_vm_ops; + vma->vm_private_data = ip; + rvt_vma_open(vma); + goto done; + } + spin_unlock_irq(&rdi->pending_lock); +done: + return ret; +} +EXPORT_SYMBOL(rvt_mmap); + +/* + * Allocate information for hfi1_mmap + */ +struct rvt_mmap_info *rvt_create_mmap_info(struct rvt_dev_info *rdi, + u32 size, + struct ib_ucontext *context, + void *obj) +{ + struct rvt_mmap_info *ip; + + ip = kmalloc(sizeof(*ip), GFP_KERNEL); + if (!ip) + return ip; + + size = PAGE_ALIGN(size); + + spin_lock_irq(&rdi->mmap_offset_lock); + if (rdi->mmap_offset == 0) + rdi->mmap_offset = PAGE_SIZE; + ip->offset = rdi->mmap_offset; + rdi->mmap_offset += size; + spin_unlock_irq(&rdi->mmap_offset_lock); + + INIT_LIST_HEAD(&ip->pending_mmaps); + ip->size = size; + ip->context = context; + ip->obj = obj; + kref_init(&ip->ref); + + return ip; +} +EXPORT_SYMBOL(rvt_create_mmap_info); + +void rvt_update_mmap_info(struct rvt_dev_info *rdi, struct rvt_mmap_info *ip, + u32 size, void *obj) +{ + size = PAGE_ALIGN(size); + + spin_lock_irq(&rdi->mmap_offset_lock); + if (rdi->mmap_offset == 0) + rdi->mmap_offset = PAGE_SIZE; + ip->offset = rdi->mmap_offset; + rdi->mmap_offset += size; + spin_unlock_irq(&rdi->mmap_offset_lock); + + ip->size = size; + ip->obj = obj; } +EXPORT_SYMBOL(rvt_update_mmap_info); diff --git a/drivers/infiniband/sw/rdmavt/mmap.h b/drivers/infiniband/sw/rdmavt/mmap.h index 94f6377..3513e25 100644 --- a/drivers/infiniband/sw/rdmavt/mmap.h +++ b/drivers/infiniband/sw/rdmavt/mmap.h @@ -50,6 +50,6 @@ #include -int rvt_mmap(struct ib_ucontext *context, struct vm_area_struct *vma); +void rvt_mmap_init(struct rvt_dev_info *rdi); #endif /* DEF_RDMAVTMMAP_H */ diff --git a/drivers/infiniband/sw/rdmavt/vt.c b/drivers/infiniband/sw/rdmavt/vt.c index f2d995d..ab4105a 100644 --- a/drivers/infiniband/sw/rdmavt/vt.c +++ b/drivers/infiniband/sw/rdmavt/vt.c @@ -241,6 +241,7 @@ int rvt_register_device(struct rvt_dev_info *rdi) } /* Once we get past here we can use the rvt_pr macros */ + rvt_mmap_init(rdi); /* Dev Ops */ CHECK_DRIVER_OVERRIDE(rdi, query_device); diff --git a/include/rdma/rdma_vt.h b/include/rdma/rdma_vt.h index 950c291..fd25d23 100644 --- a/include/rdma/rdma_vt.h +++ b/include/rdma/rdma_vt.h @@ -254,6 +254,12 @@ struct rvt_dev_info { struct rvt_ibport **ports; struct rvt_qp_ibdev *qp_dev; + + /* memory maps */ + struct list_head pending_mmaps; + spinlock_t mmap_offset_lock; /* protect mmap_offset */ + u32 mmap_offset; + spinlock_t pending_lock; /* protect pending mmap list */ }; static inline struct rvt_pd *ibpd_to_rvtpd(struct ib_pd *ibpd) @@ -285,4 +291,13 @@ int rvt_rkey_ok(struct rvt_qp *qp, struct rvt_sge *sge, u32 len, u64 vaddr, u32 rkey, int acc); int rvt_lkey_ok(struct rvt_lkey_table *rkt, struct rvt_pd *pd, struct rvt_sge *isge, struct ib_sge *sge, int acc); +int rvt_mmap(struct ib_ucontext *context, struct vm_area_struct *vma); +void rvt_release_mmap_info(struct kref *ref); +struct rvt_mmap_info *rvt_create_mmap_info(struct rvt_dev_info *rdi, + u32 size, + struct ib_ucontext *context, + void *obj); +void rvt_update_mmap_info(struct rvt_dev_info *rdi, struct rvt_mmap_info *ip, + u32 size, void *obj); + #endif /* DEF_RDMA_VT_H */ -- cgit v0.10.2 From 38ce2c6f3ae8dda0ee42dc8474759ff949994bea Mon Sep 17 00:00:00 2001 From: Dennis Dalessandro Date: Wed, 6 Jan 2016 10:05:12 -0800 Subject: IB/rdmavt: Add pkey support Add pkey table in rdi per port data structure. Also bring in related pkey functions. Drivers will still be responsible for allocating and maintaining the pkey table. However they need to tell rdmavt where to find the pkey table. We can not move the pkey table up into rdmavt because drivers need to manipulate this long before registering with it. Reviewed-by: Ira Weiny Reviewed-by: Mike Marciniszyn Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford diff --git a/drivers/infiniband/sw/rdmavt/vt.c b/drivers/infiniband/sw/rdmavt/vt.c index ab4105a..18b5f43 100644 --- a/drivers/infiniband/sw/rdmavt/vt.c +++ b/drivers/infiniband/sw/rdmavt/vt.c @@ -154,6 +154,17 @@ static int rvt_query_pkey(struct ib_device *ibdev, u8 port, u16 index, * lock, if a stale value is read and sent to the user so be it there is * no way to protect against that anyway. */ + struct rvt_dev_info *rdi = ib_to_rvt(ibdev); + int port_index; + + if (index >= rvt_get_npkeys(rdi)) + return -EINVAL; + + port_index = port - 1; /* IB ports start at 1 our array at 0 */ + if ((port_index < 0) || (port_index >= rdi->dparms.nports)) + return -EINVAL; + + *pkey = rvt_get_pkey(rdi, port_index, index); return 0; } @@ -227,19 +238,6 @@ int rvt_register_device(struct rvt_dev_info *rdi) return -EINVAL; } - if (!rdi->dparms.nports) { - rvt_pr_err(rdi, "Driver says it has no ports.\n"); - return -EINVAL; - } - - rdi->ports = kcalloc(rdi->dparms.nports, - sizeof(struct rvt_ibport **), - GFP_KERNEL); - if (!rdi->ports) { - rvt_pr_err(rdi, "Could not allocate port mem.\n"); - return -ENOMEM; - } - /* Once we get past here we can use the rvt_pr macros */ rvt_mmap_init(rdi); @@ -355,9 +353,25 @@ EXPORT_SYMBOL(rvt_unregister_device); * Keep track of a list of ports. No need to have a detach port. * They persist until the driver goes away. */ -void rvt_attach_port(struct rvt_dev_info *rdi, struct rvt_ibport *port, - int portnum) +int rvt_init_port(struct rvt_dev_info *rdi, struct rvt_ibport *port, + int portnum, u16 *pkey_table) { + if (!rdi->dparms.nports) { + rvt_pr_err(rdi, "Driver says it has no ports.\n"); + return -EINVAL; + } + + rdi->ports = kcalloc(rdi->dparms.nports, + sizeof(struct rvt_ibport **), + GFP_KERNEL); + if (!rdi->ports) { + rvt_pr_err(rdi, "Could not allocate port mem.\n"); + return -ENOMEM; + } + rdi->ports[portnum] = port; + rdi->ports[portnum]->pkey_table = pkey_table; + + return 0; } -EXPORT_SYMBOL(rvt_attach_port); +EXPORT_SYMBOL(rvt_init_port); diff --git a/include/rdma/rdma_vt.h b/include/rdma/rdma_vt.h index fd25d23..3a78f20 100644 --- a/include/rdma/rdma_vt.h +++ b/include/rdma/rdma_vt.h @@ -69,6 +69,8 @@ #define RVT_FLAG_QP_INIT_DRIVER BIT(2) #define RVT_FLAG_CQ_INIT_DRIVER BIT(3) +#define RVT_MAX_PKEY_VALUES 16 + struct rvt_ibport { struct rvt_qp __rcu *qp[2]; struct ib_mad_agent *send_agent; /* agent for SMI (traps) */ @@ -125,6 +127,14 @@ struct rvt_ibport { void *priv; /* driver private data */ + /* + * The pkey table is allocated and maintained by the driver. Drivers + * need to have access to this before registering with rdmav. However + * rdmavt will need access to it so drivers need to proviee this during + * the attach port API call. + */ + u16 *pkey_table; + /* TODO: Move sm_ah and smi_ah into here as well*/ }; @@ -178,6 +188,7 @@ struct rvt_driver_params { int qpn_res_start; int qpn_res_end; int nports; + int npkeys; u8 qos_shift; }; @@ -238,8 +249,6 @@ struct rvt_dev_info { struct rvt_mregion __rcu *dma_mr; struct rvt_lkey_table lkey_table; - /* PKey Table goes here */ - /* Driver specific helper functions */ struct rvt_driver_provided driver_f; @@ -282,11 +291,32 @@ static inline struct rvt_srq *ibsrq_to_rvtsrq(struct ib_srq *ibsrq) return container_of(ibsrq, struct rvt_srq, ibsrq); } +static inline unsigned rvt_get_npkeys(struct rvt_dev_info *rdi) +{ + /* + * All ports have same number of pkeys. + */ + return rdi->dparms.npkeys; +} + +/* + * Return the indexed PKEY from the port PKEY table. + */ +static inline u16 rvt_get_pkey(struct rvt_dev_info *rdi, + int port_index, + unsigned index) +{ + if (index >= rvt_get_npkeys(rdi)) + return 0; + else + return rdi->ports[port_index]->pkey_table[index]; +} + int rvt_register_device(struct rvt_dev_info *rvd); void rvt_unregister_device(struct rvt_dev_info *rvd); int rvt_check_ah(struct ib_device *ibdev, struct ib_ah_attr *ah_attr); -void rvt_attach_port(struct rvt_dev_info *rdi, struct rvt_ibport *port, - int portnum); +int rvt_init_port(struct rvt_dev_info *rdi, struct rvt_ibport *port, + int portnum, u16 *pkey_table); int rvt_rkey_ok(struct rvt_qp *qp, struct rvt_sge *sge, u32 len, u64 vaddr, u32 rkey, int acc); int rvt_lkey_ok(struct rvt_lkey_table *rkt, struct rvt_pd *pd, -- cgit v0.10.2 From 2dc05ab57f9fc28e9aa3f9eba1cd0b430a832d2c Mon Sep 17 00:00:00 2001 From: Dennis Dalessandro Date: Fri, 22 Jan 2016 12:44:29 -0800 Subject: IB/qib: Begin to use rdmavt for verbs This patch begins to make use of rdmavt by registering with it and providing access to the header files. This is just the beginning of rdmavt support in qib. Most functionality is still being done in the driver, set flags so that rdmavt will let qib continue to handle mr, qp, and cq init. Reviewed-by: Ira Weiny Reviewed-by: Mike Marciniszyn Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford diff --git a/drivers/infiniband/hw/qib/Kconfig b/drivers/infiniband/hw/qib/Kconfig index 495be09..e0fdb92 100644 --- a/drivers/infiniband/hw/qib/Kconfig +++ b/drivers/infiniband/hw/qib/Kconfig @@ -1,6 +1,6 @@ config INFINIBAND_QIB tristate "Intel PCIe HCA support" - depends on 64BIT + depends on 64BIT && INFINIBAND_RDMAVT ---help--- This is a low-level driver for Intel PCIe QLE InfiniBand host channel adapters. This driver does not support the Intel diff --git a/drivers/infiniband/hw/qib/qib.h b/drivers/infiniband/hw/qib/qib.h index 7df16f7..0e68e1f 100644 --- a/drivers/infiniband/hw/qib/qib.h +++ b/drivers/infiniband/hw/qib/qib.h @@ -52,6 +52,7 @@ #include #include #include +#include #include "qib_common.h" #include "qib_verbs.h" diff --git a/drivers/infiniband/hw/qib/qib_init.c b/drivers/infiniband/hw/qib/qib_init.c index 4ff340f..47190f1 100644 --- a/drivers/infiniband/hw/qib/qib_init.c +++ b/drivers/infiniband/hw/qib/qib_init.c @@ -42,6 +42,7 @@ #ifdef CONFIG_INFINIBAND_QIB_DCA #include #endif +#include #include "qib.h" #include "qib_common.h" @@ -1081,7 +1082,7 @@ void qib_free_devdata(struct qib_devdata *dd) qib_dbg_ibdev_exit(&dd->verbs_dev); #endif free_percpu(dd->int_counter); - ib_dealloc_device(&dd->verbs_dev.ibdev); + ib_dealloc_device(&dd->verbs_dev.rdi.ibdev); } u64 qib_int_counter(struct qib_devdata *dd) @@ -1171,7 +1172,7 @@ struct qib_devdata *qib_alloc_devdata(struct pci_dev *pdev, size_t extra) bail: if (!list_empty(&dd->list)) list_del_init(&dd->list); - ib_dealloc_device(&dd->verbs_dev.ibdev); + ib_dealloc_device(&dd->verbs_dev.rdi.ibdev); return ERR_PTR(ret); } diff --git a/drivers/infiniband/hw/qib/qib_intr.c b/drivers/infiniband/hw/qib/qib_intr.c index 086616d..a014fd4 100644 --- a/drivers/infiniband/hw/qib/qib_intr.c +++ b/drivers/infiniband/hw/qib/qib_intr.c @@ -74,7 +74,7 @@ static void signal_ib_event(struct qib_pportdata *ppd, enum ib_event_type ev) struct ib_event event; struct qib_devdata *dd = ppd->dd; - event.device = &dd->verbs_dev.ibdev; + event.device = &dd->verbs_dev.rdi.ibdev; event.element.port_num = ppd->port; event.event = ev; ib_dispatch_event(&event); diff --git a/drivers/infiniband/hw/qib/qib_mad.c b/drivers/infiniband/hw/qib/qib_mad.c index 9625e7c..c65d3aa 100644 --- a/drivers/infiniband/hw/qib/qib_mad.c +++ b/drivers/infiniband/hw/qib/qib_mad.c @@ -1028,7 +1028,7 @@ static int set_pkeys(struct qib_devdata *dd, u8 port, u16 *pkeys) (void) dd->f_set_ib_cfg(ppd, QIB_IB_CFG_PKEYS, 0); event.event = IB_EVENT_PKEY_CHANGE; - event.device = &dd->verbs_dev.ibdev; + event.device = &dd->verbs_dev.rdi.ibdev; event.element.port_num = port; ib_dispatch_event(&event); } @@ -2483,7 +2483,8 @@ int qib_create_agents(struct qib_ibdev *dev) for (p = 0; p < dd->num_pports; p++) { ibp = &dd->pport[p].ibport_data; - agent = ib_register_mad_agent(&dev->ibdev, p + 1, IB_QPT_SMI, + agent = ib_register_mad_agent(&dev->rdi.ibdev, p + 1, + IB_QPT_SMI, NULL, 0, send_handler, NULL, NULL, 0); if (IS_ERR(agent)) { diff --git a/drivers/infiniband/hw/qib/qib_sysfs.c b/drivers/infiniband/hw/qib/qib_sysfs.c index 81f56cd..72a160e 100644 --- a/drivers/infiniband/hw/qib/qib_sysfs.c +++ b/drivers/infiniband/hw/qib/qib_sysfs.c @@ -502,7 +502,7 @@ static ssize_t show_rev(struct device *device, struct device_attribute *attr, char *buf) { struct qib_ibdev *dev = - container_of(device, struct qib_ibdev, ibdev.dev); + container_of(device, struct qib_ibdev, rdi.ibdev.dev); return sprintf(buf, "%x\n", dd_from_dev(dev)->minrev); } @@ -511,7 +511,7 @@ static ssize_t show_hca(struct device *device, struct device_attribute *attr, char *buf) { struct qib_ibdev *dev = - container_of(device, struct qib_ibdev, ibdev.dev); + container_of(device, struct qib_ibdev, rdi.ibdev.dev); struct qib_devdata *dd = dd_from_dev(dev); int ret; @@ -533,7 +533,7 @@ static ssize_t show_boardversion(struct device *device, struct device_attribute *attr, char *buf) { struct qib_ibdev *dev = - container_of(device, struct qib_ibdev, ibdev.dev); + container_of(device, struct qib_ibdev, rdi.ibdev.dev); struct qib_devdata *dd = dd_from_dev(dev); /* The string printed here is already newline-terminated. */ @@ -545,7 +545,7 @@ static ssize_t show_localbus_info(struct device *device, struct device_attribute *attr, char *buf) { struct qib_ibdev *dev = - container_of(device, struct qib_ibdev, ibdev.dev); + container_of(device, struct qib_ibdev, rdi.ibdev.dev); struct qib_devdata *dd = dd_from_dev(dev); /* The string printed here is already newline-terminated. */ @@ -557,7 +557,7 @@ static ssize_t show_nctxts(struct device *device, struct device_attribute *attr, char *buf) { struct qib_ibdev *dev = - container_of(device, struct qib_ibdev, ibdev.dev); + container_of(device, struct qib_ibdev, rdi.ibdev.dev); struct qib_devdata *dd = dd_from_dev(dev); /* Return the number of user ports (contexts) available. */ @@ -572,7 +572,7 @@ static ssize_t show_nfreectxts(struct device *device, struct device_attribute *attr, char *buf) { struct qib_ibdev *dev = - container_of(device, struct qib_ibdev, ibdev.dev); + container_of(device, struct qib_ibdev, rdi.ibdev.dev); struct qib_devdata *dd = dd_from_dev(dev); /* Return the number of free user ports (contexts) available. */ @@ -583,7 +583,7 @@ static ssize_t show_serial(struct device *device, struct device_attribute *attr, char *buf) { struct qib_ibdev *dev = - container_of(device, struct qib_ibdev, ibdev.dev); + container_of(device, struct qib_ibdev, rdi.ibdev.dev); struct qib_devdata *dd = dd_from_dev(dev); buf[sizeof(dd->serial)] = '\0'; @@ -597,7 +597,7 @@ static ssize_t store_chip_reset(struct device *device, size_t count) { struct qib_ibdev *dev = - container_of(device, struct qib_ibdev, ibdev.dev); + container_of(device, struct qib_ibdev, rdi.ibdev.dev); struct qib_devdata *dd = dd_from_dev(dev); int ret; @@ -618,7 +618,7 @@ static ssize_t show_tempsense(struct device *device, struct device_attribute *attr, char *buf) { struct qib_ibdev *dev = - container_of(device, struct qib_ibdev, ibdev.dev); + container_of(device, struct qib_ibdev, rdi.ibdev.dev); struct qib_devdata *dd = dd_from_dev(dev); int ret; int idx; @@ -778,7 +778,7 @@ bail: */ int qib_verbs_register_sysfs(struct qib_devdata *dd) { - struct ib_device *dev = &dd->verbs_dev.ibdev; + struct ib_device *dev = &dd->verbs_dev.rdi.ibdev; int i, ret; for (i = 0; i < ARRAY_SIZE(qib_attributes); ++i) { diff --git a/drivers/infiniband/hw/qib/qib_verbs.c b/drivers/infiniband/hw/qib/qib_verbs.c index baf1e42..f8975ea 100644 --- a/drivers/infiniband/hw/qib/qib_verbs.c +++ b/drivers/infiniband/hw/qib/qib_verbs.c @@ -2091,7 +2091,7 @@ static int qib_port_immutable(struct ib_device *ibdev, u8 port_num, int qib_register_ib_device(struct qib_devdata *dd) { struct qib_ibdev *dev = &dd->verbs_dev; - struct ib_device *ibdev = &dev->ibdev; + struct ib_device *ibdev = &dev->rdi.ibdev; struct qib_pportdata *ppd = dd->pport; unsigned i, lk_tab_size; int ret; @@ -2279,7 +2279,17 @@ int qib_register_ib_device(struct qib_devdata *dd) snprintf(ibdev->node_desc, sizeof(ibdev->node_desc), "Intel Infiniband HCA %s", init_utsname()->nodename); - ret = ib_register_device(ibdev, qib_create_port_files); + /* + * Fill in rvt info object. + */ + dd->verbs_dev.rdi.driver_f.port_callback = qib_create_port_files; + dd->verbs_dev.rdi.dparms.props.max_pd = ib_qib_max_pds; + dd->verbs_dev.rdi.flags = (RVT_FLAG_MR_INIT_DRIVER | + RVT_FLAG_QP_INIT_DRIVER | + RVT_FLAG_CQ_INIT_DRIVER); + + + ret = rvt_register_device(&dd->verbs_dev.rdi); if (ret) goto err_reg; @@ -2296,7 +2306,7 @@ int qib_register_ib_device(struct qib_devdata *dd) err_class: qib_free_agents(dev); err_agents: - ib_unregister_device(ibdev); + rvt_unregister_device(&dd->verbs_dev.rdi); err_reg: err_tx: while (!list_empty(&dev->txreq_free)) { @@ -2325,7 +2335,6 @@ bail: void qib_unregister_ib_device(struct qib_devdata *dd) { struct qib_ibdev *dev = &dd->verbs_dev; - struct ib_device *ibdev = &dev->ibdev; u32 qps_inuse; unsigned lk_tab_size; @@ -2333,7 +2342,7 @@ void qib_unregister_ib_device(struct qib_devdata *dd) qib_free_agents(dev); - ib_unregister_device(ibdev); + rvt_unregister_device(&dd->verbs_dev.rdi); if (!list_empty(&dev->piowait)) qib_dev_err(dd, "piowait list not empty!\n"); diff --git a/drivers/infiniband/hw/qib/qib_verbs.h b/drivers/infiniband/hw/qib/qib_verbs.h index 6c5e777..e175301 100644 --- a/drivers/infiniband/hw/qib/qib_verbs.h +++ b/drivers/infiniband/hw/qib/qib_verbs.h @@ -45,6 +45,7 @@ #include #include #include +#include struct qib_ctxtdata; struct qib_pportdata; @@ -752,7 +753,7 @@ struct qib_ibport { struct qib_ibdev { - struct ib_device ibdev; + struct rvt_dev_info rdi; struct list_head pending_mmaps; spinlock_t mmap_offset_lock; /* protect mmap_offset */ u32 mmap_offset; @@ -845,7 +846,10 @@ static inline struct qib_qp *to_iqp(struct ib_qp *ibqp) static inline struct qib_ibdev *to_idev(struct ib_device *ibdev) { - return container_of(ibdev, struct qib_ibdev, ibdev); + struct rvt_dev_info *rdi; + + rdi = container_of(ibdev, struct rvt_dev_info, ibdev); + return container_of(rdi, struct qib_ibdev, rdi); } /* -- cgit v0.10.2 From eb636ac0e49ec560a608336aa6e02b7eca482112 Mon Sep 17 00:00:00 2001 From: Dennis Dalessandro Date: Fri, 22 Jan 2016 12:44:36 -0800 Subject: IB/qib: Remove dma.c and use rdmavt version of dma functions This patch removes the qib_dma.c file and uses the version which has been added to rdmavt. Reviewed-by: Ira Weiny Reviewed-by: Mike Marciniszyn Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford diff --git a/drivers/infiniband/hw/qib/Makefile b/drivers/infiniband/hw/qib/Makefile index 57f8103..8a8f892 100644 --- a/drivers/infiniband/hw/qib/Makefile +++ b/drivers/infiniband/hw/qib/Makefile @@ -1,6 +1,6 @@ obj-$(CONFIG_INFINIBAND_QIB) += ib_qib.o -ib_qib-y := qib_cq.o qib_diag.o qib_dma.o qib_driver.o qib_eeprom.o \ +ib_qib-y := qib_cq.o qib_diag.o qib_driver.o qib_eeprom.o \ qib_file_ops.o qib_fs.o qib_init.o qib_intr.o qib_keys.o \ qib_mad.o qib_mmap.o qib_mr.o qib_pcie.o qib_pio_copy.o \ qib_qp.o qib_qsfp.o qib_rc.o qib_ruc.o qib_sdma.o qib_srq.o \ diff --git a/drivers/infiniband/hw/qib/qib_verbs.c b/drivers/infiniband/hw/qib/qib_verbs.c index f8975ea..ae3c661 100644 --- a/drivers/infiniband/hw/qib/qib_verbs.c +++ b/drivers/infiniband/hw/qib/qib_verbs.c @@ -41,6 +41,7 @@ #include #include #include +#include #include "qib.h" #include "qib_common.h" @@ -2273,7 +2274,7 @@ int qib_register_ib_device(struct qib_devdata *dd) ibdev->detach_mcast = qib_multicast_detach; ibdev->process_mad = qib_process_mad; ibdev->mmap = qib_mmap; - ibdev->dma_ops = &qib_dma_mapping_ops; + ibdev->dma_ops = NULL; ibdev->get_port_immutable = qib_port_immutable; snprintf(ibdev->node_desc, sizeof(ibdev->node_desc), diff --git a/drivers/infiniband/hw/qib/qib_verbs.h b/drivers/infiniband/hw/qib/qib_verbs.h index e175301..8ac0724 100644 --- a/drivers/infiniband/hw/qib/qib_verbs.h +++ b/drivers/infiniband/hw/qib/qib_verbs.h @@ -1182,6 +1182,4 @@ extern unsigned int ib_qib_max_srq_wrs; extern const u32 ib_qib_rnr_table[]; -extern struct ib_dma_mapping_ops qib_dma_mapping_ops; - #endif /* QIB_VERBS_H */ -- cgit v0.10.2 From f44728d69a8ac8552fe3f0c8aa898014fa6f3b9c Mon Sep 17 00:00:00 2001 From: Dennis Dalessandro Date: Fri, 22 Jan 2016 12:44:44 -0800 Subject: IB/qib: Use rdmavt protection domain Remove protection domain datastructure from qib and use rdmavts version. Reviewed-by: Ira Weiny Reviewed-by: Mike Marciniszyn Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford diff --git a/drivers/infiniband/hw/qib/qib_keys.c b/drivers/infiniband/hw/qib/qib_keys.c index d725c56..04fa272 100644 --- a/drivers/infiniband/hw/qib/qib_keys.c +++ b/drivers/infiniband/hw/qib/qib_keys.c @@ -152,7 +152,7 @@ out: * Check the IB SGE for validity and initialize our internal version * of it. */ -int qib_lkey_ok(struct qib_lkey_table *rkt, struct qib_pd *pd, +int qib_lkey_ok(struct qib_lkey_table *rkt, struct rvt_pd *pd, struct qib_sge *isge, struct ib_sge *sge, int acc) { struct qib_mregion *mr; @@ -263,7 +263,7 @@ int qib_rkey_ok(struct qib_qp *qp, struct qib_sge *sge, */ rcu_read_lock(); if (rkey == 0) { - struct qib_pd *pd = to_ipd(qp->ibqp.pd); + struct rvt_pd *pd = ibpd_to_rvtpd(qp->ibqp.pd); struct qib_ibdev *dev = to_idev(pd->ibpd.device); if (pd->user) @@ -341,7 +341,7 @@ bail: int qib_reg_mr(struct qib_qp *qp, struct ib_reg_wr *wr) { struct qib_lkey_table *rkt = &to_idev(qp->ibqp.device)->lk_table; - struct qib_pd *pd = to_ipd(qp->ibqp.pd); + struct rvt_pd *pd = ibpd_to_rvtpd(qp->ibqp.pd); struct qib_mr *mr = to_imr(wr->mr); struct qib_mregion *mrg; u32 key = wr->key; diff --git a/drivers/infiniband/hw/qib/qib_mr.c b/drivers/infiniband/hw/qib/qib_mr.c index 5f53304..9d84e0d 100644 --- a/drivers/infiniband/hw/qib/qib_mr.c +++ b/drivers/infiniband/hw/qib/qib_mr.c @@ -99,7 +99,7 @@ struct ib_mr *qib_get_dma_mr(struct ib_pd *pd, int acc) struct ib_mr *ret; int rval; - if (to_ipd(pd)->user) { + if (ibpd_to_rvtpd(pd)->user) { ret = ERR_PTR(-EPERM); goto bail; } diff --git a/drivers/infiniband/hw/qib/qib_ruc.c b/drivers/infiniband/hw/qib/qib_ruc.c index b1aa21b..425c8c2 100644 --- a/drivers/infiniband/hw/qib/qib_ruc.c +++ b/drivers/infiniband/hw/qib/qib_ruc.c @@ -84,11 +84,11 @@ static int qib_init_sge(struct qib_qp *qp, struct qib_rwqe *wqe) int i, j, ret; struct ib_wc wc; struct qib_lkey_table *rkt; - struct qib_pd *pd; + struct rvt_pd *pd; struct qib_sge_state *ss; rkt = &to_idev(qp->ibqp.device)->lk_table; - pd = to_ipd(qp->ibqp.srq ? qp->ibqp.srq->pd : qp->ibqp.pd); + pd = ibpd_to_rvtpd(qp->ibqp.srq ? qp->ibqp.srq->pd : qp->ibqp.pd); ss = &qp->r_sge; ss->sg_list = qp->r_sg_list; qp->r_len = 0; diff --git a/drivers/infiniband/hw/qib/qib_verbs.c b/drivers/infiniband/hw/qib/qib_verbs.c index ae3c661..c742b0d 100644 --- a/drivers/infiniband/hw/qib/qib_verbs.c +++ b/drivers/infiniband/hw/qib/qib_verbs.c @@ -346,7 +346,7 @@ static int qib_post_one_send(struct qib_qp *qp, struct ib_send_wr *wr, int ret; unsigned long flags; struct qib_lkey_table *rkt; - struct qib_pd *pd; + struct rvt_pd *pd; int avoid_schedule = 0; spin_lock_irqsave(&qp->s_lock, flags); @@ -397,7 +397,7 @@ static int qib_post_one_send(struct qib_qp *qp, struct ib_send_wr *wr, } rkt = &to_idev(qp->ibqp.device)->lk_table; - pd = to_ipd(qp->ibqp.pd); + pd = ibpd_to_rvtpd(qp->ibqp.pd); wqe = get_swqe_ptr(qp, qp->s_head); if (qp->ibqp.qp_type != IB_QPT_UC && @@ -1604,7 +1604,7 @@ static int qib_query_device(struct ib_device *ibdev, struct ib_device_attr *prop props->max_mr = dev->lk_table.max; props->max_fmr = dev->lk_table.max; props->max_map_per_fmr = 32767; - props->max_pd = ib_qib_max_pds; + props->max_pd = dev->rdi.dparms.props.max_pd; props->max_qp_rd_atom = QIB_MAX_RDMA_ATOMIC; props->max_qp_init_rd_atom = 255; /* props->max_res_rd_atom */ @@ -1756,61 +1756,6 @@ static int qib_query_gid(struct ib_device *ibdev, u8 port, return ret; } -static struct ib_pd *qib_alloc_pd(struct ib_device *ibdev, - struct ib_ucontext *context, - struct ib_udata *udata) -{ - struct qib_ibdev *dev = to_idev(ibdev); - struct qib_pd *pd; - struct ib_pd *ret; - - /* - * This is actually totally arbitrary. Some correctness tests - * assume there's a maximum number of PDs that can be allocated. - * We don't actually have this limit, but we fail the test if - * we allow allocations of more than we report for this value. - */ - - pd = kmalloc(sizeof(*pd), GFP_KERNEL); - if (!pd) { - ret = ERR_PTR(-ENOMEM); - goto bail; - } - - spin_lock(&dev->n_pds_lock); - if (dev->n_pds_allocated == ib_qib_max_pds) { - spin_unlock(&dev->n_pds_lock); - kfree(pd); - ret = ERR_PTR(-ENOMEM); - goto bail; - } - - dev->n_pds_allocated++; - spin_unlock(&dev->n_pds_lock); - - /* ib_alloc_pd() will initialize pd->ibpd. */ - pd->user = udata != NULL; - - ret = &pd->ibpd; - -bail: - return ret; -} - -static int qib_dealloc_pd(struct ib_pd *ibpd) -{ - struct qib_pd *pd = to_ipd(ibpd); - struct qib_ibdev *dev = to_idev(ibpd->device); - - spin_lock(&dev->n_pds_lock); - dev->n_pds_allocated--; - spin_unlock(&dev->n_pds_lock); - - kfree(pd); - - return 0; -} - int qib_check_ah(struct ib_device *ibdev, struct ib_ah_attr *ah_attr) { /* A multicast address requires a GRH (see ch. 8.4.1). */ @@ -2115,7 +2060,6 @@ int qib_register_ib_device(struct qib_devdata *dd) /* Only need to initialize non-zero fields. */ spin_lock_init(&dev->qpt_lock); - spin_lock_init(&dev->n_pds_lock); spin_lock_init(&dev->n_ahs_lock); spin_lock_init(&dev->n_cqs_lock); spin_lock_init(&dev->n_qps_lock); @@ -2239,8 +2183,8 @@ int qib_register_ib_device(struct qib_devdata *dd) ibdev->query_gid = qib_query_gid; ibdev->alloc_ucontext = qib_alloc_ucontext; ibdev->dealloc_ucontext = qib_dealloc_ucontext; - ibdev->alloc_pd = qib_alloc_pd; - ibdev->dealloc_pd = qib_dealloc_pd; + ibdev->alloc_pd = NULL; + ibdev->dealloc_pd = NULL; ibdev->create_ah = qib_create_ah; ibdev->destroy_ah = qib_destroy_ah; ibdev->modify_ah = qib_modify_ah; diff --git a/drivers/infiniband/hw/qib/qib_verbs.h b/drivers/infiniband/hw/qib/qib_verbs.h index 8ac0724..f1ca5d1 100644 --- a/drivers/infiniband/hw/qib/qib_verbs.h +++ b/drivers/infiniband/hw/qib/qib_verbs.h @@ -222,12 +222,6 @@ struct qib_mcast { int n_attached; }; -/* Protection domain */ -struct qib_pd { - struct ib_pd ibpd; - int user; /* non-zero if created from user space */ -}; - /* Address Handle */ struct qib_ah { struct ib_ah ibah; @@ -819,11 +813,6 @@ static inline struct qib_mr *to_imr(struct ib_mr *ibmr) return container_of(ibmr, struct qib_mr, ibmr); } -static inline struct qib_pd *to_ipd(struct ib_pd *ibpd) -{ - return container_of(ibpd, struct qib_pd, ibpd); -} - static inline struct qib_ah *to_iah(struct ib_ah *ibah) { return container_of(ibah, struct qib_ah, ibah); @@ -994,7 +983,7 @@ int qib_alloc_lkey(struct qib_mregion *mr, int dma_region); void qib_free_lkey(struct qib_mregion *mr); -int qib_lkey_ok(struct qib_lkey_table *rkt, struct qib_pd *pd, +int qib_lkey_ok(struct qib_lkey_table *rkt, struct rvt_pd *pd, struct qib_sge *isge, struct ib_sge *sge, int acc); int qib_rkey_ok(struct qib_qp *qp, struct qib_sge *sge, -- cgit v0.10.2 From 9ff198f5f2c251fc33dab45a7fc1b79c138d51b5 Mon Sep 17 00:00:00 2001 From: Dennis Dalessandro Date: Fri, 22 Jan 2016 12:44:53 -0800 Subject: IB/qib: Remove most uses of QIB_PERMISSIVE_LID and QIB_MULTICAST_LID_BASE This patch removes most of the uses of QIB_PERMISSIBVE_LID and QIB_MULTICAST_LID_BASE in favor of the recently added IB_* versions. There are still minor uses in AH functions as well as the QIB_* defines but those will be removed in a follow on patch. Reviewed-by: Ira Weiny Reviewed-by: Mike Marciniszyn Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford diff --git a/drivers/infiniband/hw/qib/qib_driver.c b/drivers/infiniband/hw/qib/qib_driver.c index f58fdc3..57b19ead 100644 --- a/drivers/infiniband/hw/qib/qib_driver.c +++ b/drivers/infiniband/hw/qib/qib_driver.c @@ -319,7 +319,7 @@ static u32 qib_rcv_hdrerr(struct qib_ctxtdata *rcd, struct qib_pportdata *ppd, if (tlen < 24) goto drop; - if (lid < QIB_MULTICAST_LID_BASE) { + if (lid < be16_to_cpu(IB_MULTICAST_LID_BASE)) { lid &= ~((1 << ppd->lmc) - 1); if (unlikely(lid != ppd->lid)) goto drop; diff --git a/drivers/infiniband/hw/qib/qib_mad.c b/drivers/infiniband/hw/qib/qib_mad.c index c65d3aa..70fc1b2 100644 --- a/drivers/infiniband/hw/qib/qib_mad.c +++ b/drivers/infiniband/hw/qib/qib_mad.c @@ -691,7 +691,7 @@ static int subn_set_portinfo(struct ib_smp *smp, struct ib_device *ibdev, lid = be16_to_cpu(pip->lid); /* Must be a valid unicast LID address. */ - if (lid == 0 || lid >= QIB_MULTICAST_LID_BASE) + if (lid == 0 || lid >= be16_to_cpu(IB_MULTICAST_LID_BASE)) smp->status |= IB_SMP_INVALID_FIELD; else if (ppd->lid != lid || ppd->lmc != (pip->mkeyprot_resv_lmc & 7)) { if (ppd->lid != lid) @@ -706,7 +706,7 @@ static int subn_set_portinfo(struct ib_smp *smp, struct ib_device *ibdev, smlid = be16_to_cpu(pip->sm_lid); msl = pip->neighbormtu_mastersmsl & 0xF; /* Must be a valid unicast LID address. */ - if (smlid == 0 || smlid >= QIB_MULTICAST_LID_BASE) + if (smlid == 0 || smlid >= be16_to_cpu(IB_MULTICAST_LID_BASE)) smp->status |= IB_SMP_INVALID_FIELD; else if (smlid != ibp->sm_lid || msl != ibp->sm_sl) { spin_lock_irqsave(&ibp->lock, flags); diff --git a/drivers/infiniband/hw/qib/qib_qp.c b/drivers/infiniband/hw/qib/qib_qp.c index 3eff35c..e8beeee 100644 --- a/drivers/infiniband/hw/qib/qib_qp.c +++ b/drivers/infiniband/hw/qib/qib_qp.c @@ -590,14 +590,15 @@ int qib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, goto inval; if (attr_mask & IB_QP_AV) { - if (attr->ah_attr.dlid >= QIB_MULTICAST_LID_BASE) + if (attr->ah_attr.dlid >= be16_to_cpu(IB_MULTICAST_LID_BASE)) goto inval; if (qib_check_ah(qp->ibqp.device, &attr->ah_attr)) goto inval; } if (attr_mask & IB_QP_ALT_PATH) { - if (attr->alt_ah_attr.dlid >= QIB_MULTICAST_LID_BASE) + if (attr->alt_ah_attr.dlid >= + be16_to_cpu(IB_MULTICAST_LID_BASE)) goto inval; if (qib_check_ah(qp->ibqp.device, &attr->alt_ah_attr)) goto inval; diff --git a/drivers/infiniband/hw/qib/qib_ud.c b/drivers/infiniband/hw/qib/qib_ud.c index 59193f6..32fc80c 100644 --- a/drivers/infiniband/hw/qib/qib_ud.c +++ b/drivers/infiniband/hw/qib/qib_ud.c @@ -32,6 +32,7 @@ */ #include +#include #include "qib.h" #include "qib_mad.h" @@ -278,8 +279,8 @@ int qib_make_ud_req(struct qib_qp *qp) ibp = to_iport(qp->ibqp.device, qp->port_num); ppd = ppd_from_ibp(ibp); ah_attr = &to_iah(wqe->ud_wr.ah)->attr; - if (ah_attr->dlid >= QIB_MULTICAST_LID_BASE) { - if (ah_attr->dlid != QIB_PERMISSIVE_LID) + if (ah_attr->dlid >= be16_to_cpu(IB_MULTICAST_LID_BASE)) { + if (ah_attr->dlid != be16_to_cpu(IB_LID_PERMISSIVE)) this_cpu_inc(ibp->pmastats->n_multicast_xmit); else this_cpu_inc(ibp->pmastats->n_unicast_xmit); @@ -368,8 +369,8 @@ int qib_make_ud_req(struct qib_qp *qp) /* * Use the multicast QP if the destination LID is a multicast LID. */ - ohdr->bth[1] = ah_attr->dlid >= QIB_MULTICAST_LID_BASE && - ah_attr->dlid != QIB_PERMISSIVE_LID ? + ohdr->bth[1] = ah_attr->dlid >= be16_to_cpu(IB_MULTICAST_LID_BASE) && + ah_attr->dlid != be16_to_cpu(IB_LID_PERMISSIVE) ? cpu_to_be32(QIB_MULTICAST_QPN) : cpu_to_be32(wqe->ud_wr.remote_qpn); ohdr->bth[2] = cpu_to_be32(qp->s_next_psn++ & QIB_PSN_MASK); @@ -576,7 +577,7 @@ void qib_ud_rcv(struct qib_ibport *ibp, struct qib_ib_header *hdr, /* * Save the LMC lower bits if the destination LID is a unicast LID. */ - wc.dlid_path_bits = dlid >= QIB_MULTICAST_LID_BASE ? 0 : + wc.dlid_path_bits = dlid >= be16_to_cpu(IB_MULTICAST_LID_BASE) ? 0 : dlid & ((1 << ppd_from_ibp(ibp)->lmc) - 1); wc.port_num = qp->port_num; /* Signal completion event if the solicited bit is set. */ diff --git a/drivers/infiniband/hw/qib/qib_verbs.c b/drivers/infiniband/hw/qib/qib_verbs.c index c742b0d..84c828e 100644 --- a/drivers/infiniband/hw/qib/qib_verbs.c +++ b/drivers/infiniband/hw/qib/qib_verbs.c @@ -646,7 +646,7 @@ void qib_ib_rcv(struct qib_ctxtdata *rcd, void *rhdr, void *data, u32 tlen) /* Check for a valid destination LID (see ch. 7.11.1). */ lid = be16_to_cpu(hdr->lrh[1]); - if (lid < QIB_MULTICAST_LID_BASE) { + if (lid < be16_to_cpu(IB_MULTICAST_LID_BASE)) { lid &= ~((1 << ppd->lmc) - 1); if (unlikely(lid != ppd->lid)) goto drop; -- cgit v0.10.2 From 869a2a964afdf540246f656e018986b96edf5e57 Mon Sep 17 00:00:00 2001 From: Dennis Dalessandro Date: Fri, 22 Jan 2016 12:45:02 -0800 Subject: IB/qib: Use rdmavt lid defines in qib Original patch for AH changes from Kamal Heib , split apart from original. This patch also removes the qib specific multicast lid base and permissive lid defines since they are no longer needed. Use common LID defines in qib driver. Reviewed-by: Ira Weiny Signed-off-by: Kamal Heib Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford diff --git a/drivers/infiniband/hw/qib/qib_common.h b/drivers/infiniband/hw/qib/qib_common.h index 4fb78ab..1d87ec0 100644 --- a/drivers/infiniband/hw/qib/qib_common.h +++ b/drivers/infiniband/hw/qib/qib_common.h @@ -742,14 +742,12 @@ struct qib_tid_session_member { #define SIZE_OF_CRC 1 #define QIB_DEFAULT_P_KEY 0xFFFF -#define QIB_PERMISSIVE_LID 0xFFFF #define QIB_AETH_CREDIT_SHIFT 24 #define QIB_AETH_CREDIT_MASK 0x1F #define QIB_AETH_CREDIT_INVAL 0x1F #define QIB_PSN_MASK 0xFFFFFF #define QIB_MSN_MASK 0xFFFFFF #define QIB_QPN_MASK 0xFFFFFF -#define QIB_MULTICAST_LID_BASE 0xC000 #define QIB_EAGER_TID_ID QLOGIC_IB_I_TID_MASK #define QIB_MULTICAST_QPN 0xFFFFFF diff --git a/drivers/infiniband/hw/qib/qib_qp.c b/drivers/infiniband/hw/qib/qib_qp.c index e8beeee..6c17c90 100644 --- a/drivers/infiniband/hw/qib/qib_qp.c +++ b/drivers/infiniband/hw/qib/qib_qp.c @@ -35,6 +35,7 @@ #include #include #include +#include #ifdef CONFIG_DEBUG_FS #include #endif diff --git a/drivers/infiniband/hw/qib/qib_verbs.c b/drivers/infiniband/hw/qib/qib_verbs.c index 84c828e..3bfa1a6 100644 --- a/drivers/infiniband/hw/qib/qib_verbs.c +++ b/drivers/infiniband/hw/qib/qib_verbs.c @@ -1759,8 +1759,8 @@ static int qib_query_gid(struct ib_device *ibdev, u8 port, int qib_check_ah(struct ib_device *ibdev, struct ib_ah_attr *ah_attr) { /* A multicast address requires a GRH (see ch. 8.4.1). */ - if (ah_attr->dlid >= QIB_MULTICAST_LID_BASE && - ah_attr->dlid != QIB_PERMISSIVE_LID && + if (ah_attr->dlid >= be16_to_cpu(IB_MULTICAST_LID_BASE) && + ah_attr->dlid != be16_to_cpu(IB_LID_PERMISSIVE) && !(ah_attr->ah_flags & IB_AH_GRH)) goto bail; if ((ah_attr->ah_flags & IB_AH_GRH) && -- cgit v0.10.2 From ffc269075bce7dd895d1fc8eca6367573df0ddbc Mon Sep 17 00:00:00 2001 From: Dennis Dalessandro Date: Fri, 22 Jan 2016 12:45:11 -0800 Subject: IB/qib: Remove driver specific members from qib qp type In preparation for moving the queue pair data structure to rdmavt the members of the driver specific queue pairs which are not common need to be pushed off to a private driver structure. This structure will be available in the queue pair once moved to rdmavt as a void pointer. This patch while not adding a lot of value in and of itself is a prerequisite to move the queue pair out of the drivers and into rdmavt. The driver specific, private queue pair data structure should condense as more of the send side code moves to rdmavt. Reviewed-by: Ira Weiny Reviewed-by: Mike Marciniszyn Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford diff --git a/drivers/infiniband/hw/qib/qib_qp.c b/drivers/infiniband/hw/qib/qib_qp.c index 6c17c90..aaa1cf9 100644 --- a/drivers/infiniband/hw/qib/qib_qp.c +++ b/drivers/infiniband/hw/qib/qib_qp.c @@ -371,10 +371,11 @@ struct qib_qp *qib_lookup_qpn(struct qib_ibport *ibp, u32 qpn) */ static void qib_reset_qp(struct qib_qp *qp, enum ib_qp_type type) { + struct qib_qp_priv *priv = qp->priv; qp->remote_qpn = 0; qp->qkey = 0; qp->qp_access_flags = 0; - atomic_set(&qp->s_dma_busy, 0); + atomic_set(&priv->s_dma_busy, 0); qp->s_flags &= QIB_S_SIGNAL_REQ_WR; qp->s_hdrwords = 0; qp->s_wqe = NULL; @@ -474,6 +475,7 @@ static void clear_mr_refs(struct qib_qp *qp, int clr_sends) */ int qib_error_qp(struct qib_qp *qp, enum ib_wc_status err) { + struct qib_qp_priv *priv = qp->priv; struct qib_ibdev *dev = to_idev(qp->ibqp.device); struct ib_wc wc; int ret = 0; @@ -492,9 +494,9 @@ int qib_error_qp(struct qib_qp *qp, enum ib_wc_status err) qp->s_flags &= ~QIB_S_ANY_WAIT_SEND; spin_lock(&dev->pending_lock); - if (!list_empty(&qp->iowait) && !(qp->s_flags & QIB_S_BUSY)) { + if (!list_empty(&priv->iowait) && !(qp->s_flags & QIB_S_BUSY)) { qp->s_flags &= ~QIB_S_ANY_WAIT_IO; - list_del_init(&qp->iowait); + list_del_init(&priv->iowait); } spin_unlock(&dev->pending_lock); @@ -504,9 +506,9 @@ int qib_error_qp(struct qib_qp *qp, enum ib_wc_status err) qib_put_mr(qp->s_rdma_mr); qp->s_rdma_mr = NULL; } - if (qp->s_tx) { - qib_put_txreq(qp->s_tx); - qp->s_tx = NULL; + if (priv->s_tx) { + qib_put_txreq(priv->s_tx); + priv->s_tx = NULL; } } @@ -572,6 +574,7 @@ int qib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, { struct qib_ibdev *dev = to_idev(ibqp->device); struct qib_qp *qp = to_iqp(ibqp); + struct qib_qp_priv *priv = qp->priv; enum ib_qp_state cur_state, new_state; struct ib_event ev; int lastwqe = 0; @@ -699,19 +702,20 @@ int qib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, if (qp->state != IB_QPS_RESET) { qp->state = IB_QPS_RESET; spin_lock(&dev->pending_lock); - if (!list_empty(&qp->iowait)) - list_del_init(&qp->iowait); + if (!list_empty(&priv->iowait)) + list_del_init(&priv->iowait); spin_unlock(&dev->pending_lock); qp->s_flags &= ~(QIB_S_TIMER | QIB_S_ANY_WAIT); spin_unlock(&qp->s_lock); spin_unlock_irq(&qp->r_lock); /* Stop the sending work queue and retry timer */ - cancel_work_sync(&qp->s_work); + cancel_work_sync(&priv->s_work); del_timer_sync(&qp->s_timer); - wait_event(qp->wait_dma, !atomic_read(&qp->s_dma_busy)); - if (qp->s_tx) { - qib_put_txreq(qp->s_tx); - qp->s_tx = NULL; + wait_event(priv->wait_dma, + !atomic_read(&priv->s_dma_busy)); + if (priv->s_tx) { + qib_put_txreq(priv->s_tx); + priv->s_tx = NULL; } remove_qp(dev, qp); wait_event(qp->wait, !atomic_read(&qp->refcount)); @@ -987,7 +991,7 @@ struct ib_qp *qib_create_qp(struct ib_pd *ibpd, size_t sg_list_sz; struct ib_qp *ret; gfp_t gfp; - + struct qib_qp_priv *priv; if (init_attr->cap.max_send_sge > ib_qib_max_sges || init_attr->cap.max_send_wr > ib_qib_max_qp_wrs || @@ -1055,11 +1059,18 @@ struct ib_qp *qib_create_qp(struct ib_pd *ibpd, goto bail_swq; } RCU_INIT_POINTER(qp->next, NULL); - qp->s_hdr = kzalloc(sizeof(*qp->s_hdr), gfp); - if (!qp->s_hdr) { + priv = kzalloc(sizeof(*priv), gfp); + if (!priv) { + ret = ERR_PTR(-ENOMEM); + goto bail_qp_hdr; + } + priv->owner = qp; + priv->s_hdr = kzalloc(sizeof(*priv->s_hdr), gfp); + if (!priv->s_hdr) { ret = ERR_PTR(-ENOMEM); goto bail_qp; } + qp->priv = priv; qp->timeout_jiffies = usecs_to_jiffies((4096UL * (1UL << qp->timeout)) / 1000UL); @@ -1095,11 +1106,11 @@ struct ib_qp *qib_create_qp(struct ib_pd *ibpd, spin_lock_init(&qp->r_rq.lock); atomic_set(&qp->refcount, 0); init_waitqueue_head(&qp->wait); - init_waitqueue_head(&qp->wait_dma); + init_waitqueue_head(&priv->wait_dma); init_timer(&qp->s_timer); qp->s_timer.data = (unsigned long)qp; - INIT_WORK(&qp->s_work, qib_do_send); - INIT_LIST_HEAD(&qp->iowait); + INIT_WORK(&priv->s_work, qib_do_send); + INIT_LIST_HEAD(&priv->iowait); INIT_LIST_HEAD(&qp->rspwait); qp->state = IB_QPS_RESET; qp->s_wq = swq; @@ -1189,7 +1200,9 @@ bail_ip: vfree(qp->r_rq.wq); free_qpn(&dev->qpn_table, qp->ibqp.qp_num); bail_qp: - kfree(qp->s_hdr); + kfree(priv->s_hdr); + kfree(priv); +bail_qp_hdr: kfree(qp); bail_swq: vfree(swq); @@ -1210,23 +1223,24 @@ int qib_destroy_qp(struct ib_qp *ibqp) { struct qib_qp *qp = to_iqp(ibqp); struct qib_ibdev *dev = to_idev(ibqp->device); + struct qib_qp_priv *priv = qp->priv; /* Make sure HW and driver activity is stopped. */ spin_lock_irq(&qp->s_lock); if (qp->state != IB_QPS_RESET) { qp->state = IB_QPS_RESET; spin_lock(&dev->pending_lock); - if (!list_empty(&qp->iowait)) - list_del_init(&qp->iowait); + if (!list_empty(&priv->iowait)) + list_del_init(&priv->iowait); spin_unlock(&dev->pending_lock); qp->s_flags &= ~(QIB_S_TIMER | QIB_S_ANY_WAIT); spin_unlock_irq(&qp->s_lock); - cancel_work_sync(&qp->s_work); + cancel_work_sync(&priv->s_work); del_timer_sync(&qp->s_timer); - wait_event(qp->wait_dma, !atomic_read(&qp->s_dma_busy)); - if (qp->s_tx) { - qib_put_txreq(qp->s_tx); - qp->s_tx = NULL; + wait_event(priv->wait_dma, !atomic_read(&priv->s_dma_busy)); + if (priv->s_tx) { + qib_put_txreq(priv->s_tx); + priv->s_tx = NULL; } remove_qp(dev, qp); wait_event(qp->wait, !atomic_read(&qp->refcount)); @@ -1245,7 +1259,8 @@ int qib_destroy_qp(struct ib_qp *ibqp) else vfree(qp->r_rq.wq); vfree(qp->s_wq); - kfree(qp->s_hdr); + kfree(priv->s_hdr); + kfree(priv); kfree(qp); return 0; } @@ -1368,6 +1383,7 @@ void qib_qp_iter_print(struct seq_file *s, struct qib_qp_iter *iter) { struct qib_swqe *wqe; struct qib_qp *qp = iter->qp; + struct qib_qp_priv *priv = qp->priv; wqe = get_swqe_ptr(qp, qp->s_last); seq_printf(s, @@ -1379,8 +1395,8 @@ void qib_qp_iter_print(struct seq_file *s, struct qib_qp_iter *iter) wqe->wr.opcode, qp->s_hdrwords, qp->s_flags, - atomic_read(&qp->s_dma_busy), - !list_empty(&qp->iowait), + atomic_read(&priv->s_dma_busy), + !list_empty(&priv->iowait), qp->timeout, wqe->ssn, qp->s_lsn, diff --git a/drivers/infiniband/hw/qib/qib_rc.c b/drivers/infiniband/hw/qib/qib_rc.c index e6b7556..1506c02 100644 --- a/drivers/infiniband/hw/qib/qib_rc.c +++ b/drivers/infiniband/hw/qib/qib_rc.c @@ -230,6 +230,7 @@ bail: */ int qib_make_rc_req(struct qib_qp *qp) { + struct qib_qp_priv *priv = qp->priv; struct qib_ibdev *dev = to_idev(qp->ibqp.device); struct qib_other_headers *ohdr; struct qib_sge_state *ss; @@ -244,9 +245,9 @@ int qib_make_rc_req(struct qib_qp *qp) int ret = 0; int delta; - ohdr = &qp->s_hdr->u.oth; + ohdr = &priv->s_hdr->u.oth; if (qp->remote_ah_attr.ah_flags & IB_AH_GRH) - ohdr = &qp->s_hdr->u.l.oth; + ohdr = &priv->s_hdr->u.l.oth; /* * The lock is needed to synchronize between the sending tasklet, @@ -266,7 +267,7 @@ int qib_make_rc_req(struct qib_qp *qp) if (qp->s_last == qp->s_head) goto bail; /* If DMAs are in progress, we can't flush immediately. */ - if (atomic_read(&qp->s_dma_busy)) { + if (atomic_read(&priv->s_dma_busy)) { qp->s_flags |= QIB_S_WAIT_DMA; goto bail; } diff --git a/drivers/infiniband/hw/qib/qib_ruc.c b/drivers/infiniband/hw/qib/qib_ruc.c index 425c8c2..8985baa 100644 --- a/drivers/infiniband/hw/qib/qib_ruc.c +++ b/drivers/infiniband/hw/qib/qib_ruc.c @@ -675,6 +675,7 @@ u32 qib_make_grh(struct qib_ibport *ibp, struct ib_grh *hdr, void qib_make_ruc_header(struct qib_qp *qp, struct qib_other_headers *ohdr, u32 bth0, u32 bth2) { + struct qib_qp_priv *priv = qp->priv; struct qib_ibport *ibp = to_iport(qp->ibqp.device, qp->port_num); u16 lrh0; u32 nwords; @@ -685,17 +686,18 @@ void qib_make_ruc_header(struct qib_qp *qp, struct qib_other_headers *ohdr, nwords = (qp->s_cur_size + extra_bytes) >> 2; lrh0 = QIB_LRH_BTH; if (unlikely(qp->remote_ah_attr.ah_flags & IB_AH_GRH)) { - qp->s_hdrwords += qib_make_grh(ibp, &qp->s_hdr->u.l.grh, + qp->s_hdrwords += qib_make_grh(ibp, &priv->s_hdr->u.l.grh, &qp->remote_ah_attr.grh, qp->s_hdrwords, nwords); lrh0 = QIB_LRH_GRH; } lrh0 |= ibp->sl_to_vl[qp->remote_ah_attr.sl] << 12 | qp->remote_ah_attr.sl << 4; - qp->s_hdr->lrh[0] = cpu_to_be16(lrh0); - qp->s_hdr->lrh[1] = cpu_to_be16(qp->remote_ah_attr.dlid); - qp->s_hdr->lrh[2] = cpu_to_be16(qp->s_hdrwords + nwords + SIZE_OF_CRC); - qp->s_hdr->lrh[3] = cpu_to_be16(ppd_from_ibp(ibp)->lid | + priv->s_hdr->lrh[0] = cpu_to_be16(lrh0); + priv->s_hdr->lrh[1] = cpu_to_be16(qp->remote_ah_attr.dlid); + priv->s_hdr->lrh[2] = + cpu_to_be16(qp->s_hdrwords + nwords + SIZE_OF_CRC); + priv->s_hdr->lrh[3] = cpu_to_be16(ppd_from_ibp(ibp)->lid | qp->remote_ah_attr.src_path_bits); bth0 |= qib_get_pkey(ibp, qp->s_pkey_index); bth0 |= extra_bytes << 20; @@ -717,7 +719,9 @@ void qib_make_ruc_header(struct qib_qp *qp, struct qib_other_headers *ohdr, */ void qib_do_send(struct work_struct *work) { - struct qib_qp *qp = container_of(work, struct qib_qp, s_work); + struct qib_qp_priv *priv = container_of(work, struct qib_qp_priv, + s_work); + struct qib_qp *qp = priv->owner; struct qib_ibport *ibp = to_iport(qp->ibqp.device, qp->port_num); struct qib_pportdata *ppd = ppd_from_ibp(ibp); int (*make_req)(struct qib_qp *qp); @@ -756,7 +760,7 @@ void qib_do_send(struct work_struct *work) * If the packet cannot be sent now, return and * the send tasklet will be woken up later. */ - if (qib_verbs_send(qp, qp->s_hdr, qp->s_hdrwords, + if (qib_verbs_send(qp, priv->s_hdr, qp->s_hdrwords, qp->s_cur_sge, qp->s_cur_size)) break; /* Record that s_hdr is empty. */ diff --git a/drivers/infiniband/hw/qib/qib_sdma.c b/drivers/infiniband/hw/qib/qib_sdma.c index c6d6a54..ac4fcad 100644 --- a/drivers/infiniband/hw/qib/qib_sdma.c +++ b/drivers/infiniband/hw/qib/qib_sdma.c @@ -513,7 +513,9 @@ int qib_sdma_running(struct qib_pportdata *ppd) static void complete_sdma_err_req(struct qib_pportdata *ppd, struct qib_verbs_txreq *tx) { - atomic_inc(&tx->qp->s_dma_busy); + struct qib_qp_priv *priv = tx->qp->priv; + + atomic_inc(&priv->s_dma_busy); /* no sdma descriptors, so no unmap_desc */ tx->txreq.start_idx = 0; tx->txreq.next_descq_idx = 0; @@ -543,6 +545,7 @@ int qib_sdma_verbs_send(struct qib_pportdata *ppd, u64 sdmadesc[2]; u32 dwoffset; dma_addr_t addr; + struct qib_qp_priv *priv; spin_lock_irqsave(&ppd->sdma_lock, flags); @@ -644,8 +647,8 @@ retry: descqp[0] |= cpu_to_le64(SDMA_DESC_DMA_HEAD); if (tx->txreq.flags & QIB_SDMA_TXREQ_F_INTREQ) descqp[0] |= cpu_to_le64(SDMA_DESC_INTR); - - atomic_inc(&tx->qp->s_dma_busy); + priv = tx->qp->priv; + atomic_inc(&priv->s_dma_busy); tx->txreq.next_descq_idx = tail; ppd->dd->f_sdma_update_tail(ppd, tail); ppd->sdma_descq_added += tx->txreq.sg_count; @@ -663,6 +666,7 @@ unmap: unmap_desc(ppd, tail); } qp = tx->qp; + priv = qp->priv; qib_put_txreq(tx); spin_lock(&qp->r_lock); spin_lock(&qp->s_lock); @@ -679,6 +683,7 @@ unmap: busy: qp = tx->qp; + priv = qp->priv; spin_lock(&qp->s_lock); if (ib_qib_state_ops[qp->state] & QIB_PROCESS_RECV_OK) { struct qib_ibdev *dev; @@ -690,16 +695,16 @@ busy: */ tx->ss = ss; tx->dwords = dwords; - qp->s_tx = tx; + priv->s_tx = tx; dev = &ppd->dd->verbs_dev; spin_lock(&dev->pending_lock); - if (list_empty(&qp->iowait)) { + if (list_empty(&priv->iowait)) { struct qib_ibport *ibp; ibp = &ppd->ibport_data; ibp->n_dmawait++; qp->s_flags |= QIB_S_WAIT_DMA_DESC; - list_add_tail(&qp->iowait, &dev->dmawait); + list_add_tail(&priv->iowait, &dev->dmawait); } spin_unlock(&dev->pending_lock); qp->s_flags &= ~QIB_S_BUSY; diff --git a/drivers/infiniband/hw/qib/qib_uc.c b/drivers/infiniband/hw/qib/qib_uc.c index 06a5645..d607656 100644 --- a/drivers/infiniband/hw/qib/qib_uc.c +++ b/drivers/infiniband/hw/qib/qib_uc.c @@ -45,6 +45,7 @@ */ int qib_make_uc_req(struct qib_qp *qp) { + struct qib_qp_priv *priv = qp->priv; struct qib_other_headers *ohdr; struct qib_swqe *wqe; unsigned long flags; @@ -63,7 +64,7 @@ int qib_make_uc_req(struct qib_qp *qp) if (qp->s_last == qp->s_head) goto bail; /* If DMAs are in progress, we can't flush immediately. */ - if (atomic_read(&qp->s_dma_busy)) { + if (atomic_read(&priv->s_dma_busy)) { qp->s_flags |= QIB_S_WAIT_DMA; goto bail; } @@ -72,9 +73,9 @@ int qib_make_uc_req(struct qib_qp *qp) goto done; } - ohdr = &qp->s_hdr->u.oth; + ohdr = &priv->s_hdr->u.oth; if (qp->remote_ah_attr.ah_flags & IB_AH_GRH) - ohdr = &qp->s_hdr->u.l.oth; + ohdr = &priv->s_hdr->u.l.oth; /* header size in 32-bit words LRH+BTH = (8+12)/4. */ hwords = 5; diff --git a/drivers/infiniband/hw/qib/qib_ud.c b/drivers/infiniband/hw/qib/qib_ud.c index 32fc80c..682403a 100644 --- a/drivers/infiniband/hw/qib/qib_ud.c +++ b/drivers/infiniband/hw/qib/qib_ud.c @@ -235,6 +235,7 @@ drop: */ int qib_make_ud_req(struct qib_qp *qp) { + struct qib_qp_priv *priv = qp->priv; struct qib_other_headers *ohdr; struct ib_ah_attr *ah_attr; struct qib_pportdata *ppd; @@ -258,7 +259,7 @@ int qib_make_ud_req(struct qib_qp *qp) if (qp->s_last == qp->s_head) goto bail; /* If DMAs are in progress, we can't flush immediately. */ - if (atomic_read(&qp->s_dma_busy)) { + if (atomic_read(&priv->s_dma_busy)) { qp->s_flags |= QIB_S_WAIT_DMA; goto bail; } @@ -295,7 +296,7 @@ int qib_make_ud_req(struct qib_qp *qp) * XXX Instead of waiting, we could queue a * zero length descriptor so we get a callback. */ - if (atomic_read(&qp->s_dma_busy)) { + if (atomic_read(&priv->s_dma_busy)) { qp->s_flags |= QIB_S_WAIT_DMA; goto bail; } @@ -325,11 +326,11 @@ int qib_make_ud_req(struct qib_qp *qp) if (ah_attr->ah_flags & IB_AH_GRH) { /* Header size in 32-bit words. */ - qp->s_hdrwords += qib_make_grh(ibp, &qp->s_hdr->u.l.grh, + qp->s_hdrwords += qib_make_grh(ibp, &priv->s_hdr->u.l.grh, &ah_attr->grh, qp->s_hdrwords, nwords); lrh0 = QIB_LRH_GRH; - ohdr = &qp->s_hdr->u.l.oth; + ohdr = &priv->s_hdr->u.l.oth; /* * Don't worry about sending to locally attached multicast * QPs. It is unspecified by the spec. what happens. @@ -337,7 +338,7 @@ int qib_make_ud_req(struct qib_qp *qp) } else { /* Header size in 32-bit words. */ lrh0 = QIB_LRH_BTH; - ohdr = &qp->s_hdr->u.oth; + ohdr = &priv->s_hdr->u.oth; } if (wqe->wr.opcode == IB_WR_SEND_WITH_IMM) { qp->s_hdrwords++; @@ -350,15 +351,16 @@ int qib_make_ud_req(struct qib_qp *qp) lrh0 |= 0xF000; /* Set VL (see ch. 13.5.3.1) */ else lrh0 |= ibp->sl_to_vl[ah_attr->sl] << 12; - qp->s_hdr->lrh[0] = cpu_to_be16(lrh0); - qp->s_hdr->lrh[1] = cpu_to_be16(ah_attr->dlid); /* DEST LID */ - qp->s_hdr->lrh[2] = cpu_to_be16(qp->s_hdrwords + nwords + SIZE_OF_CRC); + priv->s_hdr->lrh[0] = cpu_to_be16(lrh0); + priv->s_hdr->lrh[1] = cpu_to_be16(ah_attr->dlid); /* DEST LID */ + priv->s_hdr->lrh[2] = + cpu_to_be16(qp->s_hdrwords + nwords + SIZE_OF_CRC); lid = ppd->lid; if (lid) { lid |= ah_attr->src_path_bits & ((1 << ppd->lmc) - 1); - qp->s_hdr->lrh[3] = cpu_to_be16(lid); + priv->s_hdr->lrh[3] = cpu_to_be16(lid); } else - qp->s_hdr->lrh[3] = IB_LID_PERMISSIVE; + priv->s_hdr->lrh[3] = IB_LID_PERMISSIVE; if (wqe->wr.send_flags & IB_SEND_SOLICITED) bth0 |= IB_BTH_SOLICITED; bth0 |= extra_bytes << 20; diff --git a/drivers/infiniband/hw/qib/qib_verbs.c b/drivers/infiniband/hw/qib/qib_verbs.c index 3bfa1a6..e55fc5f 100644 --- a/drivers/infiniband/hw/qib/qib_verbs.c +++ b/drivers/infiniband/hw/qib/qib_verbs.c @@ -486,6 +486,7 @@ static int qib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, struct ib_send_wr **bad_wr) { struct qib_qp *qp = to_iqp(ibqp); + struct qib_qp_priv *priv = qp->priv; int err = 0; int scheduled = 0; @@ -499,7 +500,7 @@ static int qib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, /* Try to do the send work in the caller's context. */ if (!scheduled) - qib_do_send(&qp->s_work); + qib_do_send(&priv->s_work); bail: return err; @@ -730,12 +731,14 @@ static void mem_timer(unsigned long data) struct qib_ibdev *dev = (struct qib_ibdev *) data; struct list_head *list = &dev->memwait; struct qib_qp *qp = NULL; + struct qib_qp_priv *priv = NULL; unsigned long flags; spin_lock_irqsave(&dev->pending_lock, flags); if (!list_empty(list)) { - qp = list_entry(list->next, struct qib_qp, iowait); - list_del_init(&qp->iowait); + priv = list_entry(list->next, struct qib_qp_priv, iowait); + qp = priv->owner; + list_del_init(&priv->iowait); atomic_inc(&qp->refcount); if (!list_empty(list)) mod_timer(&dev->mem_timer, jiffies + 1); @@ -950,6 +953,7 @@ static void copy_io(u32 __iomem *piobuf, struct qib_sge_state *ss, static noinline struct qib_verbs_txreq *__get_txreq(struct qib_ibdev *dev, struct qib_qp *qp) { + struct qib_qp_priv *priv = qp->priv; struct qib_verbs_txreq *tx; unsigned long flags; @@ -965,10 +969,10 @@ static noinline struct qib_verbs_txreq *__get_txreq(struct qib_ibdev *dev, tx = list_entry(l, struct qib_verbs_txreq, txreq.list); } else { if (ib_qib_state_ops[qp->state] & QIB_PROCESS_RECV_OK && - list_empty(&qp->iowait)) { + list_empty(&priv->iowait)) { dev->n_txwait++; qp->s_flags |= QIB_S_WAIT_TX; - list_add_tail(&qp->iowait, &dev->txwait); + list_add_tail(&priv->iowait, &dev->txwait); } qp->s_flags &= ~QIB_S_BUSY; spin_unlock(&dev->pending_lock); @@ -1004,6 +1008,7 @@ void qib_put_txreq(struct qib_verbs_txreq *tx) { struct qib_ibdev *dev; struct qib_qp *qp; + struct qib_qp_priv *priv; unsigned long flags; qp = tx->qp; @@ -1030,8 +1035,10 @@ void qib_put_txreq(struct qib_verbs_txreq *tx) if (!list_empty(&dev->txwait)) { /* Wake up first QP wanting a free struct */ - qp = list_entry(dev->txwait.next, struct qib_qp, iowait); - list_del_init(&qp->iowait); + priv = list_entry(dev->txwait.next, struct qib_qp_priv, + iowait); + qp = priv->owner; + list_del_init(&priv->iowait); atomic_inc(&qp->refcount); spin_unlock_irqrestore(&dev->pending_lock, flags); @@ -1057,6 +1064,7 @@ void qib_put_txreq(struct qib_verbs_txreq *tx) void qib_verbs_sdma_desc_avail(struct qib_pportdata *ppd, unsigned avail) { struct qib_qp *qp, *nqp; + struct qib_qp_priv *qpp, *nqpp; struct qib_qp *qps[20]; struct qib_ibdev *dev; unsigned i, n; @@ -1066,15 +1074,17 @@ void qib_verbs_sdma_desc_avail(struct qib_pportdata *ppd, unsigned avail) spin_lock(&dev->pending_lock); /* Search wait list for first QP wanting DMA descriptors. */ - list_for_each_entry_safe(qp, nqp, &dev->dmawait, iowait) { + list_for_each_entry_safe(qpp, nqpp, &dev->dmawait, iowait) { + qp = qpp->owner; + nqp = nqpp->owner; if (qp->port_num != ppd->port) continue; if (n == ARRAY_SIZE(qps)) break; - if (qp->s_tx->txreq.sg_count > avail) + if (qpp->s_tx->txreq.sg_count > avail) break; - avail -= qp->s_tx->txreq.sg_count; - list_del_init(&qp->iowait); + avail -= qpp->s_tx->txreq.sg_count; + list_del_init(&qpp->iowait); atomic_inc(&qp->refcount); qps[n++] = qp; } @@ -1102,6 +1112,7 @@ static void sdma_complete(struct qib_sdma_txreq *cookie, int status) struct qib_verbs_txreq *tx = container_of(cookie, struct qib_verbs_txreq, txreq); struct qib_qp *qp = tx->qp; + struct qib_qp_priv *priv = qp->priv; spin_lock(&qp->s_lock); if (tx->wqe) @@ -1118,9 +1129,9 @@ static void sdma_complete(struct qib_sdma_txreq *cookie, int status) } qib_rc_send_complete(qp, hdr); } - if (atomic_dec_and_test(&qp->s_dma_busy)) { + if (atomic_dec_and_test(&priv->s_dma_busy)) { if (qp->state == IB_QPS_RESET) - wake_up(&qp->wait_dma); + wake_up(&priv->wait_dma); else if (qp->s_flags & QIB_S_WAIT_DMA) { qp->s_flags &= ~QIB_S_WAIT_DMA; qib_schedule_send(qp); @@ -1133,17 +1144,18 @@ static void sdma_complete(struct qib_sdma_txreq *cookie, int status) static int wait_kmem(struct qib_ibdev *dev, struct qib_qp *qp) { + struct qib_qp_priv *priv = qp->priv; unsigned long flags; int ret = 0; spin_lock_irqsave(&qp->s_lock, flags); if (ib_qib_state_ops[qp->state] & QIB_PROCESS_RECV_OK) { spin_lock(&dev->pending_lock); - if (list_empty(&qp->iowait)) { + if (list_empty(&priv->iowait)) { if (list_empty(&dev->memwait)) mod_timer(&dev->mem_timer, jiffies + 1); qp->s_flags |= QIB_S_WAIT_KMEM; - list_add_tail(&qp->iowait, &dev->memwait); + list_add_tail(&priv->iowait, &dev->memwait); } spin_unlock(&dev->pending_lock); qp->s_flags &= ~QIB_S_BUSY; @@ -1158,6 +1170,7 @@ static int qib_verbs_send_dma(struct qib_qp *qp, struct qib_ib_header *hdr, u32 hdrwords, struct qib_sge_state *ss, u32 len, u32 plen, u32 dwords) { + struct qib_qp_priv *priv = qp->priv; struct qib_ibdev *dev = to_idev(qp->ibqp.device); struct qib_devdata *dd = dd_from_dev(dev); struct qib_ibport *ibp = to_iport(qp->ibqp.device, qp->port_num); @@ -1168,9 +1181,9 @@ static int qib_verbs_send_dma(struct qib_qp *qp, struct qib_ib_header *hdr, u32 ndesc; int ret; - tx = qp->s_tx; + tx = priv->s_tx; if (tx) { - qp->s_tx = NULL; + priv->s_tx = NULL; /* resend previously constructed packet */ ret = qib_sdma_verbs_send(ppd, tx->ss, tx->dwords, tx); goto bail; @@ -1260,6 +1273,7 @@ bail_tx: */ static int no_bufs_available(struct qib_qp *qp) { + struct qib_qp_priv *priv = qp->priv; struct qib_ibdev *dev = to_idev(qp->ibqp.device); struct qib_devdata *dd; unsigned long flags; @@ -1274,10 +1288,10 @@ static int no_bufs_available(struct qib_qp *qp) spin_lock_irqsave(&qp->s_lock, flags); if (ib_qib_state_ops[qp->state] & QIB_PROCESS_RECV_OK) { spin_lock(&dev->pending_lock); - if (list_empty(&qp->iowait)) { + if (list_empty(&priv->iowait)) { dev->n_piowait++; qp->s_flags |= QIB_S_WAIT_PIO; - list_add_tail(&qp->iowait, &dev->piowait); + list_add_tail(&priv->iowait, &dev->piowait); dd = dd_from_dev(dev); dd->f_wantpiobuf_intr(dd, 1); } @@ -1534,6 +1548,7 @@ void qib_ib_piobufavail(struct qib_devdata *dd) struct qib_qp *qp; unsigned long flags; unsigned i, n; + struct qib_qp_priv *priv; list = &dev->piowait; n = 0; @@ -1548,8 +1563,9 @@ void qib_ib_piobufavail(struct qib_devdata *dd) while (!list_empty(list)) { if (n == ARRAY_SIZE(qps)) goto full; - qp = list_entry(list->next, struct qib_qp, iowait); - list_del_init(&qp->iowait); + priv = list_entry(list->next, struct qib_qp_priv, iowait); + qp = priv->owner; + list_del_init(&priv->iowait); atomic_inc(&qp->refcount); qps[n++] = qp; } @@ -2330,11 +2346,12 @@ void qib_unregister_ib_device(struct qib_devdata *dd) */ void qib_schedule_send(struct qib_qp *qp) { + struct qib_qp_priv *priv = qp->priv; if (qib_send_ok(qp)) { struct qib_ibport *ibp = to_iport(qp->ibqp.device, qp->port_num); struct qib_pportdata *ppd = ppd_from_ibp(ibp); - queue_work(ppd->qib_wq, &qp->s_work); + queue_work(ppd->qib_wq, &priv->s_work); } } diff --git a/drivers/infiniband/hw/qib/qib_verbs.h b/drivers/infiniband/hw/qib/qib_verbs.h index f1ca5d1..a0cf23f 100644 --- a/drivers/infiniband/hw/qib/qib_verbs.h +++ b/drivers/infiniband/hw/qib/qib_verbs.h @@ -413,6 +413,21 @@ struct qib_ack_entry { }; /* + * qib specific data structure that will be hidden from rvt after the queue pair + * is made common. + */ +struct qib_qp; +struct qib_qp_priv { + struct qib_ib_header *s_hdr; /* next packet header to send */ + struct list_head iowait; /* link for wait PIO buf */ + atomic_t s_dma_busy; + struct qib_verbs_txreq *s_tx; + struct work_struct s_work; + wait_queue_head_t wait_dma; + struct qib_qp *owner; +}; + +/* * Variables prefixed with s_ are for the requester (sender). * Variables prefixed with r_ are for the responder (receiver). * Variables prefixed with ack_ are for responder replies. @@ -422,13 +437,13 @@ struct qib_ack_entry { */ struct qib_qp { struct ib_qp ibqp; + struct qib_qp_priv *priv; /* read mostly fields above and below */ struct ib_ah_attr remote_ah_attr; struct ib_ah_attr alt_ah_attr; struct qib_qp __rcu *next; /* link list for QPN hash table */ struct qib_swqe *s_wq; /* send work queue */ struct qib_mmap_info *ip; - struct qib_ib_header *s_hdr; /* next packet header to send */ unsigned long timeout_jiffies; /* computed from timeout */ enum ib_mtu path_mtu; @@ -486,11 +501,11 @@ struct qib_qp { spinlock_t s_lock ____cacheline_aligned_in_smp; struct qib_sge_state *s_cur_sge; u32 s_flags; - struct qib_verbs_txreq *s_tx; + struct qib_swqe *s_wqe; struct qib_sge_state s_sge; /* current send request data */ struct qib_mregion *s_rdma_mr; - atomic_t s_dma_busy; + u32 s_cur_size; /* size of send packet in bytes */ u32 s_len; /* total length of s_sge */ u32 s_rdma_read_len; /* total length of s_rdma_read_sge */ @@ -521,11 +536,6 @@ struct qib_qp { struct qib_sge_state s_ack_rdma_sge; struct timer_list s_timer; - struct list_head iowait; /* link for wait PIO buf */ - - struct work_struct s_work; - - wait_queue_head_t wait_dma; struct qib_sge r_sg_list[0] /* verified SGEs */ ____cacheline_aligned_in_smp; -- cgit v0.10.2 From 6a9df403c9d3e37b79d6dac83922d2b4647f4dc8 Mon Sep 17 00:00:00 2001 From: Dennis Dalessandro Date: Fri, 22 Jan 2016 12:45:20 -0800 Subject: IB/qib: Add device specific info prints Implement get_card_name and get_pci_dev helper functions for rdmavt for qib. Reviewed-by: Mike Marciniszyn Reviewed-by: Ira Weiny Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford diff --git a/drivers/infiniband/hw/qib/qib.h b/drivers/infiniband/hw/qib/qib.h index 0e68e1f..e610eaf 100644 --- a/drivers/infiniband/hw/qib/qib.h +++ b/drivers/infiniband/hw/qib/qib.h @@ -1455,6 +1455,8 @@ u64 qib_sps_ints(void); dma_addr_t qib_map_page(struct pci_dev *, struct page *, unsigned long, size_t, int); const char *qib_get_unit_name(int unit); +const char *qib_get_card_name(struct rvt_dev_info *rdi); +struct pci_dev *qib_get_pci_dev(struct rvt_dev_info *rdi); /* * Flush write combining store buffers (if present) and perform a write diff --git a/drivers/infiniband/hw/qib/qib_driver.c b/drivers/infiniband/hw/qib/qib_driver.c index 57b19ead..ae5a725 100644 --- a/drivers/infiniband/hw/qib/qib_driver.c +++ b/drivers/infiniband/hw/qib/qib_driver.c @@ -90,6 +90,22 @@ const char *qib_get_unit_name(int unit) return iname; } +const char *qib_get_card_name(struct rvt_dev_info *rdi) +{ + struct qib_ibdev *ibdev = container_of(rdi, struct qib_ibdev, rdi); + struct qib_devdata *dd = container_of(ibdev, + struct qib_devdata, verbs_dev); + return qib_get_unit_name(dd->unit); +} + +struct pci_dev *qib_get_pci_dev(struct rvt_dev_info *rdi) +{ + struct qib_ibdev *ibdev = container_of(rdi, struct qib_ibdev, rdi); + struct qib_devdata *dd = container_of(ibdev, + struct qib_devdata, verbs_dev); + return dd->pcidev; +} + /* * Return count of units with at least one port ACTIVE. */ diff --git a/drivers/infiniband/hw/qib/qib_verbs.c b/drivers/infiniband/hw/qib/qib_verbs.c index e55fc5f..6d96d7a 100644 --- a/drivers/infiniband/hw/qib/qib_verbs.c +++ b/drivers/infiniband/hw/qib/qib_verbs.c @@ -2244,6 +2244,8 @@ int qib_register_ib_device(struct qib_devdata *dd) * Fill in rvt info object. */ dd->verbs_dev.rdi.driver_f.port_callback = qib_create_port_files; + dd->verbs_dev.rdi.driver_f.get_card_name = qib_get_card_name; + dd->verbs_dev.rdi.driver_f.get_pci_dev = qib_get_pci_dev; dd->verbs_dev.rdi.dparms.props.max_pd = ib_qib_max_pds; dd->verbs_dev.rdi.flags = (RVT_FLAG_MR_INIT_DRIVER | RVT_FLAG_QP_INIT_DRIVER | -- cgit v0.10.2 From 7c2e11fe2dbe69ba78c7a363f83474ad2c11ede7 Mon Sep 17 00:00:00 2001 From: Dennis Dalessandro Date: Fri, 22 Jan 2016 12:45:59 -0800 Subject: IB/qib: Remove qp and mr functionality from qib Remove qp and mr support from qib and use rdmavt. These two changes cannot be reasonably be split apart into separate patches because they depend on each other in mulitple places. This paves the way to remove even more functions in subsequent patches. Reviewed-by: Ira Weiny Reviewed-by: Mike Marciniszyn Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford diff --git a/drivers/infiniband/hw/qib/Makefile b/drivers/infiniband/hw/qib/Makefile index 8a8f892..75140f5 100644 --- a/drivers/infiniband/hw/qib/Makefile +++ b/drivers/infiniband/hw/qib/Makefile @@ -1,8 +1,8 @@ obj-$(CONFIG_INFINIBAND_QIB) += ib_qib.o ib_qib-y := qib_cq.o qib_diag.o qib_driver.o qib_eeprom.o \ - qib_file_ops.o qib_fs.o qib_init.o qib_intr.o qib_keys.o \ - qib_mad.o qib_mmap.o qib_mr.o qib_pcie.o qib_pio_copy.o \ + qib_file_ops.o qib_fs.o qib_init.o qib_intr.o \ + qib_mad.o qib_mmap.o qib_pcie.o qib_pio_copy.o \ qib_qp.o qib_qsfp.o qib_rc.o qib_ruc.o qib_sdma.o qib_srq.o \ qib_sysfs.o qib_twsi.o qib_tx.o qib_uc.o qib_ud.o \ qib_user_pages.o qib_user_sdma.o qib_verbs_mcast.o qib_iba7220.o \ diff --git a/drivers/infiniband/hw/qib/qib.h b/drivers/infiniband/hw/qib/qib.h index e610eaf..309b6f3 100644 --- a/drivers/infiniband/hw/qib/qib.h +++ b/drivers/infiniband/hw/qib/qib.h @@ -231,7 +231,7 @@ struct qib_ctxtdata { /* ctxt rcvhdrq head offset */ u32 head; /* lookaside fields */ - struct qib_qp *lookaside_qp; + struct rvt_qp *lookaside_qp; u32 lookaside_qpn; /* QPs waiting for context processing */ struct list_head qp_wait_list; @@ -241,7 +241,7 @@ struct qib_ctxtdata { #endif }; -struct qib_sge_state; +struct rvt_sge_state; struct qib_sdma_txreq { int flags; @@ -259,14 +259,14 @@ struct qib_sdma_desc { struct qib_verbs_txreq { struct qib_sdma_txreq txreq; - struct qib_qp *qp; - struct qib_swqe *wqe; + struct rvt_qp *qp; + struct rvt_swqe *wqe; u32 dwords; u16 hdr_dwords; u16 hdr_inx; struct qib_pio_header *align_buf; - struct qib_mregion *mr; - struct qib_sge_state *ss; + struct rvt_mregion *mr; + struct rvt_sge_state *ss; }; #define QIB_SDMA_TXREQ_F_USELARGEBUF 0x1 @@ -1324,7 +1324,7 @@ void __qib_sdma_intr(struct qib_pportdata *); void qib_sdma_intr(struct qib_pportdata *); void qib_user_sdma_send_desc(struct qib_pportdata *dd, struct list_head *pktlist); -int qib_sdma_verbs_send(struct qib_pportdata *, struct qib_sge_state *, +int qib_sdma_verbs_send(struct qib_pportdata *, struct rvt_sge_state *, u32, struct qib_verbs_txreq *); /* ppd->sdma_lock should be locked before calling this. */ int qib_sdma_make_progress(struct qib_pportdata *dd); diff --git a/drivers/infiniband/hw/qib/qib_cq.c b/drivers/infiniband/hw/qib/qib_cq.c index 2b45d0b..c1ea21e 100644 --- a/drivers/infiniband/hw/qib/qib_cq.c +++ b/drivers/infiniband/hw/qib/qib_cq.c @@ -466,7 +466,7 @@ int qib_resize_cq(struct ib_cq *ibcq, int cqe, struct ib_udata *udata) if (cq->ip) { struct qib_ibdev *dev = to_idev(ibcq->device); - struct qib_mmap_info *ip = cq->ip; + struct rvt_mmap_info *ip = cq->ip; qib_update_mmap_info(dev, ip, sz, wc); diff --git a/drivers/infiniband/hw/qib/qib_driver.c b/drivers/infiniband/hw/qib/qib_driver.c index ae5a725..eafdee9 100644 --- a/drivers/infiniband/hw/qib/qib_driver.c +++ b/drivers/infiniband/hw/qib/qib_driver.c @@ -322,7 +322,7 @@ static u32 qib_rcv_hdrerr(struct qib_ctxtdata *rcd, struct qib_pportdata *ppd, struct qib_ib_header *hdr = (struct qib_ib_header *) rhdr; struct qib_other_headers *ohdr = NULL; struct qib_ibport *ibp = &ppd->ibport_data; - struct qib_qp *qp = NULL; + struct rvt_qp *qp = NULL; u32 tlen = qib_hdrget_length_in_bytes(rhf_addr); u16 lid = be16_to_cpu(hdr->lrh[1]); int lnh = be16_to_cpu(hdr->lrh[0]) & 3; @@ -472,7 +472,7 @@ u32 qib_kreceive(struct qib_ctxtdata *rcd, u32 *llic, u32 *npkts) u32 eflags, etype, tlen, i = 0, updegr = 0, crcs = 0; int last; u64 lval; - struct qib_qp *qp, *nqp; + struct rvt_qp *qp, *nqp; l = rcd->head; rhf_addr = (__le32 *) rcd->rcvhdrq + l + dd->rhf_offset; diff --git a/drivers/infiniband/hw/qib/qib_keys.c b/drivers/infiniband/hw/qib/qib_keys.c index 04fa272..2c3c935 100644 --- a/drivers/infiniband/hw/qib/qib_keys.c +++ b/drivers/infiniband/hw/qib/qib_keys.c @@ -46,20 +46,20 @@ * */ -int qib_alloc_lkey(struct qib_mregion *mr, int dma_region) +int qib_alloc_lkey(struct rvt_mregion *mr, int dma_region) { unsigned long flags; u32 r; u32 n; int ret = 0; struct qib_ibdev *dev = to_idev(mr->pd->device); - struct qib_lkey_table *rkt = &dev->lk_table; + struct rvt_lkey_table *rkt = &dev->lk_table; spin_lock_irqsave(&rkt->lock, flags); /* special case for dma_mr lkey == 0 */ if (dma_region) { - struct qib_mregion *tmr; + struct rvt_mregion *tmr; tmr = rcu_access_pointer(dev->dma_mr); if (!tmr) { @@ -90,8 +90,8 @@ int qib_alloc_lkey(struct qib_mregion *mr, int dma_region) * bits are capped in qib_verbs.c to insure enough bits * for generation number */ - mr->lkey = (r << (32 - ib_qib_lkey_table_size)) | - ((((1 << (24 - ib_qib_lkey_table_size)) - 1) & rkt->gen) + mr->lkey = (r << (32 - ib_rvt_lkey_table_size)) | + ((((1 << (24 - ib_rvt_lkey_table_size)) - 1) & rkt->gen) << 8); if (mr->lkey == 0) { mr->lkey |= 1 << 8; @@ -114,13 +114,13 @@ bail: * qib_free_lkey - free an lkey * @mr: mr to free from tables */ -void qib_free_lkey(struct qib_mregion *mr) +void qib_free_lkey(struct rvt_mregion *mr) { unsigned long flags; u32 lkey = mr->lkey; u32 r; struct qib_ibdev *dev = to_idev(mr->pd->device); - struct qib_lkey_table *rkt = &dev->lk_table; + struct rvt_lkey_table *rkt = &dev->lk_table; spin_lock_irqsave(&rkt->lock, flags); if (!mr->lkey_published) @@ -128,7 +128,7 @@ void qib_free_lkey(struct qib_mregion *mr) if (lkey == 0) RCU_INIT_POINTER(dev->dma_mr, NULL); else { - r = lkey >> (32 - ib_qib_lkey_table_size); + r = lkey >> (32 - ib_rvt_lkey_table_size); RCU_INIT_POINTER(rkt->table[r], NULL); } qib_put_mr(mr); @@ -138,105 +138,6 @@ out: } /** - * qib_lkey_ok - check IB SGE for validity and initialize - * @rkt: table containing lkey to check SGE against - * @pd: protection domain - * @isge: outgoing internal SGE - * @sge: SGE to check - * @acc: access flags - * - * Return 1 if valid and successful, otherwise returns 0. - * - * increments the reference count upon success - * - * Check the IB SGE for validity and initialize our internal version - * of it. - */ -int qib_lkey_ok(struct qib_lkey_table *rkt, struct rvt_pd *pd, - struct qib_sge *isge, struct ib_sge *sge, int acc) -{ - struct qib_mregion *mr; - unsigned n, m; - size_t off; - - /* - * We use LKEY == zero for kernel virtual addresses - * (see qib_get_dma_mr and qib_dma.c). - */ - rcu_read_lock(); - if (sge->lkey == 0) { - struct qib_ibdev *dev = to_idev(pd->ibpd.device); - - if (pd->user) - goto bail; - mr = rcu_dereference(dev->dma_mr); - if (!mr) - goto bail; - if (unlikely(!atomic_inc_not_zero(&mr->refcount))) - goto bail; - rcu_read_unlock(); - - isge->mr = mr; - isge->vaddr = (void *) sge->addr; - isge->length = sge->length; - isge->sge_length = sge->length; - isge->m = 0; - isge->n = 0; - goto ok; - } - mr = rcu_dereference( - rkt->table[(sge->lkey >> (32 - ib_qib_lkey_table_size))]); - if (unlikely(!mr || mr->lkey != sge->lkey || mr->pd != &pd->ibpd)) - goto bail; - - off = sge->addr - mr->user_base; - if (unlikely(sge->addr < mr->user_base || - off + sge->length > mr->length || - (mr->access_flags & acc) != acc)) - goto bail; - if (unlikely(!atomic_inc_not_zero(&mr->refcount))) - goto bail; - rcu_read_unlock(); - - off += mr->offset; - if (mr->page_shift) { - /* - page sizes are uniform power of 2 so no loop is necessary - entries_spanned_by_off is the number of times the loop below - would have executed. - */ - size_t entries_spanned_by_off; - - entries_spanned_by_off = off >> mr->page_shift; - off -= (entries_spanned_by_off << mr->page_shift); - m = entries_spanned_by_off/QIB_SEGSZ; - n = entries_spanned_by_off%QIB_SEGSZ; - } else { - m = 0; - n = 0; - while (off >= mr->map[m]->segs[n].length) { - off -= mr->map[m]->segs[n].length; - n++; - if (n >= QIB_SEGSZ) { - m++; - n = 0; - } - } - } - isge->mr = mr; - isge->vaddr = mr->map[m]->segs[n].vaddr + off; - isge->length = mr->map[m]->segs[n].length - off; - isge->sge_length = sge->length; - isge->m = m; - isge->n = n; -ok: - return 1; -bail: - rcu_read_unlock(); - return 0; -} - -/** * qib_rkey_ok - check the IB virtual address, length, and RKEY * @qp: qp for validation * @sge: SGE state @@ -249,11 +150,11 @@ bail: * * increments the reference count upon success */ -int qib_rkey_ok(struct qib_qp *qp, struct qib_sge *sge, +int qib_rkey_ok(struct rvt_qp *qp, struct rvt_sge *sge, u32 len, u64 vaddr, u32 rkey, int acc) { - struct qib_lkey_table *rkt = &to_idev(qp->ibqp.device)->lk_table; - struct qib_mregion *mr; + struct rvt_lkey_table *rkt = &to_idev(qp->ibqp.device)->lk_table; + struct rvt_mregion *mr; unsigned n, m; size_t off; @@ -285,7 +186,7 @@ int qib_rkey_ok(struct qib_qp *qp, struct qib_sge *sge, } mr = rcu_dereference( - rkt->table[(rkey >> (32 - ib_qib_lkey_table_size))]); + rkt->table[(rkey >> (32 - ib_rvt_lkey_table_size))]); if (unlikely(!mr || mr->lkey != rkey || qp->ibqp.pd != mr->pd)) goto bail; @@ -308,15 +209,15 @@ int qib_rkey_ok(struct qib_qp *qp, struct qib_sge *sge, entries_spanned_by_off = off >> mr->page_shift; off -= (entries_spanned_by_off << mr->page_shift); - m = entries_spanned_by_off/QIB_SEGSZ; - n = entries_spanned_by_off%QIB_SEGSZ; + m = entries_spanned_by_off / RVT_SEGSZ; + n = entries_spanned_by_off % RVT_SEGSZ; } else { m = 0; n = 0; while (off >= mr->map[m]->segs[n].length) { off -= mr->map[m]->segs[n].length; n++; - if (n >= QIB_SEGSZ) { + if (n >= RVT_SEGSZ) { m++; n = 0; } @@ -335,58 +236,3 @@ bail: return 0; } -/* - * Initialize the memory region specified by the work request. - */ -int qib_reg_mr(struct qib_qp *qp, struct ib_reg_wr *wr) -{ - struct qib_lkey_table *rkt = &to_idev(qp->ibqp.device)->lk_table; - struct rvt_pd *pd = ibpd_to_rvtpd(qp->ibqp.pd); - struct qib_mr *mr = to_imr(wr->mr); - struct qib_mregion *mrg; - u32 key = wr->key; - unsigned i, n, m; - int ret = -EINVAL; - unsigned long flags; - u64 *page_list; - size_t ps; - - spin_lock_irqsave(&rkt->lock, flags); - if (pd->user || key == 0) - goto bail; - - mrg = rcu_dereference_protected( - rkt->table[(key >> (32 - ib_qib_lkey_table_size))], - lockdep_is_held(&rkt->lock)); - if (unlikely(mrg == NULL || qp->ibqp.pd != mrg->pd)) - goto bail; - - if (mr->npages > mrg->max_segs) - goto bail; - - ps = mr->ibmr.page_size; - if (mr->ibmr.length > ps * mr->npages) - goto bail; - - mrg->user_base = mr->ibmr.iova; - mrg->iova = mr->ibmr.iova; - mrg->lkey = key; - mrg->length = mr->ibmr.length; - mrg->access_flags = wr->access; - page_list = mr->pages; - m = 0; - n = 0; - for (i = 0; i < mr->npages; i++) { - mrg->map[m]->segs[n].vaddr = (void *) page_list[i]; - mrg->map[m]->segs[n].length = ps; - if (++n == QIB_SEGSZ) { - m++; - n = 0; - } - } - - ret = 0; -bail: - spin_unlock_irqrestore(&rkt->lock, flags); - return ret; -} diff --git a/drivers/infiniband/hw/qib/qib_mmap.c b/drivers/infiniband/hw/qib/qib_mmap.c index 34927b7..c32078c 100644 --- a/drivers/infiniband/hw/qib/qib_mmap.c +++ b/drivers/infiniband/hw/qib/qib_mmap.c @@ -41,12 +41,12 @@ /** * qib_release_mmap_info - free mmap info structure - * @ref: a pointer to the kref within struct qib_mmap_info + * @ref: a pointer to the kref within struct rvt_mmap_info */ void qib_release_mmap_info(struct kref *ref) { - struct qib_mmap_info *ip = - container_of(ref, struct qib_mmap_info, ref); + struct rvt_mmap_info *ip = + container_of(ref, struct rvt_mmap_info, ref); struct qib_ibdev *dev = to_idev(ip->context->device); spin_lock_irq(&dev->pending_lock); @@ -63,14 +63,14 @@ void qib_release_mmap_info(struct kref *ref) */ static void qib_vma_open(struct vm_area_struct *vma) { - struct qib_mmap_info *ip = vma->vm_private_data; + struct rvt_mmap_info *ip = vma->vm_private_data; kref_get(&ip->ref); } static void qib_vma_close(struct vm_area_struct *vma) { - struct qib_mmap_info *ip = vma->vm_private_data; + struct rvt_mmap_info *ip = vma->vm_private_data; kref_put(&ip->ref, qib_release_mmap_info); } @@ -91,7 +91,7 @@ int qib_mmap(struct ib_ucontext *context, struct vm_area_struct *vma) struct qib_ibdev *dev = to_idev(context->device); unsigned long offset = vma->vm_pgoff << PAGE_SHIFT; unsigned long size = vma->vm_end - vma->vm_start; - struct qib_mmap_info *ip, *pp; + struct rvt_mmap_info *ip, *pp; int ret = -EINVAL; /* @@ -128,11 +128,11 @@ done: /* * Allocate information for qib_mmap */ -struct qib_mmap_info *qib_create_mmap_info(struct qib_ibdev *dev, +struct rvt_mmap_info *qib_create_mmap_info(struct qib_ibdev *dev, u32 size, struct ib_ucontext *context, void *obj) { - struct qib_mmap_info *ip; + struct rvt_mmap_info *ip; ip = kmalloc(sizeof(*ip), GFP_KERNEL); if (!ip) @@ -157,7 +157,7 @@ bail: return ip; } -void qib_update_mmap_info(struct qib_ibdev *dev, struct qib_mmap_info *ip, +void qib_update_mmap_info(struct qib_ibdev *dev, struct rvt_mmap_info *ip, u32 size, void *obj) { size = PAGE_ALIGN(size); diff --git a/drivers/infiniband/hw/qib/qib_mr.c b/drivers/infiniband/hw/qib/qib_mr.c deleted file mode 100644 index 9d84e0d..0000000 --- a/drivers/infiniband/hw/qib/qib_mr.c +++ /dev/null @@ -1,490 +0,0 @@ -/* - * Copyright (c) 2006, 2007, 2008, 2009 QLogic Corporation. All rights reserved. - * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include -#include - -#include "qib.h" - -/* Fast memory region */ -struct qib_fmr { - struct ib_fmr ibfmr; - struct qib_mregion mr; /* must be last */ -}; - -static inline struct qib_fmr *to_ifmr(struct ib_fmr *ibfmr) -{ - return container_of(ibfmr, struct qib_fmr, ibfmr); -} - -static int init_qib_mregion(struct qib_mregion *mr, struct ib_pd *pd, - int count) -{ - int m, i = 0; - int rval = 0; - - m = (count + QIB_SEGSZ - 1) / QIB_SEGSZ; - for (; i < m; i++) { - mr->map[i] = kzalloc(sizeof(*mr->map[0]), GFP_KERNEL); - if (!mr->map[i]) - goto bail; - } - mr->mapsz = m; - init_completion(&mr->comp); - /* count returning the ptr to user */ - atomic_set(&mr->refcount, 1); - mr->pd = pd; - mr->max_segs = count; -out: - return rval; -bail: - while (i) - kfree(mr->map[--i]); - rval = -ENOMEM; - goto out; -} - -static void deinit_qib_mregion(struct qib_mregion *mr) -{ - int i = mr->mapsz; - - mr->mapsz = 0; - while (i) - kfree(mr->map[--i]); -} - - -/** - * qib_get_dma_mr - get a DMA memory region - * @pd: protection domain for this memory region - * @acc: access flags - * - * Returns the memory region on success, otherwise returns an errno. - * Note that all DMA addresses should be created via the - * struct ib_dma_mapping_ops functions (see qib_dma.c). - */ -struct ib_mr *qib_get_dma_mr(struct ib_pd *pd, int acc) -{ - struct qib_mr *mr = NULL; - struct ib_mr *ret; - int rval; - - if (ibpd_to_rvtpd(pd)->user) { - ret = ERR_PTR(-EPERM); - goto bail; - } - - mr = kzalloc(sizeof(*mr), GFP_KERNEL); - if (!mr) { - ret = ERR_PTR(-ENOMEM); - goto bail; - } - - rval = init_qib_mregion(&mr->mr, pd, 0); - if (rval) { - ret = ERR_PTR(rval); - goto bail; - } - - - rval = qib_alloc_lkey(&mr->mr, 1); - if (rval) { - ret = ERR_PTR(rval); - goto bail_mregion; - } - - mr->mr.access_flags = acc; - ret = &mr->ibmr; -done: - return ret; - -bail_mregion: - deinit_qib_mregion(&mr->mr); -bail: - kfree(mr); - goto done; -} - -static struct qib_mr *alloc_mr(int count, struct ib_pd *pd) -{ - struct qib_mr *mr; - int rval = -ENOMEM; - int m; - - /* Allocate struct plus pointers to first level page tables. */ - m = (count + QIB_SEGSZ - 1) / QIB_SEGSZ; - mr = kzalloc(sizeof(*mr) + m * sizeof(mr->mr.map[0]), GFP_KERNEL); - if (!mr) - goto bail; - - rval = init_qib_mregion(&mr->mr, pd, count); - if (rval) - goto bail; - - rval = qib_alloc_lkey(&mr->mr, 0); - if (rval) - goto bail_mregion; - mr->ibmr.lkey = mr->mr.lkey; - mr->ibmr.rkey = mr->mr.lkey; -done: - return mr; - -bail_mregion: - deinit_qib_mregion(&mr->mr); -bail: - kfree(mr); - mr = ERR_PTR(rval); - goto done; -} - -/** - * qib_reg_user_mr - register a userspace memory region - * @pd: protection domain for this memory region - * @start: starting userspace address - * @length: length of region to register - * @mr_access_flags: access flags for this memory region - * @udata: unused by the QLogic_IB driver - * - * Returns the memory region on success, otherwise returns an errno. - */ -struct ib_mr *qib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, - u64 virt_addr, int mr_access_flags, - struct ib_udata *udata) -{ - struct qib_mr *mr; - struct ib_umem *umem; - struct scatterlist *sg; - int n, m, entry; - struct ib_mr *ret; - - if (length == 0) { - ret = ERR_PTR(-EINVAL); - goto bail; - } - - umem = ib_umem_get(pd->uobject->context, start, length, - mr_access_flags, 0); - if (IS_ERR(umem)) - return (void *) umem; - - n = umem->nmap; - - mr = alloc_mr(n, pd); - if (IS_ERR(mr)) { - ret = (struct ib_mr *)mr; - ib_umem_release(umem); - goto bail; - } - - mr->mr.user_base = start; - mr->mr.iova = virt_addr; - mr->mr.length = length; - mr->mr.offset = ib_umem_offset(umem); - mr->mr.access_flags = mr_access_flags; - mr->umem = umem; - - if (is_power_of_2(umem->page_size)) - mr->mr.page_shift = ilog2(umem->page_size); - m = 0; - n = 0; - for_each_sg(umem->sg_head.sgl, sg, umem->nmap, entry) { - void *vaddr; - - vaddr = page_address(sg_page(sg)); - if (!vaddr) { - ret = ERR_PTR(-EINVAL); - goto bail; - } - mr->mr.map[m]->segs[n].vaddr = vaddr; - mr->mr.map[m]->segs[n].length = umem->page_size; - n++; - if (n == QIB_SEGSZ) { - m++; - n = 0; - } - } - ret = &mr->ibmr; - -bail: - return ret; -} - -/** - * qib_dereg_mr - unregister and free a memory region - * @ibmr: the memory region to free - * - * Returns 0 on success. - * - * Note that this is called to free MRs created by qib_get_dma_mr() - * or qib_reg_user_mr(). - */ -int qib_dereg_mr(struct ib_mr *ibmr) -{ - struct qib_mr *mr = to_imr(ibmr); - int ret = 0; - unsigned long timeout; - - kfree(mr->pages); - qib_free_lkey(&mr->mr); - - qib_put_mr(&mr->mr); /* will set completion if last */ - timeout = wait_for_completion_timeout(&mr->mr.comp, - 5 * HZ); - if (!timeout) { - qib_get_mr(&mr->mr); - ret = -EBUSY; - goto out; - } - deinit_qib_mregion(&mr->mr); - if (mr->umem) - ib_umem_release(mr->umem); - kfree(mr); -out: - return ret; -} - -/* - * Allocate a memory region usable with the - * IB_WR_REG_MR send work request. - * - * Return the memory region on success, otherwise return an errno. - */ -struct ib_mr *qib_alloc_mr(struct ib_pd *pd, - enum ib_mr_type mr_type, - u32 max_num_sg) -{ - struct qib_mr *mr; - - if (mr_type != IB_MR_TYPE_MEM_REG) - return ERR_PTR(-EINVAL); - - mr = alloc_mr(max_num_sg, pd); - if (IS_ERR(mr)) - return (struct ib_mr *)mr; - - mr->pages = kcalloc(max_num_sg, sizeof(u64), GFP_KERNEL); - if (!mr->pages) - goto err; - - return &mr->ibmr; - -err: - qib_dereg_mr(&mr->ibmr); - return ERR_PTR(-ENOMEM); -} - -static int qib_set_page(struct ib_mr *ibmr, u64 addr) -{ - struct qib_mr *mr = to_imr(ibmr); - - if (unlikely(mr->npages == mr->mr.max_segs)) - return -ENOMEM; - - mr->pages[mr->npages++] = addr; - - return 0; -} - -int qib_map_mr_sg(struct ib_mr *ibmr, - struct scatterlist *sg, - int sg_nents) -{ - struct qib_mr *mr = to_imr(ibmr); - - mr->npages = 0; - - return ib_sg_to_pages(ibmr, sg, sg_nents, qib_set_page); -} - -/** - * qib_alloc_fmr - allocate a fast memory region - * @pd: the protection domain for this memory region - * @mr_access_flags: access flags for this memory region - * @fmr_attr: fast memory region attributes - * - * Returns the memory region on success, otherwise returns an errno. - */ -struct ib_fmr *qib_alloc_fmr(struct ib_pd *pd, int mr_access_flags, - struct ib_fmr_attr *fmr_attr) -{ - struct qib_fmr *fmr; - int m; - struct ib_fmr *ret; - int rval = -ENOMEM; - - /* Allocate struct plus pointers to first level page tables. */ - m = (fmr_attr->max_pages + QIB_SEGSZ - 1) / QIB_SEGSZ; - fmr = kzalloc(sizeof(*fmr) + m * sizeof(fmr->mr.map[0]), GFP_KERNEL); - if (!fmr) - goto bail; - - rval = init_qib_mregion(&fmr->mr, pd, fmr_attr->max_pages); - if (rval) - goto bail; - - /* - * ib_alloc_fmr() will initialize fmr->ibfmr except for lkey & - * rkey. - */ - rval = qib_alloc_lkey(&fmr->mr, 0); - if (rval) - goto bail_mregion; - fmr->ibfmr.rkey = fmr->mr.lkey; - fmr->ibfmr.lkey = fmr->mr.lkey; - /* - * Resources are allocated but no valid mapping (RKEY can't be - * used). - */ - fmr->mr.access_flags = mr_access_flags; - fmr->mr.max_segs = fmr_attr->max_pages; - fmr->mr.page_shift = fmr_attr->page_shift; - - ret = &fmr->ibfmr; -done: - return ret; - -bail_mregion: - deinit_qib_mregion(&fmr->mr); -bail: - kfree(fmr); - ret = ERR_PTR(rval); - goto done; -} - -/** - * qib_map_phys_fmr - set up a fast memory region - * @ibmfr: the fast memory region to set up - * @page_list: the list of pages to associate with the fast memory region - * @list_len: the number of pages to associate with the fast memory region - * @iova: the virtual address of the start of the fast memory region - * - * This may be called from interrupt context. - */ - -int qib_map_phys_fmr(struct ib_fmr *ibfmr, u64 *page_list, - int list_len, u64 iova) -{ - struct qib_fmr *fmr = to_ifmr(ibfmr); - struct qib_lkey_table *rkt; - unsigned long flags; - int m, n, i; - u32 ps; - int ret; - - i = atomic_read(&fmr->mr.refcount); - if (i > 2) - return -EBUSY; - - if (list_len > fmr->mr.max_segs) { - ret = -EINVAL; - goto bail; - } - rkt = &to_idev(ibfmr->device)->lk_table; - spin_lock_irqsave(&rkt->lock, flags); - fmr->mr.user_base = iova; - fmr->mr.iova = iova; - ps = 1 << fmr->mr.page_shift; - fmr->mr.length = list_len * ps; - m = 0; - n = 0; - for (i = 0; i < list_len; i++) { - fmr->mr.map[m]->segs[n].vaddr = (void *) page_list[i]; - fmr->mr.map[m]->segs[n].length = ps; - if (++n == QIB_SEGSZ) { - m++; - n = 0; - } - } - spin_unlock_irqrestore(&rkt->lock, flags); - ret = 0; - -bail: - return ret; -} - -/** - * qib_unmap_fmr - unmap fast memory regions - * @fmr_list: the list of fast memory regions to unmap - * - * Returns 0 on success. - */ -int qib_unmap_fmr(struct list_head *fmr_list) -{ - struct qib_fmr *fmr; - struct qib_lkey_table *rkt; - unsigned long flags; - - list_for_each_entry(fmr, fmr_list, ibfmr.list) { - rkt = &to_idev(fmr->ibfmr.device)->lk_table; - spin_lock_irqsave(&rkt->lock, flags); - fmr->mr.user_base = 0; - fmr->mr.iova = 0; - fmr->mr.length = 0; - spin_unlock_irqrestore(&rkt->lock, flags); - } - return 0; -} - -/** - * qib_dealloc_fmr - deallocate a fast memory region - * @ibfmr: the fast memory region to deallocate - * - * Returns 0 on success. - */ -int qib_dealloc_fmr(struct ib_fmr *ibfmr) -{ - struct qib_fmr *fmr = to_ifmr(ibfmr); - int ret = 0; - unsigned long timeout; - - qib_free_lkey(&fmr->mr); - qib_put_mr(&fmr->mr); /* will set completion if last */ - timeout = wait_for_completion_timeout(&fmr->mr.comp, - 5 * HZ); - if (!timeout) { - qib_get_mr(&fmr->mr); - ret = -EBUSY; - goto out; - } - deinit_qib_mregion(&fmr->mr); - kfree(fmr); -out: - return ret; -} - -void mr_rcu_callback(struct rcu_head *list) -{ - struct qib_mregion *mr = container_of(list, struct qib_mregion, list); - - complete(&mr->comp); -} diff --git a/drivers/infiniband/hw/qib/qib_qp.c b/drivers/infiniband/hw/qib/qib_qp.c index aaa1cf9..b0f2dcf 100644 --- a/drivers/infiniband/hw/qib/qib_qp.c +++ b/drivers/infiniband/hw/qib/qib_qp.c @@ -221,7 +221,7 @@ static inline unsigned qpn_hash(struct qib_ibdev *dev, u32 qpn) * Put the QP into the hash table. * The hash table holds a reference to the QP. */ -static void insert_qp(struct qib_ibdev *dev, struct qib_qp *qp) +static void insert_qp(struct qib_ibdev *dev, struct rvt_qp *qp) { struct qib_ibport *ibp = to_iport(qp->ibqp.device, qp->port_num); unsigned long flags; @@ -246,7 +246,7 @@ static void insert_qp(struct qib_ibdev *dev, struct qib_qp *qp) * Remove the QP from the table so it can't be found asynchronously by * the receive interrupt routine. */ -static void remove_qp(struct qib_ibdev *dev, struct qib_qp *qp) +static void remove_qp(struct qib_ibdev *dev, struct rvt_qp *qp) { struct qib_ibport *ibp = to_iport(qp->ibqp.device, qp->port_num); unsigned n = qpn_hash(dev, qp->ibqp.qp_num); @@ -262,8 +262,8 @@ static void remove_qp(struct qib_ibdev *dev, struct qib_qp *qp) lockdep_is_held(&dev->qpt_lock)) == qp) { RCU_INIT_POINTER(ibp->qp1, NULL); } else { - struct qib_qp *q; - struct qib_qp __rcu **qpp; + struct rvt_qp *q; + struct rvt_qp __rcu **qpp; removed = 0; qpp = &dev->qp_table[n]; @@ -297,7 +297,7 @@ unsigned qib_free_all_qps(struct qib_devdata *dd) { struct qib_ibdev *dev = &dd->verbs_dev; unsigned long flags; - struct qib_qp *qp; + struct rvt_qp *qp; unsigned n, qp_inuse = 0; for (n = 0; n < dd->num_pports; n++) { @@ -337,9 +337,9 @@ unsigned qib_free_all_qps(struct qib_devdata *dd) * The caller is responsible for decrementing the QP reference count * when done. */ -struct qib_qp *qib_lookup_qpn(struct qib_ibport *ibp, u32 qpn) +struct rvt_qp *qib_lookup_qpn(struct qib_ibport *ibp, u32 qpn) { - struct qib_qp *qp = NULL; + struct rvt_qp *qp = NULL; rcu_read_lock(); if (unlikely(qpn <= 1)) { @@ -369,7 +369,7 @@ struct qib_qp *qib_lookup_qpn(struct qib_ibport *ibp, u32 qpn) * @qp: the QP to reset * @type: the QP type */ -static void qib_reset_qp(struct qib_qp *qp, enum ib_qp_type type) +static void qib_reset_qp(struct rvt_qp *qp, enum ib_qp_type type) { struct qib_qp_priv *priv = qp->priv; qp->remote_qpn = 0; @@ -417,7 +417,7 @@ static void qib_reset_qp(struct qib_qp *qp, enum ib_qp_type type) qp->r_sge.num_sge = 0; } -static void clear_mr_refs(struct qib_qp *qp, int clr_sends) +static void clear_mr_refs(struct rvt_qp *qp, int clr_sends) { unsigned n; @@ -428,13 +428,13 @@ static void clear_mr_refs(struct qib_qp *qp, int clr_sends) if (clr_sends) { while (qp->s_last != qp->s_head) { - struct qib_swqe *wqe = get_swqe_ptr(qp, qp->s_last); + struct rvt_swqe *wqe = get_swqe_ptr(qp, qp->s_last); unsigned i; for (i = 0; i < wqe->wr.num_sge; i++) { - struct qib_sge *sge = &wqe->sg_list[i]; + struct rvt_sge *sge = &wqe->sg_list[i]; - qib_put_mr(sge->mr); + rvt_put_mr(sge->mr); } if (qp->ibqp.qp_type == IB_QPT_UD || qp->ibqp.qp_type == IB_QPT_SMI || @@ -444,7 +444,7 @@ static void clear_mr_refs(struct qib_qp *qp, int clr_sends) qp->s_last = 0; } if (qp->s_rdma_mr) { - qib_put_mr(qp->s_rdma_mr); + rvt_put_mr(qp->s_rdma_mr); qp->s_rdma_mr = NULL; } } @@ -453,11 +453,11 @@ static void clear_mr_refs(struct qib_qp *qp, int clr_sends) return; for (n = 0; n < ARRAY_SIZE(qp->s_ack_queue); n++) { - struct qib_ack_entry *e = &qp->s_ack_queue[n]; + struct rvt_ack_entry *e = &qp->s_ack_queue[n]; if (e->opcode == IB_OPCODE_RC_RDMA_READ_REQUEST && e->rdma_sge.mr) { - qib_put_mr(e->rdma_sge.mr); + rvt_put_mr(e->rdma_sge.mr); e->rdma_sge.mr = NULL; } } @@ -473,7 +473,7 @@ static void clear_mr_refs(struct qib_qp *qp, int clr_sends) * The QP r_lock and s_lock should be held and interrupts disabled. * If we are already in error state, just return. */ -int qib_error_qp(struct qib_qp *qp, enum ib_wc_status err) +int qib_error_qp(struct rvt_qp *qp, enum ib_wc_status err) { struct qib_qp_priv *priv = qp->priv; struct qib_ibdev *dev = to_idev(qp->ibqp.device); @@ -503,7 +503,7 @@ int qib_error_qp(struct qib_qp *qp, enum ib_wc_status err) if (!(qp->s_flags & QIB_S_BUSY)) { qp->s_hdrwords = 0; if (qp->s_rdma_mr) { - qib_put_mr(qp->s_rdma_mr); + rvt_put_mr(qp->s_rdma_mr); qp->s_rdma_mr = NULL; } if (priv->s_tx) { @@ -530,7 +530,7 @@ int qib_error_qp(struct qib_qp *qp, enum ib_wc_status err) wc.status = IB_WC_WR_FLUSH_ERR; if (qp->r_rq.wq) { - struct qib_rwq *wq; + struct rvt_rwq *wq; u32 head; u32 tail; @@ -573,7 +573,7 @@ int qib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask, struct ib_udata *udata) { struct qib_ibdev *dev = to_idev(ibqp->device); - struct qib_qp *qp = to_iqp(ibqp); + struct rvt_qp *qp = to_iqp(ibqp); struct qib_qp_priv *priv = qp->priv; enum ib_qp_state cur_state, new_state; struct ib_event ev; @@ -861,7 +861,7 @@ bail: int qib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask, struct ib_qp_init_attr *init_attr) { - struct qib_qp *qp = to_iqp(ibqp); + struct rvt_qp *qp = to_iqp(ibqp); attr->qp_state = qp->state; attr->cur_qp_state = attr->qp_state; @@ -914,7 +914,7 @@ int qib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, * * Returns the AETH. */ -__be32 qib_compute_aeth(struct qib_qp *qp) +__be32 qib_compute_aeth(struct rvt_qp *qp) { u32 aeth = qp->r_msn & QIB_MSN_MASK; @@ -927,7 +927,7 @@ __be32 qib_compute_aeth(struct qib_qp *qp) } else { u32 min, max, x; u32 credits; - struct qib_rwq *wq = qp->r_rq.wq; + struct rvt_rwq *wq = qp->r_rq.wq; u32 head; u32 tail; @@ -982,9 +982,9 @@ struct ib_qp *qib_create_qp(struct ib_pd *ibpd, struct ib_qp_init_attr *init_attr, struct ib_udata *udata) { - struct qib_qp *qp; + struct rvt_qp *qp; int err; - struct qib_swqe *swq = NULL; + struct rvt_swqe *swq = NULL; struct qib_ibdev *dev; struct qib_devdata *dd; size_t sz; @@ -1033,9 +1033,9 @@ struct ib_qp *qib_create_qp(struct ib_pd *ibpd, case IB_QPT_UC: case IB_QPT_RC: case IB_QPT_UD: - sz = sizeof(struct qib_sge) * + sz = sizeof(struct rvt_sge) * init_attr->cap.max_send_sge + - sizeof(struct qib_swqe); + sizeof(struct rvt_swqe); swq = __vmalloc((init_attr->cap.max_send_wr + 1) * sz, gfp, PAGE_KERNEL); if (swq == NULL) { @@ -1080,14 +1080,14 @@ struct ib_qp *qib_create_qp(struct ib_pd *ibpd, qp->r_rq.size = init_attr->cap.max_recv_wr + 1; qp->r_rq.max_sge = init_attr->cap.max_recv_sge; sz = (sizeof(struct ib_sge) * qp->r_rq.max_sge) + - sizeof(struct qib_rwqe); + sizeof(struct rvt_rwqe); if (gfp != GFP_NOIO) qp->r_rq.wq = vmalloc_user( - sizeof(struct qib_rwq) + + sizeof(struct rvt_rwq) + qp->r_rq.size * sz); else qp->r_rq.wq = __vmalloc( - sizeof(struct qib_rwq) + + sizeof(struct rvt_rwq) + qp->r_rq.size * sz, gfp, PAGE_KERNEL); @@ -1155,7 +1155,7 @@ struct ib_qp *qib_create_qp(struct ib_pd *ibpd, goto bail_ip; } } else { - u32 s = sizeof(struct qib_rwq) + qp->r_rq.size * sz; + u32 s = sizeof(struct rvt_rwq) + qp->r_rq.size * sz; qp->ip = qib_create_mmap_info(dev, s, ibpd->uobject->context, @@ -1221,7 +1221,7 @@ bail: */ int qib_destroy_qp(struct ib_qp *ibqp) { - struct qib_qp *qp = to_iqp(ibqp); + struct rvt_qp *qp = to_iqp(ibqp); struct qib_ibdev *dev = to_idev(ibqp->device); struct qib_qp_priv *priv = qp->priv; @@ -1297,7 +1297,7 @@ void qib_free_qpn_table(struct qib_qpn_table *qpt) * * The QP s_lock should be held. */ -void qib_get_credit(struct qib_qp *qp, u32 aeth) +void qib_get_credit(struct rvt_qp *qp, u32 aeth) { u32 credit = (aeth >> QIB_AETH_CREDIT_SHIFT) & QIB_AETH_CREDIT_MASK; @@ -1331,7 +1331,7 @@ void qib_get_credit(struct qib_qp *qp, u32 aeth) struct qib_qp_iter { struct qib_ibdev *dev; - struct qib_qp *qp; + struct rvt_qp *qp; int n; }; @@ -1357,8 +1357,8 @@ int qib_qp_iter_next(struct qib_qp_iter *iter) struct qib_ibdev *dev = iter->dev; int n = iter->n; int ret = 1; - struct qib_qp *pqp = iter->qp; - struct qib_qp *qp; + struct rvt_qp *pqp = iter->qp; + struct rvt_qp *qp; for (; n < dev->qp_table_size; n++) { if (pqp) @@ -1381,8 +1381,8 @@ static const char * const qp_type_str[] = { void qib_qp_iter_print(struct seq_file *s, struct qib_qp_iter *iter) { - struct qib_swqe *wqe; - struct qib_qp *qp = iter->qp; + struct rvt_swqe *wqe; + struct rvt_qp *qp = iter->qp; struct qib_qp_priv *priv = qp->priv; wqe = get_swqe_ptr(qp, qp->s_last); diff --git a/drivers/infiniband/hw/qib/qib_rc.c b/drivers/infiniband/hw/qib/qib_rc.c index 1506c02..46e6c97 100644 --- a/drivers/infiniband/hw/qib/qib_rc.c +++ b/drivers/infiniband/hw/qib/qib_rc.c @@ -40,7 +40,7 @@ static void rc_timeout(unsigned long arg); -static u32 restart_sge(struct qib_sge_state *ss, struct qib_swqe *wqe, +static u32 restart_sge(struct rvt_sge_state *ss, struct rvt_swqe *wqe, u32 psn, u32 pmtu) { u32 len; @@ -54,7 +54,7 @@ static u32 restart_sge(struct qib_sge_state *ss, struct qib_swqe *wqe, return wqe->length - len; } -static void start_timer(struct qib_qp *qp) +static void start_timer(struct rvt_qp *qp) { qp->s_flags |= QIB_S_TIMER; qp->s_timer.function = rc_timeout; @@ -74,10 +74,10 @@ static void start_timer(struct qib_qp *qp) * Note that we are in the responder's side of the QP context. * Note the QP s_lock must be held. */ -static int qib_make_rc_ack(struct qib_ibdev *dev, struct qib_qp *qp, +static int qib_make_rc_ack(struct qib_ibdev *dev, struct rvt_qp *qp, struct qib_other_headers *ohdr, u32 pmtu) { - struct qib_ack_entry *e; + struct rvt_ack_entry *e; u32 hwords; u32 len; u32 bth0; @@ -95,7 +95,7 @@ static int qib_make_rc_ack(struct qib_ibdev *dev, struct qib_qp *qp, case OP(RDMA_READ_RESPONSE_ONLY): e = &qp->s_ack_queue[qp->s_tail_ack_queue]; if (e->rdma_sge.mr) { - qib_put_mr(e->rdma_sge.mr); + rvt_put_mr(e->rdma_sge.mr); e->rdma_sge.mr = NULL; } /* FALLTHROUGH */ @@ -133,7 +133,7 @@ static int qib_make_rc_ack(struct qib_ibdev *dev, struct qib_qp *qp, /* Copy SGE state in case we need to resend */ qp->s_rdma_mr = e->rdma_sge.mr; if (qp->s_rdma_mr) - qib_get_mr(qp->s_rdma_mr); + rvt_get_mr(qp->s_rdma_mr); qp->s_ack_rdma_sge.sge = e->rdma_sge; qp->s_ack_rdma_sge.num_sge = 1; qp->s_cur_sge = &qp->s_ack_rdma_sge; @@ -172,7 +172,7 @@ static int qib_make_rc_ack(struct qib_ibdev *dev, struct qib_qp *qp, qp->s_cur_sge = &qp->s_ack_rdma_sge; qp->s_rdma_mr = qp->s_ack_rdma_sge.sge.mr; if (qp->s_rdma_mr) - qib_get_mr(qp->s_rdma_mr); + rvt_get_mr(qp->s_rdma_mr); len = qp->s_ack_rdma_sge.sge.sge_length; if (len > pmtu) len = pmtu; @@ -228,13 +228,13 @@ bail: * * Return 1 if constructed; otherwise, return 0. */ -int qib_make_rc_req(struct qib_qp *qp) +int qib_make_rc_req(struct rvt_qp *qp) { struct qib_qp_priv *priv = qp->priv; struct qib_ibdev *dev = to_idev(qp->ibqp.device); struct qib_other_headers *ohdr; - struct qib_sge_state *ss; - struct qib_swqe *wqe; + struct rvt_sge_state *ss; + struct rvt_swqe *wqe; u32 hwords; u32 len; u32 bth0; @@ -648,7 +648,7 @@ unlock: * Note that RDMA reads and atomics are handled in the * send side QP state and tasklet. */ -void qib_send_rc_ack(struct qib_qp *qp) +void qib_send_rc_ack(struct rvt_qp *qp) { struct qib_devdata *dd = dd_from_ibdev(qp->ibqp.device); struct qib_ibport *ibp = to_iport(qp->ibqp.device, qp->port_num); @@ -783,10 +783,10 @@ done: * for the given QP. * Called at interrupt level with the QP s_lock held. */ -static void reset_psn(struct qib_qp *qp, u32 psn) +static void reset_psn(struct rvt_qp *qp, u32 psn) { u32 n = qp->s_acked; - struct qib_swqe *wqe = get_swqe_ptr(qp, n); + struct rvt_swqe *wqe = get_swqe_ptr(qp, n); u32 opcode; qp->s_cur = n; @@ -868,9 +868,9 @@ done: * Back up requester to resend the last un-ACKed request. * The QP r_lock and s_lock should be held and interrupts disabled. */ -static void qib_restart_rc(struct qib_qp *qp, u32 psn, int wait) +static void qib_restart_rc(struct rvt_qp *qp, u32 psn, int wait) { - struct qib_swqe *wqe = get_swqe_ptr(qp, qp->s_acked); + struct rvt_swqe *wqe = get_swqe_ptr(qp, qp->s_acked); struct qib_ibport *ibp; if (qp->s_retry == 0) { @@ -905,7 +905,7 @@ static void qib_restart_rc(struct qib_qp *qp, u32 psn, int wait) */ static void rc_timeout(unsigned long arg) { - struct qib_qp *qp = (struct qib_qp *)arg; + struct rvt_qp *qp = (struct rvt_qp *)arg; struct qib_ibport *ibp; unsigned long flags; @@ -928,7 +928,7 @@ static void rc_timeout(unsigned long arg) */ void qib_rc_rnr_retry(unsigned long arg) { - struct qib_qp *qp = (struct qib_qp *)arg; + struct rvt_qp *qp = (struct rvt_qp *)arg; unsigned long flags; spin_lock_irqsave(&qp->s_lock, flags); @@ -944,9 +944,9 @@ void qib_rc_rnr_retry(unsigned long arg) * Set qp->s_sending_psn to the next PSN after the given one. * This would be psn+1 except when RDMA reads are present. */ -static void reset_sending_psn(struct qib_qp *qp, u32 psn) +static void reset_sending_psn(struct rvt_qp *qp, u32 psn) { - struct qib_swqe *wqe; + struct rvt_swqe *wqe; u32 n = qp->s_last; /* Find the work request corresponding to the given PSN. */ @@ -969,10 +969,10 @@ static void reset_sending_psn(struct qib_qp *qp, u32 psn) /* * This should be called with the QP s_lock held and interrupts disabled. */ -void qib_rc_send_complete(struct qib_qp *qp, struct qib_ib_header *hdr) +void qib_rc_send_complete(struct rvt_qp *qp, struct qib_ib_header *hdr) { struct qib_other_headers *ohdr; - struct qib_swqe *wqe; + struct rvt_swqe *wqe; struct ib_wc wc; unsigned i; u32 opcode; @@ -1013,9 +1013,9 @@ void qib_rc_send_complete(struct qib_qp *qp, struct qib_ib_header *hdr) qib_cmp24(qp->s_sending_psn, qp->s_sending_hpsn) <= 0) break; for (i = 0; i < wqe->wr.num_sge; i++) { - struct qib_sge *sge = &wqe->sg_list[i]; + struct rvt_sge *sge = &wqe->sg_list[i]; - qib_put_mr(sge->mr); + rvt_put_mr(sge->mr); } /* Post a send completion queue entry if requested. */ if (!(qp->s_flags & QIB_S_SIGNAL_REQ_WR) || @@ -1044,7 +1044,7 @@ void qib_rc_send_complete(struct qib_qp *qp, struct qib_ib_header *hdr) } } -static inline void update_last_psn(struct qib_qp *qp, u32 psn) +static inline void update_last_psn(struct rvt_qp *qp, u32 psn) { qp->s_last_psn = psn; } @@ -1054,8 +1054,8 @@ static inline void update_last_psn(struct qib_qp *qp, u32 psn) * This is similar to qib_send_complete but has to check to be sure * that the SGEs are not being referenced if the SWQE is being resent. */ -static struct qib_swqe *do_rc_completion(struct qib_qp *qp, - struct qib_swqe *wqe, +static struct rvt_swqe *do_rc_completion(struct rvt_qp *qp, + struct rvt_swqe *wqe, struct qib_ibport *ibp) { struct ib_wc wc; @@ -1069,9 +1069,9 @@ static struct qib_swqe *do_rc_completion(struct qib_qp *qp, if (qib_cmp24(wqe->lpsn, qp->s_sending_psn) < 0 || qib_cmp24(qp->s_sending_psn, qp->s_sending_hpsn) > 0) { for (i = 0; i < wqe->wr.num_sge; i++) { - struct qib_sge *sge = &wqe->sg_list[i]; + struct rvt_sge *sge = &wqe->sg_list[i]; - qib_put_mr(sge->mr); + rvt_put_mr(sge->mr); } /* Post a send completion queue entry if requested. */ if (!(qp->s_flags & QIB_S_SIGNAL_REQ_WR) || @@ -1127,12 +1127,12 @@ static struct qib_swqe *do_rc_completion(struct qib_qp *qp, * Called at interrupt level with the QP s_lock held. * Returns 1 if OK, 0 if current operation should be aborted (NAK). */ -static int do_rc_ack(struct qib_qp *qp, u32 aeth, u32 psn, int opcode, +static int do_rc_ack(struct rvt_qp *qp, u32 aeth, u32 psn, int opcode, u64 val, struct qib_ctxtdata *rcd) { struct qib_ibport *ibp; enum ib_wc_status status; - struct qib_swqe *wqe; + struct rvt_swqe *wqe; int ret = 0; u32 ack_psn; int diff; @@ -1350,10 +1350,10 @@ bail: * We have seen an out of sequence RDMA read middle or last packet. * This ACKs SENDs and RDMA writes up to the first RDMA read or atomic SWQE. */ -static void rdma_seq_err(struct qib_qp *qp, struct qib_ibport *ibp, u32 psn, +static void rdma_seq_err(struct rvt_qp *qp, struct qib_ibport *ibp, u32 psn, struct qib_ctxtdata *rcd) { - struct qib_swqe *wqe; + struct rvt_swqe *wqe; /* Remove QP from retry timer */ if (qp->s_flags & (QIB_S_TIMER | QIB_S_WAIT_RNR)) { @@ -1400,12 +1400,12 @@ static void rdma_seq_err(struct qib_qp *qp, struct qib_ibport *ibp, u32 psn, static void qib_rc_rcv_resp(struct qib_ibport *ibp, struct qib_other_headers *ohdr, void *data, u32 tlen, - struct qib_qp *qp, + struct rvt_qp *qp, u32 opcode, u32 psn, u32 hdrsize, u32 pmtu, struct qib_ctxtdata *rcd) { - struct qib_swqe *wqe; + struct rvt_swqe *wqe; struct qib_pportdata *ppd = ppd_from_ibp(ibp); enum ib_wc_status status; unsigned long flags; @@ -1624,14 +1624,14 @@ bail: */ static int qib_rc_rcv_error(struct qib_other_headers *ohdr, void *data, - struct qib_qp *qp, + struct rvt_qp *qp, u32 opcode, u32 psn, int diff, struct qib_ctxtdata *rcd) { struct qib_ibport *ibp = to_iport(qp->ibqp.device, qp->port_num); - struct qib_ack_entry *e; + struct rvt_ack_entry *e; unsigned long flags; u8 i, prev; int old_req; @@ -1733,7 +1733,7 @@ static int qib_rc_rcv_error(struct qib_other_headers *ohdr, if (unlikely(offset + len != e->rdma_sge.sge_length)) goto unlock_done; if (e->rdma_sge.mr) { - qib_put_mr(e->rdma_sge.mr); + rvt_put_mr(e->rdma_sge.mr); e->rdma_sge.mr = NULL; } if (len != 0) { @@ -1741,7 +1741,7 @@ static int qib_rc_rcv_error(struct qib_other_headers *ohdr, u64 vaddr = be64_to_cpu(reth->vaddr); int ok; - ok = qib_rkey_ok(qp, &e->rdma_sge, len, vaddr, rkey, + ok = rvt_rkey_ok(qp, &e->rdma_sge, len, vaddr, rkey, IB_ACCESS_REMOTE_READ); if (unlikely(!ok)) goto unlock_done; @@ -1819,7 +1819,7 @@ send_ack: return 0; } -void qib_rc_error(struct qib_qp *qp, enum ib_wc_status err) +void qib_rc_error(struct rvt_qp *qp, enum ib_wc_status err) { unsigned long flags; int lastwqe; @@ -1838,7 +1838,7 @@ void qib_rc_error(struct qib_qp *qp, enum ib_wc_status err) } } -static inline void qib_update_ack_queue(struct qib_qp *qp, unsigned n) +static inline void qib_update_ack_queue(struct rvt_qp *qp, unsigned n) { unsigned next; @@ -1863,7 +1863,7 @@ static inline void qib_update_ack_queue(struct qib_qp *qp, unsigned n) * Called at interrupt level. */ void qib_rc_rcv(struct qib_ctxtdata *rcd, struct qib_ib_header *hdr, - int has_grh, void *data, u32 tlen, struct qib_qp *qp) + int has_grh, void *data, u32 tlen, struct rvt_qp *qp) { struct qib_ibport *ibp = &rcd->ppd->ibport_data; struct qib_other_headers *ohdr; @@ -2070,7 +2070,7 @@ send_last: int ok; /* Check rkey & NAK */ - ok = qib_rkey_ok(qp, &qp->r_sge.sge, qp->r_len, vaddr, + ok = rvt_rkey_ok(qp, &qp->r_sge.sge, qp->r_len, vaddr, rkey, IB_ACCESS_REMOTE_WRITE); if (unlikely(!ok)) goto nack_acc; @@ -2097,7 +2097,7 @@ send_last: goto send_last; case OP(RDMA_READ_REQUEST): { - struct qib_ack_entry *e; + struct rvt_ack_entry *e; u32 len; u8 next; @@ -2115,7 +2115,7 @@ send_last: } e = &qp->s_ack_queue[qp->r_head_ack_queue]; if (e->opcode == OP(RDMA_READ_REQUEST) && e->rdma_sge.mr) { - qib_put_mr(e->rdma_sge.mr); + rvt_put_mr(e->rdma_sge.mr); e->rdma_sge.mr = NULL; } reth = &ohdr->u.rc.reth; @@ -2126,7 +2126,7 @@ send_last: int ok; /* Check rkey & NAK */ - ok = qib_rkey_ok(qp, &e->rdma_sge, len, vaddr, + ok = rvt_rkey_ok(qp, &e->rdma_sge, len, vaddr, rkey, IB_ACCESS_REMOTE_READ); if (unlikely(!ok)) goto nack_acc_unlck; @@ -2167,7 +2167,7 @@ send_last: case OP(COMPARE_SWAP): case OP(FETCH_ADD): { struct ib_atomic_eth *ateth; - struct qib_ack_entry *e; + struct rvt_ack_entry *e; u64 vaddr; atomic64_t *maddr; u64 sdata; @@ -2187,7 +2187,7 @@ send_last: } e = &qp->s_ack_queue[qp->r_head_ack_queue]; if (e->opcode == OP(RDMA_READ_REQUEST) && e->rdma_sge.mr) { - qib_put_mr(e->rdma_sge.mr); + rvt_put_mr(e->rdma_sge.mr); e->rdma_sge.mr = NULL; } ateth = &ohdr->u.atomic_eth; @@ -2197,7 +2197,7 @@ send_last: goto nack_inv_unlck; rkey = be32_to_cpu(ateth->rkey); /* Check rkey & NAK */ - if (unlikely(!qib_rkey_ok(qp, &qp->r_sge.sge, sizeof(u64), + if (unlikely(!rvt_rkey_ok(qp, &qp->r_sge.sge, sizeof(u64), vaddr, rkey, IB_ACCESS_REMOTE_ATOMIC))) goto nack_acc_unlck; @@ -2209,7 +2209,7 @@ send_last: (u64) cmpxchg((u64 *) qp->r_sge.sge.vaddr, be64_to_cpu(ateth->compare_data), sdata); - qib_put_mr(qp->r_sge.sge.mr); + rvt_put_mr(qp->r_sge.sge.mr); qp->r_sge.num_sge = 0; e->opcode = opcode; e->sent = 0; diff --git a/drivers/infiniband/hw/qib/qib_ruc.c b/drivers/infiniband/hw/qib/qib_ruc.c index 8985baa..02e79a8 100644 --- a/drivers/infiniband/hw/qib/qib_ruc.c +++ b/drivers/infiniband/hw/qib/qib_ruc.c @@ -79,15 +79,15 @@ const u32 ib_qib_rnr_table[32] = { * Validate a RWQE and fill in the SGE state. * Return 1 if OK. */ -static int qib_init_sge(struct qib_qp *qp, struct qib_rwqe *wqe) +static int qib_init_sge(struct rvt_qp *qp, struct rvt_rwqe *wqe) { int i, j, ret; struct ib_wc wc; - struct qib_lkey_table *rkt; + struct rvt_lkey_table *rkt; struct rvt_pd *pd; - struct qib_sge_state *ss; + struct rvt_sge_state *ss; - rkt = &to_idev(qp->ibqp.device)->lk_table; + rkt = &to_idev(qp->ibqp.device)->rdi.lkey_table; pd = ibpd_to_rvtpd(qp->ibqp.srq ? qp->ibqp.srq->pd : qp->ibqp.pd); ss = &qp->r_sge; ss->sg_list = qp->r_sg_list; @@ -96,7 +96,7 @@ static int qib_init_sge(struct qib_qp *qp, struct qib_rwqe *wqe) if (wqe->sg_list[i].length == 0) continue; /* Check LKEY */ - if (!qib_lkey_ok(rkt, pd, j ? &ss->sg_list[j - 1] : &ss->sge, + if (!rvt_lkey_ok(rkt, pd, j ? &ss->sg_list[j - 1] : &ss->sge, &wqe->sg_list[i], IB_ACCESS_LOCAL_WRITE)) goto bad_lkey; qp->r_len += wqe->sg_list[i].length; @@ -109,9 +109,9 @@ static int qib_init_sge(struct qib_qp *qp, struct qib_rwqe *wqe) bad_lkey: while (j) { - struct qib_sge *sge = --j ? &ss->sg_list[j - 1] : &ss->sge; + struct rvt_sge *sge = --j ? &ss->sg_list[j - 1] : &ss->sge; - qib_put_mr(sge->mr); + rvt_put_mr(sge->mr); } ss->num_sge = 0; memset(&wc, 0, sizeof(wc)); @@ -136,13 +136,13 @@ bail: * * Can be called from interrupt level. */ -int qib_get_rwqe(struct qib_qp *qp, int wr_id_only) +int qib_get_rwqe(struct rvt_qp *qp, int wr_id_only) { unsigned long flags; - struct qib_rq *rq; - struct qib_rwq *wq; + struct rvt_rq *rq; + struct rvt_rwq *wq; struct qib_srq *srq; - struct qib_rwqe *wqe; + struct rvt_rwqe *wqe; void (*handler)(struct ib_event *, void *); u32 tail; int ret; @@ -227,7 +227,7 @@ bail: * Switch to alternate path. * The QP s_lock should be held and interrupts disabled. */ -void qib_migrate_qp(struct qib_qp *qp) +void qib_migrate_qp(struct rvt_qp *qp) { struct ib_event ev; @@ -266,7 +266,7 @@ static int gid_ok(union ib_gid *gid, __be64 gid_prefix, __be64 id) * The s_lock will be acquired around the qib_migrate_qp() call. */ int qib_ruc_check_hdr(struct qib_ibport *ibp, struct qib_ib_header *hdr, - int has_grh, struct qib_qp *qp, u32 bth0) + int has_grh, struct rvt_qp *qp, u32 bth0) { __be64 guid; unsigned long flags; @@ -353,12 +353,12 @@ err: * receive interrupts since this is a connected protocol and all packets * will pass through here. */ -static void qib_ruc_loopback(struct qib_qp *sqp) +static void qib_ruc_loopback(struct rvt_qp *sqp) { struct qib_ibport *ibp = to_iport(sqp->ibqp.device, sqp->port_num); - struct qib_qp *qp; - struct qib_swqe *wqe; - struct qib_sge *sge; + struct rvt_qp *qp; + struct rvt_swqe *wqe; + struct rvt_sge *sge; unsigned long flags; struct ib_wc wc; u64 sdata; @@ -458,7 +458,7 @@ again: goto inv_err; if (wqe->length == 0) break; - if (unlikely(!qib_rkey_ok(qp, &qp->r_sge.sge, wqe->length, + if (unlikely(!rvt_rkey_ok(qp, &qp->r_sge.sge, wqe->length, wqe->rdma_wr.remote_addr, wqe->rdma_wr.rkey, IB_ACCESS_REMOTE_WRITE))) @@ -471,7 +471,7 @@ again: case IB_WR_RDMA_READ: if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_READ))) goto inv_err; - if (unlikely(!qib_rkey_ok(qp, &sqp->s_sge.sge, wqe->length, + if (unlikely(!rvt_rkey_ok(qp, &sqp->s_sge.sge, wqe->length, wqe->rdma_wr.remote_addr, wqe->rdma_wr.rkey, IB_ACCESS_REMOTE_READ))) @@ -489,7 +489,7 @@ again: case IB_WR_ATOMIC_FETCH_AND_ADD: if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_ATOMIC))) goto inv_err; - if (unlikely(!qib_rkey_ok(qp, &qp->r_sge.sge, sizeof(u64), + if (unlikely(!rvt_rkey_ok(qp, &qp->r_sge.sge, sizeof(u64), wqe->atomic_wr.remote_addr, wqe->atomic_wr.rkey, IB_ACCESS_REMOTE_ATOMIC))) @@ -502,7 +502,7 @@ again: (u64) atomic64_add_return(sdata, maddr) - sdata : (u64) cmpxchg((u64 *) qp->r_sge.sge.vaddr, sdata, wqe->atomic_wr.swap); - qib_put_mr(qp->r_sge.sge.mr); + rvt_put_mr(qp->r_sge.sge.mr); qp->r_sge.num_sge = 0; goto send_comp; @@ -526,11 +526,11 @@ again: sge->sge_length -= len; if (sge->sge_length == 0) { if (!release) - qib_put_mr(sge->mr); + rvt_put_mr(sge->mr); if (--sqp->s_sge.num_sge) *sge = *sqp->s_sge.sg_list++; } else if (sge->length == 0 && sge->mr->lkey) { - if (++sge->n >= QIB_SEGSZ) { + if (++sge->n >= RVT_SEGSZ) { if (++sge->m >= sge->mr->mapsz) break; sge->n = 0; @@ -672,7 +672,7 @@ u32 qib_make_grh(struct qib_ibport *ibp, struct ib_grh *hdr, return sizeof(struct ib_grh) / sizeof(u32); } -void qib_make_ruc_header(struct qib_qp *qp, struct qib_other_headers *ohdr, +void qib_make_ruc_header(struct rvt_qp *qp, struct qib_other_headers *ohdr, u32 bth0, u32 bth2) { struct qib_qp_priv *priv = qp->priv; @@ -721,10 +721,10 @@ void qib_do_send(struct work_struct *work) { struct qib_qp_priv *priv = container_of(work, struct qib_qp_priv, s_work); - struct qib_qp *qp = priv->owner; + struct rvt_qp *qp = priv->owner; struct qib_ibport *ibp = to_iport(qp->ibqp.device, qp->port_num); struct qib_pportdata *ppd = ppd_from_ibp(ibp); - int (*make_req)(struct qib_qp *qp); + int (*make_req)(struct rvt_qp *qp); unsigned long flags; if ((qp->ibqp.qp_type == IB_QPT_RC || @@ -772,7 +772,7 @@ void qib_do_send(struct work_struct *work) /* * This should be called with s_lock held. */ -void qib_send_complete(struct qib_qp *qp, struct qib_swqe *wqe, +void qib_send_complete(struct rvt_qp *qp, struct rvt_swqe *wqe, enum ib_wc_status status) { u32 old_last, last; @@ -782,9 +782,9 @@ void qib_send_complete(struct qib_qp *qp, struct qib_swqe *wqe, return; for (i = 0; i < wqe->wr.num_sge; i++) { - struct qib_sge *sge = &wqe->sg_list[i]; + struct rvt_sge *sge = &wqe->sg_list[i]; - qib_put_mr(sge->mr); + rvt_put_mr(sge->mr); } if (qp->ibqp.qp_type == IB_QPT_UD || qp->ibqp.qp_type == IB_QPT_SMI || diff --git a/drivers/infiniband/hw/qib/qib_sdma.c b/drivers/infiniband/hw/qib/qib_sdma.c index ac4fcad..1395ed0 100644 --- a/drivers/infiniband/hw/qib/qib_sdma.c +++ b/drivers/infiniband/hw/qib/qib_sdma.c @@ -533,12 +533,12 @@ static void complete_sdma_err_req(struct qib_pportdata *ppd, * 3) The SGE addresses are suitable for passing to dma_map_single(). */ int qib_sdma_verbs_send(struct qib_pportdata *ppd, - struct qib_sge_state *ss, u32 dwords, + struct rvt_sge_state *ss, u32 dwords, struct qib_verbs_txreq *tx) { unsigned long flags; - struct qib_sge *sge; - struct qib_qp *qp; + struct rvt_sge *sge; + struct rvt_qp *qp; int ret = 0; u16 tail; __le64 *descqp; @@ -624,7 +624,7 @@ retry: if (--ss->num_sge) *sge = *ss->sg_list++; } else if (sge->length == 0 && sge->mr->lkey) { - if (++sge->n >= QIB_SEGSZ) { + if (++sge->n >= RVT_SEGSZ) { if (++sge->m >= sge->mr->mapsz) break; sge->n = 0; diff --git a/drivers/infiniband/hw/qib/qib_srq.c b/drivers/infiniband/hw/qib/qib_srq.c index d623593..8547263 100644 --- a/drivers/infiniband/hw/qib/qib_srq.c +++ b/drivers/infiniband/hw/qib/qib_srq.c @@ -49,12 +49,12 @@ int qib_post_srq_receive(struct ib_srq *ibsrq, struct ib_recv_wr *wr, struct ib_recv_wr **bad_wr) { struct qib_srq *srq = to_isrq(ibsrq); - struct qib_rwq *wq; + struct rvt_rwq *wq; unsigned long flags; int ret; for (; wr; wr = wr->next) { - struct qib_rwqe *wqe; + struct rvt_rwqe *wqe; u32 next; int i; @@ -132,8 +132,8 @@ struct ib_srq *qib_create_srq(struct ib_pd *ibpd, srq->rq.size = srq_init_attr->attr.max_wr + 1; srq->rq.max_sge = srq_init_attr->attr.max_sge; sz = sizeof(struct ib_sge) * srq->rq.max_sge + - sizeof(struct qib_rwqe); - srq->rq.wq = vmalloc_user(sizeof(struct qib_rwq) + srq->rq.size * sz); + sizeof(struct rvt_rwqe); + srq->rq.wq = vmalloc_user(sizeof(struct rvt_rwq) + srq->rq.size * sz); if (!srq->rq.wq) { ret = ERR_PTR(-ENOMEM); goto bail_srq; @@ -145,7 +145,7 @@ struct ib_srq *qib_create_srq(struct ib_pd *ibpd, */ if (udata && udata->outlen >= sizeof(__u64)) { int err; - u32 s = sizeof(struct qib_rwq) + srq->rq.size * sz; + u32 s = sizeof(struct rvt_rwq) + srq->rq.size * sz; srq->ip = qib_create_mmap_info(dev, s, ibpd->uobject->context, @@ -213,12 +213,12 @@ int qib_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr, struct ib_udata *udata) { struct qib_srq *srq = to_isrq(ibsrq); - struct qib_rwq *wq; + struct rvt_rwq *wq; int ret = 0; if (attr_mask & IB_SRQ_MAX_WR) { - struct qib_rwq *owq; - struct qib_rwqe *p; + struct rvt_rwq *owq; + struct rvt_rwqe *p; u32 sz, size, n, head, tail; /* Check that the requested sizes are below the limits. */ @@ -229,10 +229,10 @@ int qib_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr, goto bail; } - sz = sizeof(struct qib_rwqe) + + sz = sizeof(struct rvt_rwqe) + srq->rq.max_sge * sizeof(struct ib_sge); size = attr->max_wr + 1; - wq = vmalloc_user(sizeof(struct qib_rwq) + size * sz); + wq = vmalloc_user(sizeof(struct rvt_rwq) + size * sz); if (!wq) { ret = -ENOMEM; goto bail; @@ -279,7 +279,7 @@ int qib_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr, n = 0; p = wq->wq; while (tail != head) { - struct qib_rwqe *wqe; + struct rvt_rwqe *wqe; int i; wqe = get_rwqe_ptr(&srq->rq, tail); @@ -288,7 +288,7 @@ int qib_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr, for (i = 0; i < wqe->num_sge; i++) p->sg_list[i] = wqe->sg_list[i]; n++; - p = (struct qib_rwqe *)((char *) p + sz); + p = (struct rvt_rwqe *)((char *)p + sz); if (++tail >= srq->rq.size) tail = 0; } @@ -303,9 +303,9 @@ int qib_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr, vfree(owq); if (srq->ip) { - struct qib_mmap_info *ip = srq->ip; + struct rvt_mmap_info *ip = srq->ip; struct qib_ibdev *dev = to_idev(srq->ibsrq.device); - u32 s = sizeof(struct qib_rwq) + size * sz; + u32 s = sizeof(struct rvt_rwq) + size * sz; qib_update_mmap_info(dev, ip, s, wq); diff --git a/drivers/infiniband/hw/qib/qib_uc.c b/drivers/infiniband/hw/qib/qib_uc.c index d607656..1ae135a 100644 --- a/drivers/infiniband/hw/qib/qib_uc.c +++ b/drivers/infiniband/hw/qib/qib_uc.c @@ -43,11 +43,11 @@ * * Return 1 if constructed; otherwise, return 0. */ -int qib_make_uc_req(struct qib_qp *qp) +int qib_make_uc_req(struct rvt_qp *qp) { struct qib_qp_priv *priv = qp->priv; struct qib_other_headers *ohdr; - struct qib_swqe *wqe; + struct rvt_swqe *wqe; unsigned long flags; u32 hwords; u32 bth0; @@ -241,7 +241,7 @@ unlock: * Called at interrupt level. */ void qib_uc_rcv(struct qib_ibport *ibp, struct qib_ib_header *hdr, - int has_grh, void *data, u32 tlen, struct qib_qp *qp) + int has_grh, void *data, u32 tlen, struct rvt_qp *qp) { struct qib_other_headers *ohdr; u32 opcode; @@ -439,7 +439,7 @@ rdma_first: int ok; /* Check rkey */ - ok = qib_rkey_ok(qp, &qp->r_sge.sge, qp->r_len, + ok = rvt_rkey_ok(qp, &qp->r_sge.sge, qp->r_len, vaddr, rkey, IB_ACCESS_REMOTE_WRITE); if (unlikely(!ok)) goto drop; diff --git a/drivers/infiniband/hw/qib/qib_ud.c b/drivers/infiniband/hw/qib/qib_ud.c index 682403a..1d9d037f 100644 --- a/drivers/infiniband/hw/qib/qib_ud.c +++ b/drivers/infiniband/hw/qib/qib_ud.c @@ -47,15 +47,15 @@ * Note that the receive interrupt handler may be calling qib_ud_rcv() * while this is being called. */ -static void qib_ud_loopback(struct qib_qp *sqp, struct qib_swqe *swqe) +static void qib_ud_loopback(struct rvt_qp *sqp, struct rvt_swqe *swqe) { struct qib_ibport *ibp = to_iport(sqp->ibqp.device, sqp->port_num); struct qib_pportdata *ppd; - struct qib_qp *qp; + struct rvt_qp *qp; struct ib_ah_attr *ah_attr; unsigned long flags; - struct qib_sge_state ssge; - struct qib_sge *sge; + struct rvt_sge_state ssge; + struct rvt_sge *sge; struct ib_wc wc; u32 length; enum ib_qp_type sqptype, dqptype; @@ -190,7 +190,7 @@ static void qib_ud_loopback(struct qib_qp *sqp, struct qib_swqe *swqe) if (--ssge.num_sge) *sge = *ssge.sg_list++; } else if (sge->length == 0 && sge->mr->lkey) { - if (++sge->n >= QIB_SEGSZ) { + if (++sge->n >= RVT_SEGSZ) { if (++sge->m >= sge->mr->mapsz) break; sge->n = 0; @@ -233,14 +233,14 @@ drop: * * Return 1 if constructed; otherwise, return 0. */ -int qib_make_ud_req(struct qib_qp *qp) +int qib_make_ud_req(struct rvt_qp *qp) { struct qib_qp_priv *priv = qp->priv; struct qib_other_headers *ohdr; struct ib_ah_attr *ah_attr; struct qib_pportdata *ppd; struct qib_ibport *ibp; - struct qib_swqe *wqe; + struct rvt_swqe *wqe; unsigned long flags; u32 nwords; u32 extra_bytes; @@ -429,7 +429,7 @@ static unsigned qib_lookup_pkey(struct qib_ibport *ibp, u16 pkey) * Called at interrupt level. */ void qib_ud_rcv(struct qib_ibport *ibp, struct qib_ib_header *hdr, - int has_grh, void *data, u32 tlen, struct qib_qp *qp) + int has_grh, void *data, u32 tlen, struct rvt_qp *qp) { struct qib_other_headers *ohdr; int opcode; diff --git a/drivers/infiniband/hw/qib/qib_verbs.c b/drivers/infiniband/hw/qib/qib_verbs.c index 6d96d7a..5c0e76c 100644 --- a/drivers/infiniband/hw/qib/qib_verbs.c +++ b/drivers/infiniband/hw/qib/qib_verbs.c @@ -50,8 +50,8 @@ static unsigned int ib_qib_qp_table_size = 256; module_param_named(qp_table_size, ib_qib_qp_table_size, uint, S_IRUGO); MODULE_PARM_DESC(qp_table_size, "QP table size"); -unsigned int ib_qib_lkey_table_size = 16; -module_param_named(lkey_table_size, ib_qib_lkey_table_size, uint, +static unsigned int qib_lkey_table_size = 16; +module_param_named(lkey_table_size, qib_lkey_table_size, uint, S_IRUGO); MODULE_PARM_DESC(lkey_table_size, "LKEY table size in bits (2^n, 1 <= n <= 23)"); @@ -167,9 +167,9 @@ __be64 ib_qib_sys_image_guid; * @data: the data to copy * @length: the length of the data */ -void qib_copy_sge(struct qib_sge_state *ss, void *data, u32 length, int release) +void qib_copy_sge(struct rvt_sge_state *ss, void *data, u32 length, int release) { - struct qib_sge *sge = &ss->sge; + struct rvt_sge *sge = &ss->sge; while (length) { u32 len = sge->length; @@ -185,11 +185,11 @@ void qib_copy_sge(struct qib_sge_state *ss, void *data, u32 length, int release) sge->sge_length -= len; if (sge->sge_length == 0) { if (release) - qib_put_mr(sge->mr); + rvt_put_mr(sge->mr); if (--ss->num_sge) *sge = *ss->sg_list++; } else if (sge->length == 0 && sge->mr->lkey) { - if (++sge->n >= QIB_SEGSZ) { + if (++sge->n >= RVT_SEGSZ) { if (++sge->m >= sge->mr->mapsz) break; sge->n = 0; @@ -209,9 +209,9 @@ void qib_copy_sge(struct qib_sge_state *ss, void *data, u32 length, int release) * @ss: the SGE state * @length: the number of bytes to skip */ -void qib_skip_sge(struct qib_sge_state *ss, u32 length, int release) +void qib_skip_sge(struct rvt_sge_state *ss, u32 length, int release) { - struct qib_sge *sge = &ss->sge; + struct rvt_sge *sge = &ss->sge; while (length) { u32 len = sge->length; @@ -226,11 +226,11 @@ void qib_skip_sge(struct qib_sge_state *ss, u32 length, int release) sge->sge_length -= len; if (sge->sge_length == 0) { if (release) - qib_put_mr(sge->mr); + rvt_put_mr(sge->mr); if (--ss->num_sge) *sge = *ss->sg_list++; } else if (sge->length == 0 && sge->mr->lkey) { - if (++sge->n >= QIB_SEGSZ) { + if (++sge->n >= RVT_SEGSZ) { if (++sge->m >= sge->mr->mapsz) break; sge->n = 0; @@ -249,10 +249,10 @@ void qib_skip_sge(struct qib_sge_state *ss, u32 length, int release) * Don't modify the qib_sge_state to get the count. * Return zero if any of the segments is not aligned. */ -static u32 qib_count_sge(struct qib_sge_state *ss, u32 length) +static u32 qib_count_sge(struct rvt_sge_state *ss, u32 length) { - struct qib_sge *sg_list = ss->sg_list; - struct qib_sge sge = ss->sge; + struct rvt_sge *sg_list = ss->sg_list; + struct rvt_sge sge = ss->sge; u8 num_sge = ss->num_sge; u32 ndesc = 1; /* count the header */ @@ -277,7 +277,7 @@ static u32 qib_count_sge(struct qib_sge_state *ss, u32 length) if (--num_sge) sge = *sg_list++; } else if (sge.length == 0 && sge.mr->lkey) { - if (++sge.n >= QIB_SEGSZ) { + if (++sge.n >= RVT_SEGSZ) { if (++sge.m >= sge.mr->mapsz) break; sge.n = 0; @@ -295,9 +295,9 @@ static u32 qib_count_sge(struct qib_sge_state *ss, u32 length) /* * Copy from the SGEs to the data buffer. */ -static void qib_copy_from_sge(void *data, struct qib_sge_state *ss, u32 length) +static void qib_copy_from_sge(void *data, struct rvt_sge_state *ss, u32 length) { - struct qib_sge *sge = &ss->sge; + struct rvt_sge *sge = &ss->sge; while (length) { u32 len = sge->length; @@ -315,7 +315,7 @@ static void qib_copy_from_sge(void *data, struct qib_sge_state *ss, u32 length) if (--ss->num_sge) *sge = *ss->sg_list++; } else if (sge->length == 0 && sge->mr->lkey) { - if (++sge->n >= QIB_SEGSZ) { + if (++sge->n >= RVT_SEGSZ) { if (++sge->m >= sge->mr->mapsz) break; sge->n = 0; @@ -335,17 +335,17 @@ static void qib_copy_from_sge(void *data, struct qib_sge_state *ss, u32 length) * @qp: the QP to post on * @wr: the work request to send */ -static int qib_post_one_send(struct qib_qp *qp, struct ib_send_wr *wr, - int *scheduled) +static int qib_post_one_send(struct rvt_qp *qp, struct ib_send_wr *wr, + int *scheduled) { - struct qib_swqe *wqe; + struct rvt_swqe *wqe; u32 next; int i; int j; int acc; int ret; unsigned long flags; - struct qib_lkey_table *rkt; + struct rvt_lkey_table *rkt; struct rvt_pd *pd; int avoid_schedule = 0; @@ -364,10 +364,7 @@ static int qib_post_one_send(struct qib_qp *qp, struct ib_send_wr *wr, * undefined operations. * Make sure buffer is large enough to hold the result for atomics. */ - if (wr->opcode == IB_WR_REG_MR) { - if (qib_reg_mr(qp, reg_wr(wr))) - goto bail_inval; - } else if (qp->ibqp.qp_type == IB_QPT_UC) { + if (qp->ibqp.qp_type == IB_QPT_UC) { if ((unsigned) wr->opcode >= IB_WR_RDMA_READ) goto bail_inval; } else if (qp->ibqp.qp_type != IB_QPT_RC) { @@ -396,7 +393,7 @@ static int qib_post_one_send(struct qib_qp *qp, struct ib_send_wr *wr, goto bail; } - rkt = &to_idev(qp->ibqp.device)->lk_table; + rkt = &to_idev(qp->ibqp.device)->rdi.lkey_table; pd = ibpd_to_rvtpd(qp->ibqp.pd); wqe = get_swqe_ptr(qp, qp->s_head); @@ -427,7 +424,7 @@ static int qib_post_one_send(struct qib_qp *qp, struct ib_send_wr *wr, if (length == 0) continue; - ok = qib_lkey_ok(rkt, pd, &wqe->sg_list[j], + ok = rvt_lkey_ok(rkt, pd, &wqe->sg_list[j], &wr->sg_list[i], acc); if (!ok) goto bail_inval_free; @@ -457,9 +454,9 @@ static int qib_post_one_send(struct qib_qp *qp, struct ib_send_wr *wr, bail_inval_free: while (j) { - struct qib_sge *sge = &wqe->sg_list[--j]; + struct rvt_sge *sge = &wqe->sg_list[--j]; - qib_put_mr(sge->mr); + rvt_put_mr(sge->mr); } bail_inval: ret = -EINVAL; @@ -485,7 +482,7 @@ bail: static int qib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, struct ib_send_wr **bad_wr) { - struct qib_qp *qp = to_iqp(ibqp); + struct rvt_qp *qp = to_iqp(ibqp); struct qib_qp_priv *priv = qp->priv; int err = 0; int scheduled = 0; @@ -517,8 +514,8 @@ bail: static int qib_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr, struct ib_recv_wr **bad_wr) { - struct qib_qp *qp = to_iqp(ibqp); - struct qib_rwq *wq = qp->r_rq.wq; + struct rvt_qp *qp = to_iqp(ibqp); + struct rvt_rwq *wq = qp->r_rq.wq; unsigned long flags; int ret; @@ -530,7 +527,7 @@ static int qib_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr, } for (; wr; wr = wr->next) { - struct qib_rwqe *wqe; + struct rvt_rwqe *wqe; u32 next; int i; @@ -581,7 +578,7 @@ bail: * Called at interrupt level. */ static void qib_qp_rcv(struct qib_ctxtdata *rcd, struct qib_ib_header *hdr, - int has_grh, void *data, u32 tlen, struct qib_qp *qp) + int has_grh, void *data, u32 tlen, struct rvt_qp *qp) { struct qib_ibport *ibp = &rcd->ppd->ibport_data; @@ -635,7 +632,7 @@ void qib_ib_rcv(struct qib_ctxtdata *rcd, void *rhdr, void *data, u32 tlen) struct qib_ibport *ibp = &ppd->ibport_data; struct qib_ib_header *hdr = rhdr; struct qib_other_headers *ohdr; - struct qib_qp *qp; + struct rvt_qp *qp; u32 qp_num; int lnh; u8 opcode; @@ -730,7 +727,7 @@ static void mem_timer(unsigned long data) { struct qib_ibdev *dev = (struct qib_ibdev *) data; struct list_head *list = &dev->memwait; - struct qib_qp *qp = NULL; + struct rvt_qp *qp = NULL; struct qib_qp_priv *priv = NULL; unsigned long flags; @@ -757,9 +754,9 @@ static void mem_timer(unsigned long data) } } -static void update_sge(struct qib_sge_state *ss, u32 length) +static void update_sge(struct rvt_sge_state *ss, u32 length) { - struct qib_sge *sge = &ss->sge; + struct rvt_sge *sge = &ss->sge; sge->vaddr += length; sge->length -= length; @@ -768,7 +765,7 @@ static void update_sge(struct qib_sge_state *ss, u32 length) if (--ss->num_sge) *sge = *ss->sg_list++; } else if (sge->length == 0 && sge->mr->lkey) { - if (++sge->n >= QIB_SEGSZ) { + if (++sge->n >= RVT_SEGSZ) { if (++sge->m >= sge->mr->mapsz) return; sge->n = 0; @@ -814,7 +811,7 @@ static inline u32 clear_upper_bytes(u32 data, u32 n, u32 off) } #endif -static void copy_io(u32 __iomem *piobuf, struct qib_sge_state *ss, +static void copy_io(u32 __iomem *piobuf, struct rvt_sge_state *ss, u32 length, unsigned flush_wc) { u32 extra = 0; @@ -951,7 +948,7 @@ static void copy_io(u32 __iomem *piobuf, struct qib_sge_state *ss, } static noinline struct qib_verbs_txreq *__get_txreq(struct qib_ibdev *dev, - struct qib_qp *qp) + struct rvt_qp *qp) { struct qib_qp_priv *priv = qp->priv; struct qib_verbs_txreq *tx; @@ -983,7 +980,7 @@ static noinline struct qib_verbs_txreq *__get_txreq(struct qib_ibdev *dev, } static inline struct qib_verbs_txreq *get_txreq(struct qib_ibdev *dev, - struct qib_qp *qp) + struct rvt_qp *qp) { struct qib_verbs_txreq *tx; unsigned long flags; @@ -1007,7 +1004,7 @@ static inline struct qib_verbs_txreq *get_txreq(struct qib_ibdev *dev, void qib_put_txreq(struct qib_verbs_txreq *tx) { struct qib_ibdev *dev; - struct qib_qp *qp; + struct rvt_qp *qp; struct qib_qp_priv *priv; unsigned long flags; @@ -1017,7 +1014,7 @@ void qib_put_txreq(struct qib_verbs_txreq *tx) if (atomic_dec_and_test(&qp->refcount)) wake_up(&qp->wait); if (tx->mr) { - qib_put_mr(tx->mr); + rvt_put_mr(tx->mr); tx->mr = NULL; } if (tx->txreq.flags & QIB_SDMA_TXREQ_F_FREEBUF) { @@ -1063,9 +1060,9 @@ void qib_put_txreq(struct qib_verbs_txreq *tx) */ void qib_verbs_sdma_desc_avail(struct qib_pportdata *ppd, unsigned avail) { - struct qib_qp *qp, *nqp; + struct rvt_qp *qp, *nqp; struct qib_qp_priv *qpp, *nqpp; - struct qib_qp *qps[20]; + struct rvt_qp *qps[20]; struct qib_ibdev *dev; unsigned i, n; @@ -1111,7 +1108,7 @@ static void sdma_complete(struct qib_sdma_txreq *cookie, int status) { struct qib_verbs_txreq *tx = container_of(cookie, struct qib_verbs_txreq, txreq); - struct qib_qp *qp = tx->qp; + struct rvt_qp *qp = tx->qp; struct qib_qp_priv *priv = qp->priv; spin_lock(&qp->s_lock); @@ -1142,7 +1139,7 @@ static void sdma_complete(struct qib_sdma_txreq *cookie, int status) qib_put_txreq(tx); } -static int wait_kmem(struct qib_ibdev *dev, struct qib_qp *qp) +static int wait_kmem(struct qib_ibdev *dev, struct rvt_qp *qp) { struct qib_qp_priv *priv = qp->priv; unsigned long flags; @@ -1166,8 +1163,8 @@ static int wait_kmem(struct qib_ibdev *dev, struct qib_qp *qp) return ret; } -static int qib_verbs_send_dma(struct qib_qp *qp, struct qib_ib_header *hdr, - u32 hdrwords, struct qib_sge_state *ss, u32 len, +static int qib_verbs_send_dma(struct rvt_qp *qp, struct qib_ib_header *hdr, + u32 hdrwords, struct rvt_sge_state *ss, u32 len, u32 plen, u32 dwords) { struct qib_qp_priv *priv = qp->priv; @@ -1271,7 +1268,7 @@ bail_tx: * If we are now in the error state, return zero to flush the * send work request. */ -static int no_bufs_available(struct qib_qp *qp) +static int no_bufs_available(struct rvt_qp *qp) { struct qib_qp_priv *priv = qp->priv; struct qib_ibdev *dev = to_idev(qp->ibqp.device); @@ -1303,8 +1300,8 @@ static int no_bufs_available(struct qib_qp *qp) return ret; } -static int qib_verbs_send_pio(struct qib_qp *qp, struct qib_ib_header *ibhdr, - u32 hdrwords, struct qib_sge_state *ss, u32 len, +static int qib_verbs_send_pio(struct rvt_qp *qp, struct qib_ib_header *ibhdr, + u32 hdrwords, struct rvt_sge_state *ss, u32 len, u32 plen, u32 dwords) { struct qib_devdata *dd = dd_from_ibdev(qp->ibqp.device); @@ -1385,7 +1382,7 @@ done: } qib_sendbuf_done(dd, pbufn); if (qp->s_rdma_mr) { - qib_put_mr(qp->s_rdma_mr); + rvt_put_mr(qp->s_rdma_mr); qp->s_rdma_mr = NULL; } if (qp->s_wqe) { @@ -1411,8 +1408,8 @@ done: * Return zero if packet is sent or queued OK. * Return non-zero and clear qp->s_flags QIB_S_BUSY otherwise. */ -int qib_verbs_send(struct qib_qp *qp, struct qib_ib_header *hdr, - u32 hdrwords, struct qib_sge_state *ss, u32 len) +int qib_verbs_send(struct rvt_qp *qp, struct qib_ib_header *hdr, + u32 hdrwords, struct rvt_sge_state *ss, u32 len) { struct qib_devdata *dd = dd_from_ibdev(qp->ibqp.device); u32 plen; @@ -1544,8 +1541,8 @@ void qib_ib_piobufavail(struct qib_devdata *dd) { struct qib_ibdev *dev = &dd->verbs_dev; struct list_head *list; - struct qib_qp *qps[5]; - struct qib_qp *qp; + struct rvt_qp *qps[5]; + struct rvt_qp *qp; unsigned long flags; unsigned i, n; struct qib_qp_priv *priv; @@ -1617,8 +1614,8 @@ static int qib_query_device(struct ib_device *ibdev, struct ib_device_attr *prop props->max_cq = ib_qib_max_cqs; props->max_ah = ib_qib_max_ahs; props->max_cqe = ib_qib_max_cqes; - props->max_mr = dev->lk_table.max; - props->max_fmr = dev->lk_table.max; + props->max_mr = dev->rdi.lkey_table.max; + props->max_fmr = dev->rdi.lkey_table.max; props->max_map_per_fmr = 32767; props->max_pd = dev->rdi.dparms.props.max_pd; props->max_qp_rd_atom = QIB_MAX_RDMA_ATOMIC; @@ -1848,7 +1845,7 @@ struct ib_ah *qib_create_qp0_ah(struct qib_ibport *ibp, u16 dlid) { struct ib_ah_attr attr; struct ib_ah *ah = ERR_PTR(-EINVAL); - struct qib_qp *qp0; + struct rvt_qp *qp0; memset(&attr, 0, sizeof(attr)); attr.dlid = dlid; @@ -2055,7 +2052,7 @@ int qib_register_ib_device(struct qib_devdata *dd) struct qib_ibdev *dev = &dd->verbs_dev; struct ib_device *ibdev = &dev->rdi.ibdev; struct qib_pportdata *ppd = dd->pport; - unsigned i, lk_tab_size; + unsigned i; int ret; dev->qp_table_size = ib_qib_qp_table_size; @@ -2087,29 +2084,6 @@ int qib_register_ib_device(struct qib_devdata *dd) qib_init_qpn_table(dd, &dev->qpn_table); - /* - * The top ib_qib_lkey_table_size bits are used to index the - * table. The lower 8 bits can be owned by the user (copied from - * the LKEY). The remaining bits act as a generation number or tag. - */ - spin_lock_init(&dev->lk_table.lock); - /* insure generation is at least 4 bits see keys.c */ - if (ib_qib_lkey_table_size > MAX_LKEY_TABLE_BITS) { - qib_dev_warn(dd, "lkey bits %u too large, reduced to %u\n", - ib_qib_lkey_table_size, MAX_LKEY_TABLE_BITS); - ib_qib_lkey_table_size = MAX_LKEY_TABLE_BITS; - } - dev->lk_table.max = 1 << ib_qib_lkey_table_size; - lk_tab_size = dev->lk_table.max * sizeof(*dev->lk_table.table); - dev->lk_table.table = (struct qib_mregion __rcu **) - vmalloc(lk_tab_size); - if (dev->lk_table.table == NULL) { - ret = -ENOMEM; - goto err_lk; - } - RCU_INIT_POINTER(dev->dma_mr, NULL); - for (i = 0; i < dev->lk_table.max; i++) - RCU_INIT_POINTER(dev->lk_table.table[i], NULL); INIT_LIST_HEAD(&dev->pending_mmaps); spin_lock_init(&dev->pending_lock); dev->mmap_offset = PAGE_SIZE; @@ -2221,15 +2195,15 @@ int qib_register_ib_device(struct qib_devdata *dd) ibdev->resize_cq = qib_resize_cq; ibdev->poll_cq = qib_poll_cq; ibdev->req_notify_cq = qib_req_notify_cq; - ibdev->get_dma_mr = qib_get_dma_mr; - ibdev->reg_user_mr = qib_reg_user_mr; - ibdev->dereg_mr = qib_dereg_mr; - ibdev->alloc_mr = qib_alloc_mr; - ibdev->map_mr_sg = qib_map_mr_sg; - ibdev->alloc_fmr = qib_alloc_fmr; - ibdev->map_phys_fmr = qib_map_phys_fmr; - ibdev->unmap_fmr = qib_unmap_fmr; - ibdev->dealloc_fmr = qib_dealloc_fmr; + ibdev->get_dma_mr = NULL; + ibdev->reg_user_mr = NULL; + ibdev->dereg_mr = NULL; + ibdev->alloc_mr = NULL; + ibdev->map_mr_sg = NULL; + ibdev->alloc_fmr = NULL; + ibdev->map_phys_fmr = NULL; + ibdev->unmap_fmr = NULL; + ibdev->dealloc_fmr = NULL; ibdev->attach_mcast = qib_multicast_attach; ibdev->detach_mcast = qib_multicast_detach; ibdev->process_mad = qib_process_mad; @@ -2247,10 +2221,9 @@ int qib_register_ib_device(struct qib_devdata *dd) dd->verbs_dev.rdi.driver_f.get_card_name = qib_get_card_name; dd->verbs_dev.rdi.driver_f.get_pci_dev = qib_get_pci_dev; dd->verbs_dev.rdi.dparms.props.max_pd = ib_qib_max_pds; - dd->verbs_dev.rdi.flags = (RVT_FLAG_MR_INIT_DRIVER | - RVT_FLAG_QP_INIT_DRIVER | + dd->verbs_dev.rdi.flags = (RVT_FLAG_QP_INIT_DRIVER | RVT_FLAG_CQ_INIT_DRIVER); - + dd->verbs_dev.rdi.dparms.lkey_table_size = qib_lkey_table_size; ret = rvt_register_device(&dd->verbs_dev.rdi); if (ret) @@ -2286,8 +2259,6 @@ err_tx: sizeof(struct qib_pio_header), dev->pio_hdrs, dev->pio_hdrs_phys); err_hdrs: - vfree(dev->lk_table.table); -err_lk: kfree(dev->qp_table); err_qpt: qib_dev_err(dd, "cannot register verbs: %d!\n", -ret); @@ -2299,7 +2270,6 @@ void qib_unregister_ib_device(struct qib_devdata *dd) { struct qib_ibdev *dev = &dd->verbs_dev; u32 qps_inuse; - unsigned lk_tab_size; qib_verbs_unregister_sysfs(dd); @@ -2315,8 +2285,6 @@ void qib_unregister_ib_device(struct qib_devdata *dd) qib_dev_err(dd, "txwait list not empty!\n"); if (!list_empty(&dev->memwait)) qib_dev_err(dd, "memwait list not empty!\n"); - if (dev->dma_mr) - qib_dev_err(dd, "DMA MR not NULL!\n"); qps_inuse = qib_free_all_qps(dd); if (qps_inuse) @@ -2338,15 +2306,13 @@ void qib_unregister_ib_device(struct qib_devdata *dd) dd->pport->sdma_descq_cnt * sizeof(struct qib_pio_header), dev->pio_hdrs, dev->pio_hdrs_phys); - lk_tab_size = dev->lk_table.max * sizeof(*dev->lk_table.table); - vfree(dev->lk_table.table); kfree(dev->qp_table); } /* * This must be called with s_lock held. */ -void qib_schedule_send(struct qib_qp *qp) +void qib_schedule_send(struct rvt_qp *qp) { struct qib_qp_priv *priv = qp->priv; if (qib_send_ok(qp)) { diff --git a/drivers/infiniband/hw/qib/qib_verbs.h b/drivers/infiniband/hw/qib/qib_verbs.h index a0cf23f..c7399ff 100644 --- a/drivers/infiniband/hw/qib/qib_verbs.h +++ b/drivers/infiniband/hw/qib/qib_verbs.h @@ -210,7 +210,7 @@ struct qib_pio_header { */ struct qib_mcast_qp { struct list_head list; - struct qib_qp *qp; + struct rvt_qp *qp; }; struct qib_mcast { @@ -230,20 +230,6 @@ struct qib_ah { }; /* - * This structure is used by qib_mmap() to validate an offset - * when an mmap() request is made. The vm_area_struct then uses - * this as its vm_private_data. - */ -struct qib_mmap_info { - struct list_head pending_mmaps; - struct ib_ucontext *context; - void *obj; - __u64 offset; - struct kref ref; - unsigned size; -}; - -/* * This structure is used to contain the head pointer, tail pointer, * and completion queue entries as a single memory allocation so * it can be mmap'ed into user space. @@ -269,154 +255,21 @@ struct qib_cq { u8 notify; u8 triggered; struct qib_cq_wc *queue; - struct qib_mmap_info *ip; -}; - -/* - * A segment is a linear region of low physical memory. - * XXX Maybe we should use phys addr here and kmap()/kunmap(). - * Used by the verbs layer. - */ -struct qib_seg { - void *vaddr; - size_t length; -}; - -/* The number of qib_segs that fit in a page. */ -#define QIB_SEGSZ (PAGE_SIZE / sizeof(struct qib_seg)) - -struct qib_segarray { - struct qib_seg segs[QIB_SEGSZ]; -}; - -struct qib_mregion { - struct ib_pd *pd; /* shares refcnt of ibmr.pd */ - u64 user_base; /* User's address for this region */ - u64 iova; /* IB start address of this region */ - size_t length; - u32 lkey; - u32 offset; /* offset (bytes) to start of region */ - int access_flags; - u32 max_segs; /* number of qib_segs in all the arrays */ - u32 mapsz; /* size of the map array */ - u8 page_shift; /* 0 - non unform/non powerof2 sizes */ - u8 lkey_published; /* in global table */ - struct completion comp; /* complete when refcount goes to zero */ - struct rcu_head list; - atomic_t refcount; - struct qib_segarray *map[0]; /* the segments */ -}; - -/* - * These keep track of the copy progress within a memory region. - * Used by the verbs layer. - */ -struct qib_sge { - struct qib_mregion *mr; - void *vaddr; /* kernel virtual address of segment */ - u32 sge_length; /* length of the SGE */ - u32 length; /* remaining length of the segment */ - u16 m; /* current index: mr->map[m] */ - u16 n; /* current index: mr->map[m]->segs[n] */ -}; - -/* Memory region */ -struct qib_mr { - struct ib_mr ibmr; - struct ib_umem *umem; - u64 *pages; - u32 npages; - struct qib_mregion mr; /* must be last */ -}; - -/* - * Send work request queue entry. - * The size of the sg_list is determined when the QP is created and stored - * in qp->s_max_sge. - */ -struct qib_swqe { - union { - struct ib_send_wr wr; /* don't use wr.sg_list */ - struct ib_ud_wr ud_wr; - struct ib_reg_wr reg_wr; - struct ib_rdma_wr rdma_wr; - struct ib_atomic_wr atomic_wr; - }; - u32 psn; /* first packet sequence number */ - u32 lpsn; /* last packet sequence number */ - u32 ssn; /* send sequence number */ - u32 length; /* total length of data in sg_list */ - struct qib_sge sg_list[0]; -}; - -/* - * Receive work request queue entry. - * The size of the sg_list is determined when the QP (or SRQ) is created - * and stored in qp->r_rq.max_sge (or srq->rq.max_sge). - */ -struct qib_rwqe { - u64 wr_id; - u8 num_sge; - struct ib_sge sg_list[0]; -}; - -/* - * This structure is used to contain the head pointer, tail pointer, - * and receive work queue entries as a single memory allocation so - * it can be mmap'ed into user space. - * Note that the wq array elements are variable size so you can't - * just index into the array to get the N'th element; - * use get_rwqe_ptr() instead. - */ -struct qib_rwq { - u32 head; /* new work requests posted to the head */ - u32 tail; /* receives pull requests from here. */ - struct qib_rwqe wq[0]; -}; - -struct qib_rq { - struct qib_rwq *wq; - u32 size; /* size of RWQE array */ - u8 max_sge; - spinlock_t lock /* protect changes in this struct */ - ____cacheline_aligned_in_smp; + struct rvt_mmap_info *ip; }; struct qib_srq { struct ib_srq ibsrq; - struct qib_rq rq; - struct qib_mmap_info *ip; + struct rvt_rq rq; + struct rvt_mmap_info *ip; /* send signal when number of RWQEs < limit */ u32 limit; }; -struct qib_sge_state { - struct qib_sge *sg_list; /* next SGE to be used if any */ - struct qib_sge sge; /* progress state for the current SGE */ - u32 total_len; - u8 num_sge; -}; - -/* - * This structure holds the information that the send tasklet needs - * to send a RDMA read response or atomic operation. - */ -struct qib_ack_entry { - u8 opcode; - u8 sent; - u32 psn; - u32 lpsn; - union { - struct qib_sge rdma_sge; - u64 atomic_data; - }; -}; - /* * qib specific data structure that will be hidden from rvt after the queue pair * is made common. */ -struct qib_qp; struct qib_qp_priv { struct qib_ib_header *s_hdr; /* next packet header to send */ struct list_head iowait; /* link for wait PIO buf */ @@ -424,121 +277,7 @@ struct qib_qp_priv { struct qib_verbs_txreq *s_tx; struct work_struct s_work; wait_queue_head_t wait_dma; - struct qib_qp *owner; -}; - -/* - * Variables prefixed with s_ are for the requester (sender). - * Variables prefixed with r_ are for the responder (receiver). - * Variables prefixed with ack_ are for responder replies. - * - * Common variables are protected by both r_rq.lock and s_lock in that order - * which only happens in modify_qp() or changing the QP 'state'. - */ -struct qib_qp { - struct ib_qp ibqp; - struct qib_qp_priv *priv; - /* read mostly fields above and below */ - struct ib_ah_attr remote_ah_attr; - struct ib_ah_attr alt_ah_attr; - struct qib_qp __rcu *next; /* link list for QPN hash table */ - struct qib_swqe *s_wq; /* send work queue */ - struct qib_mmap_info *ip; - unsigned long timeout_jiffies; /* computed from timeout */ - - enum ib_mtu path_mtu; - u32 remote_qpn; - u32 pmtu; /* decoded from path_mtu */ - u32 qkey; /* QKEY for this QP (for UD or RD) */ - u32 s_size; /* send work queue size */ - u32 s_rnr_timeout; /* number of milliseconds for RNR timeout */ - - u8 state; /* QP state */ - u8 qp_access_flags; - u8 alt_timeout; /* Alternate path timeout for this QP */ - u8 timeout; /* Timeout for this QP */ - u8 s_srate; - u8 s_mig_state; - u8 port_num; - u8 s_pkey_index; /* PKEY index to use */ - u8 s_alt_pkey_index; /* Alternate path PKEY index to use */ - u8 r_max_rd_atomic; /* max number of RDMA read/atomic to receive */ - u8 s_max_rd_atomic; /* max number of RDMA read/atomic to send */ - u8 s_retry_cnt; /* number of times to retry */ - u8 s_rnr_retry_cnt; - u8 r_min_rnr_timer; /* retry timeout value for RNR NAKs */ - u8 s_max_sge; /* size of s_wq->sg_list */ - u8 s_draining; - - /* start of read/write fields */ - - atomic_t refcount ____cacheline_aligned_in_smp; - wait_queue_head_t wait; - - - struct qib_ack_entry s_ack_queue[QIB_MAX_RDMA_ATOMIC + 1] - ____cacheline_aligned_in_smp; - struct qib_sge_state s_rdma_read_sge; - - spinlock_t r_lock ____cacheline_aligned_in_smp; /* used for APM */ - unsigned long r_aflags; - u64 r_wr_id; /* ID for current receive WQE */ - u32 r_ack_psn; /* PSN for next ACK or atomic ACK */ - u32 r_len; /* total length of r_sge */ - u32 r_rcv_len; /* receive data len processed */ - u32 r_psn; /* expected rcv packet sequence number */ - u32 r_msn; /* message sequence number */ - - u8 r_state; /* opcode of last packet received */ - u8 r_flags; - u8 r_head_ack_queue; /* index into s_ack_queue[] */ - - struct list_head rspwait; /* link for waititing to respond */ - - struct qib_sge_state r_sge; /* current receive data */ - struct qib_rq r_rq; /* receive work queue */ - - spinlock_t s_lock ____cacheline_aligned_in_smp; - struct qib_sge_state *s_cur_sge; - u32 s_flags; - - struct qib_swqe *s_wqe; - struct qib_sge_state s_sge; /* current send request data */ - struct qib_mregion *s_rdma_mr; - - u32 s_cur_size; /* size of send packet in bytes */ - u32 s_len; /* total length of s_sge */ - u32 s_rdma_read_len; /* total length of s_rdma_read_sge */ - u32 s_next_psn; /* PSN for next request */ - u32 s_last_psn; /* last response PSN processed */ - u32 s_sending_psn; /* lowest PSN that is being sent */ - u32 s_sending_hpsn; /* highest PSN that is being sent */ - u32 s_psn; /* current packet sequence number */ - u32 s_ack_rdma_psn; /* PSN for sending RDMA read responses */ - u32 s_ack_psn; /* PSN for acking sends and RDMA writes */ - u32 s_head; /* new entries added here */ - u32 s_tail; /* next entry to process */ - u32 s_cur; /* current work queue entry */ - u32 s_acked; /* last un-ACK'ed entry */ - u32 s_last; /* last completed entry */ - u32 s_ssn; /* SSN of tail entry */ - u32 s_lsn; /* limit sequence number (credit) */ - u16 s_hdrwords; /* size of s_hdr in 32 bit words */ - u16 s_rdma_ack_cnt; - u8 s_state; /* opcode of last packet sent */ - u8 s_ack_state; /* opcode of packet to ACK */ - u8 s_nak_state; /* non-zero if NAK is pending */ - u8 r_nak_state; /* non-zero if NAK is pending */ - u8 s_retry; /* requester retry counter */ - u8 s_rnr_retry; /* requester RNR retry counter */ - u8 s_num_rd_atomic; /* number of RDMA read/atomic pending */ - u8 s_tail_ack_queue; /* index into s_ack_queue[] */ - - struct qib_sge_state s_ack_rdma_sge; - struct timer_list s_timer; - - struct qib_sge r_sg_list[0] /* verified SGEs */ - ____cacheline_aligned_in_smp; + struct rvt_qp *owner; }; /* @@ -616,27 +355,27 @@ struct qib_qp { #define QIB_PSN_CREDIT 16 /* - * Since struct qib_swqe is not a fixed size, we can't simply index into - * struct qib_qp.s_wq. This function does the array index computation. + * Since struct rvt_swqe is not a fixed size, we can't simply index into + * struct rvt_qp.s_wq. This function does the array index computation. */ -static inline struct qib_swqe *get_swqe_ptr(struct qib_qp *qp, - unsigned n) +static inline struct rvt_swqe *get_swqe_ptr(struct rvt_qp *qp, + unsigned n) { - return (struct qib_swqe *)((char *)qp->s_wq + - (sizeof(struct qib_swqe) + + return (struct rvt_swqe *)((char *)qp->s_wq + + (sizeof(struct rvt_swqe) + qp->s_max_sge * - sizeof(struct qib_sge)) * n); + sizeof(struct rvt_sge)) * n); } /* - * Since struct qib_rwqe is not a fixed size, we can't simply index into - * struct qib_rwq.wq. This function does the array index computation. + * Since struct rvt_rwqe is not a fixed size, we can't simply index into + * struct rvt_rwq.wq. This function does the array index computation. */ -static inline struct qib_rwqe *get_rwqe_ptr(struct qib_rq *rq, unsigned n) +static inline struct rvt_rwqe *get_rwqe_ptr(struct rvt_rq *rq, unsigned n) { - return (struct qib_rwqe *) + return (struct rvt_rwqe *) ((char *) rq->wq->wq + - (sizeof(struct qib_rwqe) + + (sizeof(struct rvt_rwqe) + rq->max_sge * sizeof(struct ib_sge)) * n); } @@ -660,16 +399,6 @@ struct qib_qpn_table { struct qpn_map map[QPNMAP_ENTRIES]; }; -#define MAX_LKEY_TABLE_BITS 23 - -struct qib_lkey_table { - spinlock_t lock; /* protect changes in this struct */ - u32 next; /* next unused index (speeds search) */ - u32 gen; /* generation count */ - u32 max; /* size of the table */ - struct qib_mregion __rcu **table; -}; - struct qib_opcode_stats { u64 n_packets; /* number of packets */ u64 n_bytes; /* total number of bytes */ @@ -687,8 +416,8 @@ struct qib_pma_counters { }; struct qib_ibport { - struct qib_qp __rcu *qp0; - struct qib_qp __rcu *qp1; + struct rvt_qp __rcu *qp0; + struct rvt_qp __rcu *qp1; struct ib_mad_agent *send_agent; /* agent for SMI (traps) */ struct qib_ah *sm_ah; struct qib_ah *smi_ah; @@ -761,18 +490,16 @@ struct qib_ibdev { struct list_head pending_mmaps; spinlock_t mmap_offset_lock; /* protect mmap_offset */ u32 mmap_offset; - struct qib_mregion __rcu *dma_mr; /* QP numbers are shared by all IB ports */ struct qib_qpn_table qpn_table; - struct qib_lkey_table lk_table; struct list_head piowait; /* list for wait PIO buf */ struct list_head dmawait; /* list for wait DMA */ struct list_head txwait; /* list for wait qib_verbs_txreq */ struct list_head memwait; /* list for wait kernel memory */ struct list_head txreq_free; struct timer_list mem_timer; - struct qib_qp __rcu **qp_table; + struct rvt_qp __rcu **qp_table; struct qib_pio_header *pio_hdrs; dma_addr_t pio_hdrs_phys; /* list of QPs waiting for RNR timer */ @@ -818,11 +545,6 @@ struct qib_verbs_counters { u32 vl15_dropped; }; -static inline struct qib_mr *to_imr(struct ib_mr *ibmr) -{ - return container_of(ibmr, struct qib_mr, ibmr); -} - static inline struct qib_ah *to_iah(struct ib_ah *ibah) { return container_of(ibah, struct qib_ah, ibah); @@ -838,9 +560,9 @@ static inline struct qib_srq *to_isrq(struct ib_srq *ibsrq) return container_of(ibsrq, struct qib_srq, ibsrq); } -static inline struct qib_qp *to_iqp(struct ib_qp *ibqp) +static inline struct rvt_qp *to_iqp(struct ib_qp *ibqp) { - return container_of(ibqp, struct qib_qp, ibqp); + return container_of(ibqp, struct rvt_qp, ibqp); } static inline struct qib_ibdev *to_idev(struct ib_device *ibdev) @@ -855,7 +577,7 @@ static inline struct qib_ibdev *to_idev(struct ib_device *ibdev) * Send if not busy or waiting for I/O and either * a RC response is pending or we can process send work requests. */ -static inline int qib_send_ok(struct qib_qp *qp) +static inline int qib_send_ok(struct rvt_qp *qp) { return !(qp->s_flags & (QIB_S_BUSY | QIB_S_ANY_WAIT_IO)) && (qp->s_hdrwords || (qp->s_flags & QIB_S_RESP_PENDING) || @@ -865,7 +587,7 @@ static inline int qib_send_ok(struct qib_qp *qp) /* * This must be called with s_lock held. */ -void qib_schedule_send(struct qib_qp *qp); +void qib_schedule_send(struct rvt_qp *qp); static inline int qib_pkey_ok(u16 pkey1, u16 pkey2) { @@ -916,9 +638,9 @@ int qib_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid); int qib_mcast_tree_empty(struct qib_ibport *ibp); -__be32 qib_compute_aeth(struct qib_qp *qp); +__be32 qib_compute_aeth(struct rvt_qp *qp); -struct qib_qp *qib_lookup_qpn(struct qib_ibport *ibp, u32 qpn); +struct rvt_qp *qib_lookup_qpn(struct qib_ibport *ibp, u32 qpn); struct ib_qp *qib_create_qp(struct ib_pd *ibpd, struct ib_qp_init_attr *init_attr, @@ -926,7 +648,7 @@ struct ib_qp *qib_create_qp(struct ib_pd *ibpd, int qib_destroy_qp(struct ib_qp *ibqp); -int qib_error_qp(struct qib_qp *qp, enum ib_wc_status err); +int qib_error_qp(struct rvt_qp *qp, enum ib_wc_status err); int qib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask, struct ib_udata *udata); @@ -952,7 +674,7 @@ void qib_qp_iter_print(struct seq_file *s, struct qib_qp_iter *iter); #endif -void qib_get_credit(struct qib_qp *qp, u32 aeth); +void qib_get_credit(struct rvt_qp *qp, u32 aeth); unsigned qib_pkt_delay(u32 plen, u8 snd_mult, u8 rcv_mult); @@ -960,19 +682,19 @@ void qib_verbs_sdma_desc_avail(struct qib_pportdata *ppd, unsigned avail); void qib_put_txreq(struct qib_verbs_txreq *tx); -int qib_verbs_send(struct qib_qp *qp, struct qib_ib_header *hdr, - u32 hdrwords, struct qib_sge_state *ss, u32 len); +int qib_verbs_send(struct rvt_qp *qp, struct qib_ib_header *hdr, + u32 hdrwords, struct rvt_sge_state *ss, u32 len); -void qib_copy_sge(struct qib_sge_state *ss, void *data, u32 length, +void qib_copy_sge(struct rvt_sge_state *ss, void *data, u32 length, int release); -void qib_skip_sge(struct qib_sge_state *ss, u32 length, int release); +void qib_skip_sge(struct rvt_sge_state *ss, u32 length, int release); void qib_uc_rcv(struct qib_ibport *ibp, struct qib_ib_header *hdr, - int has_grh, void *data, u32 tlen, struct qib_qp *qp); + int has_grh, void *data, u32 tlen, struct rvt_qp *qp); void qib_rc_rcv(struct qib_ctxtdata *rcd, struct qib_ib_header *hdr, - int has_grh, void *data, u32 tlen, struct qib_qp *qp); + int has_grh, void *data, u32 tlen, struct rvt_qp *qp); int qib_check_ah(struct ib_device *ibdev, struct ib_ah_attr *ah_attr); @@ -980,24 +702,14 @@ struct ib_ah *qib_create_qp0_ah(struct qib_ibport *ibp, u16 dlid); void qib_rc_rnr_retry(unsigned long arg); -void qib_rc_send_complete(struct qib_qp *qp, struct qib_ib_header *hdr); +void qib_rc_send_complete(struct rvt_qp *qp, struct qib_ib_header *hdr); -void qib_rc_error(struct qib_qp *qp, enum ib_wc_status err); +void qib_rc_error(struct rvt_qp *qp, enum ib_wc_status err); -int qib_post_ud_send(struct qib_qp *qp, struct ib_send_wr *wr); +int qib_post_ud_send(struct rvt_qp *qp, struct ib_send_wr *wr); void qib_ud_rcv(struct qib_ibport *ibp, struct qib_ib_header *hdr, - int has_grh, void *data, u32 tlen, struct qib_qp *qp); - -int qib_alloc_lkey(struct qib_mregion *mr, int dma_region); - -void qib_free_lkey(struct qib_mregion *mr); - -int qib_lkey_ok(struct qib_lkey_table *rkt, struct rvt_pd *pd, - struct qib_sge *isge, struct ib_sge *sge, int acc); - -int qib_rkey_ok(struct qib_qp *qp, struct qib_sge *sge, - u32 len, u64 vaddr, u32 rkey, int acc); + int has_grh, void *data, u32 tlen, struct rvt_qp *qp); int qib_post_srq_receive(struct ib_srq *ibsrq, struct ib_recv_wr *wr, struct ib_recv_wr **bad_wr); @@ -1033,93 +745,53 @@ int qib_req_notify_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags notify_flags); int qib_resize_cq(struct ib_cq *ibcq, int cqe, struct ib_udata *udata); -struct ib_mr *qib_get_dma_mr(struct ib_pd *pd, int acc); - -struct ib_mr *qib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, - u64 virt_addr, int mr_access_flags, - struct ib_udata *udata); - -int qib_dereg_mr(struct ib_mr *ibmr); - -struct ib_mr *qib_alloc_mr(struct ib_pd *pd, - enum ib_mr_type mr_type, - u32 max_entries); - -int qib_map_mr_sg(struct ib_mr *ibmr, - struct scatterlist *sg, - int sg_nents); - -int qib_reg_mr(struct qib_qp *qp, struct ib_reg_wr *wr); - -struct ib_fmr *qib_alloc_fmr(struct ib_pd *pd, int mr_access_flags, - struct ib_fmr_attr *fmr_attr); - -int qib_map_phys_fmr(struct ib_fmr *ibfmr, u64 *page_list, - int list_len, u64 iova); - -int qib_unmap_fmr(struct list_head *fmr_list); - -int qib_dealloc_fmr(struct ib_fmr *ibfmr); - -static inline void qib_get_mr(struct qib_mregion *mr) -{ - atomic_inc(&mr->refcount); -} - void mr_rcu_callback(struct rcu_head *list); -static inline void qib_put_mr(struct qib_mregion *mr) -{ - if (unlikely(atomic_dec_and_test(&mr->refcount))) - call_rcu(&mr->list, mr_rcu_callback); -} - -static inline void qib_put_ss(struct qib_sge_state *ss) +static inline void qib_put_ss(struct rvt_sge_state *ss) { while (ss->num_sge) { - qib_put_mr(ss->sge.mr); + rvt_put_mr(ss->sge.mr); if (--ss->num_sge) ss->sge = *ss->sg_list++; } } - void qib_release_mmap_info(struct kref *ref); -struct qib_mmap_info *qib_create_mmap_info(struct qib_ibdev *dev, u32 size, +struct rvt_mmap_info *qib_create_mmap_info(struct qib_ibdev *dev, u32 size, struct ib_ucontext *context, void *obj); -void qib_update_mmap_info(struct qib_ibdev *dev, struct qib_mmap_info *ip, +void qib_update_mmap_info(struct qib_ibdev *dev, struct rvt_mmap_info *ip, u32 size, void *obj); int qib_mmap(struct ib_ucontext *context, struct vm_area_struct *vma); -int qib_get_rwqe(struct qib_qp *qp, int wr_id_only); +int qib_get_rwqe(struct rvt_qp *qp, int wr_id_only); -void qib_migrate_qp(struct qib_qp *qp); +void qib_migrate_qp(struct rvt_qp *qp); int qib_ruc_check_hdr(struct qib_ibport *ibp, struct qib_ib_header *hdr, - int has_grh, struct qib_qp *qp, u32 bth0); + int has_grh, struct rvt_qp *qp, u32 bth0); u32 qib_make_grh(struct qib_ibport *ibp, struct ib_grh *hdr, struct ib_global_route *grh, u32 hwords, u32 nwords); -void qib_make_ruc_header(struct qib_qp *qp, struct qib_other_headers *ohdr, +void qib_make_ruc_header(struct rvt_qp *qp, struct qib_other_headers *ohdr, u32 bth0, u32 bth2); void qib_do_send(struct work_struct *work); -void qib_send_complete(struct qib_qp *qp, struct qib_swqe *wqe, +void qib_send_complete(struct rvt_qp *qp, struct rvt_swqe *wqe, enum ib_wc_status status); -void qib_send_rc_ack(struct qib_qp *qp); +void qib_send_rc_ack(struct rvt_qp *qp); -int qib_make_rc_req(struct qib_qp *qp); +int qib_make_rc_req(struct rvt_qp *qp); -int qib_make_uc_req(struct qib_qp *qp); +int qib_make_uc_req(struct rvt_qp *qp); -int qib_make_ud_req(struct qib_qp *qp); +int qib_make_ud_req(struct rvt_qp *qp); int qib_register_ib_device(struct qib_devdata *); @@ -1157,7 +829,7 @@ extern const int ib_qib_state_ops[]; extern __be64 ib_qib_sys_image_guid; /* in network order */ -extern unsigned int ib_qib_lkey_table_size; +extern unsigned int ib_rvt_lkey_table_size; extern unsigned int ib_qib_max_cqes; diff --git a/drivers/infiniband/hw/qib/qib_verbs_mcast.c b/drivers/infiniband/hw/qib/qib_verbs_mcast.c index b2fb528..1c7af03 100644 --- a/drivers/infiniband/hw/qib/qib_verbs_mcast.c +++ b/drivers/infiniband/hw/qib/qib_verbs_mcast.c @@ -39,7 +39,7 @@ * qib_mcast_qp_alloc - alloc a struct to link a QP to mcast GID struct * @qp: the QP to link */ -static struct qib_mcast_qp *qib_mcast_qp_alloc(struct qib_qp *qp) +static struct qib_mcast_qp *qib_mcast_qp_alloc(struct rvt_qp *qp) { struct qib_mcast_qp *mqp; @@ -56,7 +56,7 @@ bail: static void qib_mcast_qp_free(struct qib_mcast_qp *mqp) { - struct qib_qp *qp = mqp->qp; + struct rvt_qp *qp = mqp->qp; /* Notify qib_destroy_qp() if it is waiting. */ if (atomic_dec_and_test(&qp->refcount)) @@ -224,7 +224,7 @@ bail: int qib_multicast_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) { - struct qib_qp *qp = to_iqp(ibqp); + struct rvt_qp *qp = to_iqp(ibqp); struct qib_ibdev *dev = to_idev(ibqp->device); struct qib_ibport *ibp; struct qib_mcast *mcast; @@ -282,7 +282,7 @@ bail: int qib_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) { - struct qib_qp *qp = to_iqp(ibqp); + struct rvt_qp *qp = to_iqp(ibqp); struct qib_ibdev *dev = to_idev(ibqp->device); struct qib_ibport *ibp = to_iport(ibqp->device, qp->port_num); struct qib_mcast *mcast = NULL; -- cgit v0.10.2 From 96ab1ac13f444e72d353fdd94b2cfacaaaef9de9 Mon Sep 17 00:00:00 2001 From: Dennis Dalessandro Date: Fri, 22 Jan 2016 12:46:07 -0800 Subject: IB/qib: Use address handle in rdmavt and remove from qib Original patch from Kamal Heib , split apart from original. Remove AH from qib and use rdmavt version. Reviewed-by: Ira Weiny Signed-off-by: Kamal Heib Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford diff --git a/drivers/infiniband/hw/qib/qib_iba7322.c b/drivers/infiniband/hw/qib/qib_iba7322.c index 6c8ff10..1fbe308 100644 --- a/drivers/infiniband/hw/qib/qib_iba7322.c +++ b/drivers/infiniband/hw/qib/qib_iba7322.c @@ -5515,7 +5515,7 @@ static void try_7322_ipg(struct qib_pportdata *ppd) ret = PTR_ERR(ah); else { send_buf->ah = ah; - ibp->smi_ah = to_iah(ah); + ibp->smi_ah = ibah_to_rvtah(ah); ret = 0; } } else { diff --git a/drivers/infiniband/hw/qib/qib_mad.c b/drivers/infiniband/hw/qib/qib_mad.c index 70fc1b2..43f8c49 100644 --- a/drivers/infiniband/hw/qib/qib_mad.c +++ b/drivers/infiniband/hw/qib/qib_mad.c @@ -109,7 +109,7 @@ static void qib_send_trap(struct qib_ibport *ibp, void *data, unsigned len) ret = PTR_ERR(ah); else { send_buf->ah = ah; - ibp->sm_ah = to_iah(ah); + ibp->sm_ah = ibah_to_rvtah(ah); ret = 0; } } else diff --git a/drivers/infiniband/hw/qib/qib_qp.c b/drivers/infiniband/hw/qib/qib_qp.c index b0f2dcf..6c023f7 100644 --- a/drivers/infiniband/hw/qib/qib_qp.c +++ b/drivers/infiniband/hw/qib/qib_qp.c @@ -439,7 +439,8 @@ static void clear_mr_refs(struct rvt_qp *qp, int clr_sends) if (qp->ibqp.qp_type == IB_QPT_UD || qp->ibqp.qp_type == IB_QPT_SMI || qp->ibqp.qp_type == IB_QPT_GSI) - atomic_dec(&to_iah(wqe->ud_wr.ah)->refcount); + atomic_dec( + &ibah_to_rvtah(wqe->ud_wr.ah)->refcount); if (++qp->s_last >= qp->s_size) qp->s_last = 0; } @@ -596,7 +597,7 @@ int qib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, if (attr_mask & IB_QP_AV) { if (attr->ah_attr.dlid >= be16_to_cpu(IB_MULTICAST_LID_BASE)) goto inval; - if (qib_check_ah(qp->ibqp.device, &attr->ah_attr)) + if (rvt_check_ah(qp->ibqp.device, &attr->ah_attr)) goto inval; } @@ -604,7 +605,7 @@ int qib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, if (attr->alt_ah_attr.dlid >= be16_to_cpu(IB_MULTICAST_LID_BASE)) goto inval; - if (qib_check_ah(qp->ibqp.device, &attr->alt_ah_attr)) + if (rvt_check_ah(qp->ibqp.device, &attr->alt_ah_attr)) goto inval; if (attr->alt_pkey_index >= qib_get_npkeys(dd_from_dev(dev))) goto inval; diff --git a/drivers/infiniband/hw/qib/qib_ruc.c b/drivers/infiniband/hw/qib/qib_ruc.c index 02e79a8..3900459 100644 --- a/drivers/infiniband/hw/qib/qib_ruc.c +++ b/drivers/infiniband/hw/qib/qib_ruc.c @@ -789,7 +789,7 @@ void qib_send_complete(struct rvt_qp *qp, struct rvt_swqe *wqe, if (qp->ibqp.qp_type == IB_QPT_UD || qp->ibqp.qp_type == IB_QPT_SMI || qp->ibqp.qp_type == IB_QPT_GSI) - atomic_dec(&to_iah(wqe->ud_wr.ah)->refcount); + atomic_dec(&ibah_to_rvtah(wqe->ud_wr.ah)->refcount); /* See ch. 11.2.4.1 and 10.7.3.1 */ if (!(qp->s_flags & QIB_S_SIGNAL_REQ_WR) || diff --git a/drivers/infiniband/hw/qib/qib_ud.c b/drivers/infiniband/hw/qib/qib_ud.c index 1d9d037f..6dc20ca 100644 --- a/drivers/infiniband/hw/qib/qib_ud.c +++ b/drivers/infiniband/hw/qib/qib_ud.c @@ -77,7 +77,7 @@ static void qib_ud_loopback(struct rvt_qp *sqp, struct rvt_swqe *swqe) goto drop; } - ah_attr = &to_iah(swqe->ud_wr.ah)->attr; + ah_attr = &ibah_to_rvtah(swqe->ud_wr.ah)->attr; ppd = ppd_from_ibp(ibp); if (qp->ibqp.qp_num > 1) { @@ -279,7 +279,7 @@ int qib_make_ud_req(struct rvt_qp *qp) /* Construct the header. */ ibp = to_iport(qp->ibqp.device, qp->port_num); ppd = ppd_from_ibp(ibp); - ah_attr = &to_iah(wqe->ud_wr.ah)->attr; + ah_attr = &ibah_to_rvtah(wqe->ud_wr.ah)->attr; if (ah_attr->dlid >= be16_to_cpu(IB_MULTICAST_LID_BASE)) { if (ah_attr->dlid != be16_to_cpu(IB_LID_PERMISSIVE)) this_cpu_inc(ibp->pmastats->n_multicast_xmit); diff --git a/drivers/infiniband/hw/qib/qib_verbs.c b/drivers/infiniband/hw/qib/qib_verbs.c index 5c0e76c..89fe514 100644 --- a/drivers/infiniband/hw/qib/qib_verbs.c +++ b/drivers/infiniband/hw/qib/qib_verbs.c @@ -443,7 +443,7 @@ static int qib_post_one_send(struct rvt_qp *qp, struct ib_send_wr *wr, qp->port_num - 1)->ibmtu) { goto bail_inval_free; } else { - atomic_inc(&to_iah(ud_wr(wr)->ah)->refcount); + atomic_inc(&ibah_to_rvtah(ud_wr(wr)->ah)->refcount); avoid_schedule = 1; } wqe->ssn = qp->s_ssn++; @@ -1771,74 +1771,10 @@ static int qib_query_gid(struct ib_device *ibdev, u8 port, int qib_check_ah(struct ib_device *ibdev, struct ib_ah_attr *ah_attr) { - /* A multicast address requires a GRH (see ch. 8.4.1). */ - if (ah_attr->dlid >= be16_to_cpu(IB_MULTICAST_LID_BASE) && - ah_attr->dlid != be16_to_cpu(IB_LID_PERMISSIVE) && - !(ah_attr->ah_flags & IB_AH_GRH)) - goto bail; - if ((ah_attr->ah_flags & IB_AH_GRH) && - ah_attr->grh.sgid_index >= QIB_GUIDS_PER_PORT) - goto bail; - if (ah_attr->dlid == 0) - goto bail; - if (ah_attr->port_num < 1 || - ah_attr->port_num > ibdev->phys_port_cnt) - goto bail; - if (ah_attr->static_rate != IB_RATE_PORT_CURRENT && - ib_rate_to_mult(ah_attr->static_rate) < 0) - goto bail; if (ah_attr->sl > 15) - goto bail; - return 0; -bail: - return -EINVAL; -} - -/** - * qib_create_ah - create an address handle - * @pd: the protection domain - * @ah_attr: the attributes of the AH - * - * This may be called from interrupt context. - */ -static struct ib_ah *qib_create_ah(struct ib_pd *pd, - struct ib_ah_attr *ah_attr) -{ - struct qib_ah *ah; - struct ib_ah *ret; - struct qib_ibdev *dev = to_idev(pd->device); - unsigned long flags; - - if (qib_check_ah(pd->device, ah_attr)) { - ret = ERR_PTR(-EINVAL); - goto bail; - } - - ah = kmalloc(sizeof(*ah), GFP_ATOMIC); - if (!ah) { - ret = ERR_PTR(-ENOMEM); - goto bail; - } - - spin_lock_irqsave(&dev->n_ahs_lock, flags); - if (dev->n_ahs_allocated == ib_qib_max_ahs) { - spin_unlock_irqrestore(&dev->n_ahs_lock, flags); - kfree(ah); - ret = ERR_PTR(-ENOMEM); - goto bail; - } - - dev->n_ahs_allocated++; - spin_unlock_irqrestore(&dev->n_ahs_lock, flags); - - /* ib_create_ah() will initialize ah->ibah. */ - ah->attr = *ah_attr; - atomic_set(&ah->refcount, 0); - - ret = &ah->ibah; + return -EINVAL; -bail: - return ret; + return 0; } struct ib_ah *qib_create_qp0_ah(struct qib_ibport *ibp, u16 dlid) @@ -1859,51 +1795,6 @@ struct ib_ah *qib_create_qp0_ah(struct qib_ibport *ibp, u16 dlid) } /** - * qib_destroy_ah - destroy an address handle - * @ibah: the AH to destroy - * - * This may be called from interrupt context. - */ -static int qib_destroy_ah(struct ib_ah *ibah) -{ - struct qib_ibdev *dev = to_idev(ibah->device); - struct qib_ah *ah = to_iah(ibah); - unsigned long flags; - - if (atomic_read(&ah->refcount) != 0) - return -EBUSY; - - spin_lock_irqsave(&dev->n_ahs_lock, flags); - dev->n_ahs_allocated--; - spin_unlock_irqrestore(&dev->n_ahs_lock, flags); - - kfree(ah); - - return 0; -} - -static int qib_modify_ah(struct ib_ah *ibah, struct ib_ah_attr *ah_attr) -{ - struct qib_ah *ah = to_iah(ibah); - - if (qib_check_ah(ibah->device, ah_attr)) - return -EINVAL; - - ah->attr = *ah_attr; - - return 0; -} - -static int qib_query_ah(struct ib_ah *ibah, struct ib_ah_attr *ah_attr) -{ - struct qib_ah *ah = to_iah(ibah); - - *ah_attr = ah->attr; - - return 0; -} - -/** * qib_get_npkeys - return the size of the PKEY table for context 0 * @dd: the qlogic_ib device */ @@ -2073,7 +1964,6 @@ int qib_register_ib_device(struct qib_devdata *dd) /* Only need to initialize non-zero fields. */ spin_lock_init(&dev->qpt_lock); - spin_lock_init(&dev->n_ahs_lock); spin_lock_init(&dev->n_cqs_lock); spin_lock_init(&dev->n_qps_lock); spin_lock_init(&dev->n_srqs_lock); @@ -2175,10 +2065,10 @@ int qib_register_ib_device(struct qib_devdata *dd) ibdev->dealloc_ucontext = qib_dealloc_ucontext; ibdev->alloc_pd = NULL; ibdev->dealloc_pd = NULL; - ibdev->create_ah = qib_create_ah; - ibdev->destroy_ah = qib_destroy_ah; - ibdev->modify_ah = qib_modify_ah; - ibdev->query_ah = qib_query_ah; + ibdev->create_ah = NULL; + ibdev->destroy_ah = NULL; + ibdev->modify_ah = NULL; + ibdev->query_ah = NULL; ibdev->create_srq = qib_create_srq; ibdev->modify_srq = qib_modify_srq; ibdev->query_srq = qib_query_srq; @@ -2220,7 +2110,9 @@ int qib_register_ib_device(struct qib_devdata *dd) dd->verbs_dev.rdi.driver_f.port_callback = qib_create_port_files; dd->verbs_dev.rdi.driver_f.get_card_name = qib_get_card_name; dd->verbs_dev.rdi.driver_f.get_pci_dev = qib_get_pci_dev; + dd->verbs_dev.rdi.driver_f.check_ah = qib_check_ah; dd->verbs_dev.rdi.dparms.props.max_pd = ib_qib_max_pds; + dd->verbs_dev.rdi.dparms.props.max_ah = ib_qib_max_ahs; dd->verbs_dev.rdi.flags = (RVT_FLAG_QP_INIT_DRIVER | RVT_FLAG_CQ_INIT_DRIVER); dd->verbs_dev.rdi.dparms.lkey_table_size = qib_lkey_table_size; diff --git a/drivers/infiniband/hw/qib/qib_verbs.h b/drivers/infiniband/hw/qib/qib_verbs.h index c7399ff..a9d7f0a 100644 --- a/drivers/infiniband/hw/qib/qib_verbs.h +++ b/drivers/infiniband/hw/qib/qib_verbs.h @@ -222,13 +222,6 @@ struct qib_mcast { int n_attached; }; -/* Address Handle */ -struct qib_ah { - struct ib_ah ibah; - struct ib_ah_attr attr; - atomic_t refcount; -}; - /* * This structure is used to contain the head pointer, tail pointer, * and completion queue entries as a single memory allocation so @@ -419,8 +412,8 @@ struct qib_ibport { struct rvt_qp __rcu *qp0; struct rvt_qp __rcu *qp1; struct ib_mad_agent *send_agent; /* agent for SMI (traps) */ - struct qib_ah *sm_ah; - struct qib_ah *smi_ah; + struct rvt_ah *sm_ah; + struct rvt_ah *smi_ah; struct rb_root mcast_tree; spinlock_t lock; /* protect changes in this struct */ @@ -511,10 +504,6 @@ struct qib_ibdev { u32 n_piowait; u32 n_txwait; - u32 n_pds_allocated; /* number of PDs allocated for device */ - spinlock_t n_pds_lock; - u32 n_ahs_allocated; /* number of AHs allocated for device */ - spinlock_t n_ahs_lock; u32 n_cqs_allocated; /* number of CQs allocated for device */ spinlock_t n_cqs_lock; u32 n_qps_allocated; /* number of QPs allocated for device */ @@ -545,11 +534,6 @@ struct qib_verbs_counters { u32 vl15_dropped; }; -static inline struct qib_ah *to_iah(struct ib_ah *ibah) -{ - return container_of(ibah, struct qib_ah, ibah); -} - static inline struct qib_cq *to_icq(struct ib_cq *ibcq) { return container_of(ibcq, struct qib_cq, ibcq); -- cgit v0.10.2 From 894c727b6af8cfd29fcb2b9db4520533d9e8d6a5 Mon Sep 17 00:00:00 2001 From: Dennis Dalessandro Date: Fri, 22 Jan 2016 12:46:17 -0800 Subject: IB/qib: Remove srq from qib Remove srq from qib now that it has been moved into rdmavt. Reviewed-by: Ira Weiny Reviewed-by: Mike Marciniszyn Reviewed-by: Harish Chegondi Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford diff --git a/drivers/infiniband/hw/qib/qib_qp.c b/drivers/infiniband/hw/qib/qib_qp.c index 6c023f7..1f63835 100644 --- a/drivers/infiniband/hw/qib/qib_qp.c +++ b/drivers/infiniband/hw/qib/qib_qp.c @@ -1046,7 +1046,7 @@ struct ib_qp *qib_create_qp(struct ib_pd *ibpd, sz = sizeof(*qp); sg_list_sz = 0; if (init_attr->srq) { - struct qib_srq *srq = to_isrq(init_attr->srq); + struct rvt_srq *srq = ibsrq_to_rvtsrq(init_attr->srq); if (srq->rq.max_sge > 1) sg_list_sz = sizeof(*qp->r_sg_list) * diff --git a/drivers/infiniband/hw/qib/qib_ruc.c b/drivers/infiniband/hw/qib/qib_ruc.c index 3900459..682447e 100644 --- a/drivers/infiniband/hw/qib/qib_ruc.c +++ b/drivers/infiniband/hw/qib/qib_ruc.c @@ -141,14 +141,14 @@ int qib_get_rwqe(struct rvt_qp *qp, int wr_id_only) unsigned long flags; struct rvt_rq *rq; struct rvt_rwq *wq; - struct qib_srq *srq; + struct rvt_srq *srq; struct rvt_rwqe *wqe; void (*handler)(struct ib_event *, void *); u32 tail; int ret; if (qp->ibqp.srq) { - srq = to_isrq(qp->ibqp.srq); + srq = ibsrq_to_rvtsrq(qp->ibqp.srq); handler = srq->ibsrq.event_handler; rq = &srq->rq; } else { diff --git a/drivers/infiniband/hw/qib/qib_srq.c b/drivers/infiniband/hw/qib/qib_srq.c index 8547263..e9dfa30 100644 --- a/drivers/infiniband/hw/qib/qib_srq.c +++ b/drivers/infiniband/hw/qib/qib_srq.c @@ -48,7 +48,7 @@ int qib_post_srq_receive(struct ib_srq *ibsrq, struct ib_recv_wr *wr, struct ib_recv_wr **bad_wr) { - struct qib_srq *srq = to_isrq(ibsrq); + struct rvt_srq *srq = ibsrq_to_rvtsrq(ibsrq); struct rvt_rwq *wq; unsigned long flags; int ret; @@ -103,7 +103,7 @@ struct ib_srq *qib_create_srq(struct ib_pd *ibpd, struct ib_udata *udata) { struct qib_ibdev *dev = to_idev(ibpd->device); - struct qib_srq *srq; + struct rvt_srq *srq; u32 sz; struct ib_srq *ret; @@ -212,7 +212,7 @@ int qib_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr, enum ib_srq_attr_mask attr_mask, struct ib_udata *udata) { - struct qib_srq *srq = to_isrq(ibsrq); + struct rvt_srq *srq = ibsrq_to_rvtsrq(ibsrq); struct rvt_rwq *wq; int ret = 0; @@ -350,7 +350,7 @@ bail: int qib_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr) { - struct qib_srq *srq = to_isrq(ibsrq); + struct rvt_srq *srq = ibsrq_to_rvtsrq(ibsrq); attr->max_wr = srq->rq.size - 1; attr->max_sge = srq->rq.max_sge; @@ -364,7 +364,7 @@ int qib_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr) */ int qib_destroy_srq(struct ib_srq *ibsrq) { - struct qib_srq *srq = to_isrq(ibsrq); + struct rvt_srq *srq = ibsrq_to_rvtsrq(ibsrq); struct qib_ibdev *dev = to_idev(ibsrq->device); spin_lock(&dev->n_srqs_lock); diff --git a/drivers/infiniband/hw/qib/qib_verbs.h b/drivers/infiniband/hw/qib/qib_verbs.h index a9d7f0a..00dd2ad 100644 --- a/drivers/infiniband/hw/qib/qib_verbs.h +++ b/drivers/infiniband/hw/qib/qib_verbs.h @@ -251,14 +251,6 @@ struct qib_cq { struct rvt_mmap_info *ip; }; -struct qib_srq { - struct ib_srq ibsrq; - struct rvt_rq rq; - struct rvt_mmap_info *ip; - /* send signal when number of RWQEs < limit */ - u32 limit; -}; - /* * qib specific data structure that will be hidden from rvt after the queue pair * is made common. @@ -539,11 +531,6 @@ static inline struct qib_cq *to_icq(struct ib_cq *ibcq) return container_of(ibcq, struct qib_cq, ibcq); } -static inline struct qib_srq *to_isrq(struct ib_srq *ibsrq) -{ - return container_of(ibsrq, struct qib_srq, ibsrq); -} - static inline struct rvt_qp *to_iqp(struct ib_qp *ibqp) { return container_of(ibqp, struct rvt_qp, ibqp); -- cgit v0.10.2 From 6c43cf4b4851704de28cbd5fdfc55275744153fa Mon Sep 17 00:00:00 2001 From: Harish Chegondi Date: Fri, 22 Jan 2016 12:50:05 -0800 Subject: IB/rdmavt: Add IB user context allocation and de-alloction functions Adding IB user context alloc and dealloc functions to rdmavt so that the drivers that use rdmavt can use these functions instead of defining their own functions. Reviewed-by: Ira Weiny Signed-off-by: Harish Chegondi Signed-off-by: Doug Ledford diff --git a/drivers/infiniband/sw/rdmavt/vt.c b/drivers/infiniband/sw/rdmavt/vt.c index 18b5f43..df2df36 100644 --- a/drivers/infiniband/sw/rdmavt/vt.c +++ b/drivers/infiniband/sw/rdmavt/vt.c @@ -189,6 +189,16 @@ static int rvt_query_gid(struct ib_device *ibdev, u8 port, return -EOPNOTSUPP; } +struct rvt_ucontext { + struct ib_ucontext ibucontext; +}; + +static inline struct rvt_ucontext *to_iucontext(struct ib_ucontext + *ibucontext) +{ + return container_of(ibucontext, struct rvt_ucontext, ibucontext); +} + /** * rvt_alloc_ucontext - Allocate a user context * @ibdev: Vers IB dev @@ -197,7 +207,12 @@ static int rvt_query_gid(struct ib_device *ibdev, u8 port, static struct ib_ucontext *rvt_alloc_ucontext(struct ib_device *ibdev, struct ib_udata *udata) { - return ERR_PTR(-EOPNOTSUPP); + struct rvt_ucontext *context; + + context = kmalloc(sizeof(*context), GFP_KERNEL); + if (!context) + return ERR_PTR(-ENOMEM); + return &context->ibucontext; } /** @@ -206,7 +221,8 @@ static struct ib_ucontext *rvt_alloc_ucontext(struct ib_device *ibdev, */ static int rvt_dealloc_ucontext(struct ib_ucontext *context) { - return -EOPNOTSUPP; + kfree(to_iucontext(context)); + return 0; } static int rvt_get_port_immutable(struct ib_device *ibdev, u8 port_num, -- cgit v0.10.2 From 050eb7fbe0ff2bcd95833ff180337116d5907483 Mon Sep 17 00:00:00 2001 From: Dennis Dalessandro Date: Fri, 22 Jan 2016 12:50:11 -0800 Subject: IB/rdmavt: Add R and S flags for queue pairs Use the flags originally provided for hfi1 in the rdmavt driver. These will be made available to drivers in the qp header file. Reviewed-by: Harish Chegondi Reviewed-by: Mike Marciniszyn Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford diff --git a/include/rdma/rdmavt_qp.h b/include/rdma/rdmavt_qp.h index e6a7d17..1aa8b5b 100644 --- a/include/rdma/rdmavt_qp.h +++ b/include/rdma/rdmavt_qp.h @@ -48,6 +48,86 @@ * */ +#include +/* + * Atomic bit definitions for r_aflags. + */ +#define RVT_R_WRID_VALID 0 +#define RVT_R_REWIND_SGE 1 + +/* + * Bit definitions for r_flags. + */ +#define RVT_R_REUSE_SGE 0x01 +#define RVT_R_RDMAR_SEQ 0x02 +#define RVT_R_RSP_NAK 0x04 +#define RVT_R_RSP_SEND 0x08 +#define RVT_R_COMM_EST 0x10 + +/* + * Bit definitions for s_flags. + * + * RVT_S_SIGNAL_REQ_WR - set if QP send WRs contain completion signaled + * RVT_S_BUSY - send tasklet is processing the QP + * RVT_S_TIMER - the RC retry timer is active + * RVT_S_ACK_PENDING - an ACK is waiting to be sent after RDMA read/atomics + * RVT_S_WAIT_FENCE - waiting for all prior RDMA read or atomic SWQEs + * before processing the next SWQE + * RVT_S_WAIT_RDMAR - waiting for a RDMA read or atomic SWQE to complete + * before processing the next SWQE + * RVT_S_WAIT_RNR - waiting for RNR timeout + * RVT_S_WAIT_SSN_CREDIT - waiting for RC credits to process next SWQE + * RVT_S_WAIT_DMA - waiting for send DMA queue to drain before generating + * next send completion entry not via send DMA + * RVT_S_WAIT_PIO - waiting for a send buffer to be available + * RVT_S_WAIT_TX - waiting for a struct verbs_txreq to be available + * RVT_S_WAIT_DMA_DESC - waiting for DMA descriptors to be available + * RVT_S_WAIT_KMEM - waiting for kernel memory to be available + * RVT_S_WAIT_PSN - waiting for a packet to exit the send DMA queue + * RVT_S_WAIT_ACK - waiting for an ACK packet before sending more requests + * RVT_S_SEND_ONE - send one packet, request ACK, then wait for ACK + * RVT_S_ECN - a BECN was queued to the send engine + */ +#define RVT_S_SIGNAL_REQ_WR 0x0001 +#define RVT_S_BUSY 0x0002 +#define RVT_S_TIMER 0x0004 +#define RVT_S_RESP_PENDING 0x0008 +#define RVT_S_ACK_PENDING 0x0010 +#define RVT_S_WAIT_FENCE 0x0020 +#define RVT_S_WAIT_RDMAR 0x0040 +#define RVT_S_WAIT_RNR 0x0080 +#define RVT_S_WAIT_SSN_CREDIT 0x0100 +#define RVT_S_WAIT_DMA 0x0200 +#define RVT_S_WAIT_PIO 0x0400 +#define RVT_S_WAIT_TX 0x0800 +#define RVT_S_WAIT_DMA_DESC 0x1000 +#define RVT_S_WAIT_KMEM 0x2000 +#define RVT_S_WAIT_PSN 0x4000 +#define RVT_S_WAIT_ACK 0x8000 +#define RVT_S_SEND_ONE 0x10000 +#define RVT_S_UNLIMITED_CREDIT 0x20000 +#define RVT_S_AHG_VALID 0x40000 +#define RVT_S_AHG_CLEAR 0x80000 +#define RVT_S_ECN 0x100000 + +/* + * Wait flags that would prevent any packet type from being sent. + */ +#define RVT_S_ANY_WAIT_IO (RVT_S_WAIT_PIO | RVT_S_WAIT_TX | \ + RVT_S_WAIT_DMA_DESC | RVT_S_WAIT_KMEM) + +/* + * Wait flags that would prevent send work requests from making progress. + */ +#define RVT_S_ANY_WAIT_SEND (RVT_S_WAIT_FENCE | RVT_S_WAIT_RDMAR | \ + RVT_S_WAIT_RNR | RVT_S_WAIT_SSN_CREDIT | RVT_S_WAIT_DMA | \ + RVT_S_WAIT_PSN | RVT_S_WAIT_ACK) + +#define RVT_S_ANY_WAIT (RVT_S_ANY_WAIT_IO | RVT_S_ANY_WAIT_SEND) + +/* Number of bits to pay attention to in the opcode for checking qp type */ +#define RVT_OPCODE_QP_MASK 0xE0 + /* * Send work request queue entry. * The size of the sg_list is determined when the QP is created and stored -- cgit v0.10.2 From 515667f8f8b48bdbcad61c5681291cb970e36ac3 Mon Sep 17 00:00:00 2001 From: Dennis Dalessandro Date: Fri, 22 Jan 2016 12:50:17 -0800 Subject: IB/rdmavt: Add create queue pair functionality Add create queue pair verbs call as well as supporting functions. Reviewed-by: Ira Weiny Reviewed-by: Harish Chegondi Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford diff --git a/drivers/infiniband/sw/rdmavt/qp.c b/drivers/infiniband/sw/rdmavt/qp.c index 17dd6ab..7d1f02e 100644 --- a/drivers/infiniband/sw/rdmavt/qp.c +++ b/drivers/infiniband/sw/rdmavt/qp.c @@ -47,8 +47,11 @@ #include #include -#include "vt.h" +#include +#include +#include #include "qp.h" +#include "vt.h" static void get_map_page(struct rvt_qpn_table *qpt, struct rvt_qpn_map *map) { @@ -151,7 +154,10 @@ int rvt_driver_qp_init(struct rvt_dev_info *rdi) * If driver is not doing any QP allocation then make sure it is * providing the necessary QP functions. */ - if (!rdi->driver_f.free_all_qps) + if (!rdi->driver_f.free_all_qps || + !rdi->driver_f.qp_priv_alloc || + !rdi->driver_f.qp_priv_free || + !rdi->driver_f.notify_qp_reset) return -EINVAL; /* allocate parent object */ @@ -178,7 +184,9 @@ int rvt_driver_qp_init(struct rvt_dev_info *rdi) if (init_qpn_table(rdi, &rdi->qp_dev->qpn_table)) goto fail_table; - return ret; + spin_lock_init(&rdi->n_qps_lock); + + return 0; fail_table: kfree(rdi->qp_dev->qp_table); @@ -197,31 +205,29 @@ no_qp_table: * There should not be any QPs still in use. * Free memory for table. */ -static unsigned free_all_qps(struct rvt_dev_info *rdi) +static unsigned rvt_free_all_qps(struct rvt_dev_info *rdi) { unsigned long flags; struct rvt_qp *qp; unsigned n, qp_inuse = 0; spinlock_t *ql; /* work around too long line below */ - rdi->driver_f.free_all_qps(rdi); + if (rdi->driver_f.free_all_qps) + qp_inuse = rdi->driver_f.free_all_qps(rdi); if (!rdi->qp_dev) - return 0; + return qp_inuse; ql = &rdi->qp_dev->qpt_lock; - spin_lock_irqsave(&rdi->qp_dev->qpt_lock, flags); + spin_lock_irqsave(ql, flags); for (n = 0; n < rdi->qp_dev->qp_table_size; n++) { qp = rcu_dereference_protected(rdi->qp_dev->qp_table[n], lockdep_is_held(ql)); RCU_INIT_POINTER(rdi->qp_dev->qp_table[n], NULL); - qp = rcu_dereference_protected(qp->next, - lockdep_is_held(ql)); - while (qp) { + + for (; qp; qp = rcu_dereference_protected(qp->next, + lockdep_is_held(ql))) qp_inuse++; - qp = rcu_dereference_protected(qp->next, - lockdep_is_held(ql)); - } } spin_unlock_irqrestore(ql, flags); synchronize_rcu(); @@ -230,26 +236,190 @@ static unsigned free_all_qps(struct rvt_dev_info *rdi) void rvt_qp_exit(struct rvt_dev_info *rdi) { - u32 qps_inuse = free_all_qps(rdi); + u32 qps_inuse = rvt_free_all_qps(rdi); - qps_inuse = free_all_qps(rdi); if (qps_inuse) rvt_pr_err(rdi, "QP memory leak! %u still in use\n", qps_inuse); if (!rdi->qp_dev) return; + if (rdi->flags & RVT_FLAG_QP_INIT_DRIVER) + return; /* driver did the qp init so nothing else to do */ + kfree(rdi->qp_dev->qp_table); free_qpn_table(&rdi->qp_dev->qpn_table); kfree(rdi->qp_dev); } +static inline unsigned mk_qpn(struct rvt_qpn_table *qpt, + struct rvt_qpn_map *map, unsigned off) +{ + return (map - qpt->map) * RVT_BITS_PER_PAGE + off; +} + +/* + * Allocate the next available QPN or + * zero/one for QP type IB_QPT_SMI/IB_QPT_GSI. + */ +static int alloc_qpn(struct rvt_dev_info *rdi, struct rvt_qpn_table *qpt, + enum ib_qp_type type, u8 port) +{ + u32 i, offset, max_scan, qpn; + struct rvt_qpn_map *map; + u32 ret; + + if (rdi->driver_f.alloc_qpn) + return rdi->driver_f.alloc_qpn(rdi, qpt, type, port); + + if (type == IB_QPT_SMI || type == IB_QPT_GSI) { + unsigned n; + + ret = type == IB_QPT_GSI; + n = 1 << (ret + 2 * (port - 1)); + spin_lock(&qpt->lock); + if (qpt->flags & n) + ret = -EINVAL; + else + qpt->flags |= n; + spin_unlock(&qpt->lock); + goto bail; + } + + qpn = qpt->last + qpt->incr; + if (qpn >= RVT_QPN_MAX) + qpn = qpt->incr | ((qpt->last & 1) ^ 1); + /* offset carries bit 0 */ + offset = qpn & RVT_BITS_PER_PAGE_MASK; + map = &qpt->map[qpn / RVT_BITS_PER_PAGE]; + max_scan = qpt->nmaps - !offset; + for (i = 0;;) { + if (unlikely(!map->page)) { + get_map_page(qpt, map); + if (unlikely(!map->page)) + break; + } + do { + if (!test_and_set_bit(offset, map->page)) { + qpt->last = qpn; + ret = qpn; + goto bail; + } + offset += qpt->incr; + /* + * This qpn might be bogus if offset >= BITS_PER_PAGE. + * That is OK. It gets re-assigned below + */ + qpn = mk_qpn(qpt, map, offset); + } while (offset < RVT_BITS_PER_PAGE && qpn < RVT_QPN_MAX); + /* + * In order to keep the number of pages allocated to a + * minimum, we scan the all existing pages before increasing + * the size of the bitmap table. + */ + if (++i > max_scan) { + if (qpt->nmaps == RVT_QPNMAP_ENTRIES) + break; + map = &qpt->map[qpt->nmaps++]; + /* start at incr with current bit 0 */ + offset = qpt->incr | (offset & 1); + } else if (map < &qpt->map[qpt->nmaps]) { + ++map; + /* start at incr with current bit 0 */ + offset = qpt->incr | (offset & 1); + } else { + map = &qpt->map[0]; + /* wrap to first map page, invert bit 0 */ + offset = qpt->incr | ((offset & 1) ^ 1); + } + /* there can be no bits at shift and below */ + WARN_ON(offset & (rdi->dparms.qos_shift - 1)); + qpn = mk_qpn(qpt, map, offset); + } + + ret = -ENOMEM; + +bail: + return ret; +} + +static void free_qpn(struct rvt_qpn_table *qpt, u32 qpn) +{ + struct rvt_qpn_map *map; + + map = qpt->map + qpn / RVT_BITS_PER_PAGE; + if (map->page) + clear_bit(qpn & RVT_BITS_PER_PAGE_MASK, map->page); +} + +/** + * reset_qp - initialize the QP state to the reset state + * @qp: the QP to reset + * @type: the QP type + */ +static void reset_qp(struct rvt_dev_info *rdi, struct rvt_qp *qp, + enum ib_qp_type type) +{ + qp->remote_qpn = 0; + qp->qkey = 0; + qp->qp_access_flags = 0; + + /* + * Let driver do anything it needs to for a new/reset qp + */ + rdi->driver_f.notify_qp_reset(qp); + + qp->s_flags &= RVT_S_SIGNAL_REQ_WR; + qp->s_hdrwords = 0; + qp->s_wqe = NULL; + qp->s_draining = 0; + qp->s_next_psn = 0; + qp->s_last_psn = 0; + qp->s_sending_psn = 0; + qp->s_sending_hpsn = 0; + qp->s_psn = 0; + qp->r_psn = 0; + qp->r_msn = 0; + if (type == IB_QPT_RC) { + qp->s_state = IB_OPCODE_RC_SEND_LAST; + qp->r_state = IB_OPCODE_RC_SEND_LAST; + } else { + qp->s_state = IB_OPCODE_UC_SEND_LAST; + qp->r_state = IB_OPCODE_UC_SEND_LAST; + } + qp->s_ack_state = IB_OPCODE_RC_ACKNOWLEDGE; + qp->r_nak_state = 0; + qp->r_aflags = 0; + qp->r_flags = 0; + qp->s_head = 0; + qp->s_tail = 0; + qp->s_cur = 0; + qp->s_acked = 0; + qp->s_last = 0; + qp->s_ssn = 1; + qp->s_lsn = 0; + qp->s_mig_state = IB_MIG_MIGRATED; + memset(qp->s_ack_queue, 0, sizeof(qp->s_ack_queue)); + qp->r_head_ack_queue = 0; + qp->s_tail_ack_queue = 0; + qp->s_num_rd_atomic = 0; + if (qp->r_rq.wq) { + qp->r_rq.wq->head = 0; + qp->r_rq.wq->tail = 0; + } + qp->r_sge.num_sge = 0; +} + /** * rvt_create_qp - create a queue pair for a device * @ibpd: the protection domain who's device we create the queue pair for * @init_attr: the attributes of the queue pair * @udata: user data for libibverbs.so * + * Queue pair creation is mostly an rvt issue. However, drivers have their own + * unique idea of what queue pair numbers mean. For instance there is a reserved + * range for PSM. + * * Returns the queue pair on success, otherwise returns an errno. * * Called by the ib_create_qp() core verbs function. @@ -258,15 +428,226 @@ struct ib_qp *rvt_create_qp(struct ib_pd *ibpd, struct ib_qp_init_attr *init_attr, struct ib_udata *udata) { + struct rvt_qp *qp; + int err; + struct rvt_swqe *swq = NULL; + size_t sz; + size_t sg_list_sz; + struct ib_qp *ret = ERR_PTR(-ENOMEM); + struct rvt_dev_info *rdi = ib_to_rvt(ibpd->device); + void *priv = NULL; + + if (!rdi) + return ERR_PTR(-EINVAL); + + if (init_attr->cap.max_send_sge > rdi->dparms.props.max_sge || + init_attr->cap.max_send_wr > rdi->dparms.props.max_qp_wr || + init_attr->create_flags) + return ERR_PTR(-EINVAL); + + /* Check receive queue parameters if no SRQ is specified. */ + if (!init_attr->srq) { + if (init_attr->cap.max_recv_sge > rdi->dparms.props.max_sge || + init_attr->cap.max_recv_wr > rdi->dparms.props.max_qp_wr) + return ERR_PTR(-EINVAL); + + if (init_attr->cap.max_send_sge + + init_attr->cap.max_send_wr + + init_attr->cap.max_recv_sge + + init_attr->cap.max_recv_wr == 0) + return ERR_PTR(-EINVAL); + } + + switch (init_attr->qp_type) { + case IB_QPT_SMI: + case IB_QPT_GSI: + if (init_attr->port_num == 0 || + init_attr->port_num > ibpd->device->phys_port_cnt) + return ERR_PTR(-EINVAL); + case IB_QPT_UC: + case IB_QPT_RC: + case IB_QPT_UD: + sz = sizeof(struct rvt_sge) * + init_attr->cap.max_send_sge + + sizeof(struct rvt_swqe); + swq = vmalloc((init_attr->cap.max_send_wr + 1) * sz); + if (!swq) + return ERR_PTR(-ENOMEM); + + sz = sizeof(*qp); + sg_list_sz = 0; + if (init_attr->srq) { + struct rvt_srq *srq = ibsrq_to_rvtsrq(init_attr->srq); + + if (srq->rq.max_sge > 1) + sg_list_sz = sizeof(*qp->r_sg_list) * + (srq->rq.max_sge - 1); + } else if (init_attr->cap.max_recv_sge > 1) + sg_list_sz = sizeof(*qp->r_sg_list) * + (init_attr->cap.max_recv_sge - 1); + qp = kzalloc(sz + sg_list_sz, GFP_KERNEL); + if (!qp) + goto bail_swq; + + RCU_INIT_POINTER(qp->next, NULL); + + /* + * Driver needs to set up it's private QP structure and do any + * initialization that is needed. + */ + priv = rdi->driver_f.qp_priv_alloc(rdi, qp); + if (!priv) + goto bail_qp; + qp->priv = priv; + qp->timeout_jiffies = + usecs_to_jiffies((4096UL * (1UL << qp->timeout)) / + 1000UL); + if (init_attr->srq) { + sz = 0; + } else { + qp->r_rq.size = init_attr->cap.max_recv_wr + 1; + qp->r_rq.max_sge = init_attr->cap.max_recv_sge; + sz = (sizeof(struct ib_sge) * qp->r_rq.max_sge) + + sizeof(struct rvt_rwqe); + qp->r_rq.wq = vmalloc_user(sizeof(struct rvt_rwq) + + qp->r_rq.size * sz); + if (!qp->r_rq.wq) + goto bail_driver_priv; + } + + /* + * ib_create_qp() will initialize qp->ibqp + * except for qp->ibqp.qp_num. + */ + spin_lock_init(&qp->r_lock); + spin_lock_init(&qp->s_lock); + spin_lock_init(&qp->r_rq.lock); + atomic_set(&qp->refcount, 0); + init_waitqueue_head(&qp->wait); + init_timer(&qp->s_timer); + qp->s_timer.data = (unsigned long)qp; + INIT_LIST_HEAD(&qp->rspwait); + qp->state = IB_QPS_RESET; + qp->s_wq = swq; + qp->s_size = init_attr->cap.max_send_wr + 1; + qp->s_max_sge = init_attr->cap.max_send_sge; + if (init_attr->sq_sig_type == IB_SIGNAL_REQ_WR) + qp->s_flags = RVT_S_SIGNAL_REQ_WR; + + err = alloc_qpn(rdi, &rdi->qp_dev->qpn_table, + init_attr->qp_type, + init_attr->port_num); + if (err < 0) { + ret = ERR_PTR(err); + goto bail_rq_wq; + } + qp->ibqp.qp_num = err; + qp->port_num = init_attr->port_num; + reset_qp(rdi, qp, init_attr->qp_type); + break; + + default: + /* Don't support raw QPs */ + return ERR_PTR(-EINVAL); + } + + init_attr->cap.max_inline_data = 0; + /* - * Queue pair creation is mostly an rvt issue. However, drivers have - * their own unique idea of what queue pare numbers mean. For instance - * there is a reserved range for PSM. - * - * VI-DRIVER-API: make_qpn() - * Returns a valid QPN for verbs to use + * Return the address of the RWQ as the offset to mmap. + * See hfi1_mmap() for details. */ - return ERR_PTR(-EOPNOTSUPP); + if (udata && udata->outlen >= sizeof(__u64)) { + if (!qp->r_rq.wq) { + __u64 offset = 0; + + err = ib_copy_to_udata(udata, &offset, + sizeof(offset)); + if (err) { + ret = ERR_PTR(err); + goto bail_qpn; + } + } else { + u32 s = sizeof(struct rvt_rwq) + qp->r_rq.size * sz; + + qp->ip = rvt_create_mmap_info(rdi, s, + ibpd->uobject->context, + qp->r_rq.wq); + if (!qp->ip) { + ret = ERR_PTR(-ENOMEM); + goto bail_qpn; + } + + err = ib_copy_to_udata(udata, &qp->ip->offset, + sizeof(qp->ip->offset)); + if (err) { + ret = ERR_PTR(err); + goto bail_ip; + } + } + } + + spin_lock(&rdi->n_qps_lock); + if (rdi->n_qps_allocated == rdi->dparms.props.max_qp) { + spin_unlock(&rdi->n_qps_lock); + ret = ERR_PTR(-ENOMEM); + goto bail_ip; + } + + rdi->n_qps_allocated++; + spin_unlock(&rdi->n_qps_lock); + + if (qp->ip) { + spin_lock_irq(&rdi->pending_lock); + list_add(&qp->ip->pending_mmaps, &rdi->pending_mmaps); + spin_unlock_irq(&rdi->pending_lock); + } + + ret = &qp->ibqp; + + /* + * We have our QP and its good, now keep track of what types of opcodes + * can be processed on this QP. We do this by keeping track of what the + * 3 high order bits of the opcode are. + */ + switch (init_attr->qp_type) { + case IB_QPT_SMI: + case IB_QPT_GSI: + case IB_QPT_UD: + qp->allowed_ops = IB_OPCODE_UD_SEND_ONLY & RVT_OPCODE_QP_MASK; + break; + case IB_QPT_RC: + qp->allowed_ops = IB_OPCODE_RC_SEND_ONLY & RVT_OPCODE_QP_MASK; + break; + case IB_QPT_UC: + qp->allowed_ops = IB_OPCODE_UC_SEND_ONLY & RVT_OPCODE_QP_MASK; + break; + default: + ret = ERR_PTR(-EINVAL); + goto bail_ip; + } + + return ret; + +bail_ip: + kref_put(&qp->ip->ref, rvt_release_mmap_info); + +bail_qpn: + free_qpn(&rdi->qp_dev->qpn_table, qp->ibqp.qp_num); + +bail_rq_wq: + vfree(qp->r_rq.wq); + +bail_driver_priv: + rdi->driver_f.qp_priv_free(rdi, qp); + +bail_qp: + kfree(qp); + +bail_swq: + vfree(swq); + + return ret; } /** diff --git a/drivers/infiniband/sw/rdmavt/vt.c b/drivers/infiniband/sw/rdmavt/vt.c index df2df36..e75eb3d 100644 --- a/drivers/infiniband/sw/rdmavt/vt.c +++ b/drivers/infiniband/sw/rdmavt/vt.c @@ -362,6 +362,7 @@ void rvt_unregister_device(struct rvt_dev_info *rdi) ib_unregister_device(&rdi->ibdev); rvt_mr_exit(rdi); + rvt_qp_exit(rdi); } EXPORT_SYMBOL(rvt_unregister_device); diff --git a/include/rdma/rdma_vt.h b/include/rdma/rdma_vt.h index 3a78f20..3bdeac7 100644 --- a/include/rdma/rdma_vt.h +++ b/include/rdma/rdma_vt.h @@ -222,7 +222,10 @@ struct rvt_driver_provided { int (*port_callback)(struct ib_device *, u8, struct kobject *); const char * (*get_card_name)(struct rvt_dev_info *rdi); struct pci_dev * (*get_pci_dev)(struct rvt_dev_info *rdi); - void (*free_all_qps)(struct rvt_dev_info *rdi); + unsigned (*free_all_qps)(struct rvt_dev_info *rdi); + void * (*qp_priv_alloc)(struct rvt_dev_info *rdi, struct rvt_qp *qp); + void (*qp_priv_free)(struct rvt_dev_info *rdi, struct rvt_qp *qp); + void (*notify_qp_reset)(struct rvt_qp *qp); /*--------------------*/ /* Optional functions */ @@ -230,6 +233,8 @@ struct rvt_driver_provided { int (*check_ah)(struct ib_device *, struct ib_ah_attr *); void (*notify_new_ah)(struct ib_device *, struct ib_ah_attr *, struct rvt_ah *); + int (*alloc_qpn)(struct rvt_dev_info *rdi, struct rvt_qpn_table *qpt, + enum ib_qp_type type, u8 port); }; struct rvt_dev_info { @@ -262,7 +267,10 @@ struct rvt_dev_info { int flags; struct rvt_ibport **ports; + /* QP */ struct rvt_qp_ibdev *qp_dev; + u32 n_qps_allocated; /* number of QPs allocated for device */ + spinlock_t n_qps_lock; /* keep track of number of qps */ /* memory maps */ struct list_head pending_mmaps; -- cgit v0.10.2 From 5a9cf6f27e36ece71cc8a192a4ca39b62a460807 Mon Sep 17 00:00:00 2001 From: Dennis Dalessandro Date: Fri, 22 Jan 2016 12:50:24 -0800 Subject: IB/rdmavt: Export reset_qp in rdmavt Until all queue pair functionality is moved to rdmavt we need to provide access to the reset function. This is only temporary and will be reverted back to a static, non exported function in the end. Reviewed-by: Ira Weiny Reviewed-by: Harish Chegondi Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford diff --git a/drivers/infiniband/sw/rdmavt/qp.c b/drivers/infiniband/sw/rdmavt/qp.c index 7d1f02e..44485ad 100644 --- a/drivers/infiniband/sw/rdmavt/qp.c +++ b/drivers/infiniband/sw/rdmavt/qp.c @@ -357,8 +357,8 @@ static void free_qpn(struct rvt_qpn_table *qpt, u32 qpn) * @qp: the QP to reset * @type: the QP type */ -static void reset_qp(struct rvt_dev_info *rdi, struct rvt_qp *qp, - enum ib_qp_type type) +void rvt_reset_qp(struct rvt_dev_info *rdi, struct rvt_qp *qp, + enum ib_qp_type type) { qp->remote_qpn = 0; qp->qkey = 0; @@ -409,6 +409,7 @@ static void reset_qp(struct rvt_dev_info *rdi, struct rvt_qp *qp, } qp->r_sge.num_sge = 0; } +EXPORT_SYMBOL(rvt_reset_qp); /** * rvt_create_qp - create a queue pair for a device @@ -543,7 +544,7 @@ struct ib_qp *rvt_create_qp(struct ib_pd *ibpd, } qp->ibqp.qp_num = err; qp->port_num = init_attr->port_num; - reset_qp(rdi, qp, init_attr->qp_type); + rvt_reset_qp(rdi, qp, init_attr->qp_type); break; default: diff --git a/include/rdma/rdma_vt.h b/include/rdma/rdma_vt.h index 3bdeac7..e412e67 100644 --- a/include/rdma/rdma_vt.h +++ b/include/rdma/rdma_vt.h @@ -338,4 +338,7 @@ struct rvt_mmap_info *rvt_create_mmap_info(struct rvt_dev_info *rdi, void rvt_update_mmap_info(struct rvt_dev_info *rdi, struct rvt_mmap_info *ip, u32 size, void *obj); +/* Temporary export */ +void rvt_reset_qp(struct rvt_dev_info *rdi, struct rvt_qp *qp, + enum ib_qp_type type); #endif /* DEF_RDMA_VT_H */ diff --git a/include/rdma/rdmavt_qp.h b/include/rdma/rdmavt_qp.h index 1aa8b5b..bce0a03 100644 --- a/include/rdma/rdmavt_qp.h +++ b/include/rdma/rdmavt_qp.h @@ -48,6 +48,7 @@ * */ +#include #include /* * Atomic bit definitions for r_aflags. -- cgit v0.10.2 From fef2efd6b4951148cc8dd1df7b3e1ff2f13dd6df Mon Sep 17 00:00:00 2001 From: Harish Chegondi Date: Fri, 22 Jan 2016 12:50:30 -0800 Subject: IB/rdmavt: Allow reserving just one qpn qib needs to reserve only one qpn for non-verbs stuff. Also fixed the for loop to reserve the end qpn. Reviewed-by: Dennis Dalessandro Signed-off-by: Harish Chegondi Signed-off-by: Doug Ledford diff --git a/drivers/infiniband/sw/rdmavt/qp.c b/drivers/infiniband/sw/rdmavt/qp.c index 44485ad..ee19eae 100644 --- a/drivers/infiniband/sw/rdmavt/qp.c +++ b/drivers/infiniband/sw/rdmavt/qp.c @@ -79,7 +79,7 @@ static int init_qpn_table(struct rvt_dev_info *rdi, struct rvt_qpn_table *qpt) struct rvt_qpn_map *map; int ret = 0; - if (!(rdi->dparms.qpn_res_end > rdi->dparms.qpn_res_start)) + if (!(rdi->dparms.qpn_res_end >= rdi->dparms.qpn_res_start)) return -EINVAL; spin_lock_init(&qpt->lock); @@ -105,7 +105,7 @@ static int init_qpn_table(struct rvt_dev_info *rdi, struct rvt_qpn_table *qpt) rvt_pr_info(rdi, "Reserving QPNs from 0x%x to 0x%x for non-verbs use\n", rdi->dparms.qpn_res_start, rdi->dparms.qpn_res_end); - for (i = rdi->dparms.qpn_res_start; i < rdi->dparms.qpn_res_end; i++) { + for (i = rdi->dparms.qpn_res_start; i <= rdi->dparms.qpn_res_end; i++) { if (!map->page) { get_map_page(qpt, map); if (!map->page) { -- cgit v0.10.2 From feaeb6e26fd15f6531e28f2900e0b59705bfc3d4 Mon Sep 17 00:00:00 2001 From: Harish Chegondi Date: Fri, 22 Jan 2016 12:50:36 -0800 Subject: IB/rdmavt: Add support for rvt_query_device function With this commit, the drivers using rdmavt need not define query_device function. But they should fill in the IB device attributes structure rvt_dev_info.dparms.props Reviewed-by: Dennis Dalessandro Signed-off-by: Harish Chegondi Signed-off-by: Doug Ledford diff --git a/drivers/infiniband/sw/rdmavt/vt.c b/drivers/infiniband/sw/rdmavt/vt.c index e75eb3d..f2b6438 100644 --- a/drivers/infiniband/sw/rdmavt/vt.c +++ b/drivers/infiniband/sw/rdmavt/vt.c @@ -68,10 +68,15 @@ static int rvt_query_device(struct ib_device *ibdev, struct ib_device_attr *props, struct ib_udata *uhw) { + struct rvt_dev_info *rdi = ib_to_rvt(ibdev); + + if (uhw->inlen || uhw->outlen) + return -EINVAL; /* - * Return rvt_dev_info.props contents + * Return rvt_dev_info.dparms.props contents */ - return -EOPNOTSUPP; + *props = rdi->dparms.props; + return 0; } static int rvt_modify_device(struct ib_device *device, diff --git a/include/rdma/rdma_vt.h b/include/rdma/rdma_vt.h index e412e67..725778a 100644 --- a/include/rdma/rdma_vt.h +++ b/include/rdma/rdma_vt.h @@ -245,7 +245,10 @@ struct rvt_dev_info { * allocating space for this structure. * * The driver will also be responsible for filling in certain members of - * dparms.props + * dparms.props. The driver needs to fill in dparms exactly as it would + * want values reported to a ULP. This will be returned to the caller + * in rdmavt's device. The driver should also therefore refrain from + * modifying this directly after registration with rdmavt. */ /* Driver specific properties */ -- cgit v0.10.2 From d2b8d4da1ca5052b72e043d2ce68157abf3f2d24 Mon Sep 17 00:00:00 2001 From: Mike Marciniszyn Date: Fri, 22 Jan 2016 12:50:43 -0800 Subject: IB/rdmavt: Support creating qps with GFP_NOIO flag The current code is problematic when the QP creation and ipoib is used to support NFS and NFS desires to do IO for paging purposes. In that case, the GFP_KERNEL allocation within create_qp causes a deadlock in tight memory situations. This fix adds support to create queue pair with GFP_NOIO flag for connected mode only to cleanly fail the create queue pair in those situations. This was previously fixed in qib but needed to get ported to hfi1. This patch handles that for both hardwares in the new rdmavt common layer. Reviewed-by: Dennis Dalessandro Signed-off-by: Mike Marciniszyn Signed-off-by: Doug Ledford diff --git a/drivers/infiniband/sw/rdmavt/qp.c b/drivers/infiniband/sw/rdmavt/qp.c index ee19eae..43346a7 100644 --- a/drivers/infiniband/sw/rdmavt/qp.c +++ b/drivers/infiniband/sw/rdmavt/qp.c @@ -53,9 +53,11 @@ #include "qp.h" #include "vt.h" -static void get_map_page(struct rvt_qpn_table *qpt, struct rvt_qpn_map *map) +static void get_map_page(struct rvt_qpn_table *qpt, + struct rvt_qpn_map *map, + gfp_t gfp) { - unsigned long page = get_zeroed_page(GFP_KERNEL); + unsigned long page = get_zeroed_page(gfp); /* * Free the page if someone raced with us installing it. @@ -107,7 +109,7 @@ static int init_qpn_table(struct rvt_dev_info *rdi, struct rvt_qpn_table *qpt) rdi->dparms.qpn_res_start, rdi->dparms.qpn_res_end); for (i = rdi->dparms.qpn_res_start; i <= rdi->dparms.qpn_res_end; i++) { if (!map->page) { - get_map_page(qpt, map); + get_map_page(qpt, map, GFP_KERNEL); if (!map->page) { ret = -ENOMEM; break; @@ -263,14 +265,15 @@ static inline unsigned mk_qpn(struct rvt_qpn_table *qpt, * zero/one for QP type IB_QPT_SMI/IB_QPT_GSI. */ static int alloc_qpn(struct rvt_dev_info *rdi, struct rvt_qpn_table *qpt, - enum ib_qp_type type, u8 port) + enum ib_qp_type type, u8 port, gfp_t gfp) { u32 i, offset, max_scan, qpn; struct rvt_qpn_map *map; u32 ret; if (rdi->driver_f.alloc_qpn) - return rdi->driver_f.alloc_qpn(rdi, qpt, type, port); + return rdi->driver_f.alloc_qpn(rdi, qpt, type, port, + GFP_KERNEL); if (type == IB_QPT_SMI || type == IB_QPT_GSI) { unsigned n; @@ -295,7 +298,7 @@ static int alloc_qpn(struct rvt_dev_info *rdi, struct rvt_qpn_table *qpt, max_scan = qpt->nmaps - !offset; for (i = 0;;) { if (unlikely(!map->page)) { - get_map_page(qpt, map); + get_map_page(qpt, map, gfp); if (unlikely(!map->page)) break; } @@ -437,15 +440,25 @@ struct ib_qp *rvt_create_qp(struct ib_pd *ibpd, struct ib_qp *ret = ERR_PTR(-ENOMEM); struct rvt_dev_info *rdi = ib_to_rvt(ibpd->device); void *priv = NULL; + gfp_t gfp; if (!rdi) return ERR_PTR(-EINVAL); if (init_attr->cap.max_send_sge > rdi->dparms.props.max_sge || init_attr->cap.max_send_wr > rdi->dparms.props.max_qp_wr || - init_attr->create_flags) + init_attr->create_flags & ~(IB_QP_CREATE_USE_GFP_NOIO)) return ERR_PTR(-EINVAL); + /* GFP_NOIO is applicable to RC QP's only */ + + if (init_attr->create_flags & IB_QP_CREATE_USE_GFP_NOIO && + init_attr->qp_type != IB_QPT_RC) + return ERR_PTR(-EINVAL); + + gfp = init_attr->create_flags & IB_QP_CREATE_USE_GFP_NOIO ? + GFP_NOIO : GFP_KERNEL; + /* Check receive queue parameters if no SRQ is specified. */ if (!init_attr->srq) { if (init_attr->cap.max_recv_sge > rdi->dparms.props.max_sge || @@ -471,7 +484,13 @@ struct ib_qp *rvt_create_qp(struct ib_pd *ibpd, sz = sizeof(struct rvt_sge) * init_attr->cap.max_send_sge + sizeof(struct rvt_swqe); - swq = vmalloc((init_attr->cap.max_send_wr + 1) * sz); + if (gfp == GFP_NOIO) + swq = __vmalloc( + (init_attr->cap.max_send_wr + 1) * sz, + gfp, PAGE_KERNEL); + else + swq = vmalloc( + (init_attr->cap.max_send_wr + 1) * sz); if (!swq) return ERR_PTR(-ENOMEM); @@ -486,7 +505,7 @@ struct ib_qp *rvt_create_qp(struct ib_pd *ibpd, } else if (init_attr->cap.max_recv_sge > 1) sg_list_sz = sizeof(*qp->r_sg_list) * (init_attr->cap.max_recv_sge - 1); - qp = kzalloc(sz + sg_list_sz, GFP_KERNEL); + qp = kzalloc(sz + sg_list_sz, gfp); if (!qp) goto bail_swq; @@ -496,7 +515,7 @@ struct ib_qp *rvt_create_qp(struct ib_pd *ibpd, * Driver needs to set up it's private QP structure and do any * initialization that is needed. */ - priv = rdi->driver_f.qp_priv_alloc(rdi, qp); + priv = rdi->driver_f.qp_priv_alloc(rdi, qp, gfp); if (!priv) goto bail_qp; qp->priv = priv; @@ -510,8 +529,19 @@ struct ib_qp *rvt_create_qp(struct ib_pd *ibpd, qp->r_rq.max_sge = init_attr->cap.max_recv_sge; sz = (sizeof(struct ib_sge) * qp->r_rq.max_sge) + sizeof(struct rvt_rwqe); - qp->r_rq.wq = vmalloc_user(sizeof(struct rvt_rwq) + - qp->r_rq.size * sz); + if (udata) + qp->r_rq.wq = vmalloc_user( + sizeof(struct rvt_rwq) + + qp->r_rq.size * sz); + else if (gfp == GFP_NOIO) + qp->r_rq.wq = __vmalloc( + sizeof(struct rvt_rwq) + + qp->r_rq.size * sz, + gfp, PAGE_KERNEL); + else + qp->r_rq.wq = vmalloc( + sizeof(struct rvt_rwq) + + qp->r_rq.size * sz); if (!qp->r_rq.wq) goto bail_driver_priv; } @@ -537,7 +567,7 @@ struct ib_qp *rvt_create_qp(struct ib_pd *ibpd, err = alloc_qpn(rdi, &rdi->qp_dev->qpn_table, init_attr->qp_type, - init_attr->port_num); + init_attr->port_num, gfp); if (err < 0) { ret = ERR_PTR(err); goto bail_rq_wq; diff --git a/include/rdma/rdma_vt.h b/include/rdma/rdma_vt.h index 725778a..70a9596 100644 --- a/include/rdma/rdma_vt.h +++ b/include/rdma/rdma_vt.h @@ -223,7 +223,8 @@ struct rvt_driver_provided { const char * (*get_card_name)(struct rvt_dev_info *rdi); struct pci_dev * (*get_pci_dev)(struct rvt_dev_info *rdi); unsigned (*free_all_qps)(struct rvt_dev_info *rdi); - void * (*qp_priv_alloc)(struct rvt_dev_info *rdi, struct rvt_qp *qp); + void * (*qp_priv_alloc)(struct rvt_dev_info *rdi, struct rvt_qp *qp, + gfp_t gfp); void (*qp_priv_free)(struct rvt_dev_info *rdi, struct rvt_qp *qp); void (*notify_qp_reset)(struct rvt_qp *qp); @@ -234,7 +235,7 @@ struct rvt_driver_provided { void (*notify_new_ah)(struct ib_device *, struct ib_ah_attr *, struct rvt_ah *); int (*alloc_qpn)(struct rvt_dev_info *rdi, struct rvt_qpn_table *qpt, - enum ib_qp_type type, u8 port); + enum ib_qp_type type, u8 port, gfp_t gfp); }; struct rvt_dev_info { -- cgit v0.10.2 From f24a6d4887668cc2925b1e908c72b96a2604a814 Mon Sep 17 00:00:00 2001 From: Harish Chegondi Date: Fri, 22 Jan 2016 12:56:02 -0800 Subject: IB/qib: Remove ibport and use rdmavt version Remove several ibport members from qib and use the rdmavt version. rc_acks, rc_qacks, and rc_delayed_comp are defined as per CPU variables in rdmavt. Add support for these rdmavt per CPU variables which were not per cpu variables in qib ibport structure. Reviewed-by: Mike Marciniszyn Reviewed-by: Dennis Dalessandro Reviewed-by: Ira Weiny Signed-off-by: Harish Chegondi Signed-off-by: Doug Ledford diff --git a/drivers/infiniband/hw/qib/qib_driver.c b/drivers/infiniband/hw/qib/qib_driver.c index eafdee9..e8b239c 100644 --- a/drivers/infiniband/hw/qib/qib_driver.c +++ b/drivers/infiniband/hw/qib/qib_driver.c @@ -379,7 +379,7 @@ static u32 qib_rcv_hdrerr(struct qib_ctxtdata *rcd, struct qib_pportdata *ppd, /* Check for valid receive state. */ if (!(ib_qib_state_ops[qp->state] & QIB_PROCESS_RECV_OK)) { - ibp->n_pkt_drops++; + ibp->rvp.n_pkt_drops++; goto unlock; } @@ -399,7 +399,7 @@ static u32 qib_rcv_hdrerr(struct qib_ctxtdata *rcd, struct qib_pportdata *ppd, IB_OPCODE_RC_RDMA_READ_RESPONSE_FIRST) { diff = qib_cmp24(psn, qp->r_psn); if (!qp->r_nak_state && diff >= 0) { - ibp->n_rc_seqnak++; + ibp->rvp.n_rc_seqnak++; qp->r_nak_state = IB_NAK_PSN_ERROR; /* Use the expected PSN. */ diff --git a/drivers/infiniband/hw/qib/qib_iba6120.c b/drivers/infiniband/hw/qib/qib_iba6120.c index 4b927809..a3733f2 100644 --- a/drivers/infiniband/hw/qib/qib_iba6120.c +++ b/drivers/infiniband/hw/qib/qib_iba6120.c @@ -2956,13 +2956,13 @@ static void pma_6120_timer(unsigned long data) struct qib_ibport *ibp = &ppd->ibport_data; unsigned long flags; - spin_lock_irqsave(&ibp->lock, flags); + spin_lock_irqsave(&ibp->rvp.lock, flags); if (cs->pma_sample_status == IB_PMA_SAMPLE_STATUS_STARTED) { cs->pma_sample_status = IB_PMA_SAMPLE_STATUS_RUNNING; qib_snapshot_counters(ppd, &cs->sword, &cs->rword, &cs->spkts, &cs->rpkts, &cs->xmit_wait); mod_timer(&cs->pma_timer, - jiffies + usecs_to_jiffies(ibp->pma_sample_interval)); + jiffies + usecs_to_jiffies(ibp->rvp.pma_sample_interval)); } else if (cs->pma_sample_status == IB_PMA_SAMPLE_STATUS_RUNNING) { u64 ta, tb, tc, td, te; @@ -2975,11 +2975,11 @@ static void pma_6120_timer(unsigned long data) cs->rpkts = td - cs->rpkts; cs->xmit_wait = te - cs->xmit_wait; } - spin_unlock_irqrestore(&ibp->lock, flags); + spin_unlock_irqrestore(&ibp->rvp.lock, flags); } /* - * Note that the caller has the ibp->lock held. + * Note that the caller has the ibp->rvp.lock held. */ static void qib_set_cntr_6120_sample(struct qib_pportdata *ppd, u32 intv, u32 start) diff --git a/drivers/infiniband/hw/qib/qib_iba7322.c b/drivers/infiniband/hw/qib/qib_iba7322.c index 1fbe308..ca28c19 100644 --- a/drivers/infiniband/hw/qib/qib_iba7322.c +++ b/drivers/infiniband/hw/qib/qib_iba7322.c @@ -5497,7 +5497,7 @@ static void try_7322_ipg(struct qib_pportdata *ppd) unsigned delay; int ret; - agent = ibp->send_agent; + agent = ibp->rvp.send_agent; if (!agent) goto retry; diff --git a/drivers/infiniband/hw/qib/qib_init.c b/drivers/infiniband/hw/qib/qib_init.c index 47190f1..5087a1f 100644 --- a/drivers/infiniband/hw/qib/qib_init.c +++ b/drivers/infiniband/hw/qib/qib_init.c @@ -245,6 +245,13 @@ int qib_init_pportdata(struct qib_pportdata *ppd, struct qib_devdata *dd, alloc_percpu(struct qib_pma_counters); if (!ppd->ibport_data.pmastats) return -ENOMEM; + ppd->ibport_data.rvp.rc_acks = alloc_percpu(u64); + ppd->ibport_data.rvp.rc_qacks = alloc_percpu(u64); + ppd->ibport_data.rvp.rc_delayed_comp = alloc_percpu(u64); + if (!(ppd->ibport_data.rvp.rc_acks) || + !(ppd->ibport_data.rvp.rc_qacks) || + !(ppd->ibport_data.rvp.rc_delayed_comp)) + return -ENOMEM; if (qib_cc_table_size < IB_CCT_MIN_ENTRIES) goto bail; @@ -632,6 +639,9 @@ wq_error: static void qib_free_pportdata(struct qib_pportdata *ppd) { free_percpu(ppd->ibport_data.pmastats); + free_percpu(ppd->ibport_data.rvp.rc_acks); + free_percpu(ppd->ibport_data.rvp.rc_qacks); + free_percpu(ppd->ibport_data.rvp.rc_delayed_comp); ppd->ibport_data.pmastats = NULL; } diff --git a/drivers/infiniband/hw/qib/qib_mad.c b/drivers/infiniband/hw/qib/qib_mad.c index 43f8c49..3e8dde2 100644 --- a/drivers/infiniband/hw/qib/qib_mad.c +++ b/drivers/infiniband/hw/qib/qib_mad.c @@ -70,7 +70,7 @@ static void qib_send_trap(struct qib_ibport *ibp, void *data, unsigned len) unsigned long flags; unsigned long timeout; - agent = ibp->send_agent; + agent = ibp->rvp.send_agent; if (!agent) return; @@ -79,7 +79,8 @@ static void qib_send_trap(struct qib_ibport *ibp, void *data, unsigned len) return; /* o14-2 */ - if (ibp->trap_timeout && time_before(jiffies, ibp->trap_timeout)) + if (ibp->rvp.trap_timeout && + time_before(jiffies, ibp->rvp.trap_timeout)) return; send_buf = ib_create_send_mad(agent, 0, 0, 0, IB_MGMT_MAD_HDR, @@ -93,18 +94,18 @@ static void qib_send_trap(struct qib_ibport *ibp, void *data, unsigned len) smp->mgmt_class = IB_MGMT_CLASS_SUBN_LID_ROUTED; smp->class_version = 1; smp->method = IB_MGMT_METHOD_TRAP; - ibp->tid++; - smp->tid = cpu_to_be64(ibp->tid); + ibp->rvp.tid++; + smp->tid = cpu_to_be64(ibp->rvp.tid); smp->attr_id = IB_SMP_ATTR_NOTICE; /* o14-1: smp->mkey = 0; */ memcpy(smp->data, data, len); - spin_lock_irqsave(&ibp->lock, flags); + spin_lock_irqsave(&ibp->rvp.lock, flags); if (!ibp->sm_ah) { - if (ibp->sm_lid != be16_to_cpu(IB_LID_PERMISSIVE)) { + if (ibp->rvp.sm_lid != be16_to_cpu(IB_LID_PERMISSIVE)) { struct ib_ah *ah; - ah = qib_create_qp0_ah(ibp, ibp->sm_lid); + ah = qib_create_qp0_ah(ibp, ibp->rvp.sm_lid); if (IS_ERR(ah)) ret = PTR_ERR(ah); else { @@ -118,17 +119,17 @@ static void qib_send_trap(struct qib_ibport *ibp, void *data, unsigned len) send_buf->ah = &ibp->sm_ah->ibah; ret = 0; } - spin_unlock_irqrestore(&ibp->lock, flags); + spin_unlock_irqrestore(&ibp->rvp.lock, flags); if (!ret) ret = ib_post_send_mad(send_buf, NULL); if (!ret) { /* 4.096 usec. */ - timeout = (4096 * (1UL << ibp->subnet_timeout)) / 1000; - ibp->trap_timeout = jiffies + usecs_to_jiffies(timeout); + timeout = (4096 * (1UL << ibp->rvp.subnet_timeout)) / 1000; + ibp->rvp.trap_timeout = jiffies + usecs_to_jiffies(timeout); } else { ib_free_send_mad(send_buf); - ibp->trap_timeout = 0; + ibp->rvp.trap_timeout = 0; } } @@ -141,10 +142,10 @@ void qib_bad_pqkey(struct qib_ibport *ibp, __be16 trap_num, u32 key, u32 sl, struct ib_mad_notice_attr data; if (trap_num == IB_NOTICE_TRAP_BAD_PKEY) - ibp->pkey_violations++; + ibp->rvp.pkey_violations++; else - ibp->qkey_violations++; - ibp->n_pkt_drops++; + ibp->rvp.qkey_violations++; + ibp->rvp.n_pkt_drops++; /* Send violation trap */ data.generic_type = IB_NOTICE_TYPE_SECURITY; @@ -217,8 +218,8 @@ void qib_cap_mask_chg(struct qib_ibport *ibp) data.toggle_count = 0; memset(&data.details, 0, sizeof(data.details)); data.details.ntc_144.lid = data.issuer_lid; - data.details.ntc_144.new_cap_mask = cpu_to_be32(ibp->port_cap_flags); - + data.details.ntc_144.new_cap_mask = + cpu_to_be32(ibp->rvp.port_cap_flags); qib_send_trap(ibp, &data, sizeof(data)); } @@ -409,37 +410,38 @@ static int check_mkey(struct qib_ibport *ibp, struct ib_smp *smp, int mad_flags) int ret = 0; /* Is the mkey in the process of expiring? */ - if (ibp->mkey_lease_timeout && - time_after_eq(jiffies, ibp->mkey_lease_timeout)) { + if (ibp->rvp.mkey_lease_timeout && + time_after_eq(jiffies, ibp->rvp.mkey_lease_timeout)) { /* Clear timeout and mkey protection field. */ - ibp->mkey_lease_timeout = 0; - ibp->mkeyprot = 0; + ibp->rvp.mkey_lease_timeout = 0; + ibp->rvp.mkeyprot = 0; } - if ((mad_flags & IB_MAD_IGNORE_MKEY) || ibp->mkey == 0 || - ibp->mkey == smp->mkey) + if ((mad_flags & IB_MAD_IGNORE_MKEY) || ibp->rvp.mkey == 0 || + ibp->rvp.mkey == smp->mkey) valid_mkey = 1; /* Unset lease timeout on any valid Get/Set/TrapRepress */ - if (valid_mkey && ibp->mkey_lease_timeout && + if (valid_mkey && ibp->rvp.mkey_lease_timeout && (smp->method == IB_MGMT_METHOD_GET || smp->method == IB_MGMT_METHOD_SET || smp->method == IB_MGMT_METHOD_TRAP_REPRESS)) - ibp->mkey_lease_timeout = 0; + ibp->rvp.mkey_lease_timeout = 0; if (!valid_mkey) { switch (smp->method) { case IB_MGMT_METHOD_GET: /* Bad mkey not a violation below level 2 */ - if (ibp->mkeyprot < 2) + if (ibp->rvp.mkeyprot < 2) break; case IB_MGMT_METHOD_SET: case IB_MGMT_METHOD_TRAP_REPRESS: - if (ibp->mkey_violations != 0xFFFF) - ++ibp->mkey_violations; - if (!ibp->mkey_lease_timeout && ibp->mkey_lease_period) - ibp->mkey_lease_timeout = jiffies + - ibp->mkey_lease_period * HZ; + if (ibp->rvp.mkey_violations != 0xFFFF) + ++ibp->rvp.mkey_violations; + if (!ibp->rvp.mkey_lease_timeout && + ibp->rvp.mkey_lease_period) + ibp->rvp.mkey_lease_timeout = jiffies + + ibp->rvp.mkey_lease_period * HZ; /* Generate a trap notice. */ qib_bad_mkey(ibp, smp); ret = 1; @@ -489,15 +491,15 @@ static int subn_get_portinfo(struct ib_smp *smp, struct ib_device *ibdev, /* Only return the mkey if the protection field allows it. */ if (!(smp->method == IB_MGMT_METHOD_GET && - ibp->mkey != smp->mkey && - ibp->mkeyprot == 1)) - pip->mkey = ibp->mkey; - pip->gid_prefix = ibp->gid_prefix; + ibp->rvp.mkey != smp->mkey && + ibp->rvp.mkeyprot == 1)) + pip->mkey = ibp->rvp.mkey; + pip->gid_prefix = ibp->rvp.gid_prefix; pip->lid = cpu_to_be16(ppd->lid); - pip->sm_lid = cpu_to_be16(ibp->sm_lid); - pip->cap_mask = cpu_to_be32(ibp->port_cap_flags); + pip->sm_lid = cpu_to_be16(ibp->rvp.sm_lid); + pip->cap_mask = cpu_to_be32(ibp->rvp.port_cap_flags); /* pip->diag_code; */ - pip->mkey_lease_period = cpu_to_be16(ibp->mkey_lease_period); + pip->mkey_lease_period = cpu_to_be16(ibp->rvp.mkey_lease_period); pip->local_port_num = port; pip->link_width_enabled = ppd->link_width_enabled; pip->link_width_supported = ppd->link_width_supported; @@ -508,7 +510,7 @@ static int subn_get_portinfo(struct ib_smp *smp, struct ib_device *ibdev, pip->portphysstate_linkdown = (dd->f_ibphys_portstate(ppd->lastibcstat) << 4) | (get_linkdowndefaultstate(ppd) ? 1 : 2); - pip->mkeyprot_resv_lmc = (ibp->mkeyprot << 6) | ppd->lmc; + pip->mkeyprot_resv_lmc = (ibp->rvp.mkeyprot << 6) | ppd->lmc; pip->linkspeedactive_enabled = (ppd->link_speed_active << 4) | ppd->link_speed_enabled; switch (ppd->ibmtu) { @@ -529,9 +531,9 @@ static int subn_get_portinfo(struct ib_smp *smp, struct ib_device *ibdev, mtu = IB_MTU_256; break; } - pip->neighbormtu_mastersmsl = (mtu << 4) | ibp->sm_sl; + pip->neighbormtu_mastersmsl = (mtu << 4) | ibp->rvp.sm_sl; pip->vlcap_inittype = ppd->vls_supported << 4; /* InitType = 0 */ - pip->vl_high_limit = ibp->vl_high_limit; + pip->vl_high_limit = ibp->rvp.vl_high_limit; pip->vl_arb_high_cap = dd->f_get_ib_cfg(ppd, QIB_IB_CFG_VL_HIGH_CAP); pip->vl_arb_low_cap = @@ -542,20 +544,20 @@ static int subn_get_portinfo(struct ib_smp *smp, struct ib_device *ibdev, /* pip->vlstallcnt_hoqlife; */ pip->operationalvl_pei_peo_fpi_fpo = dd->f_get_ib_cfg(ppd, QIB_IB_CFG_OP_VLS) << 4; - pip->mkey_violations = cpu_to_be16(ibp->mkey_violations); + pip->mkey_violations = cpu_to_be16(ibp->rvp.mkey_violations); /* P_KeyViolations are counted by hardware. */ - pip->pkey_violations = cpu_to_be16(ibp->pkey_violations); - pip->qkey_violations = cpu_to_be16(ibp->qkey_violations); + pip->pkey_violations = cpu_to_be16(ibp->rvp.pkey_violations); + pip->qkey_violations = cpu_to_be16(ibp->rvp.qkey_violations); /* Only the hardware GUID is supported for now */ pip->guid_cap = QIB_GUIDS_PER_PORT; - pip->clientrereg_resv_subnetto = ibp->subnet_timeout; + pip->clientrereg_resv_subnetto = ibp->rvp.subnet_timeout; /* 32.768 usec. response time (guessing) */ pip->resv_resptimevalue = 3; pip->localphyerrors_overrunerrors = (get_phyerrthreshold(ppd) << 4) | get_overrunthreshold(ppd); /* pip->max_credit_hint; */ - if (ibp->port_cap_flags & IB_PORT_LINK_LATENCY_SUP) { + if (ibp->rvp.port_cap_flags & IB_PORT_LINK_LATENCY_SUP) { u32 v; v = dd->f_get_ib_cfg(ppd, QIB_IB_CFG_LINKLATENCY); @@ -685,9 +687,9 @@ static int subn_set_portinfo(struct ib_smp *smp, struct ib_device *ibdev, event.device = ibdev; event.element.port_num = port; - ibp->mkey = pip->mkey; - ibp->gid_prefix = pip->gid_prefix; - ibp->mkey_lease_period = be16_to_cpu(pip->mkey_lease_period); + ibp->rvp.mkey = pip->mkey; + ibp->rvp.gid_prefix = pip->gid_prefix; + ibp->rvp.mkey_lease_period = be16_to_cpu(pip->mkey_lease_period); lid = be16_to_cpu(pip->lid); /* Must be a valid unicast LID address. */ @@ -708,19 +710,19 @@ static int subn_set_portinfo(struct ib_smp *smp, struct ib_device *ibdev, /* Must be a valid unicast LID address. */ if (smlid == 0 || smlid >= be16_to_cpu(IB_MULTICAST_LID_BASE)) smp->status |= IB_SMP_INVALID_FIELD; - else if (smlid != ibp->sm_lid || msl != ibp->sm_sl) { - spin_lock_irqsave(&ibp->lock, flags); + else if (smlid != ibp->rvp.sm_lid || msl != ibp->rvp.sm_sl) { + spin_lock_irqsave(&ibp->rvp.lock, flags); if (ibp->sm_ah) { - if (smlid != ibp->sm_lid) + if (smlid != ibp->rvp.sm_lid) ibp->sm_ah->attr.dlid = smlid; - if (msl != ibp->sm_sl) + if (msl != ibp->rvp.sm_sl) ibp->sm_ah->attr.sl = msl; } - spin_unlock_irqrestore(&ibp->lock, flags); - if (smlid != ibp->sm_lid) - ibp->sm_lid = smlid; - if (msl != ibp->sm_sl) - ibp->sm_sl = msl; + spin_unlock_irqrestore(&ibp->rvp.lock, flags); + if (smlid != ibp->rvp.sm_lid) + ibp->rvp.sm_lid = smlid; + if (msl != ibp->rvp.sm_sl) + ibp->rvp.sm_sl = msl; event.event = IB_EVENT_SM_CHANGE; ib_dispatch_event(&event); } @@ -768,10 +770,10 @@ static int subn_set_portinfo(struct ib_smp *smp, struct ib_device *ibdev, smp->status |= IB_SMP_INVALID_FIELD; } - ibp->mkeyprot = pip->mkeyprot_resv_lmc >> 6; - ibp->vl_high_limit = pip->vl_high_limit; + ibp->rvp.mkeyprot = pip->mkeyprot_resv_lmc >> 6; + ibp->rvp.vl_high_limit = pip->vl_high_limit; (void) dd->f_set_ib_cfg(ppd, QIB_IB_CFG_VL_HIGH_LIMIT, - ibp->vl_high_limit); + ibp->rvp.vl_high_limit); mtu = ib_mtu_enum_to_int((pip->neighbormtu_mastersmsl >> 4) & 0xF); if (mtu == -1) @@ -789,13 +791,13 @@ static int subn_set_portinfo(struct ib_smp *smp, struct ib_device *ibdev, } if (pip->mkey_violations == 0) - ibp->mkey_violations = 0; + ibp->rvp.mkey_violations = 0; if (pip->pkey_violations == 0) - ibp->pkey_violations = 0; + ibp->rvp.pkey_violations = 0; if (pip->qkey_violations == 0) - ibp->qkey_violations = 0; + ibp->rvp.qkey_violations = 0; ore = pip->localphyerrors_overrunerrors; if (set_phyerrthreshold(ppd, (ore >> 4) & 0xF)) @@ -804,7 +806,7 @@ static int subn_set_portinfo(struct ib_smp *smp, struct ib_device *ibdev, if (set_overrunthreshold(ppd, (ore & 0xF))) smp->status |= IB_SMP_INVALID_FIELD; - ibp->subnet_timeout = pip->clientrereg_resv_subnetto & 0x1F; + ibp->rvp.subnet_timeout = pip->clientrereg_resv_subnetto & 0x1F; /* * Do the port state change now that the other link parameters @@ -1062,7 +1064,7 @@ static int subn_get_sl_to_vl(struct ib_smp *smp, struct ib_device *ibdev, memset(smp->data, 0, sizeof(smp->data)); - if (!(ibp->port_cap_flags & IB_PORT_SL_MAP_SUP)) + if (!(ibp->rvp.port_cap_flags & IB_PORT_SL_MAP_SUP)) smp->status |= IB_SMP_UNSUP_METHOD; else for (i = 0; i < ARRAY_SIZE(ibp->sl_to_vl); i += 2) @@ -1078,7 +1080,7 @@ static int subn_set_sl_to_vl(struct ib_smp *smp, struct ib_device *ibdev, u8 *p = (u8 *) smp->data; unsigned i; - if (!(ibp->port_cap_flags & IB_PORT_SL_MAP_SUP)) { + if (!(ibp->rvp.port_cap_flags & IB_PORT_SL_MAP_SUP)) { smp->status |= IB_SMP_UNSUP_METHOD; return reply(smp); } @@ -1195,20 +1197,20 @@ static int pma_get_portsamplescontrol(struct ib_pma_mad *pmp, pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD; goto bail; } - spin_lock_irqsave(&ibp->lock, flags); + spin_lock_irqsave(&ibp->rvp.lock, flags); p->tick = dd->f_get_ib_cfg(ppd, QIB_IB_CFG_PMA_TICKS); p->sample_status = dd->f_portcntr(ppd, QIBPORTCNTR_PSSTAT); p->counter_width = 4; /* 32 bit counters */ p->counter_mask0_9 = COUNTER_MASK0_9; - p->sample_start = cpu_to_be32(ibp->pma_sample_start); - p->sample_interval = cpu_to_be32(ibp->pma_sample_interval); - p->tag = cpu_to_be16(ibp->pma_tag); - p->counter_select[0] = ibp->pma_counter_select[0]; - p->counter_select[1] = ibp->pma_counter_select[1]; - p->counter_select[2] = ibp->pma_counter_select[2]; - p->counter_select[3] = ibp->pma_counter_select[3]; - p->counter_select[4] = ibp->pma_counter_select[4]; - spin_unlock_irqrestore(&ibp->lock, flags); + p->sample_start = cpu_to_be32(ibp->rvp.pma_sample_start); + p->sample_interval = cpu_to_be32(ibp->rvp.pma_sample_interval); + p->tag = cpu_to_be16(ibp->rvp.pma_tag); + p->counter_select[0] = ibp->rvp.pma_counter_select[0]; + p->counter_select[1] = ibp->rvp.pma_counter_select[1]; + p->counter_select[2] = ibp->rvp.pma_counter_select[2]; + p->counter_select[3] = ibp->rvp.pma_counter_select[3]; + p->counter_select[4] = ibp->rvp.pma_counter_select[4]; + spin_unlock_irqrestore(&ibp->rvp.lock, flags); bail: return reply((struct ib_smp *) pmp); @@ -1233,7 +1235,7 @@ static int pma_set_portsamplescontrol(struct ib_pma_mad *pmp, goto bail; } - spin_lock_irqsave(&ibp->lock, flags); + spin_lock_irqsave(&ibp->rvp.lock, flags); /* Port Sampling code owns the PS* HW counters */ xmit_flags = ppd->cong_stats.flags; @@ -1242,18 +1244,18 @@ static int pma_set_portsamplescontrol(struct ib_pma_mad *pmp, if (status == IB_PMA_SAMPLE_STATUS_DONE || (status == IB_PMA_SAMPLE_STATUS_RUNNING && xmit_flags == IB_PMA_CONG_HW_CONTROL_TIMER)) { - ibp->pma_sample_start = be32_to_cpu(p->sample_start); - ibp->pma_sample_interval = be32_to_cpu(p->sample_interval); - ibp->pma_tag = be16_to_cpu(p->tag); - ibp->pma_counter_select[0] = p->counter_select[0]; - ibp->pma_counter_select[1] = p->counter_select[1]; - ibp->pma_counter_select[2] = p->counter_select[2]; - ibp->pma_counter_select[3] = p->counter_select[3]; - ibp->pma_counter_select[4] = p->counter_select[4]; - dd->f_set_cntr_sample(ppd, ibp->pma_sample_interval, - ibp->pma_sample_start); + ibp->rvp.pma_sample_start = be32_to_cpu(p->sample_start); + ibp->rvp.pma_sample_interval = be32_to_cpu(p->sample_interval); + ibp->rvp.pma_tag = be16_to_cpu(p->tag); + ibp->rvp.pma_counter_select[0] = p->counter_select[0]; + ibp->rvp.pma_counter_select[1] = p->counter_select[1]; + ibp->rvp.pma_counter_select[2] = p->counter_select[2]; + ibp->rvp.pma_counter_select[3] = p->counter_select[3]; + ibp->rvp.pma_counter_select[4] = p->counter_select[4]; + dd->f_set_cntr_sample(ppd, ibp->rvp.pma_sample_interval, + ibp->rvp.pma_sample_start); } - spin_unlock_irqrestore(&ibp->lock, flags); + spin_unlock_irqrestore(&ibp->rvp.lock, flags); ret = pma_get_portsamplescontrol(pmp, ibdev, port); @@ -1357,8 +1359,8 @@ static int pma_get_portsamplesresult(struct ib_pma_mad *pmp, int i; memset(pmp->data, 0, sizeof(pmp->data)); - spin_lock_irqsave(&ibp->lock, flags); - p->tag = cpu_to_be16(ibp->pma_tag); + spin_lock_irqsave(&ibp->rvp.lock, flags); + p->tag = cpu_to_be16(ibp->rvp.pma_tag); if (ppd->cong_stats.flags == IB_PMA_CONG_HW_CONTROL_TIMER) p->sample_status = IB_PMA_SAMPLE_STATUS_DONE; else { @@ -1373,11 +1375,11 @@ static int pma_get_portsamplesresult(struct ib_pma_mad *pmp, ppd->cong_stats.flags = IB_PMA_CONG_HW_CONTROL_TIMER; } } - for (i = 0; i < ARRAY_SIZE(ibp->pma_counter_select); i++) + for (i = 0; i < ARRAY_SIZE(ibp->rvp.pma_counter_select); i++) p->counter[i] = cpu_to_be32( get_cache_hw_sample_counters( - ppd, ibp->pma_counter_select[i])); - spin_unlock_irqrestore(&ibp->lock, flags); + ppd, ibp->rvp.pma_counter_select[i])); + spin_unlock_irqrestore(&ibp->rvp.lock, flags); return reply((struct ib_smp *) pmp); } @@ -1397,8 +1399,8 @@ static int pma_get_portsamplesresult_ext(struct ib_pma_mad *pmp, /* Port Sampling code owns the PS* HW counters */ memset(pmp->data, 0, sizeof(pmp->data)); - spin_lock_irqsave(&ibp->lock, flags); - p->tag = cpu_to_be16(ibp->pma_tag); + spin_lock_irqsave(&ibp->rvp.lock, flags); + p->tag = cpu_to_be16(ibp->rvp.pma_tag); if (ppd->cong_stats.flags == IB_PMA_CONG_HW_CONTROL_TIMER) p->sample_status = IB_PMA_SAMPLE_STATUS_DONE; else { @@ -1415,11 +1417,11 @@ static int pma_get_portsamplesresult_ext(struct ib_pma_mad *pmp, ppd->cong_stats.flags = IB_PMA_CONG_HW_CONTROL_TIMER; } } - for (i = 0; i < ARRAY_SIZE(ibp->pma_counter_select); i++) + for (i = 0; i < ARRAY_SIZE(ibp->rvp.pma_counter_select); i++) p->counter[i] = cpu_to_be64( get_cache_hw_sample_counters( - ppd, ibp->pma_counter_select[i])); - spin_unlock_irqrestore(&ibp->lock, flags); + ppd, ibp->rvp.pma_counter_select[i])); + spin_unlock_irqrestore(&ibp->rvp.lock, flags); return reply((struct ib_smp *) pmp); } @@ -1453,7 +1455,7 @@ static int pma_get_portcounters(struct ib_pma_mad *pmp, cntrs.excessive_buffer_overrun_errors -= ibp->z_excessive_buffer_overrun_errors; cntrs.vl15_dropped -= ibp->z_vl15_dropped; - cntrs.vl15_dropped += ibp->n_vl15_dropped; + cntrs.vl15_dropped += ibp->rvp.n_vl15_dropped; memset(pmp->data, 0, sizeof(pmp->data)); @@ -1546,9 +1548,9 @@ static int pma_get_portcounters_cong(struct ib_pma_mad *pmp, pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD; qib_get_counters(ppd, &cntrs); - spin_lock_irqsave(&ppd->ibport_data.lock, flags); + spin_lock_irqsave(&ppd->ibport_data.rvp.lock, flags); xmit_wait_counter = xmit_wait_get_value_delta(ppd); - spin_unlock_irqrestore(&ppd->ibport_data.lock, flags); + spin_unlock_irqrestore(&ppd->ibport_data.rvp.lock, flags); /* Adjust counters for any resets done. */ cntrs.symbol_error_counter -= ibp->z_symbol_error_counter; @@ -1564,7 +1566,7 @@ static int pma_get_portcounters_cong(struct ib_pma_mad *pmp, cntrs.excessive_buffer_overrun_errors -= ibp->z_excessive_buffer_overrun_errors; cntrs.vl15_dropped -= ibp->z_vl15_dropped; - cntrs.vl15_dropped += ibp->n_vl15_dropped; + cntrs.vl15_dropped += ibp->rvp.n_vl15_dropped; cntrs.port_xmit_data -= ibp->z_port_xmit_data; cntrs.port_rcv_data -= ibp->z_port_rcv_data; cntrs.port_xmit_packets -= ibp->z_port_xmit_packets; @@ -1743,7 +1745,7 @@ static int pma_set_portcounters(struct ib_pma_mad *pmp, cntrs.excessive_buffer_overrun_errors; if (p->counter_select & IB_PMA_SEL_PORT_VL15_DROPPED) { - ibp->n_vl15_dropped = 0; + ibp->rvp.n_vl15_dropped = 0; ibp->z_vl15_dropped = cntrs.vl15_dropped; } @@ -1778,11 +1780,11 @@ static int pma_set_portcounters_cong(struct ib_pma_mad *pmp, ret = pma_get_portcounters_cong(pmp, ibdev, port); if (counter_select & IB_PMA_SEL_CONG_XMIT) { - spin_lock_irqsave(&ppd->ibport_data.lock, flags); + spin_lock_irqsave(&ppd->ibport_data.rvp.lock, flags); ppd->cong_stats.counter = 0; dd->f_set_cntr_sample(ppd, QIB_CONG_TIMER_PSINTERVAL, 0x0); - spin_unlock_irqrestore(&ppd->ibport_data.lock, flags); + spin_unlock_irqrestore(&ppd->ibport_data.rvp.lock, flags); } if (counter_select & IB_PMA_SEL_CONG_PORT_DATA) { ibp->z_port_xmit_data = cntrs.port_xmit_data; @@ -1806,7 +1808,7 @@ static int pma_set_portcounters_cong(struct ib_pma_mad *pmp, cntrs.local_link_integrity_errors; ibp->z_excessive_buffer_overrun_errors = cntrs.excessive_buffer_overrun_errors; - ibp->n_vl15_dropped = 0; + ibp->rvp.n_vl15_dropped = 0; ibp->z_vl15_dropped = cntrs.vl15_dropped; } @@ -1916,12 +1918,12 @@ static int process_subn(struct ib_device *ibdev, int mad_flags, ret = subn_get_vl_arb(smp, ibdev, port); goto bail; case IB_SMP_ATTR_SM_INFO: - if (ibp->port_cap_flags & IB_PORT_SM_DISABLED) { + if (ibp->rvp.port_cap_flags & IB_PORT_SM_DISABLED) { ret = IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_CONSUMED; goto bail; } - if (ibp->port_cap_flags & IB_PORT_SM) { + if (ibp->rvp.port_cap_flags & IB_PORT_SM) { ret = IB_MAD_RESULT_SUCCESS; goto bail; } @@ -1950,12 +1952,12 @@ static int process_subn(struct ib_device *ibdev, int mad_flags, ret = subn_set_vl_arb(smp, ibdev, port); goto bail; case IB_SMP_ATTR_SM_INFO: - if (ibp->port_cap_flags & IB_PORT_SM_DISABLED) { + if (ibp->rvp.port_cap_flags & IB_PORT_SM_DISABLED) { ret = IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_CONSUMED; goto bail; } - if (ibp->port_cap_flags & IB_PORT_SM) { + if (ibp->rvp.port_cap_flags & IB_PORT_SM) { ret = IB_MAD_RESULT_SUCCESS; goto bail; } @@ -2456,7 +2458,7 @@ static void xmit_wait_timer_func(unsigned long opaque) unsigned long flags; u8 status; - spin_lock_irqsave(&ppd->ibport_data.lock, flags); + spin_lock_irqsave(&ppd->ibport_data.rvp.lock, flags); if (ppd->cong_stats.flags == IB_PMA_CONG_HW_CONTROL_SAMPLE) { status = dd->f_portcntr(ppd, QIBPORTCNTR_PSSTAT); if (status == IB_PMA_SAMPLE_STATUS_DONE) { @@ -2469,7 +2471,7 @@ static void xmit_wait_timer_func(unsigned long opaque) ppd->cong_stats.counter = xmit_wait_get_value_delta(ppd); dd->f_set_cntr_sample(ppd, QIB_CONG_TIMER_PSINTERVAL, 0x0); done: - spin_unlock_irqrestore(&ppd->ibport_data.lock, flags); + spin_unlock_irqrestore(&ppd->ibport_data.rvp.lock, flags); mod_timer(&ppd->cong_stats.timer, jiffies + HZ); } @@ -2501,7 +2503,7 @@ int qib_create_agents(struct qib_ibdev *dev) dd->pport[p].cong_stats.timer.expires = 0; add_timer(&dd->pport[p].cong_stats.timer); - ibp->send_agent = agent; + ibp->rvp.send_agent = agent; } return 0; @@ -2509,9 +2511,9 @@ int qib_create_agents(struct qib_ibdev *dev) err: for (p = 0; p < dd->num_pports; p++) { ibp = &dd->pport[p].ibport_data; - if (ibp->send_agent) { - agent = ibp->send_agent; - ibp->send_agent = NULL; + if (ibp->rvp.send_agent) { + agent = ibp->rvp.send_agent; + ibp->rvp.send_agent = NULL; ib_unregister_mad_agent(agent); } } @@ -2528,9 +2530,9 @@ void qib_free_agents(struct qib_ibdev *dev) for (p = 0; p < dd->num_pports; p++) { ibp = &dd->pport[p].ibport_data; - if (ibp->send_agent) { - agent = ibp->send_agent; - ibp->send_agent = NULL; + if (ibp->rvp.send_agent) { + agent = ibp->rvp.send_agent; + ibp->rvp.send_agent = NULL; ib_unregister_mad_agent(agent); } if (ibp->sm_ah) { diff --git a/drivers/infiniband/hw/qib/qib_qp.c b/drivers/infiniband/hw/qib/qib_qp.c index 1f63835..9cb9be7 100644 --- a/drivers/infiniband/hw/qib/qib_qp.c +++ b/drivers/infiniband/hw/qib/qib_qp.c @@ -231,9 +231,9 @@ static void insert_qp(struct qib_ibdev *dev, struct rvt_qp *qp) spin_lock_irqsave(&dev->qpt_lock, flags); if (qp->ibqp.qp_num == 0) - rcu_assign_pointer(ibp->qp0, qp); + rcu_assign_pointer(ibp->rvp.qp[0], qp); else if (qp->ibqp.qp_num == 1) - rcu_assign_pointer(ibp->qp1, qp); + rcu_assign_pointer(ibp->rvp.qp[1], qp); else { qp->next = dev->qp_table[n]; rcu_assign_pointer(dev->qp_table[n], qp); @@ -255,12 +255,12 @@ static void remove_qp(struct qib_ibdev *dev, struct rvt_qp *qp) spin_lock_irqsave(&dev->qpt_lock, flags); - if (rcu_dereference_protected(ibp->qp0, + if (rcu_dereference_protected(ibp->rvp.qp[0], + lockdep_is_held(&dev->qpt_lock)) == qp) { + RCU_INIT_POINTER(ibp->rvp.qp[0], NULL); + } else if (rcu_dereference_protected(ibp->rvp.qp[1], lockdep_is_held(&dev->qpt_lock)) == qp) { - RCU_INIT_POINTER(ibp->qp0, NULL); - } else if (rcu_dereference_protected(ibp->qp1, - lockdep_is_held(&dev->qpt_lock)) == qp) { - RCU_INIT_POINTER(ibp->qp1, NULL); + RCU_INIT_POINTER(ibp->rvp.qp[1], NULL); } else { struct rvt_qp *q; struct rvt_qp __rcu **qpp; @@ -306,9 +306,9 @@ unsigned qib_free_all_qps(struct qib_devdata *dd) if (!qib_mcast_tree_empty(ibp)) qp_inuse++; rcu_read_lock(); - if (rcu_dereference(ibp->qp0)) + if (rcu_dereference(ibp->rvp.qp[0])) qp_inuse++; - if (rcu_dereference(ibp->qp1)) + if (rcu_dereference(ibp->rvp.qp[1])) qp_inuse++; rcu_read_unlock(); } @@ -344,9 +344,9 @@ struct rvt_qp *qib_lookup_qpn(struct qib_ibport *ibp, u32 qpn) rcu_read_lock(); if (unlikely(qpn <= 1)) { if (qpn == 0) - qp = rcu_dereference(ibp->qp0); + qp = rcu_dereference(ibp->rvp.qp[0]); else - qp = rcu_dereference(ibp->qp1); + qp = rcu_dereference(ibp->rvp.qp[1]); if (qp) atomic_inc(&qp->refcount); } else { diff --git a/drivers/infiniband/hw/qib/qib_rc.c b/drivers/infiniband/hw/qib/qib_rc.c index 46e6c97..1e8463d 100644 --- a/drivers/infiniband/hw/qib/qib_rc.c +++ b/drivers/infiniband/hw/qib/qib_rc.c @@ -760,7 +760,7 @@ void qib_send_rc_ack(struct rvt_qp *qp) queue_ack: if (ib_qib_state_ops[qp->state] & QIB_PROCESS_RECV_OK) { - ibp->n_rc_qacks++; + this_cpu_inc(*ibp->rvp.rc_qacks); qp->s_flags |= QIB_S_ACK_PENDING | QIB_S_RESP_PENDING; qp->s_nak_state = qp->r_nak_state; qp->s_ack_psn = qp->r_ack_psn; @@ -888,9 +888,9 @@ static void qib_restart_rc(struct rvt_qp *qp, u32 psn, int wait) ibp = to_iport(qp->ibqp.device, qp->port_num); if (wqe->wr.opcode == IB_WR_RDMA_READ) - ibp->n_rc_resends++; + ibp->rvp.n_rc_resends++; else - ibp->n_rc_resends += (qp->s_psn - psn) & QIB_PSN_MASK; + ibp->rvp.n_rc_resends += (qp->s_psn - psn) & QIB_PSN_MASK; qp->s_flags &= ~(QIB_S_WAIT_FENCE | QIB_S_WAIT_RDMAR | QIB_S_WAIT_SSN_CREDIT | QIB_S_WAIT_PSN | @@ -913,7 +913,7 @@ static void rc_timeout(unsigned long arg) spin_lock(&qp->s_lock); if (qp->s_flags & QIB_S_TIMER) { ibp = to_iport(qp->ibqp.device, qp->port_num); - ibp->n_rc_timeouts++; + ibp->rvp.n_rc_timeouts++; qp->s_flags &= ~QIB_S_TIMER; del_timer(&qp->s_timer); qib_restart_rc(qp, qp->s_last_psn + 1, 1); @@ -1087,7 +1087,7 @@ static struct rvt_swqe *do_rc_completion(struct rvt_qp *qp, if (++qp->s_last >= qp->s_size) qp->s_last = 0; } else - ibp->n_rc_delayed_comp++; + this_cpu_inc(*ibp->rvp.rc_delayed_comp); qp->s_retry = qp->s_retry_cnt; update_last_psn(qp, wqe->lpsn); @@ -1232,7 +1232,7 @@ static int do_rc_ack(struct rvt_qp *qp, u32 aeth, u32 psn, int opcode, switch (aeth >> 29) { case 0: /* ACK */ - ibp->n_rc_acks++; + this_cpu_inc(*ibp->rvp.rc_acks); if (qp->s_acked != qp->s_tail) { /* * We are expecting more ACKs so @@ -1261,7 +1261,7 @@ static int do_rc_ack(struct rvt_qp *qp, u32 aeth, u32 psn, int opcode, goto bail; case 1: /* RNR NAK */ - ibp->n_rnr_naks++; + ibp->rvp.n_rnr_naks++; if (qp->s_acked == qp->s_tail) goto bail; if (qp->s_flags & QIB_S_WAIT_RNR) @@ -1276,7 +1276,7 @@ static int do_rc_ack(struct rvt_qp *qp, u32 aeth, u32 psn, int opcode, /* The last valid PSN is the previous PSN. */ update_last_psn(qp, psn - 1); - ibp->n_rc_resends += (qp->s_psn - psn) & QIB_PSN_MASK; + ibp->rvp.n_rc_resends += (qp->s_psn - psn) & QIB_PSN_MASK; reset_psn(qp, psn); @@ -1297,7 +1297,7 @@ static int do_rc_ack(struct rvt_qp *qp, u32 aeth, u32 psn, int opcode, switch ((aeth >> QIB_AETH_CREDIT_SHIFT) & QIB_AETH_CREDIT_MASK) { case 0: /* PSN sequence error */ - ibp->n_seq_naks++; + ibp->rvp.n_seq_naks++; /* * Back up to the responder's expected PSN. * Note that we might get a NAK in the middle of an @@ -1310,17 +1310,17 @@ static int do_rc_ack(struct rvt_qp *qp, u32 aeth, u32 psn, int opcode, case 1: /* Invalid Request */ status = IB_WC_REM_INV_REQ_ERR; - ibp->n_other_naks++; + ibp->rvp.n_other_naks++; goto class_b; case 2: /* Remote Access Error */ status = IB_WC_REM_ACCESS_ERR; - ibp->n_other_naks++; + ibp->rvp.n_other_naks++; goto class_b; case 3: /* Remote Operation Error */ status = IB_WC_REM_OP_ERR; - ibp->n_other_naks++; + ibp->rvp.n_other_naks++; class_b: if (qp->s_last == qp->s_acked) { qib_send_complete(qp, wqe, status); @@ -1371,7 +1371,7 @@ static void rdma_seq_err(struct rvt_qp *qp, struct qib_ibport *ibp, u32 psn, wqe = do_rc_completion(qp, wqe, ibp); } - ibp->n_rdma_seq++; + ibp->rvp.n_rdma_seq++; qp->r_flags |= QIB_R_RDMAR_SEQ; qib_restart_rc(qp, qp->s_last_psn + 1, 0); if (list_empty(&qp->rspwait)) { @@ -1643,7 +1643,7 @@ static int qib_rc_rcv_error(struct qib_other_headers *ohdr, * Don't queue the NAK if we already sent one. */ if (!qp->r_nak_state) { - ibp->n_rc_seqnak++; + ibp->rvp.n_rc_seqnak++; qp->r_nak_state = IB_NAK_PSN_ERROR; /* Use the expected PSN. */ qp->r_ack_psn = qp->r_psn; @@ -1679,7 +1679,7 @@ static int qib_rc_rcv_error(struct qib_other_headers *ohdr, */ e = NULL; old_req = 1; - ibp->n_rc_dupreq++; + ibp->rvp.n_rc_dupreq++; spin_lock_irqsave(&qp->s_lock, flags); diff --git a/drivers/infiniband/hw/qib/qib_ruc.c b/drivers/infiniband/hw/qib/qib_ruc.c index 682447e..6290979 100644 --- a/drivers/infiniband/hw/qib/qib_ruc.c +++ b/drivers/infiniband/hw/qib/qib_ruc.c @@ -279,7 +279,8 @@ int qib_ruc_check_hdr(struct qib_ibport *ibp, struct qib_ib_header *hdr, if (!(qp->alt_ah_attr.ah_flags & IB_AH_GRH)) goto err; guid = get_sguid(ibp, qp->alt_ah_attr.grh.sgid_index); - if (!gid_ok(&hdr->u.l.grh.dgid, ibp->gid_prefix, guid)) + if (!gid_ok(&hdr->u.l.grh.dgid, + ibp->rvp.gid_prefix, guid)) goto err; if (!gid_ok(&hdr->u.l.grh.sgid, qp->alt_ah_attr.grh.dgid.global.subnet_prefix, @@ -311,7 +312,8 @@ int qib_ruc_check_hdr(struct qib_ibport *ibp, struct qib_ib_header *hdr, goto err; guid = get_sguid(ibp, qp->remote_ah_attr.grh.sgid_index); - if (!gid_ok(&hdr->u.l.grh.dgid, ibp->gid_prefix, guid)) + if (!gid_ok(&hdr->u.l.grh.dgid, + ibp->rvp.gid_prefix, guid)) goto err; if (!gid_ok(&hdr->u.l.grh.sgid, qp->remote_ah_attr.grh.dgid.global.subnet_prefix, @@ -409,7 +411,7 @@ again: if (!qp || !(ib_qib_state_ops[qp->state] & QIB_PROCESS_RECV_OK) || qp->ibqp.qp_type != sqp->ibqp.qp_type) { - ibp->n_pkt_drops++; + ibp->rvp.n_pkt_drops++; /* * For RC, the requester would timeout and retry so * shortcut the timeouts and just signal too many retries. @@ -566,7 +568,7 @@ again: send_comp: spin_lock_irqsave(&sqp->s_lock, flags); - ibp->n_loop_pkts++; + ibp->rvp.n_loop_pkts++; flush_send: sqp->s_rnr_retry = sqp->s_rnr_retry_cnt; qib_send_complete(sqp, wqe, send_status); @@ -576,7 +578,7 @@ rnr_nak: /* Handle RNR NAK */ if (qp->ibqp.qp_type == IB_QPT_UC) goto send_comp; - ibp->n_rnr_naks++; + ibp->rvp.n_rnr_naks++; /* * Note: we don't need the s_lock held since the BUSY flag * makes this single threaded. @@ -663,7 +665,7 @@ u32 qib_make_grh(struct qib_ibport *ibp, struct ib_grh *hdr, hdr->next_hdr = IB_GRH_NEXT_HDR; hdr->hop_limit = grh->hop_limit; /* The SGID is 32-bit aligned. */ - hdr->sgid.global.subnet_prefix = ibp->gid_prefix; + hdr->sgid.global.subnet_prefix = ibp->rvp.gid_prefix; hdr->sgid.global.interface_id = grh->sgid_index ? ibp->guids[grh->sgid_index - 1] : ppd_from_ibp(ibp)->guid; hdr->dgid = grh->dgid; diff --git a/drivers/infiniband/hw/qib/qib_sdma.c b/drivers/infiniband/hw/qib/qib_sdma.c index 1395ed0..9d1104e 100644 --- a/drivers/infiniband/hw/qib/qib_sdma.c +++ b/drivers/infiniband/hw/qib/qib_sdma.c @@ -702,7 +702,7 @@ busy: struct qib_ibport *ibp; ibp = &ppd->ibport_data; - ibp->n_dmawait++; + ibp->rvp.n_dmawait++; qp->s_flags |= QIB_S_WAIT_DMA_DESC; list_add_tail(&priv->iowait, &dev->dmawait); } diff --git a/drivers/infiniband/hw/qib/qib_sysfs.c b/drivers/infiniband/hw/qib/qib_sysfs.c index 72a160e..fe4cf5e 100644 --- a/drivers/infiniband/hw/qib/qib_sysfs.c +++ b/drivers/infiniband/hw/qib/qib_sysfs.c @@ -406,7 +406,13 @@ static struct kobj_type qib_sl2vl_ktype = { #define QIB_DIAGC_ATTR(N) \ static struct qib_diagc_attr qib_diagc_attr_##N = { \ .attr = { .name = __stringify(N), .mode = 0664 }, \ - .counter = offsetof(struct qib_ibport, n_##N) \ + .counter = offsetof(struct qib_ibport, rvp.n_##N) \ + } + +#define QIB_DIAGC_ATTR_PER_CPU(N) \ + static struct qib_diagc_attr qib_diagc_attr_##N = { \ + .attr = { .name = __stringify(N), .mode = 0664 }, \ + .counter = offsetof(struct qib_ibport, rvp.z_##N) \ } struct qib_diagc_attr { @@ -414,10 +420,11 @@ struct qib_diagc_attr { size_t counter; }; +QIB_DIAGC_ATTR_PER_CPU(rc_acks); +QIB_DIAGC_ATTR_PER_CPU(rc_qacks); +QIB_DIAGC_ATTR_PER_CPU(rc_delayed_comp); + QIB_DIAGC_ATTR(rc_resends); -QIB_DIAGC_ATTR(rc_acks); -QIB_DIAGC_ATTR(rc_qacks); -QIB_DIAGC_ATTR(rc_delayed_comp); QIB_DIAGC_ATTR(seq_naks); QIB_DIAGC_ATTR(rdma_seq); QIB_DIAGC_ATTR(rnr_naks); @@ -449,6 +456,35 @@ static struct attribute *diagc_default_attributes[] = { NULL }; +static u64 get_all_cpu_total(u64 __percpu *cntr) +{ + int cpu; + u64 counter = 0; + + for_each_possible_cpu(cpu) + counter += *per_cpu_ptr(cntr, cpu); + return counter; +} + +#define def_write_per_cpu(cntr) \ +static void write_per_cpu_##cntr(struct qib_pportdata *ppd, u32 data) \ +{ \ + struct qib_devdata *dd = ppd->dd; \ + struct qib_ibport *qibp = &ppd->ibport_data; \ + /* A write can only zero the counter */ \ + if (data == 0) \ + qibp->rvp.z_##cntr = get_all_cpu_total(qibp->rvp.cntr); \ + else \ + qib_dev_err(dd, "Per CPU cntrs can only be zeroed"); \ +} + +def_write_per_cpu(rc_acks) +def_write_per_cpu(rc_qacks) +def_write_per_cpu(rc_delayed_comp) + +#define READ_PER_CPU_CNTR(cntr) (get_all_cpu_total(qibp->rvp.cntr) - \ + qibp->rvp.z_##cntr) + static ssize_t diagc_attr_show(struct kobject *kobj, struct attribute *attr, char *buf) { @@ -458,7 +494,16 @@ static ssize_t diagc_attr_show(struct kobject *kobj, struct attribute *attr, container_of(kobj, struct qib_pportdata, diagc_kobj); struct qib_ibport *qibp = &ppd->ibport_data; - return sprintf(buf, "%u\n", *(u32 *)((char *)qibp + dattr->counter)); + if (!strncmp(dattr->attr.name, "rc_acks", 7)) + return sprintf(buf, "%llu\n", READ_PER_CPU_CNTR(rc_acks)); + else if (!strncmp(dattr->attr.name, "rc_qacks", 8)) + return sprintf(buf, "%llu\n", READ_PER_CPU_CNTR(rc_qacks)); + else if (!strncmp(dattr->attr.name, "rc_delayed_comp", 15)) + return sprintf(buf, "%llu\n", + READ_PER_CPU_CNTR(rc_delayed_comp)); + else + return sprintf(buf, "%u\n", + *(u32 *)((char *)qibp + dattr->counter)); } static ssize_t diagc_attr_store(struct kobject *kobj, struct attribute *attr, @@ -475,7 +520,15 @@ static ssize_t diagc_attr_store(struct kobject *kobj, struct attribute *attr, ret = kstrtou32(buf, 0, &val); if (ret) return ret; - *(u32 *)((char *) qibp + dattr->counter) = val; + + if (!strncmp(dattr->attr.name, "rc_acks", 7)) + write_per_cpu_rc_acks(ppd, val); + else if (!strncmp(dattr->attr.name, "rc_qacks", 8)) + write_per_cpu_rc_qacks(ppd, val); + else if (!strncmp(dattr->attr.name, "rc_delayed_comp", 15)) + write_per_cpu_rc_delayed_comp(ppd, val); + else + *(u32 *)((char *)qibp + dattr->counter) = val; return size; } diff --git a/drivers/infiniband/hw/qib/qib_uc.c b/drivers/infiniband/hw/qib/qib_uc.c index 1ae135a..659ac51 100644 --- a/drivers/infiniband/hw/qib/qib_uc.c +++ b/drivers/infiniband/hw/qib/qib_uc.c @@ -527,7 +527,7 @@ rewind: set_bit(QIB_R_REWIND_SGE, &qp->r_aflags); qp->r_sge.num_sge = 0; drop: - ibp->n_pkt_drops++; + ibp->rvp.n_pkt_drops++; return; op_err: diff --git a/drivers/infiniband/hw/qib/qib_ud.c b/drivers/infiniband/hw/qib/qib_ud.c index 6dc20ca..d84872d 100644 --- a/drivers/infiniband/hw/qib/qib_ud.c +++ b/drivers/infiniband/hw/qib/qib_ud.c @@ -62,7 +62,7 @@ static void qib_ud_loopback(struct rvt_qp *sqp, struct rvt_swqe *swqe) qp = qib_lookup_qpn(ibp, swqe->ud_wr.remote_qpn); if (!qp) { - ibp->n_pkt_drops++; + ibp->rvp.n_pkt_drops++; return; } @@ -73,7 +73,7 @@ static void qib_ud_loopback(struct rvt_qp *sqp, struct rvt_swqe *swqe) if (dqptype != sqptype || !(ib_qib_state_ops[qp->state] & QIB_PROCESS_RECV_OK)) { - ibp->n_pkt_drops++; + ibp->rvp.n_pkt_drops++; goto drop; } @@ -153,14 +153,14 @@ static void qib_ud_loopback(struct rvt_qp *sqp, struct rvt_swqe *swqe) } if (!ret) { if (qp->ibqp.qp_num == 0) - ibp->n_vl15_dropped++; + ibp->rvp.n_vl15_dropped++; goto bail_unlock; } } /* Silently drop packets which are too big. */ if (unlikely(wc.byte_len > qp->r_len)) { qp->r_flags |= QIB_R_REUSE_SGE; - ibp->n_pkt_drops++; + ibp->rvp.n_pkt_drops++; goto bail_unlock; } @@ -219,7 +219,7 @@ static void qib_ud_loopback(struct rvt_qp *sqp, struct rvt_swqe *swqe) /* Signal completion event if the solicited bit is set. */ qib_cq_enter(to_icq(qp->ibqp.recv_cq), &wc, swqe->wr.send_flags & IB_SEND_SOLICITED); - ibp->n_loop_pkts++; + ibp->rvp.n_loop_pkts++; bail_unlock: spin_unlock_irqrestore(&qp->r_lock, flags); drop: @@ -546,7 +546,7 @@ void qib_ud_rcv(struct qib_ibport *ibp, struct qib_ib_header *hdr, } if (!ret) { if (qp->ibqp.qp_num == 0) - ibp->n_vl15_dropped++; + ibp->rvp.n_vl15_dropped++; return; } } @@ -589,5 +589,5 @@ void qib_ud_rcv(struct qib_ibport *ibp, struct qib_ib_header *hdr, return; drop: - ibp->n_pkt_drops++; + ibp->rvp.n_pkt_drops++; } diff --git a/drivers/infiniband/hw/qib/qib_verbs.c b/drivers/infiniband/hw/qib/qib_verbs.c index 89fe514..276e4dcc 100644 --- a/drivers/infiniband/hw/qib/qib_verbs.c +++ b/drivers/infiniband/hw/qib/qib_verbs.c @@ -586,7 +586,7 @@ static void qib_qp_rcv(struct qib_ctxtdata *rcd, struct qib_ib_header *hdr, /* Check for valid receive state. */ if (!(ib_qib_state_ops[qp->state] & QIB_PROCESS_RECV_OK)) { - ibp->n_pkt_drops++; + ibp->rvp.n_pkt_drops++; goto unlock; } @@ -716,7 +716,7 @@ void qib_ib_rcv(struct qib_ctxtdata *rcd, void *rhdr, void *data, u32 tlen) return; drop: - ibp->n_pkt_drops++; + ibp->rvp.n_pkt_drops++; } /* @@ -1256,7 +1256,7 @@ err_tx: qib_put_txreq(tx); ret = wait_kmem(dev, qp); unaligned: - ibp->n_unaligned++; + ibp->rvp.n_unaligned++; bail: return ret; bail_tx: @@ -1647,16 +1647,16 @@ static int qib_query_port(struct ib_device *ibdev, u8 port, memset(props, 0, sizeof(*props)); props->lid = lid ? lid : be16_to_cpu(IB_LID_PERMISSIVE); props->lmc = ppd->lmc; - props->sm_lid = ibp->sm_lid; - props->sm_sl = ibp->sm_sl; + props->sm_lid = ibp->rvp.sm_lid; + props->sm_sl = ibp->rvp.sm_sl; props->state = dd->f_iblink_state(ppd->lastibcstat); props->phys_state = dd->f_ibphys_portstate(ppd->lastibcstat); - props->port_cap_flags = ibp->port_cap_flags; + props->port_cap_flags = ibp->rvp.port_cap_flags; props->gid_tbl_len = QIB_GUIDS_PER_PORT; props->max_msg_sz = 0x80000000; props->pkey_tbl_len = qib_get_npkeys(dd); - props->bad_pkey_cntr = ibp->pkey_violations; - props->qkey_viol_cntr = ibp->qkey_violations; + props->bad_pkey_cntr = ibp->rvp.pkey_violations; + props->qkey_viol_cntr = ibp->rvp.qkey_violations; props->active_width = ppd->link_width_active; /* See rate_show() */ props->active_speed = ppd->link_speed_active; @@ -1684,7 +1684,7 @@ static int qib_query_port(struct ib_device *ibdev, u8 port, mtu = IB_MTU_2048; } props->active_mtu = mtu; - props->subnet_timeout = ibp->subnet_timeout; + props->subnet_timeout = ibp->rvp.subnet_timeout; return 0; } @@ -1734,14 +1734,14 @@ static int qib_modify_port(struct ib_device *ibdev, u8 port, struct qib_ibport *ibp = to_iport(ibdev, port); struct qib_pportdata *ppd = ppd_from_ibp(ibp); - ibp->port_cap_flags |= props->set_port_cap_mask; - ibp->port_cap_flags &= ~props->clr_port_cap_mask; + ibp->rvp.port_cap_flags |= props->set_port_cap_mask; + ibp->rvp.port_cap_flags &= ~props->clr_port_cap_mask; if (props->set_port_cap_mask || props->clr_port_cap_mask) qib_cap_mask_chg(ibp); if (port_modify_mask & IB_PORT_SHUTDOWN) qib_set_linkstate(ppd, QIB_IB_LINKDOWN); if (port_modify_mask & IB_PORT_RESET_QKEY_CNTR) - ibp->qkey_violations = 0; + ibp->rvp.qkey_violations = 0; return 0; } @@ -1757,7 +1757,7 @@ static int qib_query_gid(struct ib_device *ibdev, u8 port, struct qib_ibport *ibp = to_iport(ibdev, port); struct qib_pportdata *ppd = ppd_from_ibp(ibp); - gid->global.subnet_prefix = ibp->gid_prefix; + gid->global.subnet_prefix = ibp->rvp.gid_prefix; if (index == 0) gid->global.interface_id = ppd->guid; else if (index < QIB_GUIDS_PER_PORT) @@ -1787,7 +1787,7 @@ struct ib_ah *qib_create_qp0_ah(struct qib_ibport *ibp, u16 dlid) attr.dlid = dlid; attr.port_num = ppd_from_ibp(ibp)->port; rcu_read_lock(); - qp0 = rcu_dereference(ibp->qp0); + qp0 = rcu_dereference(ibp->rvp.qp[0]); if (qp0) ah = ib_create_ah(qp0->ibqp.pd, &attr); rcu_read_unlock(); @@ -1876,22 +1876,22 @@ static void init_ibport(struct qib_pportdata *ppd) struct qib_verbs_counters cntrs; struct qib_ibport *ibp = &ppd->ibport_data; - spin_lock_init(&ibp->lock); + spin_lock_init(&ibp->rvp.lock); /* Set the prefix to the default value (see ch. 4.1.1) */ - ibp->gid_prefix = IB_DEFAULT_GID_PREFIX; - ibp->sm_lid = be16_to_cpu(IB_LID_PERMISSIVE); - ibp->port_cap_flags = IB_PORT_SYS_IMAGE_GUID_SUP | + ibp->rvp.gid_prefix = IB_DEFAULT_GID_PREFIX; + ibp->rvp.sm_lid = be16_to_cpu(IB_LID_PERMISSIVE); + ibp->rvp.port_cap_flags = IB_PORT_SYS_IMAGE_GUID_SUP | IB_PORT_CLIENT_REG_SUP | IB_PORT_SL_MAP_SUP | IB_PORT_TRAP_SUP | IB_PORT_AUTO_MIGR_SUP | IB_PORT_DR_NOTICE_SUP | IB_PORT_CAP_MASK_NOTICE_SUP | IB_PORT_OTHER_LOCAL_CHANGES_SUP; if (ppd->dd->flags & QIB_HAS_LINK_LATENCY) - ibp->port_cap_flags |= IB_PORT_LINK_LATENCY_SUP; - ibp->pma_counter_select[0] = IB_PMA_PORT_XMIT_DATA; - ibp->pma_counter_select[1] = IB_PMA_PORT_RCV_DATA; - ibp->pma_counter_select[2] = IB_PMA_PORT_XMIT_PKTS; - ibp->pma_counter_select[3] = IB_PMA_PORT_RCV_PKTS; - ibp->pma_counter_select[4] = IB_PMA_PORT_XMIT_WAIT; + ibp->rvp.port_cap_flags |= IB_PORT_LINK_LATENCY_SUP; + ibp->rvp.pma_counter_select[0] = IB_PMA_PORT_XMIT_DATA; + ibp->rvp.pma_counter_select[1] = IB_PMA_PORT_RCV_DATA; + ibp->rvp.pma_counter_select[2] = IB_PMA_PORT_XMIT_PKTS; + ibp->rvp.pma_counter_select[3] = IB_PMA_PORT_RCV_PKTS; + ibp->rvp.pma_counter_select[4] = IB_PMA_PORT_XMIT_WAIT; /* Snapshot current HW counters to "clear" them. */ qib_get_counters(ppd, &cntrs); @@ -1911,8 +1911,8 @@ static void init_ibport(struct qib_pportdata *ppd) ibp->z_excessive_buffer_overrun_errors = cntrs.excessive_buffer_overrun_errors; ibp->z_vl15_dropped = cntrs.vl15_dropped; - RCU_INIT_POINTER(ibp->qp0, NULL); - RCU_INIT_POINTER(ibp->qp1, NULL); + RCU_INIT_POINTER(ibp->rvp.qp[0], NULL); + RCU_INIT_POINTER(ibp->rvp.qp[1], NULL); } static int qib_port_immutable(struct ib_device *ibdev, u8 port_num, diff --git a/drivers/infiniband/hw/qib/qib_verbs.h b/drivers/infiniband/hw/qib/qib_verbs.h index 00dd2ad..538d3a6 100644 --- a/drivers/infiniband/hw/qib/qib_verbs.h +++ b/drivers/infiniband/hw/qib/qib_verbs.h @@ -401,21 +401,10 @@ struct qib_pma_counters { }; struct qib_ibport { - struct rvt_qp __rcu *qp0; - struct rvt_qp __rcu *qp1; - struct ib_mad_agent *send_agent; /* agent for SMI (traps) */ + struct rvt_ibport rvp; struct rvt_ah *sm_ah; struct rvt_ah *smi_ah; - struct rb_root mcast_tree; - spinlock_t lock; /* protect changes in this struct */ - - /* non-zero when timer is set */ - unsigned long mkey_lease_timeout; - unsigned long trap_timeout; - __be64 gid_prefix; /* in network order */ - __be64 mkey; __be64 guids[QIB_GUIDS_PER_PORT - 1]; /* writable GUIDs */ - u64 tid; /* TID for traps */ struct qib_pma_counters __percpu *pmastats; u64 z_unicast_xmit; /* starting count for PMA */ u64 z_unicast_rcv; /* starting count for PMA */ @@ -434,42 +423,9 @@ struct qib_ibport { u32 z_local_link_integrity_errors; /* starting count for PMA */ u32 z_excessive_buffer_overrun_errors; /* starting count for PMA */ u32 z_vl15_dropped; /* starting count for PMA */ - u32 n_rc_resends; - u32 n_rc_acks; - u32 n_rc_qacks; - u32 n_rc_delayed_comp; - u32 n_seq_naks; - u32 n_rdma_seq; - u32 n_rnr_naks; - u32 n_other_naks; - u32 n_loop_pkts; - u32 n_pkt_drops; - u32 n_vl15_dropped; - u32 n_rc_timeouts; - u32 n_dmawait; - u32 n_unaligned; - u32 n_rc_dupreq; - u32 n_rc_seqnak; - u32 port_cap_flags; - u32 pma_sample_start; - u32 pma_sample_interval; - __be16 pma_counter_select[5]; - u16 pma_tag; - u16 pkey_violations; - u16 qkey_violations; - u16 mkey_violations; - u16 mkey_lease_period; - u16 sm_lid; - u16 repress_traps; - u8 sm_sl; - u8 mkeyprot; - u8 subnet_timeout; - u8 vl_high_limit; u8 sl_to_vl[16]; - }; - struct qib_ibdev { struct rvt_dev_info rdi; struct list_head pending_mmaps; diff --git a/drivers/infiniband/hw/qib/qib_verbs_mcast.c b/drivers/infiniband/hw/qib/qib_verbs_mcast.c index 1c7af03..c3d6535 100644 --- a/drivers/infiniband/hw/qib/qib_verbs_mcast.c +++ b/drivers/infiniband/hw/qib/qib_verbs_mcast.c @@ -114,8 +114,8 @@ struct qib_mcast *qib_mcast_find(struct qib_ibport *ibp, union ib_gid *mgid) unsigned long flags; struct qib_mcast *mcast; - spin_lock_irqsave(&ibp->lock, flags); - n = ibp->mcast_tree.rb_node; + spin_lock_irqsave(&ibp->rvp.lock, flags); + n = ibp->rvp.mcast_tree.rb_node; while (n) { int ret; @@ -129,11 +129,11 @@ struct qib_mcast *qib_mcast_find(struct qib_ibport *ibp, union ib_gid *mgid) n = n->rb_right; else { atomic_inc(&mcast->refcount); - spin_unlock_irqrestore(&ibp->lock, flags); + spin_unlock_irqrestore(&ibp->rvp.lock, flags); goto bail; } } - spin_unlock_irqrestore(&ibp->lock, flags); + spin_unlock_irqrestore(&ibp->rvp.lock, flags); mcast = NULL; @@ -153,11 +153,11 @@ bail: static int qib_mcast_add(struct qib_ibdev *dev, struct qib_ibport *ibp, struct qib_mcast *mcast, struct qib_mcast_qp *mqp) { - struct rb_node **n = &ibp->mcast_tree.rb_node; + struct rb_node **n = &ibp->rvp.mcast_tree.rb_node; struct rb_node *pn = NULL; int ret; - spin_lock_irq(&ibp->lock); + spin_lock_irq(&ibp->rvp.lock); while (*n) { struct qib_mcast *tmcast; @@ -212,12 +212,12 @@ static int qib_mcast_add(struct qib_ibdev *dev, struct qib_ibport *ibp, atomic_inc(&mcast->refcount); rb_link_node(&mcast->rb_node, pn, n); - rb_insert_color(&mcast->rb_node, &ibp->mcast_tree); + rb_insert_color(&mcast->rb_node, &ibp->rvp.mcast_tree); ret = 0; bail: - spin_unlock_irq(&ibp->lock); + spin_unlock_irq(&ibp->rvp.lock); return ret; } @@ -294,13 +294,13 @@ int qib_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) if (ibqp->qp_num <= 1 || qp->state == IB_QPS_RESET) return -EINVAL; - spin_lock_irq(&ibp->lock); + spin_lock_irq(&ibp->rvp.lock); /* Find the GID in the mcast table. */ - n = ibp->mcast_tree.rb_node; + n = ibp->rvp.mcast_tree.rb_node; while (1) { if (n == NULL) { - spin_unlock_irq(&ibp->lock); + spin_unlock_irq(&ibp->rvp.lock); return -EINVAL; } @@ -329,13 +329,13 @@ int qib_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) /* If this was the last attached QP, remove the GID too. */ if (list_empty(&mcast->qp_list)) { - rb_erase(&mcast->rb_node, &ibp->mcast_tree); + rb_erase(&mcast->rb_node, &ibp->rvp.mcast_tree); last = 1; } break; } - spin_unlock_irq(&ibp->lock); + spin_unlock_irq(&ibp->rvp.lock); /* QP not attached */ if (!delp) return -EINVAL; @@ -359,5 +359,5 @@ int qib_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) int qib_mcast_tree_empty(struct qib_ibport *ibp) { - return ibp->mcast_tree.rb_node == NULL; + return !(ibp->rvp.mcast_tree.rb_node); } -- cgit v0.10.2 From 5418a5abc96f908d31bfecee143fbf330ded60c1 Mon Sep 17 00:00:00 2001 From: Harish Chegondi Date: Fri, 22 Jan 2016 12:56:08 -0800 Subject: IB/qib: Implement qib support for AH notification Additional work is required to create an AH. This patch adds support to set the VL correctly. Reviewed-by: Mike Marciniszyn Reviewed-by: Dennis Dalessandro Reviewed-by: Ira Weiny Signed-off-by: Harish Chegondi Signed-off-by: Doug Ledford diff --git a/drivers/infiniband/hw/qib/qib_verbs.c b/drivers/infiniband/hw/qib/qib_verbs.c index 276e4dcc..96e34f4 100644 --- a/drivers/infiniband/hw/qib/qib_verbs.c +++ b/drivers/infiniband/hw/qib/qib_verbs.c @@ -1777,6 +1777,24 @@ int qib_check_ah(struct ib_device *ibdev, struct ib_ah_attr *ah_attr) return 0; } +static void qib_notify_new_ah(struct ib_device *ibdev, + struct ib_ah_attr *ah_attr, + struct rvt_ah *ah) +{ + struct qib_ibport *ibp; + struct qib_pportdata *ppd; + + /* + * Do not trust reading anything from rvt_ah at this point as it is not + * done being setup. We can however modify things which we need to set. + */ + + ibp = to_iport(ibdev, ah_attr->port_num); + ppd = ppd_from_ibp(ibp); + ah->vl = ibp->sl_to_vl[ah->attr.sl]; + ah->log_pmtu = ilog2(ppd->ibmtu); +} + struct ib_ah *qib_create_qp0_ah(struct qib_ibport *ibp, u16 dlid) { struct ib_ah_attr attr; @@ -2111,6 +2129,7 @@ int qib_register_ib_device(struct qib_devdata *dd) dd->verbs_dev.rdi.driver_f.get_card_name = qib_get_card_name; dd->verbs_dev.rdi.driver_f.get_pci_dev = qib_get_pci_dev; dd->verbs_dev.rdi.driver_f.check_ah = qib_check_ah; + dd->verbs_dev.rdi.driver_f.notify_new_ah = qib_notify_new_ah; dd->verbs_dev.rdi.dparms.props.max_pd = ib_qib_max_pds; dd->verbs_dev.rdi.dparms.props.max_ah = ib_qib_max_ahs; dd->verbs_dev.rdi.flags = (RVT_FLAG_QP_INIT_DRIVER | -- cgit v0.10.2 From cd18201f5ec8b04a8eb9ef3f3b559cba55955598 Mon Sep 17 00:00:00 2001 From: Harish Chegondi Date: Fri, 22 Jan 2016 12:56:14 -0800 Subject: IB/qib: Remove mmap from qib Since mmap functionality has been moved into rdmavt, its time for qib to use that. Reviewed-by: Dennis Dalessandro Signed-off-by: Harish Chegondi Signed-off-by: Doug Ledford diff --git a/drivers/infiniband/hw/qib/Makefile b/drivers/infiniband/hw/qib/Makefile index 75140f5..45db4fc 100644 --- a/drivers/infiniband/hw/qib/Makefile +++ b/drivers/infiniband/hw/qib/Makefile @@ -2,7 +2,7 @@ obj-$(CONFIG_INFINIBAND_QIB) += ib_qib.o ib_qib-y := qib_cq.o qib_diag.o qib_driver.o qib_eeprom.o \ qib_file_ops.o qib_fs.o qib_init.o qib_intr.o \ - qib_mad.o qib_mmap.o qib_pcie.o qib_pio_copy.o \ + qib_mad.o qib_pcie.o qib_pio_copy.o \ qib_qp.o qib_qsfp.o qib_rc.o qib_ruc.o qib_sdma.o qib_srq.o \ qib_sysfs.o qib_twsi.o qib_tx.o qib_uc.o qib_ud.o \ qib_user_pages.o qib_user_sdma.o qib_verbs_mcast.o qib_iba7220.o \ diff --git a/drivers/infiniband/hw/qib/qib_cq.c b/drivers/infiniband/hw/qib/qib_cq.c index c1ea21e..094f694 100644 --- a/drivers/infiniband/hw/qib/qib_cq.c +++ b/drivers/infiniband/hw/qib/qib_cq.c @@ -264,7 +264,7 @@ struct ib_cq *qib_create_cq(struct ib_device *ibdev, if (udata && udata->outlen >= sizeof(__u64)) { int err; - cq->ip = qib_create_mmap_info(dev, sz, context, wc); + cq->ip = rvt_create_mmap_info(&dev->rdi, sz, context, wc); if (!cq->ip) { ret = ERR_PTR(-ENOMEM); goto bail_wc; @@ -290,9 +290,9 @@ struct ib_cq *qib_create_cq(struct ib_device *ibdev, spin_unlock(&dev->n_cqs_lock); if (cq->ip) { - spin_lock_irq(&dev->pending_lock); - list_add(&cq->ip->pending_mmaps, &dev->pending_mmaps); - spin_unlock_irq(&dev->pending_lock); + spin_lock_irq(&dev->rdi.pending_lock); + list_add(&cq->ip->pending_mmaps, &dev->rdi.pending_mmaps); + spin_unlock_irq(&dev->rdi.pending_lock); } /* @@ -342,7 +342,7 @@ int qib_destroy_cq(struct ib_cq *ibcq) dev->n_cqs_allocated--; spin_unlock(&dev->n_cqs_lock); if (cq->ip) - kref_put(&cq->ip->ref, qib_release_mmap_info); + kref_put(&cq->ip->ref, rvt_release_mmap_info); else vfree(cq->queue); kfree(cq); @@ -468,7 +468,7 @@ int qib_resize_cq(struct ib_cq *ibcq, int cqe, struct ib_udata *udata) struct qib_ibdev *dev = to_idev(ibcq->device); struct rvt_mmap_info *ip = cq->ip; - qib_update_mmap_info(dev, ip, sz, wc); + rvt_update_mmap_info(&dev->rdi, ip, sz, wc); /* * Return the offset to mmap. @@ -481,10 +481,10 @@ int qib_resize_cq(struct ib_cq *ibcq, int cqe, struct ib_udata *udata) goto bail; } - spin_lock_irq(&dev->pending_lock); + spin_lock_irq(&dev->rdi.pending_lock); if (list_empty(&ip->pending_mmaps)) - list_add(&ip->pending_mmaps, &dev->pending_mmaps); - spin_unlock_irq(&dev->pending_lock); + list_add(&ip->pending_mmaps, &dev->rdi.pending_mmaps); + spin_unlock_irq(&dev->rdi.pending_lock); } ret = 0; diff --git a/drivers/infiniband/hw/qib/qib_mmap.c b/drivers/infiniband/hw/qib/qib_mmap.c deleted file mode 100644 index c32078c..0000000 --- a/drivers/infiniband/hw/qib/qib_mmap.c +++ /dev/null @@ -1,174 +0,0 @@ -/* - * Copyright (c) 2006, 2007, 2008, 2009 QLogic Corporation. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include -#include -#include -#include -#include -#include - -#include "qib_verbs.h" - -/** - * qib_release_mmap_info - free mmap info structure - * @ref: a pointer to the kref within struct rvt_mmap_info - */ -void qib_release_mmap_info(struct kref *ref) -{ - struct rvt_mmap_info *ip = - container_of(ref, struct rvt_mmap_info, ref); - struct qib_ibdev *dev = to_idev(ip->context->device); - - spin_lock_irq(&dev->pending_lock); - list_del(&ip->pending_mmaps); - spin_unlock_irq(&dev->pending_lock); - - vfree(ip->obj); - kfree(ip); -} - -/* - * open and close keep track of how many times the CQ is mapped, - * to avoid releasing it. - */ -static void qib_vma_open(struct vm_area_struct *vma) -{ - struct rvt_mmap_info *ip = vma->vm_private_data; - - kref_get(&ip->ref); -} - -static void qib_vma_close(struct vm_area_struct *vma) -{ - struct rvt_mmap_info *ip = vma->vm_private_data; - - kref_put(&ip->ref, qib_release_mmap_info); -} - -static const struct vm_operations_struct qib_vm_ops = { - .open = qib_vma_open, - .close = qib_vma_close, -}; - -/** - * qib_mmap - create a new mmap region - * @context: the IB user context of the process making the mmap() call - * @vma: the VMA to be initialized - * Return zero if the mmap is OK. Otherwise, return an errno. - */ -int qib_mmap(struct ib_ucontext *context, struct vm_area_struct *vma) -{ - struct qib_ibdev *dev = to_idev(context->device); - unsigned long offset = vma->vm_pgoff << PAGE_SHIFT; - unsigned long size = vma->vm_end - vma->vm_start; - struct rvt_mmap_info *ip, *pp; - int ret = -EINVAL; - - /* - * Search the device's list of objects waiting for a mmap call. - * Normally, this list is very short since a call to create a - * CQ, QP, or SRQ is soon followed by a call to mmap(). - */ - spin_lock_irq(&dev->pending_lock); - list_for_each_entry_safe(ip, pp, &dev->pending_mmaps, - pending_mmaps) { - /* Only the creator is allowed to mmap the object */ - if (context != ip->context || (__u64) offset != ip->offset) - continue; - /* Don't allow a mmap larger than the object. */ - if (size > ip->size) - break; - - list_del_init(&ip->pending_mmaps); - spin_unlock_irq(&dev->pending_lock); - - ret = remap_vmalloc_range(vma, ip->obj, 0); - if (ret) - goto done; - vma->vm_ops = &qib_vm_ops; - vma->vm_private_data = ip; - qib_vma_open(vma); - goto done; - } - spin_unlock_irq(&dev->pending_lock); -done: - return ret; -} - -/* - * Allocate information for qib_mmap - */ -struct rvt_mmap_info *qib_create_mmap_info(struct qib_ibdev *dev, - u32 size, - struct ib_ucontext *context, - void *obj) { - struct rvt_mmap_info *ip; - - ip = kmalloc(sizeof(*ip), GFP_KERNEL); - if (!ip) - goto bail; - - size = PAGE_ALIGN(size); - - spin_lock_irq(&dev->mmap_offset_lock); - if (dev->mmap_offset == 0) - dev->mmap_offset = PAGE_SIZE; - ip->offset = dev->mmap_offset; - dev->mmap_offset += size; - spin_unlock_irq(&dev->mmap_offset_lock); - - INIT_LIST_HEAD(&ip->pending_mmaps); - ip->size = size; - ip->context = context; - ip->obj = obj; - kref_init(&ip->ref); - -bail: - return ip; -} - -void qib_update_mmap_info(struct qib_ibdev *dev, struct rvt_mmap_info *ip, - u32 size, void *obj) -{ - size = PAGE_ALIGN(size); - - spin_lock_irq(&dev->mmap_offset_lock); - if (dev->mmap_offset == 0) - dev->mmap_offset = PAGE_SIZE; - ip->offset = dev->mmap_offset; - dev->mmap_offset += size; - spin_unlock_irq(&dev->mmap_offset_lock); - - ip->size = size; - ip->obj = obj; -} diff --git a/drivers/infiniband/hw/qib/qib_qp.c b/drivers/infiniband/hw/qib/qib_qp.c index 9cb9be7..b7034d4 100644 --- a/drivers/infiniband/hw/qib/qib_qp.c +++ b/drivers/infiniband/hw/qib/qib_qp.c @@ -494,12 +494,12 @@ int qib_error_qp(struct rvt_qp *qp, enum ib_wc_status err) if (qp->s_flags & QIB_S_ANY_WAIT_SEND) qp->s_flags &= ~QIB_S_ANY_WAIT_SEND; - spin_lock(&dev->pending_lock); + spin_lock(&dev->rdi.pending_lock); if (!list_empty(&priv->iowait) && !(qp->s_flags & QIB_S_BUSY)) { qp->s_flags &= ~QIB_S_ANY_WAIT_IO; list_del_init(&priv->iowait); } - spin_unlock(&dev->pending_lock); + spin_unlock(&dev->rdi.pending_lock); if (!(qp->s_flags & QIB_S_BUSY)) { qp->s_hdrwords = 0; @@ -702,10 +702,10 @@ int qib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, case IB_QPS_RESET: if (qp->state != IB_QPS_RESET) { qp->state = IB_QPS_RESET; - spin_lock(&dev->pending_lock); + spin_lock(&dev->rdi.pending_lock); if (!list_empty(&priv->iowait)) list_del_init(&priv->iowait); - spin_unlock(&dev->pending_lock); + spin_unlock(&dev->rdi.pending_lock); qp->s_flags &= ~(QIB_S_TIMER | QIB_S_ANY_WAIT); spin_unlock(&qp->s_lock); spin_unlock_irq(&qp->r_lock); @@ -1158,7 +1158,7 @@ struct ib_qp *qib_create_qp(struct ib_pd *ibpd, } else { u32 s = sizeof(struct rvt_rwq) + qp->r_rq.size * sz; - qp->ip = qib_create_mmap_info(dev, s, + qp->ip = rvt_create_mmap_info(&dev->rdi, s, ibpd->uobject->context, qp->r_rq.wq); if (!qp->ip) { @@ -1186,9 +1186,9 @@ struct ib_qp *qib_create_qp(struct ib_pd *ibpd, spin_unlock(&dev->n_qps_lock); if (qp->ip) { - spin_lock_irq(&dev->pending_lock); - list_add(&qp->ip->pending_mmaps, &dev->pending_mmaps); - spin_unlock_irq(&dev->pending_lock); + spin_lock_irq(&dev->rdi.pending_lock); + list_add(&qp->ip->pending_mmaps, &dev->rdi.pending_mmaps); + spin_unlock_irq(&dev->rdi.pending_lock); } ret = &qp->ibqp; @@ -1196,7 +1196,7 @@ struct ib_qp *qib_create_qp(struct ib_pd *ibpd, bail_ip: if (qp->ip) - kref_put(&qp->ip->ref, qib_release_mmap_info); + kref_put(&qp->ip->ref, rvt_release_mmap_info); else vfree(qp->r_rq.wq); free_qpn(&dev->qpn_table, qp->ibqp.qp_num); @@ -1230,10 +1230,10 @@ int qib_destroy_qp(struct ib_qp *ibqp) spin_lock_irq(&qp->s_lock); if (qp->state != IB_QPS_RESET) { qp->state = IB_QPS_RESET; - spin_lock(&dev->pending_lock); + spin_lock(&dev->rdi.pending_lock); if (!list_empty(&priv->iowait)) list_del_init(&priv->iowait); - spin_unlock(&dev->pending_lock); + spin_unlock(&dev->rdi.pending_lock); qp->s_flags &= ~(QIB_S_TIMER | QIB_S_ANY_WAIT); spin_unlock_irq(&qp->s_lock); cancel_work_sync(&priv->s_work); @@ -1256,7 +1256,7 @@ int qib_destroy_qp(struct ib_qp *ibqp) spin_unlock(&dev->n_qps_lock); if (qp->ip) - kref_put(&qp->ip->ref, qib_release_mmap_info); + kref_put(&qp->ip->ref, rvt_release_mmap_info); else vfree(qp->r_rq.wq); vfree(qp->s_wq); diff --git a/drivers/infiniband/hw/qib/qib_sdma.c b/drivers/infiniband/hw/qib/qib_sdma.c index 9d1104e..bb34bb9 100644 --- a/drivers/infiniband/hw/qib/qib_sdma.c +++ b/drivers/infiniband/hw/qib/qib_sdma.c @@ -697,7 +697,7 @@ busy: tx->dwords = dwords; priv->s_tx = tx; dev = &ppd->dd->verbs_dev; - spin_lock(&dev->pending_lock); + spin_lock(&dev->rdi.pending_lock); if (list_empty(&priv->iowait)) { struct qib_ibport *ibp; @@ -706,7 +706,7 @@ busy: qp->s_flags |= QIB_S_WAIT_DMA_DESC; list_add_tail(&priv->iowait, &dev->dmawait); } - spin_unlock(&dev->pending_lock); + spin_unlock(&dev->rdi.pending_lock); qp->s_flags &= ~QIB_S_BUSY; spin_unlock(&qp->s_lock); ret = -EBUSY; diff --git a/drivers/infiniband/hw/qib/qib_srq.c b/drivers/infiniband/hw/qib/qib_srq.c index e9dfa30..dff8808 100644 --- a/drivers/infiniband/hw/qib/qib_srq.c +++ b/drivers/infiniband/hw/qib/qib_srq.c @@ -148,7 +148,7 @@ struct ib_srq *qib_create_srq(struct ib_pd *ibpd, u32 s = sizeof(struct rvt_rwq) + srq->rq.size * sz; srq->ip = - qib_create_mmap_info(dev, s, ibpd->uobject->context, + rvt_create_mmap_info(&dev->rdi, s, ibpd->uobject->context, srq->rq.wq); if (!srq->ip) { ret = ERR_PTR(-ENOMEM); @@ -183,9 +183,9 @@ struct ib_srq *qib_create_srq(struct ib_pd *ibpd, spin_unlock(&dev->n_srqs_lock); if (srq->ip) { - spin_lock_irq(&dev->pending_lock); - list_add(&srq->ip->pending_mmaps, &dev->pending_mmaps); - spin_unlock_irq(&dev->pending_lock); + spin_lock_irq(&dev->rdi.pending_lock); + list_add(&srq->ip->pending_mmaps, &dev->rdi.pending_mmaps); + spin_unlock_irq(&dev->rdi.pending_lock); } ret = &srq->ibsrq; @@ -307,7 +307,7 @@ int qib_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr, struct qib_ibdev *dev = to_idev(srq->ibsrq.device); u32 s = sizeof(struct rvt_rwq) + size * sz; - qib_update_mmap_info(dev, ip, s, wq); + rvt_update_mmap_info(&dev->rdi, ip, s, wq); /* * Return the offset to mmap. @@ -324,11 +324,11 @@ int qib_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr, * Put user mapping info onto the pending list * unless it already is on the list. */ - spin_lock_irq(&dev->pending_lock); + spin_lock_irq(&dev->rdi.pending_lock); if (list_empty(&ip->pending_mmaps)) list_add(&ip->pending_mmaps, - &dev->pending_mmaps); - spin_unlock_irq(&dev->pending_lock); + &dev->rdi.pending_mmaps); + spin_unlock_irq(&dev->rdi.pending_lock); } } else if (attr_mask & IB_SRQ_LIMIT) { spin_lock_irq(&srq->rq.lock); @@ -371,7 +371,7 @@ int qib_destroy_srq(struct ib_srq *ibsrq) dev->n_srqs_allocated--; spin_unlock(&dev->n_srqs_lock); if (srq->ip) - kref_put(&srq->ip->ref, qib_release_mmap_info); + kref_put(&srq->ip->ref, rvt_release_mmap_info); else vfree(srq->rq.wq); kfree(srq); diff --git a/drivers/infiniband/hw/qib/qib_verbs.c b/drivers/infiniband/hw/qib/qib_verbs.c index 96e34f4..893d00c 100644 --- a/drivers/infiniband/hw/qib/qib_verbs.c +++ b/drivers/infiniband/hw/qib/qib_verbs.c @@ -731,7 +731,7 @@ static void mem_timer(unsigned long data) struct qib_qp_priv *priv = NULL; unsigned long flags; - spin_lock_irqsave(&dev->pending_lock, flags); + spin_lock_irqsave(&dev->rdi.pending_lock, flags); if (!list_empty(list)) { priv = list_entry(list->next, struct qib_qp_priv, iowait); qp = priv->owner; @@ -740,7 +740,7 @@ static void mem_timer(unsigned long data) if (!list_empty(list)) mod_timer(&dev->mem_timer, jiffies + 1); } - spin_unlock_irqrestore(&dev->pending_lock, flags); + spin_unlock_irqrestore(&dev->rdi.pending_lock, flags); if (qp) { spin_lock_irqsave(&qp->s_lock, flags); @@ -955,13 +955,13 @@ static noinline struct qib_verbs_txreq *__get_txreq(struct qib_ibdev *dev, unsigned long flags; spin_lock_irqsave(&qp->s_lock, flags); - spin_lock(&dev->pending_lock); + spin_lock(&dev->rdi.pending_lock); if (!list_empty(&dev->txreq_free)) { struct list_head *l = dev->txreq_free.next; list_del(l); - spin_unlock(&dev->pending_lock); + spin_unlock(&dev->rdi.pending_lock); spin_unlock_irqrestore(&qp->s_lock, flags); tx = list_entry(l, struct qib_verbs_txreq, txreq.list); } else { @@ -972,7 +972,7 @@ static noinline struct qib_verbs_txreq *__get_txreq(struct qib_ibdev *dev, list_add_tail(&priv->iowait, &dev->txwait); } qp->s_flags &= ~QIB_S_BUSY; - spin_unlock(&dev->pending_lock); + spin_unlock(&dev->rdi.pending_lock); spin_unlock_irqrestore(&qp->s_lock, flags); tx = ERR_PTR(-EBUSY); } @@ -985,17 +985,17 @@ static inline struct qib_verbs_txreq *get_txreq(struct qib_ibdev *dev, struct qib_verbs_txreq *tx; unsigned long flags; - spin_lock_irqsave(&dev->pending_lock, flags); + spin_lock_irqsave(&dev->rdi.pending_lock, flags); /* assume the list non empty */ if (likely(!list_empty(&dev->txreq_free))) { struct list_head *l = dev->txreq_free.next; list_del(l); - spin_unlock_irqrestore(&dev->pending_lock, flags); + spin_unlock_irqrestore(&dev->rdi.pending_lock, flags); tx = list_entry(l, struct qib_verbs_txreq, txreq.list); } else { /* call slow path to get the extra lock */ - spin_unlock_irqrestore(&dev->pending_lock, flags); + spin_unlock_irqrestore(&dev->rdi.pending_lock, flags); tx = __get_txreq(dev, qp); } return tx; @@ -1025,7 +1025,7 @@ void qib_put_txreq(struct qib_verbs_txreq *tx) kfree(tx->align_buf); } - spin_lock_irqsave(&dev->pending_lock, flags); + spin_lock_irqsave(&dev->rdi.pending_lock, flags); /* Put struct back on free list */ list_add(&tx->txreq.list, &dev->txreq_free); @@ -1037,7 +1037,7 @@ void qib_put_txreq(struct qib_verbs_txreq *tx) qp = priv->owner; list_del_init(&priv->iowait); atomic_inc(&qp->refcount); - spin_unlock_irqrestore(&dev->pending_lock, flags); + spin_unlock_irqrestore(&dev->rdi.pending_lock, flags); spin_lock_irqsave(&qp->s_lock, flags); if (qp->s_flags & QIB_S_WAIT_TX) { @@ -1049,7 +1049,7 @@ void qib_put_txreq(struct qib_verbs_txreq *tx) if (atomic_dec_and_test(&qp->refcount)) wake_up(&qp->wait); } else - spin_unlock_irqrestore(&dev->pending_lock, flags); + spin_unlock_irqrestore(&dev->rdi.pending_lock, flags); } /* @@ -1068,7 +1068,7 @@ void qib_verbs_sdma_desc_avail(struct qib_pportdata *ppd, unsigned avail) n = 0; dev = &ppd->dd->verbs_dev; - spin_lock(&dev->pending_lock); + spin_lock(&dev->rdi.pending_lock); /* Search wait list for first QP wanting DMA descriptors. */ list_for_each_entry_safe(qpp, nqpp, &dev->dmawait, iowait) { @@ -1086,7 +1086,7 @@ void qib_verbs_sdma_desc_avail(struct qib_pportdata *ppd, unsigned avail) qps[n++] = qp; } - spin_unlock(&dev->pending_lock); + spin_unlock(&dev->rdi.pending_lock); for (i = 0; i < n; i++) { qp = qps[i]; @@ -1147,14 +1147,14 @@ static int wait_kmem(struct qib_ibdev *dev, struct rvt_qp *qp) spin_lock_irqsave(&qp->s_lock, flags); if (ib_qib_state_ops[qp->state] & QIB_PROCESS_RECV_OK) { - spin_lock(&dev->pending_lock); + spin_lock(&dev->rdi.pending_lock); if (list_empty(&priv->iowait)) { if (list_empty(&dev->memwait)) mod_timer(&dev->mem_timer, jiffies + 1); qp->s_flags |= QIB_S_WAIT_KMEM; list_add_tail(&priv->iowait, &dev->memwait); } - spin_unlock(&dev->pending_lock); + spin_unlock(&dev->rdi.pending_lock); qp->s_flags &= ~QIB_S_BUSY; ret = -EBUSY; } @@ -1284,7 +1284,7 @@ static int no_bufs_available(struct rvt_qp *qp) */ spin_lock_irqsave(&qp->s_lock, flags); if (ib_qib_state_ops[qp->state] & QIB_PROCESS_RECV_OK) { - spin_lock(&dev->pending_lock); + spin_lock(&dev->rdi.pending_lock); if (list_empty(&priv->iowait)) { dev->n_piowait++; qp->s_flags |= QIB_S_WAIT_PIO; @@ -1292,7 +1292,7 @@ static int no_bufs_available(struct rvt_qp *qp) dd = dd_from_dev(dev); dd->f_wantpiobuf_intr(dd, 1); } - spin_unlock(&dev->pending_lock); + spin_unlock(&dev->rdi.pending_lock); qp->s_flags &= ~QIB_S_BUSY; ret = -EBUSY; } @@ -1556,7 +1556,7 @@ void qib_ib_piobufavail(struct qib_devdata *dd) * could end up with QPs on the wait list with the interrupt * disabled. */ - spin_lock_irqsave(&dev->pending_lock, flags); + spin_lock_irqsave(&dev->rdi.pending_lock, flags); while (!list_empty(list)) { if (n == ARRAY_SIZE(qps)) goto full; @@ -1568,7 +1568,7 @@ void qib_ib_piobufavail(struct qib_devdata *dd) } dd->f_wantpiobuf_intr(dd, 0); full: - spin_unlock_irqrestore(&dev->pending_lock, flags); + spin_unlock_irqrestore(&dev->rdi.pending_lock, flags); for (i = 0; i < n; i++) { qp = qps[i]; @@ -1992,10 +1992,6 @@ int qib_register_ib_device(struct qib_devdata *dd) qib_init_qpn_table(dd, &dev->qpn_table); - INIT_LIST_HEAD(&dev->pending_mmaps); - spin_lock_init(&dev->pending_lock); - dev->mmap_offset = PAGE_SIZE; - spin_lock_init(&dev->mmap_offset_lock); INIT_LIST_HEAD(&dev->piowait); INIT_LIST_HEAD(&dev->dmawait); INIT_LIST_HEAD(&dev->txwait); @@ -2115,7 +2111,7 @@ int qib_register_ib_device(struct qib_devdata *dd) ibdev->attach_mcast = qib_multicast_attach; ibdev->detach_mcast = qib_multicast_detach; ibdev->process_mad = qib_process_mad; - ibdev->mmap = qib_mmap; + ibdev->mmap = NULL; ibdev->dma_ops = NULL; ibdev->get_port_immutable = qib_port_immutable; diff --git a/drivers/infiniband/hw/qib/qib_verbs.h b/drivers/infiniband/hw/qib/qib_verbs.h index 538d3a6..eade668 100644 --- a/drivers/infiniband/hw/qib/qib_verbs.h +++ b/drivers/infiniband/hw/qib/qib_verbs.h @@ -428,9 +428,6 @@ struct qib_ibport { struct qib_ibdev { struct rvt_dev_info rdi; - struct list_head pending_mmaps; - spinlock_t mmap_offset_lock; /* protect mmap_offset */ - u32 mmap_offset; /* QP numbers are shared by all IB ports */ struct qib_qpn_table qpn_table; @@ -444,7 +441,6 @@ struct qib_ibdev { struct qib_pio_header *pio_hdrs; dma_addr_t pio_hdrs_phys; /* list of QPs waiting for RNR timer */ - spinlock_t pending_lock; /* protect wait lists, PMA counters, etc. */ u32 qp_table_size; /* size of the hash table */ u32 qp_rnd; /* random bytes for hash */ spinlock_t qpt_lock; @@ -683,17 +679,6 @@ static inline void qib_put_ss(struct rvt_sge_state *ss) } } -void qib_release_mmap_info(struct kref *ref); - -struct rvt_mmap_info *qib_create_mmap_info(struct qib_ibdev *dev, u32 size, - struct ib_ucontext *context, - void *obj); - -void qib_update_mmap_info(struct qib_ibdev *dev, struct rvt_mmap_info *ip, - u32 size, void *obj); - -int qib_mmap(struct ib_ucontext *context, struct vm_area_struct *vma); - int qib_get_rwqe(struct rvt_qp *qp, int wr_id_only); void qib_migrate_qp(struct rvt_qp *qp); -- cgit v0.10.2 From 76fec3e08c1205c4b57bd934727cbbf7223b8dac Mon Sep 17 00:00:00 2001 From: Harish Chegondi Date: Fri, 22 Jan 2016 12:56:21 -0800 Subject: IB/qib: Use rdmavt pkey verbs function Remove qib query pkey function which is no longer needed as this is now being done in rdmavt. The allocation and maintenance of the list still resides in the driver. Reviewed-by: Dennis Dalessandro Signed-off-by: Harish Chegondi Signed-off-by: Doug Ledford diff --git a/drivers/infiniband/hw/qib/qib_verbs.c b/drivers/infiniband/hw/qib/qib_verbs.c index 893d00c..74cb501 100644 --- a/drivers/infiniband/hw/qib/qib_verbs.c +++ b/drivers/infiniband/hw/qib/qib_verbs.c @@ -1841,24 +1841,6 @@ unsigned qib_get_pkey(struct qib_ibport *ibp, unsigned index) return ret; } -static int qib_query_pkey(struct ib_device *ibdev, u8 port, u16 index, - u16 *pkey) -{ - struct qib_devdata *dd = dd_from_ibdev(ibdev); - int ret; - - if (index >= qib_get_npkeys(dd)) { - ret = -EINVAL; - goto bail; - } - - *pkey = qib_get_pkey(to_iport(ibdev, port), index); - ret = 0; - -bail: - return ret; -} - /** * qib_alloc_ucontext - allocate a ucontest * @ibdev: the infiniband device @@ -1961,7 +1943,7 @@ int qib_register_ib_device(struct qib_devdata *dd) struct qib_ibdev *dev = &dd->verbs_dev; struct ib_device *ibdev = &dev->rdi.ibdev; struct qib_pportdata *ppd = dd->pport; - unsigned i; + unsigned i, ctxt; int ret; dev->qp_table_size = ib_qib_qp_table_size; @@ -2073,7 +2055,7 @@ int qib_register_ib_device(struct qib_devdata *dd) ibdev->modify_device = qib_modify_device; ibdev->query_port = qib_query_port; ibdev->modify_port = qib_modify_port; - ibdev->query_pkey = qib_query_pkey; + ibdev->query_pkey = NULL; ibdev->query_gid = qib_query_gid; ibdev->alloc_ucontext = qib_alloc_ucontext; ibdev->dealloc_ucontext = qib_dealloc_ucontext; @@ -2131,6 +2113,17 @@ int qib_register_ib_device(struct qib_devdata *dd) dd->verbs_dev.rdi.flags = (RVT_FLAG_QP_INIT_DRIVER | RVT_FLAG_CQ_INIT_DRIVER); dd->verbs_dev.rdi.dparms.lkey_table_size = qib_lkey_table_size; + dd->verbs_dev.rdi.dparms.nports = dd->num_pports; + dd->verbs_dev.rdi.dparms.npkeys = qib_get_npkeys(dd); + + ppd = dd->pport; + for (i = 0; i < dd->num_pports; i++, ppd++) { + ctxt = ppd->hw_pidx; + rvt_init_port(&dd->verbs_dev.rdi, + &ppd->ibport_data.rvp, + i, + dd->rcd[ctxt]->pkeys); + } ret = rvt_register_device(&dd->verbs_dev.rdi); if (ret) -- cgit v0.10.2 From 898fa52b4ac3bb3ba306e1aa94bc7fbc79bfd2bd Mon Sep 17 00:00:00 2001 From: Harish Chegondi Date: Fri, 22 Jan 2016 12:56:27 -0800 Subject: IB/qib: Remove qpn, qp tables and related variables from qib This patch removes the private queue pair structure and the table which holds the queue pair numbers in favor of using what is provided by rdmavt. Reviewed-by: Dennis Dalessandro Signed-off-by: Harish Chegondi Signed-off-by: Doug Ledford diff --git a/drivers/infiniband/hw/qib/qib_qp.c b/drivers/infiniband/hw/qib/qib_qp.c index b7034d4..f18ee76 100644 --- a/drivers/infiniband/hw/qib/qib_qp.c +++ b/drivers/infiniband/hw/qib/qib_qp.c @@ -42,25 +42,31 @@ #include "qib.h" -#define BITS_PER_PAGE (PAGE_SIZE*BITS_PER_BYTE) -#define BITS_PER_PAGE_MASK (BITS_PER_PAGE-1) +/* + * mask field which was present in now deleted qib_qpn_table + * is not present in rvt_qpn_table. Defining the same field + * as qpt_mask here instead of adding the mask field to + * rvt_qpn_table. + */ +static u16 qpt_mask; -static inline unsigned mk_qpn(struct qib_qpn_table *qpt, - struct qpn_map *map, unsigned off) +static inline unsigned mk_qpn(struct rvt_qpn_table *qpt, + struct rvt_qpn_map *map, unsigned off) { - return (map - qpt->map) * BITS_PER_PAGE + off; + return (map - qpt->map) * RVT_BITS_PER_PAGE + off; } -static inline unsigned find_next_offset(struct qib_qpn_table *qpt, - struct qpn_map *map, unsigned off, +static inline unsigned find_next_offset(struct rvt_qpn_table *qpt, + struct rvt_qpn_map *map, unsigned off, unsigned n) { - if (qpt->mask) { + if (qpt_mask) { off++; - if (((off & qpt->mask) >> 1) >= n) - off = (off | qpt->mask) + 2; - } else - off = find_next_zero_bit(map->page, BITS_PER_PAGE, off); + if (((off & qpt_mask) >> 1) >= n) + off = (off | qpt_mask) + 2; + } else { + off = find_next_zero_bit(map->page, RVT_BITS_PER_PAGE, off); + } return off; } @@ -101,7 +107,7 @@ static u32 credit_table[31] = { 32768 /* 1E */ }; -static void get_map_page(struct qib_qpn_table *qpt, struct qpn_map *map, +static void get_map_page(struct rvt_qpn_table *qpt, struct rvt_qpn_map *map, gfp_t gfp) { unsigned long page = get_zeroed_page(gfp); @@ -122,11 +128,11 @@ static void get_map_page(struct qib_qpn_table *qpt, struct qpn_map *map, * Allocate the next available QPN or * zero/one for QP type IB_QPT_SMI/IB_QPT_GSI. */ -static int alloc_qpn(struct qib_devdata *dd, struct qib_qpn_table *qpt, +static int alloc_qpn(struct qib_devdata *dd, struct rvt_qpn_table *qpt, enum ib_qp_type type, u8 port, gfp_t gfp) { u32 i, offset, max_scan, qpn; - struct qpn_map *map; + struct rvt_qpn_map *map; u32 ret; if (type == IB_QPT_SMI || type == IB_QPT_GSI) { @@ -144,12 +150,12 @@ static int alloc_qpn(struct qib_devdata *dd, struct qib_qpn_table *qpt, } qpn = qpt->last + 2; - if (qpn >= QPN_MAX) + if (qpn >= RVT_QPN_MAX) qpn = 2; - if (qpt->mask && ((qpn & qpt->mask) >> 1) >= dd->n_krcv_queues) - qpn = (qpn | qpt->mask) + 2; - offset = qpn & BITS_PER_PAGE_MASK; - map = &qpt->map[qpn / BITS_PER_PAGE]; + if (qpt_mask && ((qpn & qpt_mask) >> 1) >= dd->n_krcv_queues) + qpn = (qpn | qpt_mask) + 2; + offset = qpn & RVT_BITS_PER_PAGE_MASK; + map = &qpt->map[qpn / RVT_BITS_PER_PAGE]; max_scan = qpt->nmaps - !offset; for (i = 0;;) { if (unlikely(!map->page)) { @@ -174,14 +180,14 @@ static int alloc_qpn(struct qib_devdata *dd, struct qib_qpn_table *qpt, * We just need to be sure we don't loop * forever. */ - } while (offset < BITS_PER_PAGE && qpn < QPN_MAX); + } while (offset < RVT_BITS_PER_PAGE && qpn < RVT_QPN_MAX); /* * In order to keep the number of pages allocated to a * minimum, we scan the all existing pages before increasing * the size of the bitmap table. */ if (++i > max_scan) { - if (qpt->nmaps == QPNMAP_ENTRIES) + if (qpt->nmaps == RVT_QPNMAP_ENTRIES) break; map = &qpt->map[qpt->nmaps++]; offset = 0; @@ -201,19 +207,19 @@ bail: return ret; } -static void free_qpn(struct qib_qpn_table *qpt, u32 qpn) +static void free_qpn(struct rvt_qpn_table *qpt, u32 qpn) { - struct qpn_map *map; + struct rvt_qpn_map *map; - map = qpt->map + qpn / BITS_PER_PAGE; + map = qpt->map + qpn / RVT_BITS_PER_PAGE; if (map->page) - clear_bit(qpn & BITS_PER_PAGE_MASK, map->page); + clear_bit(qpn & RVT_BITS_PER_PAGE_MASK, map->page); } static inline unsigned qpn_hash(struct qib_ibdev *dev, u32 qpn) { return jhash_1word(qpn, dev->qp_rnd) & - (dev->qp_table_size - 1); + (dev->rdi.qp_dev->qp_table_size - 1); } @@ -228,18 +234,18 @@ static void insert_qp(struct qib_ibdev *dev, struct rvt_qp *qp) unsigned n = qpn_hash(dev, qp->ibqp.qp_num); atomic_inc(&qp->refcount); - spin_lock_irqsave(&dev->qpt_lock, flags); + spin_lock_irqsave(&dev->rdi.qp_dev->qpt_lock, flags); if (qp->ibqp.qp_num == 0) rcu_assign_pointer(ibp->rvp.qp[0], qp); else if (qp->ibqp.qp_num == 1) rcu_assign_pointer(ibp->rvp.qp[1], qp); else { - qp->next = dev->qp_table[n]; - rcu_assign_pointer(dev->qp_table[n], qp); + qp->next = dev->rdi.qp_dev->qp_table[n]; + rcu_assign_pointer(dev->rdi.qp_dev->qp_table[n], qp); } - spin_unlock_irqrestore(&dev->qpt_lock, flags); + spin_unlock_irqrestore(&dev->rdi.qp_dev->qpt_lock, flags); } /* @@ -252,34 +258,36 @@ static void remove_qp(struct qib_ibdev *dev, struct rvt_qp *qp) unsigned n = qpn_hash(dev, qp->ibqp.qp_num); unsigned long flags; int removed = 1; + spinlock_t *qpt_lock_ptr; /* Pointer to make checkpatch happy */ - spin_lock_irqsave(&dev->qpt_lock, flags); + spin_lock_irqsave(&dev->rdi.qp_dev->qpt_lock, flags); + qpt_lock_ptr = &dev->rdi.qp_dev->qpt_lock; if (rcu_dereference_protected(ibp->rvp.qp[0], - lockdep_is_held(&dev->qpt_lock)) == qp) { + lockdep_is_held(qpt_lock_ptr)) == qp) { RCU_INIT_POINTER(ibp->rvp.qp[0], NULL); } else if (rcu_dereference_protected(ibp->rvp.qp[1], - lockdep_is_held(&dev->qpt_lock)) == qp) { + lockdep_is_held(&dev->rdi.qp_dev->qpt_lock)) == qp) { RCU_INIT_POINTER(ibp->rvp.qp[1], NULL); } else { struct rvt_qp *q; struct rvt_qp __rcu **qpp; removed = 0; - qpp = &dev->qp_table[n]; + qpp = &dev->rdi.qp_dev->qp_table[n]; for (; (q = rcu_dereference_protected(*qpp, - lockdep_is_held(&dev->qpt_lock))) != NULL; + lockdep_is_held(qpt_lock_ptr))) != NULL; qpp = &q->next) if (q == qp) { RCU_INIT_POINTER(*qpp, rcu_dereference_protected(qp->next, - lockdep_is_held(&dev->qpt_lock))); + lockdep_is_held(qpt_lock_ptr))); removed = 1; break; } } - spin_unlock_irqrestore(&dev->qpt_lock, flags); + spin_unlock_irqrestore(&dev->rdi.qp_dev->qpt_lock, flags); if (removed) { synchronize_rcu(); atomic_dec(&qp->refcount); @@ -299,6 +307,7 @@ unsigned qib_free_all_qps(struct qib_devdata *dd) unsigned long flags; struct rvt_qp *qp; unsigned n, qp_inuse = 0; + spinlock_t *qpt_lock_ptr; /* Pointer to make checkpatch happy */ for (n = 0; n < dd->num_pports; n++) { struct qib_ibport *ibp = &dd->pport[n].ibport_data; @@ -313,17 +322,18 @@ unsigned qib_free_all_qps(struct qib_devdata *dd) rcu_read_unlock(); } - spin_lock_irqsave(&dev->qpt_lock, flags); - for (n = 0; n < dev->qp_table_size; n++) { - qp = rcu_dereference_protected(dev->qp_table[n], - lockdep_is_held(&dev->qpt_lock)); - RCU_INIT_POINTER(dev->qp_table[n], NULL); + spin_lock_irqsave(&dev->rdi.qp_dev->qpt_lock, flags); + qpt_lock_ptr = &dev->rdi.qp_dev->qpt_lock; + for (n = 0; n < dev->rdi.qp_dev->qp_table_size; n++) { + qp = rcu_dereference_protected(dev->rdi.qp_dev->qp_table[n], + lockdep_is_held(qpt_lock_ptr)); + RCU_INIT_POINTER(dev->rdi.qp_dev->qp_table[n], NULL); for (; qp; qp = rcu_dereference_protected(qp->next, - lockdep_is_held(&dev->qpt_lock))) + lockdep_is_held(qpt_lock_ptr))) qp_inuse++; } - spin_unlock_irqrestore(&dev->qpt_lock, flags); + spin_unlock_irqrestore(&dev->rdi.qp_dev->qpt_lock, flags); synchronize_rcu(); return qp_inuse; @@ -353,7 +363,7 @@ struct rvt_qp *qib_lookup_qpn(struct qib_ibport *ibp, u32 qpn) struct qib_ibdev *dev = &ppd_from_ibp(ibp)->dd->verbs_dev; unsigned n = qpn_hash(dev, qpn); - for (qp = rcu_dereference(dev->qp_table[n]); qp; + for (qp = rcu_dereference(dev->rdi.qp_dev->qp_table[n]); qp; qp = rcu_dereference(qp->next)) if (qp->ibqp.qp_num == qpn) { atomic_inc(&qp->refcount); @@ -1121,8 +1131,8 @@ struct ib_qp *qib_create_qp(struct ib_pd *ibpd, qp->s_flags = QIB_S_SIGNAL_REQ_WR; dev = to_idev(ibpd->device); dd = dd_from_dev(dev); - err = alloc_qpn(dd, &dev->qpn_table, init_attr->qp_type, - init_attr->port_num, gfp); + err = alloc_qpn(dd, &dev->rdi.qp_dev->qpn_table, + init_attr->qp_type, init_attr->port_num, gfp); if (err < 0) { ret = ERR_PTR(err); vfree(qp->r_rq.wq); @@ -1199,7 +1209,7 @@ bail_ip: kref_put(&qp->ip->ref, rvt_release_mmap_info); else vfree(qp->r_rq.wq); - free_qpn(&dev->qpn_table, qp->ibqp.qp_num); + free_qpn(&dev->rdi.qp_dev->qpn_table, qp->ibqp.qp_num); bail_qp: kfree(priv->s_hdr); kfree(priv); @@ -1250,7 +1260,7 @@ int qib_destroy_qp(struct ib_qp *ibqp) spin_unlock_irq(&qp->s_lock); /* all user's cleaned up, mark it available */ - free_qpn(&dev->qpn_table, qp->ibqp.qp_num); + free_qpn(&dev->rdi.qp_dev->qpn_table, qp->ibqp.qp_num); spin_lock(&dev->n_qps_lock); dev->n_qps_allocated--; spin_unlock(&dev->n_qps_lock); @@ -1270,19 +1280,19 @@ int qib_destroy_qp(struct ib_qp *ibqp) * qib_init_qpn_table - initialize the QP number table for a device * @qpt: the QPN table */ -void qib_init_qpn_table(struct qib_devdata *dd, struct qib_qpn_table *qpt) +void qib_init_qpn_table(struct qib_devdata *dd, struct rvt_qpn_table *qpt) { spin_lock_init(&qpt->lock); qpt->last = 1; /* start with QPN 2 */ qpt->nmaps = 1; - qpt->mask = dd->qpn_mask; + qpt_mask = dd->qpn_mask; } /** * qib_free_qpn_table - free the QP number table for a device * @qpt: the QPN table */ -void qib_free_qpn_table(struct qib_qpn_table *qpt) +void qib_free_qpn_table(struct rvt_qpn_table *qpt) { int i; @@ -1361,11 +1371,11 @@ int qib_qp_iter_next(struct qib_qp_iter *iter) struct rvt_qp *pqp = iter->qp; struct rvt_qp *qp; - for (; n < dev->qp_table_size; n++) { + for (; n < dev->rdi.qp_dev->qp_table_size; n++) { if (pqp) qp = rcu_dereference(pqp->next); else - qp = rcu_dereference(dev->qp_table[n]); + qp = rcu_dereference(dev->rdi.qp_dev->qp_table[n]); pqp = qp; if (qp) { iter->qp = qp; diff --git a/drivers/infiniband/hw/qib/qib_verbs.c b/drivers/infiniband/hw/qib/qib_verbs.c index 74cb501..2daca8f 100644 --- a/drivers/infiniband/hw/qib/qib_verbs.c +++ b/drivers/infiniband/hw/qib/qib_verbs.c @@ -1946,24 +1946,29 @@ int qib_register_ib_device(struct qib_devdata *dd) unsigned i, ctxt; int ret; - dev->qp_table_size = ib_qib_qp_table_size; + /* allocate parent object */ + dev->rdi.qp_dev = kzalloc(sizeof(*dev->rdi.qp_dev), GFP_KERNEL); + if (!dev->rdi.qp_dev) + return -ENOMEM; + dev->rdi.qp_dev->qp_table_size = ib_qib_qp_table_size; + dev->rdi.qp_dev->qp_table_bits = ilog2(ib_qib_qp_table_size); get_random_bytes(&dev->qp_rnd, sizeof(dev->qp_rnd)); - dev->qp_table = kmalloc_array( - dev->qp_table_size, - sizeof(*dev->qp_table), + dev->rdi.qp_dev->qp_table = kmalloc_array( + dev->rdi.qp_dev->qp_table_size, + sizeof(*dev->rdi.qp_dev->qp_table), GFP_KERNEL); - if (!dev->qp_table) { + if (!dev->rdi.qp_dev->qp_table) { ret = -ENOMEM; goto err_qpt; } - for (i = 0; i < dev->qp_table_size; i++) - RCU_INIT_POINTER(dev->qp_table[i], NULL); + for (i = 0; i < dev->rdi.qp_dev->qp_table_size; i++) + RCU_INIT_POINTER(dev->rdi.qp_dev->qp_table[i], NULL); for (i = 0; i < dd->num_pports; i++) init_ibport(ppd + i); /* Only need to initialize non-zero fields. */ - spin_lock_init(&dev->qpt_lock); + spin_lock_init(&dev->rdi.qp_dev->qpt_lock); spin_lock_init(&dev->n_cqs_lock); spin_lock_init(&dev->n_qps_lock); spin_lock_init(&dev->n_srqs_lock); @@ -1972,7 +1977,7 @@ int qib_register_ib_device(struct qib_devdata *dd) dev->mem_timer.function = mem_timer; dev->mem_timer.data = (unsigned long) dev; - qib_init_qpn_table(dd, &dev->qpn_table); + qib_init_qpn_table(dd, &dev->rdi.qp_dev->qpn_table); INIT_LIST_HEAD(&dev->piowait); INIT_LIST_HEAD(&dev->dmawait); @@ -2159,7 +2164,7 @@ err_tx: sizeof(struct qib_pio_header), dev->pio_hdrs, dev->pio_hdrs_phys); err_hdrs: - kfree(dev->qp_table); + kfree(dev->rdi.qp_dev->qp_table); err_qpt: qib_dev_err(dd, "cannot register verbs: %d!\n", -ret); bail: @@ -2192,7 +2197,7 @@ void qib_unregister_ib_device(struct qib_devdata *dd) qps_inuse); del_timer_sync(&dev->mem_timer); - qib_free_qpn_table(&dev->qpn_table); + qib_free_qpn_table(&dev->rdi.qp_dev->qpn_table); while (!list_empty(&dev->txreq_free)) { struct list_head *l = dev->txreq_free.next; struct qib_verbs_txreq *tx; @@ -2206,7 +2211,7 @@ void qib_unregister_ib_device(struct qib_devdata *dd) dd->pport->sdma_descq_cnt * sizeof(struct qib_pio_header), dev->pio_hdrs, dev->pio_hdrs_phys); - kfree(dev->qp_table); + kfree(dev->rdi.qp_dev->qp_table); } /* diff --git a/drivers/infiniband/hw/qib/qib_verbs.h b/drivers/infiniband/hw/qib/qib_verbs.h index eade668..e10ab80 100644 --- a/drivers/infiniband/hw/qib/qib_verbs.h +++ b/drivers/infiniband/hw/qib/qib_verbs.h @@ -55,9 +55,6 @@ struct qib_verbs_txreq; #define QIB_MAX_RDMA_ATOMIC 16 #define QIB_GUIDS_PER_PORT 5 -#define QPN_MAX (1 << 24) -#define QPNMAP_ENTRIES (QPN_MAX / PAGE_SIZE / BITS_PER_BYTE) - /* * Increment this value if any changes that break userspace ABI * compatibility are made. @@ -364,26 +361,6 @@ static inline struct rvt_rwqe *get_rwqe_ptr(struct rvt_rq *rq, unsigned n) rq->max_sge * sizeof(struct ib_sge)) * n); } -/* - * QPN-map pages start out as NULL, they get allocated upon - * first use and are never deallocated. This way, - * large bitmaps are not allocated unless large numbers of QPs are used. - */ -struct qpn_map { - void *page; -}; - -struct qib_qpn_table { - spinlock_t lock; /* protect changes in this struct */ - unsigned flags; /* flags for QP0/1 allocated for each port */ - u32 last; /* last QP number allocated */ - u32 nmaps; /* size of the map table */ - u16 limit; - u16 mask; - /* bit map of free QP numbers other than 0/1 */ - struct qpn_map map[QPNMAP_ENTRIES]; -}; - struct qib_opcode_stats { u64 n_packets; /* number of packets */ u64 n_bytes; /* total number of bytes */ @@ -429,21 +406,15 @@ struct qib_ibport { struct qib_ibdev { struct rvt_dev_info rdi; - /* QP numbers are shared by all IB ports */ - struct qib_qpn_table qpn_table; struct list_head piowait; /* list for wait PIO buf */ struct list_head dmawait; /* list for wait DMA */ struct list_head txwait; /* list for wait qib_verbs_txreq */ struct list_head memwait; /* list for wait kernel memory */ struct list_head txreq_free; struct timer_list mem_timer; - struct rvt_qp __rcu **qp_table; struct qib_pio_header *pio_hdrs; dma_addr_t pio_hdrs_phys; - /* list of QPs waiting for RNR timer */ - u32 qp_table_size; /* size of the hash table */ u32 qp_rnd; /* random bytes for hash */ - spinlock_t qpt_lock; u32 n_piowait; u32 n_txwait; @@ -581,9 +552,9 @@ int qib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, unsigned qib_free_all_qps(struct qib_devdata *dd); -void qib_init_qpn_table(struct qib_devdata *dd, struct qib_qpn_table *qpt); +void qib_init_qpn_table(struct qib_devdata *dd, struct rvt_qpn_table *qpt); -void qib_free_qpn_table(struct qib_qpn_table *qpt); +void qib_free_qpn_table(struct rvt_qpn_table *qpt); #ifdef CONFIG_DEBUG_FS -- cgit v0.10.2 From 1da0f7e2041ec54657eea25ed84612819c7a2cba Mon Sep 17 00:00:00 2001 From: Harish Chegondi Date: Fri, 22 Jan 2016 12:56:33 -0800 Subject: IB/qib: Delete QIB user context allocation and de-alloction functions IB user context alloc and dealloc functions have been added to rdmavt. Delete the QIB user context alloc/dealloc functions and use the ones in rdmavt. Reviewed-by: Ira Weiny Signed-off-by: Harish Chegondi Signed-off-by: Doug Ledford diff --git a/drivers/infiniband/hw/qib/qib_verbs.c b/drivers/infiniband/hw/qib/qib_verbs.c index 2daca8f..5d230c8 100644 --- a/drivers/infiniband/hw/qib/qib_verbs.c +++ b/drivers/infiniband/hw/qib/qib_verbs.c @@ -133,16 +133,6 @@ const int ib_qib_state_ops[IB_QPS_ERR + 1] = { QIB_POST_SEND_OK | QIB_FLUSH_SEND, }; -struct qib_ucontext { - struct ib_ucontext ibucontext; -}; - -static inline struct qib_ucontext *to_iucontext(struct ib_ucontext - *ibucontext) -{ - return container_of(ibucontext, struct qib_ucontext, ibucontext); -} - /* * Translate ib_wr_opcode into ib_wc_opcode. */ @@ -1841,36 +1831,6 @@ unsigned qib_get_pkey(struct qib_ibport *ibp, unsigned index) return ret; } -/** - * qib_alloc_ucontext - allocate a ucontest - * @ibdev: the infiniband device - * @udata: not used by the QLogic_IB driver - */ - -static struct ib_ucontext *qib_alloc_ucontext(struct ib_device *ibdev, - struct ib_udata *udata) -{ - struct qib_ucontext *context; - struct ib_ucontext *ret; - - context = kmalloc(sizeof(*context), GFP_KERNEL); - if (!context) { - ret = ERR_PTR(-ENOMEM); - goto bail; - } - - ret = &context->ibucontext; - -bail: - return ret; -} - -static int qib_dealloc_ucontext(struct ib_ucontext *context) -{ - kfree(to_iucontext(context)); - return 0; -} - static void init_ibport(struct qib_pportdata *ppd) { struct qib_verbs_counters cntrs; @@ -2062,8 +2022,8 @@ int qib_register_ib_device(struct qib_devdata *dd) ibdev->modify_port = qib_modify_port; ibdev->query_pkey = NULL; ibdev->query_gid = qib_query_gid; - ibdev->alloc_ucontext = qib_alloc_ucontext; - ibdev->dealloc_ucontext = qib_dealloc_ucontext; + ibdev->alloc_ucontext = NULL; + ibdev->dealloc_ucontext = NULL; ibdev->alloc_pd = NULL; ibdev->dealloc_pd = NULL; ibdev->create_ah = NULL; -- cgit v0.10.2 From 0aeddea29c78299e4b022aa0bb978ed87e33f678 Mon Sep 17 00:00:00 2001 From: Harish Chegondi Date: Fri, 22 Jan 2016 12:56:40 -0800 Subject: IB/qib: Remove qib_query_device function Removed qib_query_device function to use rdmavt rvt_query_device function The device attributes still need to be filled in by the driver. Reviewed-by: Dennis Dalessandro Signed-off-by: Harish Chegondi Signed-off-by: Doug Ledford diff --git a/drivers/infiniband/hw/qib/qib_verbs.c b/drivers/infiniband/hw/qib/qib_verbs.c index 5d230c8..45f9582 100644 --- a/drivers/infiniband/hw/qib/qib_verbs.c +++ b/drivers/infiniband/hw/qib/qib_verbs.c @@ -1576,55 +1576,6 @@ full: } } -static int qib_query_device(struct ib_device *ibdev, struct ib_device_attr *props, - struct ib_udata *uhw) -{ - struct qib_devdata *dd = dd_from_ibdev(ibdev); - struct qib_ibdev *dev = to_idev(ibdev); - - if (uhw->inlen || uhw->outlen) - return -EINVAL; - memset(props, 0, sizeof(*props)); - - props->device_cap_flags = IB_DEVICE_BAD_PKEY_CNTR | - IB_DEVICE_BAD_QKEY_CNTR | IB_DEVICE_SHUTDOWN_PORT | - IB_DEVICE_SYS_IMAGE_GUID | IB_DEVICE_RC_RNR_NAK_GEN | - IB_DEVICE_PORT_ACTIVE_EVENT | IB_DEVICE_SRQ_RESIZE; - props->page_size_cap = PAGE_SIZE; - props->vendor_id = - QIB_SRC_OUI_1 << 16 | QIB_SRC_OUI_2 << 8 | QIB_SRC_OUI_3; - props->vendor_part_id = dd->deviceid; - props->hw_ver = dd->minrev; - props->sys_image_guid = ib_qib_sys_image_guid; - props->max_mr_size = ~0ULL; - props->max_qp = ib_qib_max_qps; - props->max_qp_wr = ib_qib_max_qp_wrs; - props->max_sge = ib_qib_max_sges; - props->max_sge_rd = ib_qib_max_sges; - props->max_cq = ib_qib_max_cqs; - props->max_ah = ib_qib_max_ahs; - props->max_cqe = ib_qib_max_cqes; - props->max_mr = dev->rdi.lkey_table.max; - props->max_fmr = dev->rdi.lkey_table.max; - props->max_map_per_fmr = 32767; - props->max_pd = dev->rdi.dparms.props.max_pd; - props->max_qp_rd_atom = QIB_MAX_RDMA_ATOMIC; - props->max_qp_init_rd_atom = 255; - /* props->max_res_rd_atom */ - props->max_srq = ib_qib_max_srqs; - props->max_srq_wr = ib_qib_max_srq_wrs; - props->max_srq_sge = ib_qib_max_srq_sges; - /* props->local_ca_ack_delay */ - props->atomic_cap = IB_ATOMIC_GLOB; - props->max_pkeys = qib_get_npkeys(dd); - props->max_mcast_grp = ib_qib_max_mcast_grps; - props->max_mcast_qp_attach = ib_qib_max_mcast_qp_attached; - props->max_total_mcast_qp_attach = props->max_mcast_qp_attach * - props->max_mcast_grp; - - return 0; -} - static int qib_query_port(struct ib_device *ibdev, u8 port, struct ib_port_attr *props) { @@ -1894,6 +1845,53 @@ static int qib_port_immutable(struct ib_device *ibdev, u8 port_num, } /** + * qib_fill_device_attr - Fill in rvt dev info device attributes. + * @dd: the device data structure + */ +static void qib_fill_device_attr(struct qib_devdata *dd) +{ + struct rvt_dev_info *rdi = &dd->verbs_dev.rdi; + + memset(&rdi->dparms.props, 0, sizeof(rdi->dparms.props)); + + rdi->dparms.props.max_pd = ib_qib_max_pds; + rdi->dparms.props.max_ah = ib_qib_max_ahs; + rdi->dparms.props.device_cap_flags = IB_DEVICE_BAD_PKEY_CNTR | + IB_DEVICE_BAD_QKEY_CNTR | IB_DEVICE_SHUTDOWN_PORT | + IB_DEVICE_SYS_IMAGE_GUID | IB_DEVICE_RC_RNR_NAK_GEN | + IB_DEVICE_PORT_ACTIVE_EVENT | IB_DEVICE_SRQ_RESIZE; + rdi->dparms.props.page_size_cap = PAGE_SIZE; + rdi->dparms.props.vendor_id = + QIB_SRC_OUI_1 << 16 | QIB_SRC_OUI_2 << 8 | QIB_SRC_OUI_3; + rdi->dparms.props.vendor_part_id = dd->deviceid; + rdi->dparms.props.hw_ver = dd->minrev; + rdi->dparms.props.sys_image_guid = ib_qib_sys_image_guid; + rdi->dparms.props.max_mr_size = ~0ULL; + rdi->dparms.props.max_qp = ib_qib_max_qps; + rdi->dparms.props.max_qp_wr = ib_qib_max_qp_wrs; + rdi->dparms.props.max_sge = ib_qib_max_sges; + rdi->dparms.props.max_sge_rd = ib_qib_max_sges; + rdi->dparms.props.max_cq = ib_qib_max_cqs; + rdi->dparms.props.max_cqe = ib_qib_max_cqes; + rdi->dparms.props.max_ah = ib_qib_max_ahs; + rdi->dparms.props.max_mr = rdi->lkey_table.max; + rdi->dparms.props.max_fmr = rdi->lkey_table.max; + rdi->dparms.props.max_map_per_fmr = 32767; + rdi->dparms.props.max_qp_rd_atom = QIB_MAX_RDMA_ATOMIC; + rdi->dparms.props.max_qp_init_rd_atom = 255; + rdi->dparms.props.max_srq = ib_qib_max_srqs; + rdi->dparms.props.max_srq_wr = ib_qib_max_srq_wrs; + rdi->dparms.props.max_srq_sge = ib_qib_max_srq_sges; + rdi->dparms.props.atomic_cap = IB_ATOMIC_GLOB; + rdi->dparms.props.max_pkeys = qib_get_npkeys(dd); + rdi->dparms.props.max_mcast_grp = ib_qib_max_mcast_grps; + rdi->dparms.props.max_mcast_qp_attach = ib_qib_max_mcast_qp_attached; + rdi->dparms.props.max_total_mcast_qp_attach = + rdi->dparms.props.max_mcast_qp_attach * + rdi->dparms.props.max_mcast_grp; +} + +/** * qib_register_ib_device - register our device with the infiniband core * @dd: the device data structure * Return the allocated qib_ibdev pointer or NULL on error. @@ -2016,7 +2014,7 @@ int qib_register_ib_device(struct qib_devdata *dd) ibdev->phys_port_cnt = dd->num_pports; ibdev->num_comp_vectors = 1; ibdev->dma_device = &dd->pcidev->dev; - ibdev->query_device = qib_query_device; + ibdev->query_device = NULL; ibdev->modify_device = qib_modify_device; ibdev->query_port = qib_query_port; ibdev->modify_port = qib_modify_port; @@ -2073,14 +2071,14 @@ int qib_register_ib_device(struct qib_devdata *dd) dd->verbs_dev.rdi.driver_f.get_pci_dev = qib_get_pci_dev; dd->verbs_dev.rdi.driver_f.check_ah = qib_check_ah; dd->verbs_dev.rdi.driver_f.notify_new_ah = qib_notify_new_ah; - dd->verbs_dev.rdi.dparms.props.max_pd = ib_qib_max_pds; - dd->verbs_dev.rdi.dparms.props.max_ah = ib_qib_max_ahs; dd->verbs_dev.rdi.flags = (RVT_FLAG_QP_INIT_DRIVER | RVT_FLAG_CQ_INIT_DRIVER); dd->verbs_dev.rdi.dparms.lkey_table_size = qib_lkey_table_size; dd->verbs_dev.rdi.dparms.nports = dd->num_pports; dd->verbs_dev.rdi.dparms.npkeys = qib_get_npkeys(dd); + qib_fill_device_attr(dd); + ppd = dd->pport; for (i = 0; i < dd->num_pports; i++, ppd++) { ctxt = ppd->hw_pidx; -- cgit v0.10.2 From 01ba79d4dddcf4ca3669f8dc4658322342793fee Mon Sep 17 00:00:00 2001 From: Harish Chegondi Date: Fri, 22 Jan 2016 12:56:46 -0800 Subject: IB/qib: Use rdmavt send and receive flags Use the definitions of the s_flags and r_flags which are now in rdmavt. Reviewed-by: Dennis Dalessandro Reviewed-by: Ira Weiny Reviewed-by: Mike Marciniszyn Signed-off-by: Harish Chegondi Signed-off-by: Doug Ledford diff --git a/drivers/infiniband/hw/qib/qib_driver.c b/drivers/infiniband/hw/qib/qib_driver.c index e8b239c..ad41df3 100644 --- a/drivers/infiniband/hw/qib/qib_driver.c +++ b/drivers/infiniband/hw/qib/qib_driver.c @@ -414,7 +414,7 @@ static u32 qib_rcv_hdrerr(struct qib_ctxtdata *rcd, struct qib_pportdata *ppd, */ if (list_empty(&qp->rspwait)) { qp->r_flags |= - QIB_R_RSP_NAK; + RVT_R_RSP_NAK; atomic_inc( &qp->refcount); list_add_tail( @@ -583,14 +583,14 @@ move_along: */ list_for_each_entry_safe(qp, nqp, &rcd->qp_wait_list, rspwait) { list_del_init(&qp->rspwait); - if (qp->r_flags & QIB_R_RSP_NAK) { - qp->r_flags &= ~QIB_R_RSP_NAK; + if (qp->r_flags & RVT_R_RSP_NAK) { + qp->r_flags &= ~RVT_R_RSP_NAK; qib_send_rc_ack(qp); } - if (qp->r_flags & QIB_R_RSP_SEND) { + if (qp->r_flags & RVT_R_RSP_SEND) { unsigned long flags; - qp->r_flags &= ~QIB_R_RSP_SEND; + qp->r_flags &= ~RVT_R_RSP_SEND; spin_lock_irqsave(&qp->s_lock, flags); if (ib_qib_state_ops[qp->state] & QIB_PROCESS_OR_FLUSH_SEND) diff --git a/drivers/infiniband/hw/qib/qib_qp.c b/drivers/infiniband/hw/qib/qib_qp.c index f18ee76..c5e9cf5 100644 --- a/drivers/infiniband/hw/qib/qib_qp.c +++ b/drivers/infiniband/hw/qib/qib_qp.c @@ -386,7 +386,7 @@ static void qib_reset_qp(struct rvt_qp *qp, enum ib_qp_type type) qp->qkey = 0; qp->qp_access_flags = 0; atomic_set(&priv->s_dma_busy, 0); - qp->s_flags &= QIB_S_SIGNAL_REQ_WR; + qp->s_flags &= RVT_S_SIGNAL_REQ_WR; qp->s_hdrwords = 0; qp->s_wqe = NULL; qp->s_draining = 0; @@ -431,7 +431,7 @@ static void clear_mr_refs(struct rvt_qp *qp, int clr_sends) { unsigned n; - if (test_and_clear_bit(QIB_R_REWIND_SGE, &qp->r_aflags)) + if (test_and_clear_bit(RVT_R_REWIND_SGE, &qp->r_aflags)) qib_put_ss(&qp->s_rdma_read_sge); qib_put_ss(&qp->r_sge); @@ -496,22 +496,22 @@ int qib_error_qp(struct rvt_qp *qp, enum ib_wc_status err) qp->state = IB_QPS_ERR; - if (qp->s_flags & (QIB_S_TIMER | QIB_S_WAIT_RNR)) { - qp->s_flags &= ~(QIB_S_TIMER | QIB_S_WAIT_RNR); + if (qp->s_flags & (RVT_S_TIMER | RVT_S_WAIT_RNR)) { + qp->s_flags &= ~(RVT_S_TIMER | RVT_S_WAIT_RNR); del_timer(&qp->s_timer); } - if (qp->s_flags & QIB_S_ANY_WAIT_SEND) - qp->s_flags &= ~QIB_S_ANY_WAIT_SEND; + if (qp->s_flags & RVT_S_ANY_WAIT_SEND) + qp->s_flags &= ~RVT_S_ANY_WAIT_SEND; spin_lock(&dev->rdi.pending_lock); - if (!list_empty(&priv->iowait) && !(qp->s_flags & QIB_S_BUSY)) { - qp->s_flags &= ~QIB_S_ANY_WAIT_IO; + if (!list_empty(&priv->iowait) && !(qp->s_flags & RVT_S_BUSY)) { + qp->s_flags &= ~RVT_S_ANY_WAIT_IO; list_del_init(&priv->iowait); } spin_unlock(&dev->rdi.pending_lock); - if (!(qp->s_flags & QIB_S_BUSY)) { + if (!(qp->s_flags & RVT_S_BUSY)) { qp->s_hdrwords = 0; if (qp->s_rdma_mr) { rvt_put_mr(qp->s_rdma_mr); @@ -533,7 +533,7 @@ int qib_error_qp(struct rvt_qp *qp, enum ib_wc_status err) wc.qp = &qp->ibqp; wc.opcode = IB_WC_RECV; - if (test_and_clear_bit(QIB_R_WRID_VALID, &qp->r_aflags)) { + if (test_and_clear_bit(RVT_R_WRID_VALID, &qp->r_aflags)) { wc.wr_id = qp->r_wr_id; wc.status = err; qib_cq_enter(to_icq(qp->ibqp.recv_cq), &wc, 1); @@ -716,7 +716,7 @@ int qib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, if (!list_empty(&priv->iowait)) list_del_init(&priv->iowait); spin_unlock(&dev->rdi.pending_lock); - qp->s_flags &= ~(QIB_S_TIMER | QIB_S_ANY_WAIT); + qp->s_flags &= ~(RVT_S_TIMER | RVT_S_ANY_WAIT); spin_unlock(&qp->s_lock); spin_unlock_irq(&qp->r_lock); /* Stop the sending work queue and retry timer */ @@ -739,7 +739,7 @@ int qib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, case IB_QPS_RTR: /* Allow event to retrigger if QP set to RTR more than once */ - qp->r_flags &= ~QIB_R_COMM_EST; + qp->r_flags &= ~RVT_R_COMM_EST; qp->state = new_state; break; @@ -910,7 +910,7 @@ int qib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, init_attr->recv_cq = qp->ibqp.recv_cq; init_attr->srq = qp->ibqp.srq; init_attr->cap = attr->cap; - if (qp->s_flags & QIB_S_SIGNAL_REQ_WR) + if (qp->s_flags & RVT_S_SIGNAL_REQ_WR) init_attr->sq_sig_type = IB_SIGNAL_REQ_WR; else init_attr->sq_sig_type = IB_SIGNAL_ALL_WR; @@ -1128,7 +1128,7 @@ struct ib_qp *qib_create_qp(struct ib_pd *ibpd, qp->s_size = init_attr->cap.max_send_wr + 1; qp->s_max_sge = init_attr->cap.max_send_sge; if (init_attr->sq_sig_type == IB_SIGNAL_REQ_WR) - qp->s_flags = QIB_S_SIGNAL_REQ_WR; + qp->s_flags = RVT_S_SIGNAL_REQ_WR; dev = to_idev(ibpd->device); dd = dd_from_dev(dev); err = alloc_qpn(dd, &dev->rdi.qp_dev->qpn_table, @@ -1244,7 +1244,7 @@ int qib_destroy_qp(struct ib_qp *ibqp) if (!list_empty(&priv->iowait)) list_del_init(&priv->iowait); spin_unlock(&dev->rdi.pending_lock); - qp->s_flags &= ~(QIB_S_TIMER | QIB_S_ANY_WAIT); + qp->s_flags &= ~(RVT_S_TIMER | RVT_S_ANY_WAIT); spin_unlock_irq(&qp->s_lock); cancel_work_sync(&priv->s_work); del_timer_sync(&qp->s_timer); @@ -1318,20 +1318,20 @@ void qib_get_credit(struct rvt_qp *qp, u32 aeth) * honor the credit field. */ if (credit == QIB_AETH_CREDIT_INVAL) { - if (!(qp->s_flags & QIB_S_UNLIMITED_CREDIT)) { - qp->s_flags |= QIB_S_UNLIMITED_CREDIT; - if (qp->s_flags & QIB_S_WAIT_SSN_CREDIT) { - qp->s_flags &= ~QIB_S_WAIT_SSN_CREDIT; + if (!(qp->s_flags & RVT_S_UNLIMITED_CREDIT)) { + qp->s_flags |= RVT_S_UNLIMITED_CREDIT; + if (qp->s_flags & RVT_S_WAIT_SSN_CREDIT) { + qp->s_flags &= ~RVT_S_WAIT_SSN_CREDIT; qib_schedule_send(qp); } } - } else if (!(qp->s_flags & QIB_S_UNLIMITED_CREDIT)) { + } else if (!(qp->s_flags & RVT_S_UNLIMITED_CREDIT)) { /* Compute new LSN (i.e., MSN + credit) */ credit = (aeth + credit_table[credit]) & QIB_MSN_MASK; if (qib_cmp24(credit, qp->s_lsn) > 0) { qp->s_lsn = credit; - if (qp->s_flags & QIB_S_WAIT_SSN_CREDIT) { - qp->s_flags &= ~QIB_S_WAIT_SSN_CREDIT; + if (qp->s_flags & RVT_S_WAIT_SSN_CREDIT) { + qp->s_flags &= ~RVT_S_WAIT_SSN_CREDIT; qib_schedule_send(qp); } } diff --git a/drivers/infiniband/hw/qib/qib_rc.c b/drivers/infiniband/hw/qib/qib_rc.c index 1e8463d..e118004 100644 --- a/drivers/infiniband/hw/qib/qib_rc.c +++ b/drivers/infiniband/hw/qib/qib_rc.c @@ -56,7 +56,7 @@ static u32 restart_sge(struct rvt_sge_state *ss, struct rvt_swqe *wqe, static void start_timer(struct rvt_qp *qp) { - qp->s_flags |= QIB_S_TIMER; + qp->s_flags |= RVT_S_TIMER; qp->s_timer.function = rc_timeout; /* 4.096 usec. * (1 << qp->timeout) */ qp->s_timer.expires = jiffies + qp->timeout_jiffies; @@ -112,7 +112,7 @@ static int qib_make_rc_ack(struct qib_ibdev *dev, struct rvt_qp *qp, case OP(ACKNOWLEDGE): /* Check for no next entry in the queue. */ if (qp->r_head_ack_queue == qp->s_tail_ack_queue) { - if (qp->s_flags & QIB_S_ACK_PENDING) + if (qp->s_flags & RVT_S_ACK_PENDING) goto normal; goto bail; } @@ -196,7 +196,7 @@ normal: * (see above). */ qp->s_ack_state = OP(SEND_ONLY); - qp->s_flags &= ~QIB_S_ACK_PENDING; + qp->s_flags &= ~RVT_S_ACK_PENDING; qp->s_cur_sge = NULL; if (qp->s_nak_state) ohdr->u.aeth = @@ -218,7 +218,7 @@ normal: bail: qp->s_ack_state = OP(ACKNOWLEDGE); - qp->s_flags &= ~(QIB_S_RESP_PENDING | QIB_S_ACK_PENDING); + qp->s_flags &= ~(RVT_S_RESP_PENDING | RVT_S_ACK_PENDING); return 0; } @@ -256,7 +256,7 @@ int qib_make_rc_req(struct rvt_qp *qp) spin_lock_irqsave(&qp->s_lock, flags); /* Sending responses has higher priority over sending requests. */ - if ((qp->s_flags & QIB_S_RESP_PENDING) && + if ((qp->s_flags & RVT_S_RESP_PENDING) && qib_make_rc_ack(dev, qp, ohdr, pmtu)) goto done; @@ -268,7 +268,7 @@ int qib_make_rc_req(struct rvt_qp *qp) goto bail; /* If DMAs are in progress, we can't flush immediately. */ if (atomic_read(&priv->s_dma_busy)) { - qp->s_flags |= QIB_S_WAIT_DMA; + qp->s_flags |= RVT_S_WAIT_DMA; goto bail; } wqe = get_swqe_ptr(qp, qp->s_last); @@ -278,12 +278,12 @@ int qib_make_rc_req(struct rvt_qp *qp) goto done; } - if (qp->s_flags & (QIB_S_WAIT_RNR | QIB_S_WAIT_ACK)) + if (qp->s_flags & (RVT_S_WAIT_RNR | RVT_S_WAIT_ACK)) goto bail; if (qib_cmp24(qp->s_psn, qp->s_sending_hpsn) <= 0) { if (qib_cmp24(qp->s_sending_psn, qp->s_sending_hpsn) <= 0) { - qp->s_flags |= QIB_S_WAIT_PSN; + qp->s_flags |= RVT_S_WAIT_PSN; goto bail; } qp->s_sending_psn = qp->s_psn; @@ -318,7 +318,7 @@ int qib_make_rc_req(struct rvt_qp *qp) */ if ((wqe->wr.send_flags & IB_SEND_FENCE) && qp->s_num_rd_atomic) { - qp->s_flags |= QIB_S_WAIT_FENCE; + qp->s_flags |= RVT_S_WAIT_FENCE; goto bail; } wqe->psn = qp->s_next_psn; @@ -336,9 +336,9 @@ int qib_make_rc_req(struct rvt_qp *qp) case IB_WR_SEND: case IB_WR_SEND_WITH_IMM: /* If no credit, return. */ - if (!(qp->s_flags & QIB_S_UNLIMITED_CREDIT) && + if (!(qp->s_flags & RVT_S_UNLIMITED_CREDIT) && qib_cmp24(wqe->ssn, qp->s_lsn + 1) > 0) { - qp->s_flags |= QIB_S_WAIT_SSN_CREDIT; + qp->s_flags |= RVT_S_WAIT_SSN_CREDIT; goto bail; } wqe->lpsn = wqe->psn; @@ -364,14 +364,14 @@ int qib_make_rc_req(struct rvt_qp *qp) break; case IB_WR_RDMA_WRITE: - if (newreq && !(qp->s_flags & QIB_S_UNLIMITED_CREDIT)) + if (newreq && !(qp->s_flags & RVT_S_UNLIMITED_CREDIT)) qp->s_lsn++; /* FALLTHROUGH */ case IB_WR_RDMA_WRITE_WITH_IMM: /* If no credit, return. */ - if (!(qp->s_flags & QIB_S_UNLIMITED_CREDIT) && + if (!(qp->s_flags & RVT_S_UNLIMITED_CREDIT) && qib_cmp24(wqe->ssn, qp->s_lsn + 1) > 0) { - qp->s_flags |= QIB_S_WAIT_SSN_CREDIT; + qp->s_flags |= RVT_S_WAIT_SSN_CREDIT; goto bail; } @@ -412,11 +412,11 @@ int qib_make_rc_req(struct rvt_qp *qp) if (newreq) { if (qp->s_num_rd_atomic >= qp->s_max_rd_atomic) { - qp->s_flags |= QIB_S_WAIT_RDMAR; + qp->s_flags |= RVT_S_WAIT_RDMAR; goto bail; } qp->s_num_rd_atomic++; - if (!(qp->s_flags & QIB_S_UNLIMITED_CREDIT)) + if (!(qp->s_flags & RVT_S_UNLIMITED_CREDIT)) qp->s_lsn++; /* * Adjust s_next_psn to count the @@ -450,11 +450,11 @@ int qib_make_rc_req(struct rvt_qp *qp) if (newreq) { if (qp->s_num_rd_atomic >= qp->s_max_rd_atomic) { - qp->s_flags |= QIB_S_WAIT_RDMAR; + qp->s_flags |= RVT_S_WAIT_RDMAR; goto bail; } qp->s_num_rd_atomic++; - if (!(qp->s_flags & QIB_S_UNLIMITED_CREDIT)) + if (!(qp->s_flags & RVT_S_UNLIMITED_CREDIT)) qp->s_lsn++; wqe->lpsn = wqe->psn; } @@ -619,9 +619,9 @@ int qib_make_rc_req(struct rvt_qp *qp) delta = (((int) bth2 - (int) wqe->psn) << 8) >> 8; if (delta && delta % QIB_PSN_CREDIT == 0) bth2 |= IB_BTH_REQ_ACK; - if (qp->s_flags & QIB_S_SEND_ONE) { - qp->s_flags &= ~QIB_S_SEND_ONE; - qp->s_flags |= QIB_S_WAIT_ACK; + if (qp->s_flags & RVT_S_SEND_ONE) { + qp->s_flags &= ~RVT_S_SEND_ONE; + qp->s_flags |= RVT_S_WAIT_ACK; bth2 |= IB_BTH_REQ_ACK; } qp->s_len -= len; @@ -634,7 +634,7 @@ done: goto unlock; bail: - qp->s_flags &= ~QIB_S_BUSY; + qp->s_flags &= ~RVT_S_BUSY; unlock: spin_unlock_irqrestore(&qp->s_lock, flags); return ret; @@ -670,7 +670,7 @@ void qib_send_rc_ack(struct rvt_qp *qp) goto unlock; /* Don't send ACK or NAK if a RDMA read or atomic is pending. */ - if ((qp->s_flags & QIB_S_RESP_PENDING) || qp->s_rdma_ack_cnt) + if ((qp->s_flags & RVT_S_RESP_PENDING) || qp->s_rdma_ack_cnt) goto queue_ack; /* Construct the header with s_lock held so APM doesn't change it. */ @@ -761,7 +761,7 @@ void qib_send_rc_ack(struct rvt_qp *qp) queue_ack: if (ib_qib_state_ops[qp->state] & QIB_PROCESS_RECV_OK) { this_cpu_inc(*ibp->rvp.rc_qacks); - qp->s_flags |= QIB_S_ACK_PENDING | QIB_S_RESP_PENDING; + qp->s_flags |= RVT_S_ACK_PENDING | RVT_S_RESP_PENDING; qp->s_nak_state = qp->r_nak_state; qp->s_ack_psn = qp->r_ack_psn; @@ -855,13 +855,13 @@ static void reset_psn(struct rvt_qp *qp, u32 psn) done: qp->s_psn = psn; /* - * Set QIB_S_WAIT_PSN as qib_rc_complete() may start the timer + * Set RVT_S_WAIT_PSN as qib_rc_complete() may start the timer * asynchronously before the send tasklet can get scheduled. * Doing it in qib_make_rc_req() is too late. */ if ((qib_cmp24(qp->s_psn, qp->s_sending_hpsn) <= 0) && (qib_cmp24(qp->s_sending_psn, qp->s_sending_hpsn) <= 0)) - qp->s_flags |= QIB_S_WAIT_PSN; + qp->s_flags |= RVT_S_WAIT_PSN; } /* @@ -892,11 +892,11 @@ static void qib_restart_rc(struct rvt_qp *qp, u32 psn, int wait) else ibp->rvp.n_rc_resends += (qp->s_psn - psn) & QIB_PSN_MASK; - qp->s_flags &= ~(QIB_S_WAIT_FENCE | QIB_S_WAIT_RDMAR | - QIB_S_WAIT_SSN_CREDIT | QIB_S_WAIT_PSN | - QIB_S_WAIT_ACK); + qp->s_flags &= ~(RVT_S_WAIT_FENCE | RVT_S_WAIT_RDMAR | + RVT_S_WAIT_SSN_CREDIT | RVT_S_WAIT_PSN | + RVT_S_WAIT_ACK); if (wait) - qp->s_flags |= QIB_S_SEND_ONE; + qp->s_flags |= RVT_S_SEND_ONE; reset_psn(qp, psn); } @@ -911,10 +911,10 @@ static void rc_timeout(unsigned long arg) spin_lock_irqsave(&qp->r_lock, flags); spin_lock(&qp->s_lock); - if (qp->s_flags & QIB_S_TIMER) { + if (qp->s_flags & RVT_S_TIMER) { ibp = to_iport(qp->ibqp.device, qp->port_num); ibp->rvp.n_rc_timeouts++; - qp->s_flags &= ~QIB_S_TIMER; + qp->s_flags &= ~RVT_S_TIMER; del_timer(&qp->s_timer); qib_restart_rc(qp, qp->s_last_psn + 1, 1); qib_schedule_send(qp); @@ -932,8 +932,8 @@ void qib_rc_rnr_retry(unsigned long arg) unsigned long flags; spin_lock_irqsave(&qp->s_lock, flags); - if (qp->s_flags & QIB_S_WAIT_RNR) { - qp->s_flags &= ~QIB_S_WAIT_RNR; + if (qp->s_flags & RVT_S_WAIT_RNR) { + qp->s_flags &= ~RVT_S_WAIT_RNR; del_timer(&qp->s_timer); qib_schedule_send(qp); } @@ -1003,7 +1003,7 @@ void qib_rc_send_complete(struct rvt_qp *qp, struct qib_ib_header *hdr) * there are still requests that haven't been acked. */ if ((psn & IB_BTH_REQ_ACK) && qp->s_acked != qp->s_tail && - !(qp->s_flags & (QIB_S_TIMER | QIB_S_WAIT_RNR | QIB_S_WAIT_PSN)) && + !(qp->s_flags & (RVT_S_TIMER | RVT_S_WAIT_RNR | RVT_S_WAIT_PSN)) && (ib_qib_state_ops[qp->state] & QIB_PROCESS_RECV_OK)) start_timer(qp); @@ -1018,7 +1018,7 @@ void qib_rc_send_complete(struct rvt_qp *qp, struct qib_ib_header *hdr) rvt_put_mr(sge->mr); } /* Post a send completion queue entry if requested. */ - if (!(qp->s_flags & QIB_S_SIGNAL_REQ_WR) || + if (!(qp->s_flags & RVT_S_SIGNAL_REQ_WR) || (wqe->wr.send_flags & IB_SEND_SIGNALED)) { memset(&wc, 0, sizeof(wc)); wc.wr_id = wqe->wr.wr_id; @@ -1035,9 +1035,9 @@ void qib_rc_send_complete(struct rvt_qp *qp, struct qib_ib_header *hdr) * If we were waiting for sends to complete before resending, * and they are now complete, restart sending. */ - if (qp->s_flags & QIB_S_WAIT_PSN && + if (qp->s_flags & RVT_S_WAIT_PSN && qib_cmp24(qp->s_sending_psn, qp->s_sending_hpsn) > 0) { - qp->s_flags &= ~QIB_S_WAIT_PSN; + qp->s_flags &= ~RVT_S_WAIT_PSN; qp->s_sending_psn = qp->s_psn; qp->s_sending_hpsn = qp->s_psn - 1; qib_schedule_send(qp); @@ -1074,7 +1074,7 @@ static struct rvt_swqe *do_rc_completion(struct rvt_qp *qp, rvt_put_mr(sge->mr); } /* Post a send completion queue entry if requested. */ - if (!(qp->s_flags & QIB_S_SIGNAL_REQ_WR) || + if (!(qp->s_flags & RVT_S_SIGNAL_REQ_WR) || (wqe->wr.send_flags & IB_SEND_SIGNALED)) { memset(&wc, 0, sizeof(wc)); wc.wr_id = wqe->wr.wr_id; @@ -1138,8 +1138,8 @@ static int do_rc_ack(struct rvt_qp *qp, u32 aeth, u32 psn, int opcode, int diff; /* Remove QP from retry timer */ - if (qp->s_flags & (QIB_S_TIMER | QIB_S_WAIT_RNR)) { - qp->s_flags &= ~(QIB_S_TIMER | QIB_S_WAIT_RNR); + if (qp->s_flags & (RVT_S_TIMER | RVT_S_WAIT_RNR)) { + qp->s_flags &= ~(RVT_S_TIMER | RVT_S_WAIT_RNR); del_timer(&qp->s_timer); } @@ -1187,11 +1187,11 @@ static int do_rc_ack(struct rvt_qp *qp, u32 aeth, u32 psn, int opcode, wqe->wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD) && (opcode != OP(ATOMIC_ACKNOWLEDGE) || diff != 0))) { /* Retry this request. */ - if (!(qp->r_flags & QIB_R_RDMAR_SEQ)) { - qp->r_flags |= QIB_R_RDMAR_SEQ; + if (!(qp->r_flags & RVT_R_RDMAR_SEQ)) { + qp->r_flags |= RVT_R_RDMAR_SEQ; qib_restart_rc(qp, qp->s_last_psn + 1, 0); if (list_empty(&qp->rspwait)) { - qp->r_flags |= QIB_R_RSP_SEND; + qp->r_flags |= RVT_R_RSP_SEND; atomic_inc(&qp->refcount); list_add_tail(&qp->rspwait, &rcd->qp_wait_list); @@ -1214,14 +1214,14 @@ static int do_rc_ack(struct rvt_qp *qp, u32 aeth, u32 psn, int opcode, wqe->wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD)) { qp->s_num_rd_atomic--; /* Restart sending task if fence is complete */ - if ((qp->s_flags & QIB_S_WAIT_FENCE) && + if ((qp->s_flags & RVT_S_WAIT_FENCE) && !qp->s_num_rd_atomic) { - qp->s_flags &= ~(QIB_S_WAIT_FENCE | - QIB_S_WAIT_ACK); + qp->s_flags &= ~(RVT_S_WAIT_FENCE | + RVT_S_WAIT_ACK); qib_schedule_send(qp); - } else if (qp->s_flags & QIB_S_WAIT_RDMAR) { - qp->s_flags &= ~(QIB_S_WAIT_RDMAR | - QIB_S_WAIT_ACK); + } else if (qp->s_flags & RVT_S_WAIT_RDMAR) { + qp->s_flags &= ~(RVT_S_WAIT_RDMAR | + RVT_S_WAIT_ACK); qib_schedule_send(qp); } } @@ -1249,8 +1249,8 @@ static int do_rc_ack(struct rvt_qp *qp, u32 aeth, u32 psn, int opcode, qp->s_state = OP(SEND_LAST); qp->s_psn = psn + 1; } - if (qp->s_flags & QIB_S_WAIT_ACK) { - qp->s_flags &= ~QIB_S_WAIT_ACK; + if (qp->s_flags & RVT_S_WAIT_ACK) { + qp->s_flags &= ~RVT_S_WAIT_ACK; qib_schedule_send(qp); } qib_get_credit(qp, aeth); @@ -1264,7 +1264,7 @@ static int do_rc_ack(struct rvt_qp *qp, u32 aeth, u32 psn, int opcode, ibp->rvp.n_rnr_naks++; if (qp->s_acked == qp->s_tail) goto bail; - if (qp->s_flags & QIB_S_WAIT_RNR) + if (qp->s_flags & RVT_S_WAIT_RNR) goto bail; if (qp->s_rnr_retry == 0) { status = IB_WC_RNR_RETRY_EXC_ERR; @@ -1280,8 +1280,8 @@ static int do_rc_ack(struct rvt_qp *qp, u32 aeth, u32 psn, int opcode, reset_psn(qp, psn); - qp->s_flags &= ~(QIB_S_WAIT_SSN_CREDIT | QIB_S_WAIT_ACK); - qp->s_flags |= QIB_S_WAIT_RNR; + qp->s_flags &= ~(RVT_S_WAIT_SSN_CREDIT | RVT_S_WAIT_ACK); + qp->s_flags |= RVT_S_WAIT_RNR; qp->s_timer.function = qib_rc_rnr_retry; qp->s_timer.expires = jiffies + usecs_to_jiffies( ib_qib_rnr_table[(aeth >> QIB_AETH_CREDIT_SHIFT) & @@ -1356,8 +1356,8 @@ static void rdma_seq_err(struct rvt_qp *qp, struct qib_ibport *ibp, u32 psn, struct rvt_swqe *wqe; /* Remove QP from retry timer */ - if (qp->s_flags & (QIB_S_TIMER | QIB_S_WAIT_RNR)) { - qp->s_flags &= ~(QIB_S_TIMER | QIB_S_WAIT_RNR); + if (qp->s_flags & (RVT_S_TIMER | RVT_S_WAIT_RNR)) { + qp->s_flags &= ~(RVT_S_TIMER | RVT_S_WAIT_RNR); del_timer(&qp->s_timer); } @@ -1372,10 +1372,10 @@ static void rdma_seq_err(struct rvt_qp *qp, struct qib_ibport *ibp, u32 psn, } ibp->rvp.n_rdma_seq++; - qp->r_flags |= QIB_R_RDMAR_SEQ; + qp->r_flags |= RVT_R_RDMAR_SEQ; qib_restart_rc(qp, qp->s_last_psn + 1, 0); if (list_empty(&qp->rspwait)) { - qp->r_flags |= QIB_R_RSP_SEND; + qp->r_flags |= RVT_R_RSP_SEND; atomic_inc(&qp->refcount); list_add_tail(&qp->rspwait, &rcd->qp_wait_list); } @@ -1426,7 +1426,7 @@ static void qib_rc_rcv_resp(struct qib_ibport *ibp, * If send tasklet not running attempt to progress * SDMA queue. */ - if (!(qp->s_flags & QIB_S_BUSY)) { + if (!(qp->s_flags & RVT_S_BUSY)) { /* Acquire SDMA Lock */ spin_lock_irqsave(&ppd->sdma_lock, flags); /* Invoke sdma make progress */ @@ -1461,10 +1461,10 @@ static void qib_rc_rcv_resp(struct qib_ibport *ibp, * Skip everything other than the PSN we expect, if we are waiting * for a reply to a restarted RDMA read or atomic op. */ - if (qp->r_flags & QIB_R_RDMAR_SEQ) { + if (qp->r_flags & RVT_R_RDMAR_SEQ) { if (qib_cmp24(psn, qp->s_last_psn + 1) != 0) goto ack_done; - qp->r_flags &= ~QIB_R_RDMAR_SEQ; + qp->r_flags &= ~RVT_R_RDMAR_SEQ; } if (unlikely(qp->s_acked == qp->s_tail)) @@ -1516,10 +1516,10 @@ read_middle: * We got a response so update the timeout. * 4.096 usec. * (1 << qp->timeout) */ - qp->s_flags |= QIB_S_TIMER; + qp->s_flags |= RVT_S_TIMER; mod_timer(&qp->s_timer, jiffies + qp->timeout_jiffies); - if (qp->s_flags & QIB_S_WAIT_ACK) { - qp->s_flags &= ~QIB_S_WAIT_ACK; + if (qp->s_flags & RVT_S_WAIT_ACK) { + qp->s_flags &= ~RVT_S_WAIT_ACK; qib_schedule_send(qp); } @@ -1653,7 +1653,7 @@ static int qib_rc_rcv_error(struct qib_other_headers *ohdr, * Otherwise, we end up propagating congestion. */ if (list_empty(&qp->rspwait)) { - qp->r_flags |= QIB_R_RSP_NAK; + qp->r_flags |= RVT_R_RSP_NAK; atomic_inc(&qp->refcount); list_add_tail(&qp->rspwait, &rcd->qp_wait_list); } @@ -1792,7 +1792,7 @@ static int qib_rc_rcv_error(struct qib_other_headers *ohdr, * which doesn't accept a RDMA read response or atomic * response as an ACK for earlier SENDs or RDMA writes. */ - if (!(qp->s_flags & QIB_S_RESP_PENDING)) { + if (!(qp->s_flags & RVT_S_RESP_PENDING)) { spin_unlock_irqrestore(&qp->s_lock, flags); qp->r_nak_state = 0; qp->r_ack_psn = qp->s_ack_queue[i].psn - 1; @@ -1806,7 +1806,7 @@ static int qib_rc_rcv_error(struct qib_other_headers *ohdr, break; } qp->s_ack_state = OP(ACKNOWLEDGE); - qp->s_flags |= QIB_S_RESP_PENDING; + qp->s_flags |= RVT_S_RESP_PENDING; qp->r_nak_state = 0; qib_schedule_send(qp); @@ -1949,8 +1949,8 @@ void qib_rc_rcv(struct qib_ctxtdata *rcd, struct qib_ib_header *hdr, break; } - if (qp->state == IB_QPS_RTR && !(qp->r_flags & QIB_R_COMM_EST)) { - qp->r_flags |= QIB_R_COMM_EST; + if (qp->state == IB_QPS_RTR && !(qp->r_flags & RVT_R_COMM_EST)) { + qp->r_flags |= RVT_R_COMM_EST; if (qp->ibqp.event_handler) { struct ib_event ev; @@ -2029,7 +2029,7 @@ send_last: qib_copy_sge(&qp->r_sge, data, tlen, 1); qib_put_ss(&qp->r_sge); qp->r_msn++; - if (!test_and_clear_bit(QIB_R_WRID_VALID, &qp->r_aflags)) + if (!test_and_clear_bit(RVT_R_WRID_VALID, &qp->r_aflags)) break; wc.wr_id = qp->r_wr_id; wc.status = IB_WC_SUCCESS; @@ -2158,7 +2158,7 @@ send_last: qp->r_head_ack_queue = next; /* Schedule the send tasklet. */ - qp->s_flags |= QIB_S_RESP_PENDING; + qp->s_flags |= RVT_S_RESP_PENDING; qib_schedule_send(qp); goto sunlock; @@ -2222,7 +2222,7 @@ send_last: qp->r_head_ack_queue = next; /* Schedule the send tasklet. */ - qp->s_flags |= QIB_S_RESP_PENDING; + qp->s_flags |= RVT_S_RESP_PENDING; qib_schedule_send(qp); goto sunlock; @@ -2246,7 +2246,7 @@ rnr_nak: qp->r_ack_psn = qp->r_psn; /* Queue RNR NAK for later */ if (list_empty(&qp->rspwait)) { - qp->r_flags |= QIB_R_RSP_NAK; + qp->r_flags |= RVT_R_RSP_NAK; atomic_inc(&qp->refcount); list_add_tail(&qp->rspwait, &rcd->qp_wait_list); } @@ -2258,7 +2258,7 @@ nack_op_err: qp->r_ack_psn = qp->r_psn; /* Queue NAK for later */ if (list_empty(&qp->rspwait)) { - qp->r_flags |= QIB_R_RSP_NAK; + qp->r_flags |= RVT_R_RSP_NAK; atomic_inc(&qp->refcount); list_add_tail(&qp->rspwait, &rcd->qp_wait_list); } @@ -2272,7 +2272,7 @@ nack_inv: qp->r_ack_psn = qp->r_psn; /* Queue NAK for later */ if (list_empty(&qp->rspwait)) { - qp->r_flags |= QIB_R_RSP_NAK; + qp->r_flags |= RVT_R_RSP_NAK; atomic_inc(&qp->refcount); list_add_tail(&qp->rspwait, &rcd->qp_wait_list); } diff --git a/drivers/infiniband/hw/qib/qib_ruc.c b/drivers/infiniband/hw/qib/qib_ruc.c index 6290979..f7b3bb7 100644 --- a/drivers/infiniband/hw/qib/qib_ruc.c +++ b/drivers/infiniband/hw/qib/qib_ruc.c @@ -190,7 +190,7 @@ int qib_get_rwqe(struct rvt_qp *qp, int wr_id_only) qp->r_wr_id = wqe->wr_id; ret = 1; - set_bit(QIB_R_WRID_VALID, &qp->r_aflags); + set_bit(RVT_R_WRID_VALID, &qp->r_aflags); if (handler) { u32 n; @@ -378,11 +378,11 @@ static void qib_ruc_loopback(struct rvt_qp *sqp) spin_lock_irqsave(&sqp->s_lock, flags); /* Return if we are already busy processing a work request. */ - if ((sqp->s_flags & (QIB_S_BUSY | QIB_S_ANY_WAIT)) || + if ((sqp->s_flags & (RVT_S_BUSY | RVT_S_ANY_WAIT)) || !(ib_qib_state_ops[sqp->state] & QIB_PROCESS_OR_FLUSH_SEND)) goto unlock; - sqp->s_flags |= QIB_S_BUSY; + sqp->s_flags |= RVT_S_BUSY; again: if (sqp->s_last == sqp->s_head) @@ -547,7 +547,7 @@ again: if (release) qib_put_ss(&qp->r_sge); - if (!test_and_clear_bit(QIB_R_WRID_VALID, &qp->r_aflags)) + if (!test_and_clear_bit(RVT_R_WRID_VALID, &qp->r_aflags)) goto send_comp; if (wqe->wr.opcode == IB_WR_RDMA_WRITE_WITH_IMM) @@ -592,7 +592,7 @@ rnr_nak: spin_lock_irqsave(&sqp->s_lock, flags); if (!(ib_qib_state_ops[sqp->state] & QIB_PROCESS_RECV_OK)) goto clr_busy; - sqp->s_flags |= QIB_S_WAIT_RNR; + sqp->s_flags |= RVT_S_WAIT_RNR; sqp->s_timer.function = qib_rc_rnr_retry; sqp->s_timer.expires = jiffies + usecs_to_jiffies(ib_qib_rnr_table[qp->r_min_rnr_timer]); @@ -622,7 +622,7 @@ serr: if (sqp->ibqp.qp_type == IB_QPT_RC) { int lastwqe = qib_error_qp(sqp, IB_WC_WR_FLUSH_ERR); - sqp->s_flags &= ~QIB_S_BUSY; + sqp->s_flags &= ~RVT_S_BUSY; spin_unlock_irqrestore(&sqp->s_lock, flags); if (lastwqe) { struct ib_event ev; @@ -635,7 +635,7 @@ serr: goto done; } clr_busy: - sqp->s_flags &= ~QIB_S_BUSY; + sqp->s_flags &= ~RVT_S_BUSY; unlock: spin_unlock_irqrestore(&sqp->s_lock, flags); done: @@ -751,7 +751,7 @@ void qib_do_send(struct work_struct *work) return; } - qp->s_flags |= QIB_S_BUSY; + qp->s_flags |= RVT_S_BUSY; spin_unlock_irqrestore(&qp->s_lock, flags); @@ -794,7 +794,7 @@ void qib_send_complete(struct rvt_qp *qp, struct rvt_swqe *wqe, atomic_dec(&ibah_to_rvtah(wqe->ud_wr.ah)->refcount); /* See ch. 11.2.4.1 and 10.7.3.1 */ - if (!(qp->s_flags & QIB_S_SIGNAL_REQ_WR) || + if (!(qp->s_flags & RVT_S_SIGNAL_REQ_WR) || (wqe->wr.send_flags & IB_SEND_SIGNALED) || status != IB_WC_SUCCESS) { struct ib_wc wc; diff --git a/drivers/infiniband/hw/qib/qib_sdma.c b/drivers/infiniband/hw/qib/qib_sdma.c index bb34bb9..3819a6d 100644 --- a/drivers/infiniband/hw/qib/qib_sdma.c +++ b/drivers/infiniband/hw/qib/qib_sdma.c @@ -703,11 +703,11 @@ busy: ibp = &ppd->ibport_data; ibp->rvp.n_dmawait++; - qp->s_flags |= QIB_S_WAIT_DMA_DESC; + qp->s_flags |= RVT_S_WAIT_DMA_DESC; list_add_tail(&priv->iowait, &dev->dmawait); } spin_unlock(&dev->rdi.pending_lock); - qp->s_flags &= ~QIB_S_BUSY; + qp->s_flags &= ~RVT_S_BUSY; spin_unlock(&qp->s_lock); ret = -EBUSY; } else { diff --git a/drivers/infiniband/hw/qib/qib_uc.c b/drivers/infiniband/hw/qib/qib_uc.c index 659ac51..deceb45 100644 --- a/drivers/infiniband/hw/qib/qib_uc.c +++ b/drivers/infiniband/hw/qib/qib_uc.c @@ -65,7 +65,7 @@ int qib_make_uc_req(struct rvt_qp *qp) goto bail; /* If DMAs are in progress, we can't flush immediately. */ if (atomic_read(&priv->s_dma_busy)) { - qp->s_flags |= QIB_S_WAIT_DMA; + qp->s_flags |= RVT_S_WAIT_DMA; goto bail; } wqe = get_swqe_ptr(qp, qp->s_last); @@ -221,7 +221,7 @@ done: goto unlock; bail: - qp->s_flags &= ~QIB_S_BUSY; + qp->s_flags &= ~RVT_S_BUSY; unlock: spin_unlock_irqrestore(&qp->s_lock, flags); return ret; @@ -279,7 +279,7 @@ void qib_uc_rcv(struct qib_ibport *ibp, struct qib_ib_header *hdr, inv: if (qp->r_state == OP(SEND_FIRST) || qp->r_state == OP(SEND_MIDDLE)) { - set_bit(QIB_R_REWIND_SGE, &qp->r_aflags); + set_bit(RVT_R_REWIND_SGE, &qp->r_aflags); qp->r_sge.num_sge = 0; } else qib_put_ss(&qp->r_sge); @@ -329,8 +329,8 @@ inv: goto inv; } - if (qp->state == IB_QPS_RTR && !(qp->r_flags & QIB_R_COMM_EST)) { - qp->r_flags |= QIB_R_COMM_EST; + if (qp->state == IB_QPS_RTR && !(qp->r_flags & RVT_R_COMM_EST)) { + qp->r_flags |= RVT_R_COMM_EST; if (qp->ibqp.event_handler) { struct ib_event ev; @@ -347,7 +347,7 @@ inv: case OP(SEND_ONLY): case OP(SEND_ONLY_WITH_IMMEDIATE): send_first: - if (test_and_clear_bit(QIB_R_REWIND_SGE, &qp->r_aflags)) + if (test_and_clear_bit(RVT_R_REWIND_SGE, &qp->r_aflags)) qp->r_sge = qp->s_rdma_read_sge; else { ret = qib_get_rwqe(qp, 0); @@ -484,7 +484,7 @@ rdma_last_imm: tlen -= (hdrsize + pad + 4); if (unlikely(tlen + qp->r_rcv_len != qp->r_len)) goto drop; - if (test_and_clear_bit(QIB_R_REWIND_SGE, &qp->r_aflags)) + if (test_and_clear_bit(RVT_R_REWIND_SGE, &qp->r_aflags)) qib_put_ss(&qp->s_rdma_read_sge); else { ret = qib_get_rwqe(qp, 1); @@ -524,7 +524,7 @@ rdma_last: return; rewind: - set_bit(QIB_R_REWIND_SGE, &qp->r_aflags); + set_bit(RVT_R_REWIND_SGE, &qp->r_aflags); qp->r_sge.num_sge = 0; drop: ibp->rvp.n_pkt_drops++; diff --git a/drivers/infiniband/hw/qib/qib_ud.c b/drivers/infiniband/hw/qib/qib_ud.c index d84872d..76f854e 100644 --- a/drivers/infiniband/hw/qib/qib_ud.c +++ b/drivers/infiniband/hw/qib/qib_ud.c @@ -141,8 +141,8 @@ static void qib_ud_loopback(struct rvt_qp *sqp, struct rvt_swqe *swqe) /* * Get the next work request entry to find where to put the data. */ - if (qp->r_flags & QIB_R_REUSE_SGE) - qp->r_flags &= ~QIB_R_REUSE_SGE; + if (qp->r_flags & RVT_R_REUSE_SGE) + qp->r_flags &= ~RVT_R_REUSE_SGE; else { int ret; @@ -159,7 +159,7 @@ static void qib_ud_loopback(struct rvt_qp *sqp, struct rvt_swqe *swqe) } /* Silently drop packets which are too big. */ if (unlikely(wc.byte_len > qp->r_len)) { - qp->r_flags |= QIB_R_REUSE_SGE; + qp->r_flags |= RVT_R_REUSE_SGE; ibp->rvp.n_pkt_drops++; goto bail_unlock; } @@ -203,7 +203,7 @@ static void qib_ud_loopback(struct rvt_qp *sqp, struct rvt_swqe *swqe) length -= len; } qib_put_ss(&qp->r_sge); - if (!test_and_clear_bit(QIB_R_WRID_VALID, &qp->r_aflags)) + if (!test_and_clear_bit(RVT_R_WRID_VALID, &qp->r_aflags)) goto bail_unlock; wc.wr_id = qp->r_wr_id; wc.status = IB_WC_SUCCESS; @@ -260,7 +260,7 @@ int qib_make_ud_req(struct rvt_qp *qp) goto bail; /* If DMAs are in progress, we can't flush immediately. */ if (atomic_read(&priv->s_dma_busy)) { - qp->s_flags |= QIB_S_WAIT_DMA; + qp->s_flags |= RVT_S_WAIT_DMA; goto bail; } wqe = get_swqe_ptr(qp, qp->s_last); @@ -297,7 +297,7 @@ int qib_make_ud_req(struct rvt_qp *qp) * zero length descriptor so we get a callback. */ if (atomic_read(&priv->s_dma_busy)) { - qp->s_flags |= QIB_S_WAIT_DMA; + qp->s_flags |= RVT_S_WAIT_DMA; goto bail; } qp->s_cur = next_cur; @@ -389,7 +389,7 @@ done: goto unlock; bail: - qp->s_flags &= ~QIB_S_BUSY; + qp->s_flags &= ~RVT_S_BUSY; unlock: spin_unlock_irqrestore(&qp->s_lock, flags); return ret; @@ -534,8 +534,8 @@ void qib_ud_rcv(struct qib_ibport *ibp, struct qib_ib_header *hdr, /* * Get the next work request entry to find where to put the data. */ - if (qp->r_flags & QIB_R_REUSE_SGE) - qp->r_flags &= ~QIB_R_REUSE_SGE; + if (qp->r_flags & RVT_R_REUSE_SGE) + qp->r_flags &= ~RVT_R_REUSE_SGE; else { int ret; @@ -552,7 +552,7 @@ void qib_ud_rcv(struct qib_ibport *ibp, struct qib_ib_header *hdr, } /* Silently drop packets which are too big. */ if (unlikely(wc.byte_len > qp->r_len)) { - qp->r_flags |= QIB_R_REUSE_SGE; + qp->r_flags |= RVT_R_REUSE_SGE; goto drop; } if (has_grh) { @@ -563,7 +563,7 @@ void qib_ud_rcv(struct qib_ibport *ibp, struct qib_ib_header *hdr, qib_skip_sge(&qp->r_sge, sizeof(struct ib_grh), 1); qib_copy_sge(&qp->r_sge, data, wc.byte_len - sizeof(struct ib_grh), 1); qib_put_ss(&qp->r_sge); - if (!test_and_clear_bit(QIB_R_WRID_VALID, &qp->r_aflags)) + if (!test_and_clear_bit(RVT_R_WRID_VALID, &qp->r_aflags)) return; wc.wr_id = qp->r_wr_id; wc.status = IB_WC_SUCCESS; diff --git a/drivers/infiniband/hw/qib/qib_verbs.c b/drivers/infiniband/hw/qib/qib_verbs.c index 45f9582..c190534 100644 --- a/drivers/infiniband/hw/qib/qib_verbs.c +++ b/drivers/infiniband/hw/qib/qib_verbs.c @@ -734,8 +734,8 @@ static void mem_timer(unsigned long data) if (qp) { spin_lock_irqsave(&qp->s_lock, flags); - if (qp->s_flags & QIB_S_WAIT_KMEM) { - qp->s_flags &= ~QIB_S_WAIT_KMEM; + if (qp->s_flags & RVT_S_WAIT_KMEM) { + qp->s_flags &= ~RVT_S_WAIT_KMEM; qib_schedule_send(qp); } spin_unlock_irqrestore(&qp->s_lock, flags); @@ -958,10 +958,10 @@ static noinline struct qib_verbs_txreq *__get_txreq(struct qib_ibdev *dev, if (ib_qib_state_ops[qp->state] & QIB_PROCESS_RECV_OK && list_empty(&priv->iowait)) { dev->n_txwait++; - qp->s_flags |= QIB_S_WAIT_TX; + qp->s_flags |= RVT_S_WAIT_TX; list_add_tail(&priv->iowait, &dev->txwait); } - qp->s_flags &= ~QIB_S_BUSY; + qp->s_flags &= ~RVT_S_BUSY; spin_unlock(&dev->rdi.pending_lock); spin_unlock_irqrestore(&qp->s_lock, flags); tx = ERR_PTR(-EBUSY); @@ -1030,8 +1030,8 @@ void qib_put_txreq(struct qib_verbs_txreq *tx) spin_unlock_irqrestore(&dev->rdi.pending_lock, flags); spin_lock_irqsave(&qp->s_lock, flags); - if (qp->s_flags & QIB_S_WAIT_TX) { - qp->s_flags &= ~QIB_S_WAIT_TX; + if (qp->s_flags & RVT_S_WAIT_TX) { + qp->s_flags &= ~RVT_S_WAIT_TX; qib_schedule_send(qp); } spin_unlock_irqrestore(&qp->s_lock, flags); @@ -1081,8 +1081,8 @@ void qib_verbs_sdma_desc_avail(struct qib_pportdata *ppd, unsigned avail) for (i = 0; i < n; i++) { qp = qps[i]; spin_lock(&qp->s_lock); - if (qp->s_flags & QIB_S_WAIT_DMA_DESC) { - qp->s_flags &= ~QIB_S_WAIT_DMA_DESC; + if (qp->s_flags & RVT_S_WAIT_DMA_DESC) { + qp->s_flags &= ~RVT_S_WAIT_DMA_DESC; qib_schedule_send(qp); } spin_unlock(&qp->s_lock); @@ -1119,8 +1119,8 @@ static void sdma_complete(struct qib_sdma_txreq *cookie, int status) if (atomic_dec_and_test(&priv->s_dma_busy)) { if (qp->state == IB_QPS_RESET) wake_up(&priv->wait_dma); - else if (qp->s_flags & QIB_S_WAIT_DMA) { - qp->s_flags &= ~QIB_S_WAIT_DMA; + else if (qp->s_flags & RVT_S_WAIT_DMA) { + qp->s_flags &= ~RVT_S_WAIT_DMA; qib_schedule_send(qp); } } @@ -1141,11 +1141,11 @@ static int wait_kmem(struct qib_ibdev *dev, struct rvt_qp *qp) if (list_empty(&priv->iowait)) { if (list_empty(&dev->memwait)) mod_timer(&dev->mem_timer, jiffies + 1); - qp->s_flags |= QIB_S_WAIT_KMEM; + qp->s_flags |= RVT_S_WAIT_KMEM; list_add_tail(&priv->iowait, &dev->memwait); } spin_unlock(&dev->rdi.pending_lock); - qp->s_flags &= ~QIB_S_BUSY; + qp->s_flags &= ~RVT_S_BUSY; ret = -EBUSY; } spin_unlock_irqrestore(&qp->s_lock, flags); @@ -1277,13 +1277,13 @@ static int no_bufs_available(struct rvt_qp *qp) spin_lock(&dev->rdi.pending_lock); if (list_empty(&priv->iowait)) { dev->n_piowait++; - qp->s_flags |= QIB_S_WAIT_PIO; + qp->s_flags |= RVT_S_WAIT_PIO; list_add_tail(&priv->iowait, &dev->piowait); dd = dd_from_dev(dev); dd->f_wantpiobuf_intr(dd, 1); } spin_unlock(&dev->rdi.pending_lock); - qp->s_flags &= ~QIB_S_BUSY; + qp->s_flags &= ~RVT_S_BUSY; ret = -EBUSY; } spin_unlock_irqrestore(&qp->s_lock, flags); @@ -1396,7 +1396,7 @@ done: * @len: the length of the packet in bytes * * Return zero if packet is sent or queued OK. - * Return non-zero and clear qp->s_flags QIB_S_BUSY otherwise. + * Return non-zero and clear qp->s_flags RVT_S_BUSY otherwise. */ int qib_verbs_send(struct rvt_qp *qp, struct qib_ib_header *hdr, u32 hdrwords, struct rvt_sge_state *ss, u32 len) @@ -1564,8 +1564,8 @@ full: qp = qps[i]; spin_lock_irqsave(&qp->s_lock, flags); - if (qp->s_flags & QIB_S_WAIT_PIO) { - qp->s_flags &= ~QIB_S_WAIT_PIO; + if (qp->s_flags & RVT_S_WAIT_PIO) { + qp->s_flags &= ~RVT_S_WAIT_PIO; qib_schedule_send(qp); } spin_unlock_irqrestore(&qp->s_lock, flags); diff --git a/drivers/infiniband/hw/qib/qib_verbs.h b/drivers/infiniband/hw/qib/qib_verbs.h index e10ab80..86d7537 100644 --- a/drivers/infiniband/hw/qib/qib_verbs.h +++ b/drivers/infiniband/hw/qib/qib_verbs.h @@ -262,78 +262,6 @@ struct qib_qp_priv { struct rvt_qp *owner; }; -/* - * Atomic bit definitions for r_aflags. - */ -#define QIB_R_WRID_VALID 0 -#define QIB_R_REWIND_SGE 1 - -/* - * Bit definitions for r_flags. - */ -#define QIB_R_REUSE_SGE 0x01 -#define QIB_R_RDMAR_SEQ 0x02 -#define QIB_R_RSP_NAK 0x04 -#define QIB_R_RSP_SEND 0x08 -#define QIB_R_COMM_EST 0x10 - -/* - * Bit definitions for s_flags. - * - * QIB_S_SIGNAL_REQ_WR - set if QP send WRs contain completion signaled - * QIB_S_BUSY - send tasklet is processing the QP - * QIB_S_TIMER - the RC retry timer is active - * QIB_S_ACK_PENDING - an ACK is waiting to be sent after RDMA read/atomics - * QIB_S_WAIT_FENCE - waiting for all prior RDMA read or atomic SWQEs - * before processing the next SWQE - * QIB_S_WAIT_RDMAR - waiting for a RDMA read or atomic SWQE to complete - * before processing the next SWQE - * QIB_S_WAIT_RNR - waiting for RNR timeout - * QIB_S_WAIT_SSN_CREDIT - waiting for RC credits to process next SWQE - * QIB_S_WAIT_DMA - waiting for send DMA queue to drain before generating - * next send completion entry not via send DMA - * QIB_S_WAIT_PIO - waiting for a send buffer to be available - * QIB_S_WAIT_TX - waiting for a struct qib_verbs_txreq to be available - * QIB_S_WAIT_DMA_DESC - waiting for DMA descriptors to be available - * QIB_S_WAIT_KMEM - waiting for kernel memory to be available - * QIB_S_WAIT_PSN - waiting for a packet to exit the send DMA queue - * QIB_S_WAIT_ACK - waiting for an ACK packet before sending more requests - * QIB_S_SEND_ONE - send one packet, request ACK, then wait for ACK - */ -#define QIB_S_SIGNAL_REQ_WR 0x0001 -#define QIB_S_BUSY 0x0002 -#define QIB_S_TIMER 0x0004 -#define QIB_S_RESP_PENDING 0x0008 -#define QIB_S_ACK_PENDING 0x0010 -#define QIB_S_WAIT_FENCE 0x0020 -#define QIB_S_WAIT_RDMAR 0x0040 -#define QIB_S_WAIT_RNR 0x0080 -#define QIB_S_WAIT_SSN_CREDIT 0x0100 -#define QIB_S_WAIT_DMA 0x0200 -#define QIB_S_WAIT_PIO 0x0400 -#define QIB_S_WAIT_TX 0x0800 -#define QIB_S_WAIT_DMA_DESC 0x1000 -#define QIB_S_WAIT_KMEM 0x2000 -#define QIB_S_WAIT_PSN 0x4000 -#define QIB_S_WAIT_ACK 0x8000 -#define QIB_S_SEND_ONE 0x10000 -#define QIB_S_UNLIMITED_CREDIT 0x20000 - -/* - * Wait flags that would prevent any packet type from being sent. - */ -#define QIB_S_ANY_WAIT_IO (QIB_S_WAIT_PIO | QIB_S_WAIT_TX | \ - QIB_S_WAIT_DMA_DESC | QIB_S_WAIT_KMEM) - -/* - * Wait flags that would prevent send work requests from making progress. - */ -#define QIB_S_ANY_WAIT_SEND (QIB_S_WAIT_FENCE | QIB_S_WAIT_RDMAR | \ - QIB_S_WAIT_RNR | QIB_S_WAIT_SSN_CREDIT | QIB_S_WAIT_DMA | \ - QIB_S_WAIT_PSN | QIB_S_WAIT_ACK) - -#define QIB_S_ANY_WAIT (QIB_S_ANY_WAIT_IO | QIB_S_ANY_WAIT_SEND) - #define QIB_PSN_CREDIT 16 /* @@ -473,9 +401,9 @@ static inline struct qib_ibdev *to_idev(struct ib_device *ibdev) */ static inline int qib_send_ok(struct rvt_qp *qp) { - return !(qp->s_flags & (QIB_S_BUSY | QIB_S_ANY_WAIT_IO)) && - (qp->s_hdrwords || (qp->s_flags & QIB_S_RESP_PENDING) || - !(qp->s_flags & QIB_S_ANY_WAIT_SEND)); + return !(qp->s_flags & (RVT_S_BUSY | RVT_S_ANY_WAIT_IO)) && + (qp->s_hdrwords || (qp->s_flags & RVT_S_RESP_PENDING) || + !(qp->s_flags & RVT_S_ANY_WAIT_SEND)); } /* -- cgit v0.10.2 From 47c7ea6d8e70510c3b3e311cfc20943cd3fe786a Mon Sep 17 00:00:00 2001 From: Harish Chegondi Date: Fri, 22 Jan 2016 12:56:52 -0800 Subject: IB/qib: Remove create qp and create qp table functionality Rely on rdmavt functions for creation of qp and qp table. Function to allocate a qpn is still being provided by qib as the algorithm to allocate a qpn in qib is different from that of the algorithm in rdmavt. Reviewed-by: Mike Marciniszyn Reviewed-by: Ira Weiny Reviewed-by: Dennis Dalessandro Signed-off-by: Harish Chegondi Signed-off-by: Doug Ledford diff --git a/drivers/infiniband/hw/qib/qib.h b/drivers/infiniband/hw/qib/qib.h index 309b6f3..29cbe67 100644 --- a/drivers/infiniband/hw/qib/qib.h +++ b/drivers/infiniband/hw/qib/qib.h @@ -1136,8 +1136,9 @@ extern spinlock_t qib_devs_lock; extern struct qib_devdata *qib_lookup(int unit); extern u32 qib_cpulist_count; extern unsigned long *qib_cpulist; - +extern u16 qpt_mask; extern unsigned qib_cc_table_size; + int qib_init(struct qib_devdata *, int); int init_chip_wc_pat(struct qib_devdata *dd, u32); int qib_enable_wc(struct qib_devdata *dd); diff --git a/drivers/infiniband/hw/qib/qib_qp.c b/drivers/infiniband/hw/qib/qib_qp.c index c5e9cf5..83dec69 100644 --- a/drivers/infiniband/hw/qib/qib_qp.c +++ b/drivers/infiniband/hw/qib/qib_qp.c @@ -48,7 +48,7 @@ * as qpt_mask here instead of adding the mask field to * rvt_qpn_table. */ -static u16 qpt_mask; +u16 qpt_mask; static inline unsigned mk_qpn(struct rvt_qpn_table *qpt, struct rvt_qpn_map *map, unsigned off) @@ -128,12 +128,15 @@ static void get_map_page(struct rvt_qpn_table *qpt, struct rvt_qpn_map *map, * Allocate the next available QPN or * zero/one for QP type IB_QPT_SMI/IB_QPT_GSI. */ -static int alloc_qpn(struct qib_devdata *dd, struct rvt_qpn_table *qpt, - enum ib_qp_type type, u8 port, gfp_t gfp) +int alloc_qpn(struct rvt_dev_info *rdi, struct rvt_qpn_table *qpt, + enum ib_qp_type type, u8 port, gfp_t gfp) { u32 i, offset, max_scan, qpn; struct rvt_qpn_map *map; u32 ret; + struct qib_ibdev *verbs_dev = container_of(rdi, struct qib_ibdev, rdi); + struct qib_devdata *dd = container_of(verbs_dev, struct qib_devdata, + verbs_dev); if (type == IB_QPT_SMI || type == IB_QPT_GSI) { unsigned n; @@ -296,18 +299,13 @@ static void remove_qp(struct qib_ibdev *dev, struct rvt_qp *qp) /** * qib_free_all_qps - check for QPs still in use - * @qpt: the QP table to empty - * - * There should not be any QPs still in use. - * Free memory for table. */ -unsigned qib_free_all_qps(struct qib_devdata *dd) +unsigned qib_free_all_qps(struct rvt_dev_info *rdi) { - struct qib_ibdev *dev = &dd->verbs_dev; - unsigned long flags; - struct rvt_qp *qp; + struct qib_ibdev *verbs_dev = container_of(rdi, struct qib_ibdev, rdi); + struct qib_devdata *dd = container_of(verbs_dev, struct qib_devdata, + verbs_dev); unsigned n, qp_inuse = 0; - spinlock_t *qpt_lock_ptr; /* Pointer to make checkpatch happy */ for (n = 0; n < dd->num_pports; n++) { struct qib_ibport *ibp = &dd->pport[n].ibport_data; @@ -321,21 +319,6 @@ unsigned qib_free_all_qps(struct qib_devdata *dd) qp_inuse++; rcu_read_unlock(); } - - spin_lock_irqsave(&dev->rdi.qp_dev->qpt_lock, flags); - qpt_lock_ptr = &dev->rdi.qp_dev->qpt_lock; - for (n = 0; n < dev->rdi.qp_dev->qp_table_size; n++) { - qp = rcu_dereference_protected(dev->rdi.qp_dev->qp_table[n], - lockdep_is_held(qpt_lock_ptr)); - RCU_INIT_POINTER(dev->rdi.qp_dev->qp_table[n], NULL); - - for (; qp; qp = rcu_dereference_protected(qp->next, - lockdep_is_held(qpt_lock_ptr))) - qp_inuse++; - } - spin_unlock_irqrestore(&dev->rdi.qp_dev->qpt_lock, flags); - synchronize_rcu(); - return qp_inuse; } @@ -374,57 +357,11 @@ struct rvt_qp *qib_lookup_qpn(struct qib_ibport *ibp, u32 qpn) return qp; } -/** - * qib_reset_qp - initialize the QP state to the reset state - * @qp: the QP to reset - * @type: the QP type - */ -static void qib_reset_qp(struct rvt_qp *qp, enum ib_qp_type type) +void notify_qp_reset(struct rvt_qp *qp) { struct qib_qp_priv *priv = qp->priv; - qp->remote_qpn = 0; - qp->qkey = 0; - qp->qp_access_flags = 0; + atomic_set(&priv->s_dma_busy, 0); - qp->s_flags &= RVT_S_SIGNAL_REQ_WR; - qp->s_hdrwords = 0; - qp->s_wqe = NULL; - qp->s_draining = 0; - qp->s_next_psn = 0; - qp->s_last_psn = 0; - qp->s_sending_psn = 0; - qp->s_sending_hpsn = 0; - qp->s_psn = 0; - qp->r_psn = 0; - qp->r_msn = 0; - if (type == IB_QPT_RC) { - qp->s_state = IB_OPCODE_RC_SEND_LAST; - qp->r_state = IB_OPCODE_RC_SEND_LAST; - } else { - qp->s_state = IB_OPCODE_UC_SEND_LAST; - qp->r_state = IB_OPCODE_UC_SEND_LAST; - } - qp->s_ack_state = IB_OPCODE_RC_ACKNOWLEDGE; - qp->r_nak_state = 0; - qp->r_aflags = 0; - qp->r_flags = 0; - qp->s_head = 0; - qp->s_tail = 0; - qp->s_cur = 0; - qp->s_acked = 0; - qp->s_last = 0; - qp->s_ssn = 1; - qp->s_lsn = 0; - qp->s_mig_state = IB_MIG_MIGRATED; - memset(qp->s_ack_queue, 0, sizeof(qp->s_ack_queue)); - qp->r_head_ack_queue = 0; - qp->s_tail_ack_queue = 0; - qp->s_num_rd_atomic = 0; - if (qp->r_rq.wq) { - qp->r_rq.wq->head = 0; - qp->r_rq.wq->tail = 0; - } - qp->r_sge.num_sge = 0; } static void clear_mr_refs(struct rvt_qp *qp, int clr_sends) @@ -733,7 +670,7 @@ int qib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, spin_lock_irq(&qp->r_lock); spin_lock(&qp->s_lock); clear_mr_refs(qp, 1); - qib_reset_qp(qp, ibqp->qp_type); + rvt_reset_qp(&dev->rdi, qp, ibqp->qp_type); } break; @@ -979,246 +916,33 @@ __be32 qib_compute_aeth(struct rvt_qp *qp) return cpu_to_be32(aeth); } -/** - * qib_create_qp - create a queue pair for a device - * @ibpd: the protection domain who's device we create the queue pair for - * @init_attr: the attributes of the queue pair - * @udata: user data for libibverbs.so - * - * Returns the queue pair on success, otherwise returns an errno. - * - * Called by the ib_create_qp() core verbs function. - */ -struct ib_qp *qib_create_qp(struct ib_pd *ibpd, - struct ib_qp_init_attr *init_attr, - struct ib_udata *udata) +void *qp_priv_alloc(struct rvt_dev_info *rdi, struct rvt_qp *qp, gfp_t gfp) { - struct rvt_qp *qp; - int err; - struct rvt_swqe *swq = NULL; - struct qib_ibdev *dev; - struct qib_devdata *dd; - size_t sz; - size_t sg_list_sz; - struct ib_qp *ret; - gfp_t gfp; struct qib_qp_priv *priv; - if (init_attr->cap.max_send_sge > ib_qib_max_sges || - init_attr->cap.max_send_wr > ib_qib_max_qp_wrs || - init_attr->create_flags & ~(IB_QP_CREATE_USE_GFP_NOIO)) - return ERR_PTR(-EINVAL); - - /* GFP_NOIO is applicable in RC QPs only */ - if (init_attr->create_flags & IB_QP_CREATE_USE_GFP_NOIO && - init_attr->qp_type != IB_QPT_RC) - return ERR_PTR(-EINVAL); - - gfp = init_attr->create_flags & IB_QP_CREATE_USE_GFP_NOIO ? - GFP_NOIO : GFP_KERNEL; - - /* Check receive queue parameters if no SRQ is specified. */ - if (!init_attr->srq) { - if (init_attr->cap.max_recv_sge > ib_qib_max_sges || - init_attr->cap.max_recv_wr > ib_qib_max_qp_wrs) { - ret = ERR_PTR(-EINVAL); - goto bail; - } - if (init_attr->cap.max_send_sge + - init_attr->cap.max_send_wr + - init_attr->cap.max_recv_sge + - init_attr->cap.max_recv_wr == 0) { - ret = ERR_PTR(-EINVAL); - goto bail; - } - } - - switch (init_attr->qp_type) { - case IB_QPT_SMI: - case IB_QPT_GSI: - if (init_attr->port_num == 0 || - init_attr->port_num > ibpd->device->phys_port_cnt) { - ret = ERR_PTR(-EINVAL); - goto bail; - } - case IB_QPT_UC: - case IB_QPT_RC: - case IB_QPT_UD: - sz = sizeof(struct rvt_sge) * - init_attr->cap.max_send_sge + - sizeof(struct rvt_swqe); - swq = __vmalloc((init_attr->cap.max_send_wr + 1) * sz, - gfp, PAGE_KERNEL); - if (swq == NULL) { - ret = ERR_PTR(-ENOMEM); - goto bail; - } - sz = sizeof(*qp); - sg_list_sz = 0; - if (init_attr->srq) { - struct rvt_srq *srq = ibsrq_to_rvtsrq(init_attr->srq); - - if (srq->rq.max_sge > 1) - sg_list_sz = sizeof(*qp->r_sg_list) * - (srq->rq.max_sge - 1); - } else if (init_attr->cap.max_recv_sge > 1) - sg_list_sz = sizeof(*qp->r_sg_list) * - (init_attr->cap.max_recv_sge - 1); - qp = kzalloc(sz + sg_list_sz, gfp); - if (!qp) { - ret = ERR_PTR(-ENOMEM); - goto bail_swq; - } - RCU_INIT_POINTER(qp->next, NULL); - priv = kzalloc(sizeof(*priv), gfp); - if (!priv) { - ret = ERR_PTR(-ENOMEM); - goto bail_qp_hdr; - } - priv->owner = qp; - priv->s_hdr = kzalloc(sizeof(*priv->s_hdr), gfp); - if (!priv->s_hdr) { - ret = ERR_PTR(-ENOMEM); - goto bail_qp; - } - qp->priv = priv; - qp->timeout_jiffies = - usecs_to_jiffies((4096UL * (1UL << qp->timeout)) / - 1000UL); - if (init_attr->srq) - sz = 0; - else { - qp->r_rq.size = init_attr->cap.max_recv_wr + 1; - qp->r_rq.max_sge = init_attr->cap.max_recv_sge; - sz = (sizeof(struct ib_sge) * qp->r_rq.max_sge) + - sizeof(struct rvt_rwqe); - if (gfp != GFP_NOIO) - qp->r_rq.wq = vmalloc_user( - sizeof(struct rvt_rwq) + - qp->r_rq.size * sz); - else - qp->r_rq.wq = __vmalloc( - sizeof(struct rvt_rwq) + - qp->r_rq.size * sz, - gfp, PAGE_KERNEL); - - if (!qp->r_rq.wq) { - ret = ERR_PTR(-ENOMEM); - goto bail_qp; - } - } + priv = kzalloc(sizeof(*priv), gfp); + if (!priv) + return ERR_PTR(-ENOMEM); + priv->owner = qp; - /* - * ib_create_qp() will initialize qp->ibqp - * except for qp->ibqp.qp_num. - */ - spin_lock_init(&qp->r_lock); - spin_lock_init(&qp->s_lock); - spin_lock_init(&qp->r_rq.lock); - atomic_set(&qp->refcount, 0); - init_waitqueue_head(&qp->wait); - init_waitqueue_head(&priv->wait_dma); - init_timer(&qp->s_timer); - qp->s_timer.data = (unsigned long)qp; - INIT_WORK(&priv->s_work, qib_do_send); - INIT_LIST_HEAD(&priv->iowait); - INIT_LIST_HEAD(&qp->rspwait); - qp->state = IB_QPS_RESET; - qp->s_wq = swq; - qp->s_size = init_attr->cap.max_send_wr + 1; - qp->s_max_sge = init_attr->cap.max_send_sge; - if (init_attr->sq_sig_type == IB_SIGNAL_REQ_WR) - qp->s_flags = RVT_S_SIGNAL_REQ_WR; - dev = to_idev(ibpd->device); - dd = dd_from_dev(dev); - err = alloc_qpn(dd, &dev->rdi.qp_dev->qpn_table, - init_attr->qp_type, init_attr->port_num, gfp); - if (err < 0) { - ret = ERR_PTR(err); - vfree(qp->r_rq.wq); - goto bail_qp; - } - qp->ibqp.qp_num = err; - qp->port_num = init_attr->port_num; - qib_reset_qp(qp, init_attr->qp_type); - break; - - default: - /* Don't support raw QPs */ - ret = ERR_PTR(-ENOSYS); - goto bail; - } - - init_attr->cap.max_inline_data = 0; - - /* - * Return the address of the RWQ as the offset to mmap. - * See qib_mmap() for details. - */ - if (udata && udata->outlen >= sizeof(__u64)) { - if (!qp->r_rq.wq) { - __u64 offset = 0; - - err = ib_copy_to_udata(udata, &offset, - sizeof(offset)); - if (err) { - ret = ERR_PTR(err); - goto bail_ip; - } - } else { - u32 s = sizeof(struct rvt_rwq) + qp->r_rq.size * sz; - - qp->ip = rvt_create_mmap_info(&dev->rdi, s, - ibpd->uobject->context, - qp->r_rq.wq); - if (!qp->ip) { - ret = ERR_PTR(-ENOMEM); - goto bail_ip; - } - - err = ib_copy_to_udata(udata, &(qp->ip->offset), - sizeof(qp->ip->offset)); - if (err) { - ret = ERR_PTR(err); - goto bail_ip; - } - } + priv->s_hdr = kzalloc(sizeof(*priv->s_hdr), gfp); + if (!priv->s_hdr) { + kfree(priv); + return ERR_PTR(-ENOMEM); } + init_waitqueue_head(&priv->wait_dma); + INIT_WORK(&priv->s_work, qib_do_send); + INIT_LIST_HEAD(&priv->iowait); - spin_lock(&dev->n_qps_lock); - if (dev->n_qps_allocated == ib_qib_max_qps) { - spin_unlock(&dev->n_qps_lock); - ret = ERR_PTR(-ENOMEM); - goto bail_ip; - } - - dev->n_qps_allocated++; - spin_unlock(&dev->n_qps_lock); - - if (qp->ip) { - spin_lock_irq(&dev->rdi.pending_lock); - list_add(&qp->ip->pending_mmaps, &dev->rdi.pending_mmaps); - spin_unlock_irq(&dev->rdi.pending_lock); - } + return priv; +} - ret = &qp->ibqp; - goto bail; +void qp_priv_free(struct rvt_dev_info *rdi, struct rvt_qp *qp) +{ + struct qib_qp_priv *priv = qp->priv; -bail_ip: - if (qp->ip) - kref_put(&qp->ip->ref, rvt_release_mmap_info); - else - vfree(qp->r_rq.wq); - free_qpn(&dev->rdi.qp_dev->qpn_table, qp->ibqp.qp_num); -bail_qp: kfree(priv->s_hdr); kfree(priv); -bail_qp_hdr: - kfree(qp); -bail_swq: - vfree(swq); -bail: - return ret; } /** @@ -1277,31 +1001,6 @@ int qib_destroy_qp(struct ib_qp *ibqp) } /** - * qib_init_qpn_table - initialize the QP number table for a device - * @qpt: the QPN table - */ -void qib_init_qpn_table(struct qib_devdata *dd, struct rvt_qpn_table *qpt) -{ - spin_lock_init(&qpt->lock); - qpt->last = 1; /* start with QPN 2 */ - qpt->nmaps = 1; - qpt_mask = dd->qpn_mask; -} - -/** - * qib_free_qpn_table - free the QP number table for a device - * @qpt: the QPN table - */ -void qib_free_qpn_table(struct rvt_qpn_table *qpt) -{ - int i; - - for (i = 0; i < ARRAY_SIZE(qpt->map); i++) - if (qpt->map[i].page) - free_page((unsigned long) qpt->map[i].page); -} - -/** * qib_get_credit - flush the send work queue of a QP * @qp: the qp who's send work queue to flush * @aeth: the Acknowledge Extended Transport Header diff --git a/drivers/infiniband/hw/qib/qib_verbs.c b/drivers/infiniband/hw/qib/qib_verbs.c index c190534..6b85153 100644 --- a/drivers/infiniband/hw/qib/qib_verbs.c +++ b/drivers/infiniband/hw/qib/qib_verbs.c @@ -1904,29 +1904,11 @@ int qib_register_ib_device(struct qib_devdata *dd) unsigned i, ctxt; int ret; - /* allocate parent object */ - dev->rdi.qp_dev = kzalloc(sizeof(*dev->rdi.qp_dev), GFP_KERNEL); - if (!dev->rdi.qp_dev) - return -ENOMEM; - dev->rdi.qp_dev->qp_table_size = ib_qib_qp_table_size; - dev->rdi.qp_dev->qp_table_bits = ilog2(ib_qib_qp_table_size); get_random_bytes(&dev->qp_rnd, sizeof(dev->qp_rnd)); - dev->rdi.qp_dev->qp_table = kmalloc_array( - dev->rdi.qp_dev->qp_table_size, - sizeof(*dev->rdi.qp_dev->qp_table), - GFP_KERNEL); - if (!dev->rdi.qp_dev->qp_table) { - ret = -ENOMEM; - goto err_qpt; - } - for (i = 0; i < dev->rdi.qp_dev->qp_table_size; i++) - RCU_INIT_POINTER(dev->rdi.qp_dev->qp_table[i], NULL); - for (i = 0; i < dd->num_pports; i++) init_ibport(ppd + i); /* Only need to initialize non-zero fields. */ - spin_lock_init(&dev->rdi.qp_dev->qpt_lock); spin_lock_init(&dev->n_cqs_lock); spin_lock_init(&dev->n_qps_lock); spin_lock_init(&dev->n_srqs_lock); @@ -1935,7 +1917,7 @@ int qib_register_ib_device(struct qib_devdata *dd) dev->mem_timer.function = mem_timer; dev->mem_timer.data = (unsigned long) dev; - qib_init_qpn_table(dd, &dev->rdi.qp_dev->qpn_table); + qpt_mask = dd->qpn_mask; INIT_LIST_HEAD(&dev->piowait); INIT_LIST_HEAD(&dev->dmawait); @@ -2032,7 +2014,7 @@ int qib_register_ib_device(struct qib_devdata *dd) ibdev->modify_srq = qib_modify_srq; ibdev->query_srq = qib_query_srq; ibdev->destroy_srq = qib_destroy_srq; - ibdev->create_qp = qib_create_qp; + ibdev->create_qp = NULL; ibdev->modify_qp = qib_modify_qp; ibdev->query_qp = qib_query_qp; ibdev->destroy_qp = qib_destroy_qp; @@ -2071,9 +2053,21 @@ int qib_register_ib_device(struct qib_devdata *dd) dd->verbs_dev.rdi.driver_f.get_pci_dev = qib_get_pci_dev; dd->verbs_dev.rdi.driver_f.check_ah = qib_check_ah; dd->verbs_dev.rdi.driver_f.notify_new_ah = qib_notify_new_ah; - dd->verbs_dev.rdi.flags = (RVT_FLAG_QP_INIT_DRIVER | - RVT_FLAG_CQ_INIT_DRIVER); + dd->verbs_dev.rdi.driver_f.alloc_qpn = alloc_qpn; + dd->verbs_dev.rdi.driver_f.qp_priv_alloc = qp_priv_alloc; + dd->verbs_dev.rdi.driver_f.qp_priv_free = qp_priv_free; + dd->verbs_dev.rdi.driver_f.free_all_qps = qib_free_all_qps; + dd->verbs_dev.rdi.driver_f.notify_qp_reset = notify_qp_reset; + + dd->verbs_dev.rdi.flags = RVT_FLAG_CQ_INIT_DRIVER; + dd->verbs_dev.rdi.dparms.lkey_table_size = qib_lkey_table_size; + dd->verbs_dev.rdi.dparms.qp_table_size = ib_qib_qp_table_size; + dd->verbs_dev.rdi.dparms.qpn_start = 1; + dd->verbs_dev.rdi.dparms.qpn_res_start = QIB_KD_QP; + dd->verbs_dev.rdi.dparms.qpn_res_end = QIB_KD_QP; /* Reserve one QP */ + dd->verbs_dev.rdi.dparms.qpn_inc = 1; + dd->verbs_dev.rdi.dparms.qos_shift = 1; dd->verbs_dev.rdi.dparms.nports = dd->num_pports; dd->verbs_dev.rdi.dparms.npkeys = qib_get_npkeys(dd); @@ -2122,8 +2116,6 @@ err_tx: sizeof(struct qib_pio_header), dev->pio_hdrs, dev->pio_hdrs_phys); err_hdrs: - kfree(dev->rdi.qp_dev->qp_table); -err_qpt: qib_dev_err(dd, "cannot register verbs: %d!\n", -ret); bail: return ret; @@ -2132,7 +2124,6 @@ bail: void qib_unregister_ib_device(struct qib_devdata *dd) { struct qib_ibdev *dev = &dd->verbs_dev; - u32 qps_inuse; qib_verbs_unregister_sysfs(dd); @@ -2149,13 +2140,7 @@ void qib_unregister_ib_device(struct qib_devdata *dd) if (!list_empty(&dev->memwait)) qib_dev_err(dd, "memwait list not empty!\n"); - qps_inuse = qib_free_all_qps(dd); - if (qps_inuse) - qib_dev_err(dd, "QP memory leak! %u still in use\n", - qps_inuse); - del_timer_sync(&dev->mem_timer); - qib_free_qpn_table(&dev->rdi.qp_dev->qpn_table); while (!list_empty(&dev->txreq_free)) { struct list_head *l = dev->txreq_free.next; struct qib_verbs_txreq *tx; @@ -2169,7 +2154,6 @@ void qib_unregister_ib_device(struct qib_devdata *dd) dd->pport->sdma_descq_cnt * sizeof(struct qib_pio_header), dev->pio_hdrs, dev->pio_hdrs_phys); - kfree(dev->rdi.qp_dev->qp_table); } /* diff --git a/drivers/infiniband/hw/qib/qib_verbs.h b/drivers/infiniband/hw/qib/qib_verbs.h index 86d7537..bcc6271 100644 --- a/drivers/infiniband/hw/qib/qib_verbs.h +++ b/drivers/infiniband/hw/qib/qib_verbs.h @@ -464,10 +464,6 @@ __be32 qib_compute_aeth(struct rvt_qp *qp); struct rvt_qp *qib_lookup_qpn(struct qib_ibport *ibp, u32 qpn); -struct ib_qp *qib_create_qp(struct ib_pd *ibpd, - struct ib_qp_init_attr *init_attr, - struct ib_udata *udata); - int qib_destroy_qp(struct ib_qp *ibqp); int qib_error_qp(struct rvt_qp *qp, enum ib_wc_status err); @@ -477,12 +473,15 @@ int qib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int qib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask, struct ib_qp_init_attr *init_attr); - -unsigned qib_free_all_qps(struct qib_devdata *dd); - -void qib_init_qpn_table(struct qib_devdata *dd, struct rvt_qpn_table *qpt); - -void qib_free_qpn_table(struct rvt_qpn_table *qpt); +/* + * Functions provided by qib driver for rdmavt to use + */ +unsigned qib_free_all_qps(struct rvt_dev_info *rdi); +void *qp_priv_alloc(struct rvt_dev_info *rdi, struct rvt_qp *qp, gfp_t gfp); +void qp_priv_free(struct rvt_dev_info *rdi, struct rvt_qp *qp); +void notify_qp_reset(struct rvt_qp *qp); +int alloc_qpn(struct rvt_dev_info *rdi, struct rvt_qpn_table *qpt, + enum ib_qp_type type, u8 port, gfp_t gfp); #ifdef CONFIG_DEBUG_FS -- cgit v0.10.2 From 6f6387ae75c1499b315ddcd3c74402d44423e1cc Mon Sep 17 00:00:00 2001 From: Dennis Dalessandro Date: Fri, 22 Jan 2016 13:00:15 -0800 Subject: IB/rdmavt: Add completion queue functions Brings in completion queue functionality. A kthread worker is added to the rvt_dev_info to serve as a worker for completion queues. Reviewed-by: Mike Marciniszyn Reviewed-by: Harish Chegondi Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford diff --git a/drivers/infiniband/sw/rdmavt/cq.c b/drivers/infiniband/sw/rdmavt/cq.c index 8d96194..7308a27 100644 --- a/drivers/infiniband/sw/rdmavt/cq.c +++ b/drivers/infiniband/sw/rdmavt/cq.c @@ -45,7 +45,126 @@ * */ +#include +#include +#include #include "cq.h" +#include "vt.h" + +/** + * rvt_cq_enter - add a new entry to the completion queue + * @cq: completion queue + * @entry: work completion entry to add + * @sig: true if @entry is solicited + * + * This may be called with qp->s_lock held. + */ +void rvt_cq_enter(struct rvt_cq *cq, struct ib_wc *entry, bool solicited) +{ + struct rvt_cq_wc *wc; + unsigned long flags; + u32 head; + u32 next; + + spin_lock_irqsave(&cq->lock, flags); + + /* + * Note that the head pointer might be writable by user processes. + * Take care to verify it is a sane value. + */ + wc = cq->queue; + head = wc->head; + if (head >= (unsigned)cq->ibcq.cqe) { + head = cq->ibcq.cqe; + next = 0; + } else { + next = head + 1; + } + + if (unlikely(next == wc->tail)) { + spin_unlock_irqrestore(&cq->lock, flags); + if (cq->ibcq.event_handler) { + struct ib_event ev; + + ev.device = cq->ibcq.device; + ev.element.cq = &cq->ibcq; + ev.event = IB_EVENT_CQ_ERR; + cq->ibcq.event_handler(&ev, cq->ibcq.cq_context); + } + return; + } + if (cq->ip) { + wc->uqueue[head].wr_id = entry->wr_id; + wc->uqueue[head].status = entry->status; + wc->uqueue[head].opcode = entry->opcode; + wc->uqueue[head].vendor_err = entry->vendor_err; + wc->uqueue[head].byte_len = entry->byte_len; + wc->uqueue[head].ex.imm_data = + (__u32 __force)entry->ex.imm_data; + wc->uqueue[head].qp_num = entry->qp->qp_num; + wc->uqueue[head].src_qp = entry->src_qp; + wc->uqueue[head].wc_flags = entry->wc_flags; + wc->uqueue[head].pkey_index = entry->pkey_index; + wc->uqueue[head].slid = entry->slid; + wc->uqueue[head].sl = entry->sl; + wc->uqueue[head].dlid_path_bits = entry->dlid_path_bits; + wc->uqueue[head].port_num = entry->port_num; + /* Make sure entry is written before the head index. */ + smp_wmb(); + } else { + wc->kqueue[head] = *entry; + } + wc->head = next; + + if (cq->notify == IB_CQ_NEXT_COMP || + (cq->notify == IB_CQ_SOLICITED && + (solicited || entry->status != IB_WC_SUCCESS))) { + struct kthread_worker *worker; + /* + * This will cause send_complete() to be called in + * another thread. + */ + smp_read_barrier_depends(); /* see rvt_cq_exit */ + worker = cq->rdi->worker; + if (likely(worker)) { + cq->notify = RVT_CQ_NONE; + cq->triggered++; + queue_kthread_work(worker, &cq->comptask); + } + } + + spin_unlock_irqrestore(&cq->lock, flags); +} +EXPORT_SYMBOL(rvt_cq_enter); + +static void send_complete(struct kthread_work *work) +{ + struct rvt_cq *cq = container_of(work, struct rvt_cq, comptask); + + /* + * The completion handler will most likely rearm the notification + * and poll for all pending entries. If a new completion entry + * is added while we are in this routine, queue_work() + * won't call us again until we return so we check triggered to + * see if we need to call the handler again. + */ + for (;;) { + u8 triggered = cq->triggered; + + /* + * IPoIB connected mode assumes the callback is from a + * soft IRQ. We simulate this by blocking "bottom halves". + * See the implementation for ipoib_cm_handle_tx_wc(), + * netif_tx_lock_bh() and netif_tx_lock(). + */ + local_bh_disable(); + cq->ibcq.comp_handler(&cq->ibcq, cq->ibcq.cq_context); + local_bh_enable(); + + if (cq->triggered == triggered) + return; + } +} /** * rvt_create_cq - create a completion queue @@ -64,7 +183,103 @@ struct ib_cq *rvt_create_cq(struct ib_device *ibdev, struct ib_ucontext *context, struct ib_udata *udata) { - return ERR_PTR(-EOPNOTSUPP); + struct rvt_dev_info *rdi = ib_to_rvt(ibdev); + struct rvt_cq *cq; + struct rvt_cq_wc *wc; + struct ib_cq *ret; + u32 sz; + unsigned int entries = attr->cqe; + + if (attr->flags) + return ERR_PTR(-EINVAL); + + if (entries < 1 || entries > rdi->dparms.props.max_cqe) + return ERR_PTR(-EINVAL); + + /* Allocate the completion queue structure. */ + cq = kzalloc(sizeof(*cq), GFP_KERNEL); + if (!cq) + return ERR_PTR(-ENOMEM); + + /* + * Allocate the completion queue entries and head/tail pointers. + * This is allocated separately so that it can be resized and + * also mapped into user space. + * We need to use vmalloc() in order to support mmap and large + * numbers of entries. + */ + sz = sizeof(*wc); + if (udata && udata->outlen >= sizeof(__u64)) + sz += sizeof(struct ib_uverbs_wc) * (entries + 1); + else + sz += sizeof(struct ib_wc) * (entries + 1); + wc = vmalloc_user(sz); + if (!wc) { + ret = ERR_PTR(-ENOMEM); + goto bail_cq; + } + + /* + * Return the address of the WC as the offset to mmap. + * See rvt_mmap() for details. + */ + if (udata && udata->outlen >= sizeof(__u64)) { + int err; + + cq->ip = rvt_create_mmap_info(rdi, sz, context, wc); + if (!cq->ip) { + ret = ERR_PTR(-ENOMEM); + goto bail_wc; + } + + err = ib_copy_to_udata(udata, &cq->ip->offset, + sizeof(cq->ip->offset)); + if (err) { + ret = ERR_PTR(err); + goto bail_ip; + } + } + + spin_lock(&rdi->n_cqs_lock); + if (rdi->n_cqs_allocated == rdi->dparms.props.max_cq) { + spin_unlock(&rdi->n_cqs_lock); + ret = ERR_PTR(-ENOMEM); + goto bail_ip; + } + + rdi->n_cqs_allocated++; + spin_unlock(&rdi->n_cqs_lock); + + if (cq->ip) { + spin_lock_irq(&rdi->pending_lock); + list_add(&cq->ip->pending_mmaps, &rdi->pending_mmaps); + spin_unlock_irq(&rdi->pending_lock); + } + + /* + * ib_create_cq() will initialize cq->ibcq except for cq->ibcq.cqe. + * The number of entries should be >= the number requested or return + * an error. + */ + cq->rdi = rdi; + cq->ibcq.cqe = entries; + cq->notify = RVT_CQ_NONE; + spin_lock_init(&cq->lock); + init_kthread_work(&cq->comptask, send_complete); + cq->queue = wc; + + ret = &cq->ibcq; + + goto done; + +bail_ip: + kfree(cq->ip); +bail_wc: + vfree(wc); +bail_cq: + kfree(cq); +done: + return ret; } /** @@ -77,12 +292,53 @@ struct ib_cq *rvt_create_cq(struct ib_device *ibdev, */ int rvt_destroy_cq(struct ib_cq *ibcq) { - return -EOPNOTSUPP; + struct rvt_cq *cq = ibcq_to_rvtcq(ibcq); + struct rvt_dev_info *rdi = cq->rdi; + + flush_kthread_work(&cq->comptask); + spin_lock(&rdi->n_cqs_lock); + rdi->n_cqs_allocated--; + spin_unlock(&rdi->n_cqs_lock); + if (cq->ip) + kref_put(&cq->ip->ref, rvt_release_mmap_info); + else + vfree(cq->queue); + kfree(cq); + + return 0; } +/** + * rvt_req_notify_cq - change the notification type for a completion queue + * @ibcq: the completion queue + * @notify_flags: the type of notification to request + * + * Returns 0 for success. + * + * This may be called from interrupt context. Also called by + * ib_req_notify_cq() in the generic verbs code. + */ int rvt_req_notify_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags notify_flags) { - return -EOPNOTSUPP; + struct rvt_cq *cq = ibcq_to_rvtcq(ibcq); + unsigned long flags; + int ret = 0; + + spin_lock_irqsave(&cq->lock, flags); + /* + * Don't change IB_CQ_NEXT_COMP to IB_CQ_SOLICITED but allow + * any other transitions (see C11-31 and C11-32 in ch. 11.4.2.2). + */ + if (cq->notify != IB_CQ_NEXT_COMP) + cq->notify = notify_flags & IB_CQ_SOLICITED_MASK; + + if ((notify_flags & IB_CQ_REPORT_MISSED_EVENTS) && + cq->queue->head != cq->queue->tail) + ret = 1; + + spin_unlock_irqrestore(&cq->lock, flags); + + return ret; } /** @@ -93,7 +349,107 @@ int rvt_req_notify_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags notify_flags) */ int rvt_resize_cq(struct ib_cq *ibcq, int cqe, struct ib_udata *udata) { - return -EOPNOTSUPP; + struct rvt_cq *cq = ibcq_to_rvtcq(ibcq); + struct rvt_cq_wc *old_wc; + struct rvt_cq_wc *wc; + u32 head, tail, n; + int ret; + u32 sz; + struct rvt_dev_info *rdi = cq->rdi; + + if (cqe < 1 || cqe > rdi->dparms.props.max_cqe) + return -EINVAL; + + /* + * Need to use vmalloc() if we want to support large #s of entries. + */ + sz = sizeof(*wc); + if (udata && udata->outlen >= sizeof(__u64)) + sz += sizeof(struct ib_uverbs_wc) * (cqe + 1); + else + sz += sizeof(struct ib_wc) * (cqe + 1); + wc = vmalloc_user(sz); + if (!wc) + return -ENOMEM; + + /* Check that we can write the offset to mmap. */ + if (udata && udata->outlen >= sizeof(__u64)) { + __u64 offset = 0; + + ret = ib_copy_to_udata(udata, &offset, sizeof(offset)); + if (ret) + goto bail_free; + } + + spin_lock_irq(&cq->lock); + /* + * Make sure head and tail are sane since they + * might be user writable. + */ + old_wc = cq->queue; + head = old_wc->head; + if (head > (u32)cq->ibcq.cqe) + head = (u32)cq->ibcq.cqe; + tail = old_wc->tail; + if (tail > (u32)cq->ibcq.cqe) + tail = (u32)cq->ibcq.cqe; + if (head < tail) + n = cq->ibcq.cqe + 1 + head - tail; + else + n = head - tail; + if (unlikely((u32)cqe < n)) { + ret = -EINVAL; + goto bail_unlock; + } + for (n = 0; tail != head; n++) { + if (cq->ip) + wc->uqueue[n] = old_wc->uqueue[tail]; + else + wc->kqueue[n] = old_wc->kqueue[tail]; + if (tail == (u32)cq->ibcq.cqe) + tail = 0; + else + tail++; + } + cq->ibcq.cqe = cqe; + wc->head = n; + wc->tail = 0; + cq->queue = wc; + spin_unlock_irq(&cq->lock); + + vfree(old_wc); + + if (cq->ip) { + struct rvt_mmap_info *ip = cq->ip; + + rvt_update_mmap_info(rdi, ip, sz, wc); + + /* + * Return the offset to mmap. + * See rvt_mmap() for details. + */ + if (udata && udata->outlen >= sizeof(__u64)) { + ret = ib_copy_to_udata(udata, &ip->offset, + sizeof(ip->offset)); + if (ret) + goto bail; + } + + spin_lock_irq(&rdi->pending_lock); + if (list_empty(&ip->pending_mmaps)) + list_add(&ip->pending_mmaps, &rdi->pending_mmaps); + spin_unlock_irq(&rdi->pending_lock); + } + + return 0; + +bail_unlock: + spin_unlock_irq(&cq->lock); +bail_free: + vfree(wc); +bail: + return ret; + } /** @@ -109,5 +465,84 @@ int rvt_resize_cq(struct ib_cq *ibcq, int cqe, struct ib_udata *udata) */ int rvt_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *entry) { - return -EOPNOTSUPP; + struct rvt_cq *cq = ibcq_to_rvtcq(ibcq); + struct rvt_cq_wc *wc; + unsigned long flags; + int npolled; + u32 tail; + + /* The kernel can only poll a kernel completion queue */ + if (cq->ip) + return -EINVAL; + + spin_lock_irqsave(&cq->lock, flags); + + wc = cq->queue; + tail = wc->tail; + if (tail > (u32)cq->ibcq.cqe) + tail = (u32)cq->ibcq.cqe; + for (npolled = 0; npolled < num_entries; ++npolled, ++entry) { + if (tail == wc->head) + break; + /* The kernel doesn't need a RMB since it has the lock. */ + *entry = wc->kqueue[tail]; + if (tail >= cq->ibcq.cqe) + tail = 0; + else + tail++; + } + wc->tail = tail; + + spin_unlock_irqrestore(&cq->lock, flags); + + return npolled; +} + +int rvt_driver_cq_init(struct rvt_dev_info *rdi) +{ + int ret = 0; + int cpu; + struct task_struct *task; + + if (rdi->flags & RVT_FLAG_CQ_INIT_DRIVER) { + rvt_pr_info(rdi, "Driver is doing CQ init.\n"); + return 0; + } + + if (rdi->worker) + return 0; + rdi->worker = kzalloc(sizeof(*rdi->worker), GFP_KERNEL); + if (!rdi->worker) + return -ENOMEM; + init_kthread_worker(rdi->worker); + task = kthread_create_on_node( + kthread_worker_fn, + rdi->worker, + rdi->dparms.node, + "%s", rdi->dparms.cq_name); + if (IS_ERR(task)) { + kfree(rdi->worker); + rdi->worker = NULL; + return PTR_ERR(task); + } + + cpu = cpumask_first(cpumask_of_node(rdi->dparms.node)); + kthread_bind(task, cpu); + wake_up_process(task); + return ret; +} + +void rvt_cq_exit(struct rvt_dev_info *rdi) +{ + struct kthread_worker *worker; + + worker = rdi->worker; + if (!worker) + return; + /* blocks future queuing from send_complete() */ + rdi->worker = NULL; + smp_wmb(); /* See rdi_cq_enter */ + flush_kthread_worker(worker); + kthread_stop(worker->task); + kfree(worker); } diff --git a/drivers/infiniband/sw/rdmavt/cq.h b/drivers/infiniband/sw/rdmavt/cq.h index 63a517d..3813d90 100644 --- a/drivers/infiniband/sw/rdmavt/cq.h +++ b/drivers/infiniband/sw/rdmavt/cq.h @@ -49,6 +49,7 @@ */ #include +#include struct ib_cq *rvt_create_cq(struct ib_device *ibdev, const struct ib_cq_init_attr *attr, @@ -58,5 +59,6 @@ int rvt_destroy_cq(struct ib_cq *ibcq); int rvt_req_notify_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags notify_flags); int rvt_resize_cq(struct ib_cq *ibcq, int cqe, struct ib_udata *udata); int rvt_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *entry); - +int rvt_driver_cq_init(struct rvt_dev_info *rdi); +void rvt_cq_exit(struct rvt_dev_info *rdi); #endif /* DEF_RVTCQ_H */ diff --git a/drivers/infiniband/sw/rdmavt/vt.c b/drivers/infiniband/sw/rdmavt/vt.c index f2b6438..136cc21 100644 --- a/drivers/infiniband/sw/rdmavt/vt.c +++ b/drivers/infiniband/sw/rdmavt/vt.c @@ -324,6 +324,11 @@ int rvt_register_device(struct rvt_dev_info *rdi) CHECK_DRIVER_OVERRIDE(rdi, mmap); /* Completion queues */ + ret = rvt_driver_cq_init(rdi); + if (ret) { + pr_err("Error in driver CQ init.\n"); + goto bail_mr; + } CHECK_DRIVER_OVERRIDE(rdi, create_cq); CHECK_DRIVER_OVERRIDE(rdi, destroy_cq); CHECK_DRIVER_OVERRIDE(rdi, poll_cq); @@ -344,12 +349,15 @@ int rvt_register_device(struct rvt_dev_info *rdi) ret = ib_register_device(&rdi->ibdev, rdi->driver_f.port_callback); if (ret) { rvt_pr_err(rdi, "Failed to register driver with ib core.\n"); - goto bail_mr; + goto bail_cq; } rvt_pr_info(rdi, "Registration with rdmavt done.\n"); return ret; +bail_cq: + rvt_cq_exit(rdi); + bail_mr: rvt_mr_exit(rdi); @@ -366,6 +374,7 @@ void rvt_unregister_device(struct rvt_dev_info *rdi) return; ib_unregister_device(&rdi->ibdev); + rvt_cq_exit(rdi); rvt_mr_exit(rdi); rvt_qp_exit(rdi); } diff --git a/include/rdma/rdma_vt.h b/include/rdma/rdma_vt.h index 70a9596..79080e3 100644 --- a/include/rdma/rdma_vt.h +++ b/include/rdma/rdma_vt.h @@ -138,6 +138,8 @@ struct rvt_ibport { /* TODO: Move sm_ah and smi_ah into here as well*/ }; +#define RVT_CQN_MAX 16 /* maximum length of cq name */ + /* * Things that are driver specific, module parameters in hfi1 and qib */ @@ -190,6 +192,8 @@ struct rvt_driver_params { int nports; int npkeys; u8 qos_shift; + char cq_name[RVT_CQN_MAX]; + int node; }; /* Protection domain */ @@ -281,6 +285,11 @@ struct rvt_dev_info { spinlock_t mmap_offset_lock; /* protect mmap_offset */ u32 mmap_offset; spinlock_t pending_lock; /* protect pending mmap list */ + + /* CQ */ + struct kthread_worker *worker; /* per device cq worker */ + u32 n_cqs_allocated; /* number of CQs allocated for device */ + spinlock_t n_cqs_lock; /* protect count of in use cqs */ }; static inline struct rvt_pd *ibpd_to_rvtpd(struct ib_pd *ibpd) diff --git a/include/rdma/rdmavt_cq.h b/include/rdma/rdmavt_cq.h new file mode 100644 index 0000000..51fd00b --- /dev/null +++ b/include/rdma/rdmavt_cq.h @@ -0,0 +1,99 @@ +#ifndef DEF_RDMAVT_INCCQ_H +#define DEF_RDMAVT_INCCQ_H + +/* + * + * This file is provided under a dual BSD/GPLv2 license. When using or + * redistributing this file, you may do so under either license. + * + * GPL LICENSE SUMMARY + * + * Copyright(c) 2016 Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * BSD LICENSE + * + * Copyright(c) 2015 Intel Corporation. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * - Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + */ + +#include +#include + +/* + * Define an ib_cq_notify value that is not valid so we know when CQ + * notifications are armed. + */ +#define RVT_CQ_NONE (IB_CQ_NEXT_COMP + 1) + +/* + * This structure is used to contain the head pointer, tail pointer, + * and completion queue entries as a single memory allocation so + * it can be mmap'ed into user space. + */ +struct rvt_cq_wc { + u32 head; /* index of next entry to fill */ + u32 tail; /* index of next ib_poll_cq() entry */ + union { + /* these are actually size ibcq.cqe + 1 */ + struct ib_uverbs_wc uqueue[0]; + struct ib_wc kqueue[0]; + }; +}; + +/* + * The completion queue structure. + */ +struct rvt_cq { + struct ib_cq ibcq; + struct kthread_work comptask; + spinlock_t lock; /* protect changes in this struct */ + u8 notify; + u8 triggered; + struct rvt_dev_info *rdi; + struct rvt_cq_wc *queue; + struct rvt_mmap_info *ip; +}; + +static inline struct rvt_cq *ibcq_to_rvtcq(struct ib_cq *ibcq) +{ + return container_of(ibcq, struct rvt_cq, ibcq); +} + +void rvt_cq_enter(struct rvt_cq *cq, struct ib_wc *entry, bool solicited); + +#endif /* DEF_RDMAVT_INCCQH */ -- cgit v0.10.2 From bfbac097b6e8023e10fdadab2527d0a1a3160d7e Mon Sep 17 00:00:00 2001 From: Dennis Dalessandro Date: Fri, 22 Jan 2016 13:00:22 -0800 Subject: IB/rdmavt: Add post send to rdmavt Add in a post_send and post_one_send to rdmavt. The ULP will provide a WQE to rdmavt which will then walk and queue each element. Rdmavt will then queue the work to be done in the driver or kick the driver's progress routine. There needs to be a follow on patch which adds in another lock for the head of the queue so that it can be added to and read from in parallel. This will touch protocol handlers and require other changes in the drivers. This will be done separately. Reviewed-by: Mike Marciniszyn Reviewed-by: Ira Weiny Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford diff --git a/drivers/infiniband/sw/rdmavt/qp.c b/drivers/infiniband/sw/rdmavt/qp.c index 43346a7..bd2d91a 100644 --- a/drivers/infiniband/sw/rdmavt/qp.c +++ b/drivers/infiniband/sw/rdmavt/qp.c @@ -53,6 +53,27 @@ #include "qp.h" #include "vt.h" +/* + * Note that it is OK to post send work requests in the SQE and ERR + * states; rvt_do_send() will process them and generate error + * completions as per IB 1.2 C10-96. + */ +const int ib_rvt_state_ops[IB_QPS_ERR + 1] = { + [IB_QPS_RESET] = 0, + [IB_QPS_INIT] = RVT_POST_RECV_OK, + [IB_QPS_RTR] = RVT_POST_RECV_OK | RVT_PROCESS_RECV_OK, + [IB_QPS_RTS] = RVT_POST_RECV_OK | RVT_PROCESS_RECV_OK | + RVT_POST_SEND_OK | RVT_PROCESS_SEND_OK | + RVT_PROCESS_NEXT_SEND_OK, + [IB_QPS_SQD] = RVT_POST_RECV_OK | RVT_PROCESS_RECV_OK | + RVT_POST_SEND_OK | RVT_PROCESS_SEND_OK, + [IB_QPS_SQE] = RVT_POST_RECV_OK | RVT_PROCESS_RECV_OK | + RVT_POST_SEND_OK | RVT_FLUSH_SEND, + [IB_QPS_ERR] = RVT_POST_RECV_OK | RVT_FLUSH_RECV | + RVT_POST_SEND_OK | RVT_FLUSH_SEND, +}; +EXPORT_SYMBOL(ib_rvt_state_ops); + static void get_map_page(struct rvt_qpn_table *qpt, struct rvt_qpn_map *map, gfp_t gfp) @@ -586,7 +607,7 @@ struct ib_qp *rvt_create_qp(struct ib_pd *ibpd, /* * Return the address of the RWQ as the offset to mmap. - * See hfi1_mmap() for details. + * See rvt_mmap() for details. */ if (udata && udata->outlen >= sizeof(__u64)) { if (!qp->r_rq.wq) { @@ -750,6 +771,118 @@ int rvt_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr, } /** + * rvt_post_one_wr - post one RC, UC, or UD send work request + * @qp: the QP to post on + * @wr: the work request to send + */ +static int rvt_post_one_wr(struct rvt_qp *qp, struct ib_send_wr *wr) +{ + struct rvt_swqe *wqe; + u32 next; + int i; + int j; + int acc; + struct rvt_lkey_table *rkt; + struct rvt_pd *pd; + struct rvt_dev_info *rdi = ib_to_rvt(qp->ibqp.device); + + /* IB spec says that num_sge == 0 is OK. */ + if (unlikely(wr->num_sge > qp->s_max_sge)) + return -EINVAL; + + /* + * Don't allow RDMA reads or atomic operations on UC or + * undefined operations. + * Make sure buffer is large enough to hold the result for atomics. + */ + if (qp->ibqp.qp_type == IB_QPT_UC) { + if ((unsigned)wr->opcode >= IB_WR_RDMA_READ) + return -EINVAL; + } else if (qp->ibqp.qp_type != IB_QPT_RC) { + /* Check IB_QPT_SMI, IB_QPT_GSI, IB_QPT_UD opcode */ + if (wr->opcode != IB_WR_SEND && + wr->opcode != IB_WR_SEND_WITH_IMM) + return -EINVAL; + /* Check UD destination address PD */ + if (qp->ibqp.pd != ud_wr(wr)->ah->pd) + return -EINVAL; + } else if ((unsigned)wr->opcode > IB_WR_ATOMIC_FETCH_AND_ADD) { + return -EINVAL; + } else if (wr->opcode >= IB_WR_ATOMIC_CMP_AND_SWP && + (wr->num_sge == 0 || + wr->sg_list[0].length < sizeof(u64) || + wr->sg_list[0].addr & (sizeof(u64) - 1))) { + return -EINVAL; + } else if (wr->opcode >= IB_WR_RDMA_READ && !qp->s_max_rd_atomic) { + return -EINVAL; + } + + next = qp->s_head + 1; + if (next >= qp->s_size) + next = 0; + if (next == qp->s_last) + return -ENOMEM; + + rkt = &rdi->lkey_table; + pd = ibpd_to_rvtpd(qp->ibqp.pd); + wqe = rvt_get_swqe_ptr(qp, qp->s_head); + + if (qp->ibqp.qp_type != IB_QPT_UC && + qp->ibqp.qp_type != IB_QPT_RC) + memcpy(&wqe->ud_wr, ud_wr(wr), sizeof(wqe->ud_wr)); + else if (wr->opcode == IB_WR_RDMA_WRITE_WITH_IMM || + wr->opcode == IB_WR_RDMA_WRITE || + wr->opcode == IB_WR_RDMA_READ) + memcpy(&wqe->rdma_wr, rdma_wr(wr), sizeof(wqe->rdma_wr)); + else if (wr->opcode == IB_WR_ATOMIC_CMP_AND_SWP || + wr->opcode == IB_WR_ATOMIC_FETCH_AND_ADD) + memcpy(&wqe->atomic_wr, atomic_wr(wr), sizeof(wqe->atomic_wr)); + else + memcpy(&wqe->wr, wr, sizeof(wqe->wr)); + + wqe->length = 0; + j = 0; + if (wr->num_sge) { + acc = wr->opcode >= IB_WR_RDMA_READ ? + IB_ACCESS_LOCAL_WRITE : 0; + for (i = 0; i < wr->num_sge; i++) { + u32 length = wr->sg_list[i].length; + int ok; + + if (length == 0) + continue; + ok = rvt_lkey_ok(rkt, pd, &wqe->sg_list[j], + &wr->sg_list[i], acc); + if (!ok) + goto bail_inval_free; + wqe->length += length; + j++; + } + wqe->wr.num_sge = j; + } + if (qp->ibqp.qp_type == IB_QPT_UC || + qp->ibqp.qp_type == IB_QPT_RC) { + if (wqe->length > 0x80000000U) + goto bail_inval_free; + } else { + atomic_inc(&ibah_to_rvtah(ud_wr(wr)->ah)->refcount); + } + wqe->ssn = qp->s_ssn++; + qp->s_head = next; + + return 0; + +bail_inval_free: + /* release mr holds */ + while (j) { + struct rvt_sge *sge = &wqe->sg_list[--j]; + + rvt_put_mr(sge->mr); + } + return -EINVAL; +} + +/** * rvt_post_send - post a send on a QP * @ibqp: the QP to post the send on * @wr: the list of work requests to post @@ -760,20 +893,46 @@ int rvt_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr, int rvt_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, struct ib_send_wr **bad_wr) { + struct rvt_qp *qp = ibqp_to_rvtqp(ibqp); + struct rvt_dev_info *rdi = ib_to_rvt(ibqp->device); + unsigned long flags = 0; + int call_send; + unsigned nreq = 0; + int err = 0; + + spin_lock_irqsave(&qp->s_lock, flags); + /* - * VT-DRIVER-API: do_send() - * Driver needs to have a do_send() call which is a single entry point - * to take an already formed packet and throw it out on the wire. Once - * the packet is sent the driver needs to make an upcall to rvt so the - * completion queue can be notified and/or any other outstanding - * work/book keeping can be finished. - * - * Note that there should also be a way for rvt to protect itself - * against hangs in the driver layer. If a send doesn't actually - * complete in a timely manor rvt needs to return an error event. + * Ensure QP state is such that we can send. If not bail out early, + * there is no need to do this every time we post a send. */ + if (unlikely(!(ib_rvt_state_ops[qp->state] & RVT_POST_SEND_OK))) { + spin_unlock_irqrestore(&qp->s_lock, flags); + return -EINVAL; + } - return -EOPNOTSUPP; + /* + * If the send queue is empty, and we only have a single WR then just go + * ahead and kick the send engine into gear. Otherwise we will always + * just schedule the send to happen later. + */ + call_send = qp->s_head == ACCESS_ONCE(qp->s_last) && !wr->next; + + for (; wr; wr = wr->next) { + err = rvt_post_one_wr(qp, wr); + if (unlikely(err)) { + *bad_wr = wr; + goto bail; + } + nreq++; + } +bail: + if (nreq && !call_send) + rdi->driver_f.schedule_send(qp); + spin_unlock_irqrestore(&qp->s_lock, flags); + if (nreq && call_send) + rdi->driver_f.do_send(qp); + return err; } /** diff --git a/include/rdma/rdma_vt.h b/include/rdma/rdma_vt.h index 79080e3..36e4fb4c 100644 --- a/include/rdma/rdma_vt.h +++ b/include/rdma/rdma_vt.h @@ -231,6 +231,8 @@ struct rvt_driver_provided { gfp_t gfp); void (*qp_priv_free)(struct rvt_dev_info *rdi, struct rvt_qp *qp); void (*notify_qp_reset)(struct rvt_qp *qp); + void (*schedule_send)(struct rvt_qp *qp); + void (*do_send)(struct rvt_qp *qp); /*--------------------*/ /* Optional functions */ @@ -312,6 +314,11 @@ static inline struct rvt_srq *ibsrq_to_rvtsrq(struct ib_srq *ibsrq) return container_of(ibsrq, struct rvt_srq, ibsrq); } +static inline struct rvt_qp *ibqp_to_rvtqp(struct ib_qp *ibqp) +{ + return container_of(ibqp, struct rvt_qp, ibqp); +} + static inline unsigned rvt_get_npkeys(struct rvt_dev_info *rdi) { /* diff --git a/include/rdma/rdmavt_qp.h b/include/rdma/rdmavt_qp.h index bce0a03..3189f19 100644 --- a/include/rdma/rdmavt_qp.h +++ b/include/rdma/rdmavt_qp.h @@ -129,6 +129,17 @@ /* Number of bits to pay attention to in the opcode for checking qp type */ #define RVT_OPCODE_QP_MASK 0xE0 +/* Flags for checking QP state (see ib_rvt_state_ops[]) */ +#define RVT_POST_SEND_OK 0x01 +#define RVT_POST_RECV_OK 0x02 +#define RVT_PROCESS_RECV_OK 0x04 +#define RVT_PROCESS_SEND_OK 0x08 +#define RVT_PROCESS_NEXT_SEND_OK 0x10 +#define RVT_FLUSH_SEND 0x20 +#define RVT_FLUSH_RECV 0x40 +#define RVT_PROCESS_OR_FLUSH_SEND \ + (RVT_PROCESS_SEND_OK | RVT_FLUSH_SEND) + /* * Send work request queue entry. * The size of the sg_list is determined when the QP is created and stored @@ -373,4 +384,19 @@ struct rvt_qp_ibdev { struct rvt_qpn_table qpn_table; }; +/* + * Since struct rvt_swqe is not a fixed size, we can't simply index into + * struct hfi1_qp.s_wq. This function does the array index computation. + */ +static inline struct rvt_swqe *rvt_get_swqe_ptr(struct rvt_qp *qp, + unsigned n) +{ + return (struct rvt_swqe *)((char *)qp->s_wq + + (sizeof(struct rvt_swqe) + + qp->s_max_sge * + sizeof(struct rvt_sge)) * n); +} + +extern const int ib_rvt_state_ops[]; + #endif /* DEF_RDMAVT_INCQP_H */ -- cgit v0.10.2 From 81ba39a82e8a9ed4eba3e967a09e60a9d00d8de4 Mon Sep 17 00:00:00 2001 From: Dennis Dalessandro Date: Fri, 22 Jan 2016 13:00:28 -0800 Subject: IB/rdmavt: Add support for tracing events This patch adds support of tracing events using the kernels built-in event tracing infrastructure. This can be extended to provide a wide range of trace and debug capabilities which have a negligible impact on performance when enabled. These should be preferred over the use of the rvt_pr* functions. Reviewed-by: Ira Weiny Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford diff --git a/drivers/infiniband/sw/rdmavt/Makefile b/drivers/infiniband/sw/rdmavt/Makefile index 00f0188..ccaa799 100644 --- a/drivers/infiniband/sw/rdmavt/Makefile +++ b/drivers/infiniband/sw/rdmavt/Makefile @@ -7,4 +7,7 @@ # obj-$(CONFIG_INFINIBAND_RDMAVT) += rdmavt.o -rdmavt-y := vt.o ah.o cq.o dma.o mad.o mcast.o mmap.o mr.o pd.o qp.o srq.o +rdmavt-y := vt.o ah.o cq.o dma.o mad.o mcast.o mmap.o mr.o pd.o qp.o srq.o \ + trace.o + +CFLAGS_trace.o = -I$(src) diff --git a/drivers/infiniband/sw/rdmavt/trace.c b/drivers/infiniband/sw/rdmavt/trace.c new file mode 100644 index 0000000..19afe39 --- /dev/null +++ b/drivers/infiniband/sw/rdmavt/trace.c @@ -0,0 +1,49 @@ +/* + * Copyright(c) 2015 Intel Corporation. + * + * This file is provided under a dual BSD/GPLv2 license. When using or + * redistributing this file, you may do so under either license. + * + * GPL LICENSE SUMMARY + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * BSD LICENSE + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * - Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + */ + +#define CREATE_TRACE_POINTS +#include "trace.h" diff --git a/drivers/infiniband/sw/rdmavt/trace.h b/drivers/infiniband/sw/rdmavt/trace.h new file mode 100644 index 0000000..22e86ff --- /dev/null +++ b/drivers/infiniband/sw/rdmavt/trace.h @@ -0,0 +1,86 @@ +/* + * Copyright(c) 2015 Intel Corporation. + * + * This file is provided under a dual BSD/GPLv2 license. When using or + * redistributing this file, you may do so under either license. + * + * GPL LICENSE SUMMARY + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * BSD LICENSE + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * - Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + */ + +#undef TRACE_SYSTEM_VAR +#define TRACE_SYSTEM_VAR rdmavt + +#if !defined(__RDMAVT_TRACE_H) || defined(TRACE_HEADER_MULTI_READ) +#define __RDMAVT_TRACE_H + +#include +#include + +#include + +#define RDI_DEV_ENTRY(rdi) __string(dev, rdi->driver_f.get_card_name(rdi)) +#define RDI_DEV_ASSIGN(rdi) __assign_str(dev, rdi->driver_f.get_card_name(rdi)) + +#undef TRACE_SYSTEM +#define TRACE_SYSTEM rdmavt + +TRACE_EVENT(rvt_dbg, + TP_PROTO(struct rvt_dev_info *rdi, + const char *msg), + TP_ARGS(rdi, msg), + TP_STRUCT__entry( + RDI_DEV_ENTRY(rdi) + __string(msg, msg) + ), + TP_fast_assign( + RDI_DEV_ASSIGN(rdi); + __assign_str(msg, msg); + ), + TP_printk("[%s]: %s", __get_str(dev), __get_str(msg)) +); + +#endif /* __RDMAVT_TRACE_H */ + +#undef TRACE_INCLUDE_PATH +#undef TRACE_INCLUDE_FILE +#define TRACE_INCLUDE_PATH . +#define TRACE_INCLUDE_FILE trace +#include diff --git a/drivers/infiniband/sw/rdmavt/vt.c b/drivers/infiniband/sw/rdmavt/vt.c index 136cc21..7f56a42 100644 --- a/drivers/infiniband/sw/rdmavt/vt.c +++ b/drivers/infiniband/sw/rdmavt/vt.c @@ -48,6 +48,7 @@ #include #include #include "vt.h" +#include "trace.h" MODULE_LICENSE("Dual BSD/GPL"); MODULE_DESCRIPTION("RDMA Verbs Transport Library"); @@ -259,7 +260,8 @@ int rvt_register_device(struct rvt_dev_info *rdi) return -EINVAL; } - /* Once we get past here we can use the rvt_pr macros */ + /* Once we get past here we can use rvt_pr macros and tracepoints */ + trace_rvt_dbg(rdi, "Driver attempting registration"); rvt_mmap_init(rdi); /* Dev Ops */ @@ -370,6 +372,7 @@ EXPORT_SYMBOL(rvt_register_device); void rvt_unregister_device(struct rvt_dev_info *rdi) { + trace_rvt_dbg(rdi, "Driver is unregistering."); if (!rdi) return; -- cgit v0.10.2 From 3b0b3fb3c1bbf50a2f88ea7345448a41dcba3c57 Mon Sep 17 00:00:00 2001 From: Dennis Dalessandro Date: Fri, 22 Jan 2016 13:00:35 -0800 Subject: IB/rdmavt: Add modify qp Add modify qp and supporting functions. Reviewed-by: Mike Marciniszyn Reviewed-by: Ira Weiny Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford diff --git a/drivers/infiniband/sw/rdmavt/qp.c b/drivers/infiniband/sw/rdmavt/qp.c index bd2d91a..9442126 100644 --- a/drivers/infiniband/sw/rdmavt/qp.c +++ b/drivers/infiniband/sw/rdmavt/qp.c @@ -45,6 +45,7 @@ * */ +#include #include #include #include @@ -52,6 +53,7 @@ #include #include "qp.h" #include "vt.h" +#include "trace.h" /* * Note that it is OK to post send work requests in the SQE and ERR @@ -380,19 +382,47 @@ static void free_qpn(struct rvt_qpn_table *qpt, u32 qpn) * reset_qp - initialize the QP state to the reset state * @qp: the QP to reset * @type: the QP type + * r and s lock are required to be held by the caller */ void rvt_reset_qp(struct rvt_dev_info *rdi, struct rvt_qp *qp, enum ib_qp_type type) { - qp->remote_qpn = 0; - qp->qkey = 0; - qp->qp_access_flags = 0; + if (qp->state != IB_QPS_RESET) { + qp->state = IB_QPS_RESET; + + /* Let drivers flush their waitlist */ + rdi->driver_f.flush_qp_waiters(qp); + qp->s_flags &= ~(RVT_S_TIMER | RVT_S_ANY_WAIT); + spin_unlock(&qp->s_lock); + spin_unlock_irq(&qp->r_lock); + + /* Stop the send queue and the retry timer */ + rdi->driver_f.stop_send_queue(qp); + del_timer_sync(&qp->s_timer); + + /* Wait for things to stop */ + rdi->driver_f.quiesce_qp(qp); + + /* take qp out the hash and wait for it to be unused */ + rvt_remove_qp(rdi, qp); + wait_event(qp->wait, !atomic_read(&qp->refcount)); + + /* grab the lock b/c it was locked at call time */ + spin_lock_irq(&qp->r_lock); + spin_lock(&qp->s_lock); + + rvt_clear_mr_refs(qp, 1); + } /* - * Let driver do anything it needs to for a new/reset qp + * Let the driver do any tear down it needs to for a qp + * that has been reset */ rdi->driver_f.notify_qp_reset(qp); + qp->remote_qpn = 0; + qp->qkey = 0; + qp->qp_access_flags = 0; qp->s_flags &= RVT_S_SIGNAL_REQ_WR; qp->s_hdrwords = 0; qp->s_wqe = NULL; @@ -702,6 +732,208 @@ bail_swq: return ret; } +void rvt_clear_mr_refs(struct rvt_qp *qp, int clr_sends) +{ + unsigned n; + + if (test_and_clear_bit(RVT_R_REWIND_SGE, &qp->r_aflags)) + rvt_put_ss(&qp->s_rdma_read_sge); + + rvt_put_ss(&qp->r_sge); + + if (clr_sends) { + while (qp->s_last != qp->s_head) { + struct rvt_swqe *wqe = rvt_get_swqe_ptr(qp, qp->s_last); + unsigned i; + + for (i = 0; i < wqe->wr.num_sge; i++) { + struct rvt_sge *sge = &wqe->sg_list[i]; + + rvt_put_mr(sge->mr); + } + if (qp->ibqp.qp_type == IB_QPT_UD || + qp->ibqp.qp_type == IB_QPT_SMI || + qp->ibqp.qp_type == IB_QPT_GSI) + atomic_dec(&ibah_to_rvtah( + wqe->ud_wr.ah)->refcount); + if (++qp->s_last >= qp->s_size) + qp->s_last = 0; + } + if (qp->s_rdma_mr) { + rvt_put_mr(qp->s_rdma_mr); + qp->s_rdma_mr = NULL; + } + } + + if (qp->ibqp.qp_type != IB_QPT_RC) + return; + + for (n = 0; n < ARRAY_SIZE(qp->s_ack_queue); n++) { + struct rvt_ack_entry *e = &qp->s_ack_queue[n]; + + if (e->opcode == IB_OPCODE_RC_RDMA_READ_REQUEST && + e->rdma_sge.mr) { + rvt_put_mr(e->rdma_sge.mr); + e->rdma_sge.mr = NULL; + } + } +} +EXPORT_SYMBOL(rvt_clear_mr_refs); + +/** + * rvt_error_qp - put a QP into the error state + * @qp: the QP to put into the error state + * @err: the receive completion error to signal if a RWQE is active + * + * Flushes both send and receive work queues. + * Returns true if last WQE event should be generated. + * The QP r_lock and s_lock should be held and interrupts disabled. + * If we are already in error state, just return. + */ +int rvt_error_qp(struct rvt_qp *qp, enum ib_wc_status err) +{ + struct ib_wc wc; + int ret = 0; + struct rvt_dev_info *rdi = ib_to_rvt(qp->ibqp.device); + + if (qp->state == IB_QPS_ERR || qp->state == IB_QPS_RESET) + goto bail; + + qp->state = IB_QPS_ERR; + + if (qp->s_flags & (RVT_S_TIMER | RVT_S_WAIT_RNR)) { + qp->s_flags &= ~(RVT_S_TIMER | RVT_S_WAIT_RNR); + del_timer(&qp->s_timer); + } + + if (qp->s_flags & RVT_S_ANY_WAIT_SEND) + qp->s_flags &= ~RVT_S_ANY_WAIT_SEND; + + rdi->driver_f.notify_error_qp(qp); + + /* Schedule the sending tasklet to drain the send work queue. */ + if (qp->s_last != qp->s_head) + rdi->driver_f.schedule_send(qp); + + rvt_clear_mr_refs(qp, 0); + + memset(&wc, 0, sizeof(wc)); + wc.qp = &qp->ibqp; + wc.opcode = IB_WC_RECV; + + if (test_and_clear_bit(RVT_R_WRID_VALID, &qp->r_aflags)) { + wc.wr_id = qp->r_wr_id; + wc.status = err; + rvt_cq_enter(ibcq_to_rvtcq(qp->ibqp.recv_cq), &wc, 1); + } + wc.status = IB_WC_WR_FLUSH_ERR; + + if (qp->r_rq.wq) { + struct rvt_rwq *wq; + u32 head; + u32 tail; + + spin_lock(&qp->r_rq.lock); + + /* sanity check pointers before trusting them */ + wq = qp->r_rq.wq; + head = wq->head; + if (head >= qp->r_rq.size) + head = 0; + tail = wq->tail; + if (tail >= qp->r_rq.size) + tail = 0; + while (tail != head) { + wc.wr_id = rvt_get_rwqe_ptr(&qp->r_rq, tail)->wr_id; + if (++tail >= qp->r_rq.size) + tail = 0; + rvt_cq_enter(ibcq_to_rvtcq(qp->ibqp.recv_cq), &wc, 1); + } + wq->tail = tail; + + spin_unlock(&qp->r_rq.lock); + } else if (qp->ibqp.event_handler) { + ret = 1; + } + +bail: + return ret; +} +EXPORT_SYMBOL(rvt_error_qp); + +/* + * Put the QP into the hash table. + * The hash table holds a reference to the QP. + */ +static void rvt_insert_qp(struct rvt_dev_info *rdi, struct rvt_qp *qp) +{ + struct rvt_ibport *rvp = rdi->ports[qp->port_num - 1]; + unsigned long flags; + + atomic_inc(&qp->refcount); + spin_lock_irqsave(&rdi->qp_dev->qpt_lock, flags); + + if (qp->ibqp.qp_num <= 1) { + rcu_assign_pointer(rvp->qp[qp->ibqp.qp_num], qp); + } else { + u32 n = hash_32(qp->ibqp.qp_num, rdi->qp_dev->qp_table_bits); + + qp->next = rdi->qp_dev->qp_table[n]; + rcu_assign_pointer(rdi->qp_dev->qp_table[n], qp); + trace_rvt_qpinsert(qp, n); + } + + spin_unlock_irqrestore(&rdi->qp_dev->qpt_lock, flags); +} + +/* + * Remove the QP from the table so it can't be found asynchronously by + * the receive routine. + */ +void rvt_remove_qp(struct rvt_dev_info *rdi, struct rvt_qp *qp) +{ + struct rvt_ibport *rvp = rdi->ports[qp->port_num - 1]; + u32 n = hash_32(qp->ibqp.qp_num, rdi->qp_dev->qp_table_bits); + unsigned long flags; + int removed = 1; + + spin_lock_irqsave(&rdi->qp_dev->qpt_lock, flags); + + if (rcu_dereference_protected(rvp->qp[0], + lockdep_is_held(&rdi->qp_dev->qpt_lock)) == qp) { + RCU_INIT_POINTER(rvp->qp[0], NULL); + } else if (rcu_dereference_protected(rvp->qp[1], + lockdep_is_held(&rdi->qp_dev->qpt_lock)) == qp) { + RCU_INIT_POINTER(rvp->qp[1], NULL); + } else { + struct rvt_qp *q; + struct rvt_qp __rcu **qpp; + + removed = 0; + qpp = &rdi->qp_dev->qp_table[n]; + for (; (q = rcu_dereference_protected(*qpp, + lockdep_is_held(&rdi->qp_dev->qpt_lock))) != NULL; + qpp = &q->next) { + if (q == qp) { + RCU_INIT_POINTER(*qpp, + rcu_dereference_protected(qp->next, + lockdep_is_held(&rdi->qp_dev->qpt_lock))); + removed = 1; + trace_rvt_qpremove(qp, n); + break; + } + } + } + + spin_unlock_irqrestore(&rdi->qp_dev->qpt_lock, flags); + if (removed) { + synchronize_rcu(); + if (atomic_dec_and_test(&qp->refcount)) + wake_up(&qp->wait); + } +} +EXPORT_SYMBOL(rvt_remove_qp); + /** * qib_modify_qp - modify the attributes of a queue pair * @ibqp: the queue pair who's attributes we're modifying @@ -714,13 +946,248 @@ bail_swq: int rvt_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask, struct ib_udata *udata) { + struct rvt_dev_info *rdi = ib_to_rvt(ibqp->device); + struct rvt_qp *qp = ibqp_to_rvtqp(ibqp); + enum ib_qp_state cur_state, new_state; + struct ib_event ev; + int lastwqe = 0; + int mig = 0; + int pmtu = 0; /* for gcc warning only */ + enum rdma_link_layer link; + + link = rdma_port_get_link_layer(ibqp->device, qp->port_num); + + spin_lock_irq(&qp->r_lock); + spin_lock(&qp->s_lock); + + cur_state = attr_mask & IB_QP_CUR_STATE ? + attr->cur_qp_state : qp->state; + new_state = attr_mask & IB_QP_STATE ? attr->qp_state : cur_state; + + if (!ib_modify_qp_is_ok(cur_state, new_state, ibqp->qp_type, + attr_mask, link)) + goto inval; + + if (attr_mask & IB_QP_AV) { + if (attr->ah_attr.dlid >= be16_to_cpu(IB_MULTICAST_LID_BASE)) + goto inval; + if (rvt_check_ah(qp->ibqp.device, &attr->ah_attr)) + goto inval; + } + + if (attr_mask & IB_QP_ALT_PATH) { + if (attr->alt_ah_attr.dlid >= + be16_to_cpu(IB_MULTICAST_LID_BASE)) + goto inval; + if (rvt_check_ah(qp->ibqp.device, &attr->alt_ah_attr)) + goto inval; + if (attr->alt_pkey_index >= rvt_get_npkeys(rdi)) + goto inval; + } + + if (attr_mask & IB_QP_PKEY_INDEX) + if (attr->pkey_index >= rvt_get_npkeys(rdi)) + goto inval; + + if (attr_mask & IB_QP_MIN_RNR_TIMER) + if (attr->min_rnr_timer > 31) + goto inval; + + if (attr_mask & IB_QP_PORT) + if (qp->ibqp.qp_type == IB_QPT_SMI || + qp->ibqp.qp_type == IB_QPT_GSI || + attr->port_num == 0 || + attr->port_num > ibqp->device->phys_port_cnt) + goto inval; + + if (attr_mask & IB_QP_DEST_QPN) + if (attr->dest_qp_num > RVT_QPN_MASK) + goto inval; + + if (attr_mask & IB_QP_RETRY_CNT) + if (attr->retry_cnt > 7) + goto inval; + + if (attr_mask & IB_QP_RNR_RETRY) + if (attr->rnr_retry > 7) + goto inval; + /* - * VT-DRIVER-API: qp_mtu() - * OPA devices have a per VL MTU the driver has a mapping of IB SL to SC - * to VL and the mapping table of MTUs per VL. This is not something - * that IB has and should not live in the rvt. + * Don't allow invalid path_mtu values. OK to set greater + * than the active mtu (or even the max_cap, if we have tuned + * that to a small mtu. We'll set qp->path_mtu + * to the lesser of requested attribute mtu and active, + * for packetizing messages. + * Note that the QP port has to be set in INIT and MTU in RTR. */ - return -EOPNOTSUPP; + if (attr_mask & IB_QP_PATH_MTU) { + pmtu = rdi->driver_f.get_pmtu_from_attr(rdi, qp, attr); + if (pmtu < 0) + goto inval; + } + + if (attr_mask & IB_QP_PATH_MIG_STATE) { + if (attr->path_mig_state == IB_MIG_REARM) { + if (qp->s_mig_state == IB_MIG_ARMED) + goto inval; + if (new_state != IB_QPS_RTS) + goto inval; + } else if (attr->path_mig_state == IB_MIG_MIGRATED) { + if (qp->s_mig_state == IB_MIG_REARM) + goto inval; + if (new_state != IB_QPS_RTS && new_state != IB_QPS_SQD) + goto inval; + if (qp->s_mig_state == IB_MIG_ARMED) + mig = 1; + } else { + goto inval; + } + } + + if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC) + if (attr->max_dest_rd_atomic > rdi->dparms.max_rdma_atomic) + goto inval; + + switch (new_state) { + case IB_QPS_RESET: + if (qp->state != IB_QPS_RESET) + rvt_reset_qp(rdi, qp, ibqp->qp_type); + break; + + case IB_QPS_RTR: + /* Allow event to re-trigger if QP set to RTR more than once */ + qp->r_flags &= ~RVT_R_COMM_EST; + qp->state = new_state; + break; + + case IB_QPS_SQD: + qp->s_draining = qp->s_last != qp->s_cur; + qp->state = new_state; + break; + + case IB_QPS_SQE: + if (qp->ibqp.qp_type == IB_QPT_RC) + goto inval; + qp->state = new_state; + break; + + case IB_QPS_ERR: + lastwqe = rvt_error_qp(qp, IB_WC_WR_FLUSH_ERR); + break; + + default: + qp->state = new_state; + break; + } + + if (attr_mask & IB_QP_PKEY_INDEX) + qp->s_pkey_index = attr->pkey_index; + + if (attr_mask & IB_QP_PORT) + qp->port_num = attr->port_num; + + if (attr_mask & IB_QP_DEST_QPN) + qp->remote_qpn = attr->dest_qp_num; + + if (attr_mask & IB_QP_SQ_PSN) { + qp->s_next_psn = attr->sq_psn & rdi->dparms.psn_modify_mask; + qp->s_psn = qp->s_next_psn; + qp->s_sending_psn = qp->s_next_psn; + qp->s_last_psn = qp->s_next_psn - 1; + qp->s_sending_hpsn = qp->s_last_psn; + } + + if (attr_mask & IB_QP_RQ_PSN) + qp->r_psn = attr->rq_psn & rdi->dparms.psn_modify_mask; + + if (attr_mask & IB_QP_ACCESS_FLAGS) + qp->qp_access_flags = attr->qp_access_flags; + + if (attr_mask & IB_QP_AV) { + qp->remote_ah_attr = attr->ah_attr; + qp->s_srate = attr->ah_attr.static_rate; + qp->srate_mbps = ib_rate_to_mbps(qp->s_srate); + } + + if (attr_mask & IB_QP_ALT_PATH) { + qp->alt_ah_attr = attr->alt_ah_attr; + qp->s_alt_pkey_index = attr->alt_pkey_index; + } + + if (attr_mask & IB_QP_PATH_MIG_STATE) { + qp->s_mig_state = attr->path_mig_state; + if (mig) { + qp->remote_ah_attr = qp->alt_ah_attr; + qp->port_num = qp->alt_ah_attr.port_num; + qp->s_pkey_index = qp->s_alt_pkey_index; + + /* + * Ignored by drivers which do not support it. Not + * really worth creating a call back into the driver + * just to set a flag. + */ + qp->s_flags |= RVT_S_AHG_CLEAR; + } + } + + if (attr_mask & IB_QP_PATH_MTU) { + qp->pmtu = rdi->driver_f.mtu_from_qp(rdi, qp, pmtu); + qp->path_mtu = rdi->driver_f.mtu_to_path_mtu(qp->pmtu); + } + + if (attr_mask & IB_QP_RETRY_CNT) { + qp->s_retry_cnt = attr->retry_cnt; + qp->s_retry = attr->retry_cnt; + } + + if (attr_mask & IB_QP_RNR_RETRY) { + qp->s_rnr_retry_cnt = attr->rnr_retry; + qp->s_rnr_retry = attr->rnr_retry; + } + + if (attr_mask & IB_QP_MIN_RNR_TIMER) + qp->r_min_rnr_timer = attr->min_rnr_timer; + + if (attr_mask & IB_QP_TIMEOUT) { + qp->timeout = attr->timeout; + qp->timeout_jiffies = + usecs_to_jiffies((4096UL * (1UL << qp->timeout)) / + 1000UL); + } + + if (attr_mask & IB_QP_QKEY) + qp->qkey = attr->qkey; + + if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC) + qp->r_max_rd_atomic = attr->max_dest_rd_atomic; + + if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC) + qp->s_max_rd_atomic = attr->max_rd_atomic; + + spin_unlock(&qp->s_lock); + spin_unlock_irq(&qp->r_lock); + + if (cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT) + rvt_insert_qp(rdi, qp); + + if (lastwqe) { + ev.device = qp->ibqp.device; + ev.element.qp = &qp->ibqp; + ev.event = IB_EVENT_QP_LAST_WQE_REACHED; + qp->ibqp.event_handler(&ev, qp->ibqp.qp_context); + } + if (mig) { + ev.device = qp->ibqp.device; + ev.element.qp = &qp->ibqp; + ev.event = IB_EVENT_PATH_MIG; + qp->ibqp.event_handler(&ev, qp->ibqp.qp_context); + } + return 0; + +inval: + spin_unlock(&qp->s_lock); + spin_unlock_irq(&qp->r_lock); + return -EINVAL; } /** @@ -948,3 +1415,21 @@ int rvt_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr, { return -EOPNOTSUPP; } + +void rvt_free_qpn(struct rvt_qpn_table *qpt, u32 qpn) +{ + struct rvt_qpn_map *map; + + map = qpt->map + qpn / RVT_BITS_PER_PAGE; + if (map->page) + clear_bit(qpn & RVT_BITS_PER_PAGE_MASK, map->page); +} +EXPORT_SYMBOL(rvt_free_qpn); + +void rvt_dec_qp_cnt(struct rvt_dev_info *rdi) +{ + spin_lock(&rdi->n_qps_lock); + rdi->n_qps_allocated--; + spin_unlock(&rdi->n_qps_lock); +} +EXPORT_SYMBOL(rvt_dec_qp_cnt); diff --git a/drivers/infiniband/sw/rdmavt/trace.h b/drivers/infiniband/sw/rdmavt/trace.h index 22e86ff..b269291 100644 --- a/drivers/infiniband/sw/rdmavt/trace.h +++ b/drivers/infiniband/sw/rdmavt/trace.h @@ -77,6 +77,37 @@ TRACE_EVENT(rvt_dbg, TP_printk("[%s]: %s", __get_str(dev), __get_str(msg)) ); +#undef TRACE_SYSTEM +#define TRACE_SYSTEM rvt_qphash +DECLARE_EVENT_CLASS(rvt_qphash_template, + TP_PROTO(struct rvt_qp *qp, u32 bucket), + TP_ARGS(qp, bucket), + TP_STRUCT__entry( + RDI_DEV_ENTRY(ib_to_rvt(qp->ibqp.device)) + __field(u32, qpn) + __field(u32, bucket) + ), + TP_fast_assign( + RDI_DEV_ASSIGN(ib_to_rvt(qp->ibqp.device)) + __entry->qpn = qp->ibqp.qp_num; + __entry->bucket = bucket; + ), + TP_printk( + "[%s] qpn 0x%x bucket %u", + __get_str(dev), + __entry->qpn, + __entry->bucket + ) +); + +DEFINE_EVENT(rvt_qphash_template, rvt_qpinsert, + TP_PROTO(struct rvt_qp *qp, u32 bucket), + TP_ARGS(qp, bucket)); + +DEFINE_EVENT(rvt_qphash_template, rvt_qpremove, + TP_PROTO(struct rvt_qp *qp, u32 bucket), + TP_ARGS(qp, bucket)); + #endif /* __RDMAVT_TRACE_H */ #undef TRACE_INCLUDE_PATH diff --git a/include/rdma/rdma_vt.h b/include/rdma/rdma_vt.h index 36e4fb4c..1c7123f 100644 --- a/include/rdma/rdma_vt.h +++ b/include/rdma/rdma_vt.h @@ -55,6 +55,7 @@ #include #include +#include #include #include #include @@ -194,6 +195,10 @@ struct rvt_driver_params { u8 qos_shift; char cq_name[RVT_CQN_MAX]; int node; + int max_rdma_atomic; + int psn_mask; + int psn_shift; + int psn_modify_mask; }; /* Protection domain */ @@ -233,6 +238,15 @@ struct rvt_driver_provided { void (*notify_qp_reset)(struct rvt_qp *qp); void (*schedule_send)(struct rvt_qp *qp); void (*do_send)(struct rvt_qp *qp); + int (*get_pmtu_from_attr)(struct rvt_dev_info *rdi, struct rvt_qp *qp, + struct ib_qp_attr *attr); + void (*flush_qp_waiters)(struct rvt_qp *qp); + void (*stop_send_queue)(struct rvt_qp *qp); + void (*quiesce_qp)(struct rvt_qp *qp); + void (*notify_error_qp)(struct rvt_qp *qp); + u32 (*mtu_from_qp)(struct rvt_dev_info *rdi, struct rvt_qp *qp, + u32 pmtu); + int (*mtu_to_path_mtu)(u32 mtu); /*--------------------*/ /* Optional functions */ @@ -340,6 +354,34 @@ static inline u16 rvt_get_pkey(struct rvt_dev_info *rdi, return rdi->ports[port_index]->pkey_table[index]; } +/** + * rvt_lookup_qpn - return the QP with the given QPN + * @ibp: the ibport + * @qpn: the QP number to look up + * + * The caller must hold the rcu_read_lock(), and keep the lock until + * the returned qp is no longer in use. + */ +/* TODO: Remove this and put in rdmavt/qp.h when no longer needed by drivers */ +static inline struct rvt_qp *rvt_lookup_qpn(struct rvt_dev_info *rdi, + struct rvt_ibport *rvp, + u32 qpn) __must_hold(RCU) +{ + struct rvt_qp *qp = NULL; + + if (unlikely(qpn <= 1)) { + qp = rcu_dereference(rvp->qp[qpn]); + } else { + u32 n = hash_32(qpn, rdi->qp_dev->qp_table_bits); + + for (qp = rcu_dereference(rdi->qp_dev->qp_table[n]); qp; + qp = rcu_dereference(qp->next)) + if (qp->ibqp.qp_num == qpn) + break; + } + return qp; +} + int rvt_register_device(struct rvt_dev_info *rvd); void rvt_unregister_device(struct rvt_dev_info *rvd); int rvt_check_ah(struct ib_device *ibdev, struct ib_ah_attr *ah_attr); diff --git a/include/rdma/rdmavt_mr.h b/include/rdma/rdmavt_mr.h index ea60476..4aa8171 100644 --- a/include/rdma/rdmavt_mr.h +++ b/include/rdma/rdmavt_mr.h @@ -127,4 +127,13 @@ static inline void rvt_get_mr(struct rvt_mregion *mr) atomic_inc(&mr->refcount); } +static inline void rvt_put_ss(struct rvt_sge_state *ss) +{ + while (ss->num_sge) { + rvt_put_mr(ss->sge.mr); + if (--ss->num_sge) + ss->sge = *ss->sg_list++; + } +} + #endif /* DEF_RDMAVT_INCMRH */ diff --git a/include/rdma/rdmavt_qp.h b/include/rdma/rdmavt_qp.h index 3189f19..e66bcc9 100644 --- a/include/rdma/rdmavt_qp.h +++ b/include/rdma/rdmavt_qp.h @@ -355,6 +355,7 @@ struct rvt_srq { #define RVT_QPNMAP_ENTRIES (RVT_QPN_MAX / PAGE_SIZE / BITS_PER_BYTE) #define RVT_BITS_PER_PAGE (PAGE_SIZE * BITS_PER_BYTE) #define RVT_BITS_PER_PAGE_MASK (RVT_BITS_PER_PAGE - 1) +#define RVT_QPN_MASK 0xFFFFFF /* * QPN-map pages start out as NULL, they get allocated upon @@ -397,6 +398,25 @@ static inline struct rvt_swqe *rvt_get_swqe_ptr(struct rvt_qp *qp, sizeof(struct rvt_sge)) * n); } +/* + * Since struct rvt_rwqe is not a fixed size, we can't simply index into + * struct rvt_rwq.wq. This function does the array index computation. + */ +static inline struct rvt_rwqe *rvt_get_rwqe_ptr(struct rvt_rq *rq, unsigned n) +{ + return (struct rvt_rwqe *) + ((char *)rq->wq->wq + + (sizeof(struct rvt_rwqe) + + rq->max_sge * sizeof(struct ib_sge)) * n); +} + extern const int ib_rvt_state_ops[]; +struct rvt_dev_info; +void rvt_remove_qp(struct rvt_dev_info *rdi, struct rvt_qp *qp); +void rvt_clear_mr_refs(struct rvt_qp *qp, int clr_sends); +int rvt_error_qp(struct rvt_qp *qp, enum ib_wc_status err); +void rvt_free_qpn(struct rvt_qpn_table *qpt, u32 qpn); +void rvt_dec_qp_cnt(struct rvt_dev_info *rdi); + #endif /* DEF_RDMAVT_INCQP_H */ -- cgit v0.10.2 From 5a17ad11da322e3040b570e69a14a41a37060e5f Mon Sep 17 00:00:00 2001 From: Dennis Dalessandro Date: Fri, 22 Jan 2016 13:00:42 -0800 Subject: IB/rdmavt: Add destroy qp verb This patch adds in support the qp destroy verb call. Reviewed-by: Ira Weiny Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford diff --git a/drivers/infiniband/sw/rdmavt/qp.c b/drivers/infiniband/sw/rdmavt/qp.c index 9442126..cca65a6 100644 --- a/drivers/infiniband/sw/rdmavt/qp.c +++ b/drivers/infiniband/sw/rdmavt/qp.c @@ -1201,13 +1201,30 @@ inval: */ int rvt_destroy_qp(struct ib_qp *ibqp) { - /* - * VT-DRIVER-API: qp_flush() - * Driver provies a mechanism to flush and wait for that flush to - * finish. - */ + struct rvt_qp *qp = ibqp_to_rvtqp(ibqp); + struct rvt_dev_info *rdi = ib_to_rvt(ibqp->device); - return -EOPNOTSUPP; + spin_lock_irq(&qp->r_lock); + spin_lock(&qp->s_lock); + rvt_reset_qp(rdi, qp, ibqp->qp_type); + spin_unlock(&qp->s_lock); + spin_unlock_irq(&qp->r_lock); + + /* qpn is now available for use again */ + rvt_free_qpn(&rdi->qp_dev->qpn_table, qp->ibqp.qp_num); + + spin_lock(&rdi->n_qps_lock); + rdi->n_qps_allocated--; + spin_unlock(&rdi->n_qps_lock); + + if (qp->ip) + kref_put(&qp->ip->ref, rvt_release_mmap_info); + else + vfree(qp->r_rq.wq); + vfree(qp->s_wq); + rdi->driver_f.qp_priv_free(rdi, qp); + kfree(qp); + return 0; } int rvt_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, -- cgit v0.10.2 From 120bdafaece72056e48d97809c5abe172824a7f6 Mon Sep 17 00:00:00 2001 From: Dennis Dalessandro Date: Fri, 22 Jan 2016 13:00:48 -0800 Subject: IB/rdmavt: Add post receive to rdmavt This patch adds the simple post receive verbs call to rdmavt. The actual interrupt handling and packet processing is still done in the low level driver. Reviewed-by: Mike Marciniszyn Reviewed-by: Harish Chegondi Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford diff --git a/drivers/infiniband/sw/rdmavt/qp.c b/drivers/infiniband/sw/rdmavt/qp.c index cca65a6..0eeef49 100644 --- a/drivers/infiniband/sw/rdmavt/qp.c +++ b/drivers/infiniband/sw/rdmavt/qp.c @@ -1244,14 +1244,47 @@ int rvt_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int rvt_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr, struct ib_recv_wr **bad_wr) { - /* - * When a packet arrives the driver needs to call up to rvt to process - * the packet. The UD, RC, UC processing will be done in rvt, however - * the driver should be able to override this if it so choses. Perhaps a - * set of function pointers set up at registration time. - */ + struct rvt_qp *qp = ibqp_to_rvtqp(ibqp); + struct rvt_rwq *wq = qp->r_rq.wq; + unsigned long flags; - return -EOPNOTSUPP; + /* Check that state is OK to post receive. */ + if (!(ib_rvt_state_ops[qp->state] & RVT_POST_RECV_OK) || !wq) { + *bad_wr = wr; + return -EINVAL; + } + + for (; wr; wr = wr->next) { + struct rvt_rwqe *wqe; + u32 next; + int i; + + if ((unsigned)wr->num_sge > qp->r_rq.max_sge) { + *bad_wr = wr; + return -EINVAL; + } + + spin_lock_irqsave(&qp->r_rq.lock, flags); + next = wq->head + 1; + if (next >= qp->r_rq.size) + next = 0; + if (next == wq->tail) { + spin_unlock_irqrestore(&qp->r_rq.lock, flags); + *bad_wr = wr; + return -ENOMEM; + } + + wqe = rvt_get_rwqe_ptr(&qp->r_rq, wq->head); + wqe->wr_id = wr->wr_id; + wqe->num_sge = wr->num_sge; + for (i = 0; i < wr->num_sge; i++) + wqe->sg_list[i] = wr->sg_list[i]; + /* Make sure queue entry is written before the head index. */ + smp_wmb(); + wq->head = next; + spin_unlock_irqrestore(&qp->r_rq.lock, flags); + } + return 0; } /** -- cgit v0.10.2 From 4e74080b248701c0c2d1af2764bf02f9c531020a Mon Sep 17 00:00:00 2001 From: Dennis Dalessandro Date: Fri, 22 Jan 2016 13:00:55 -0800 Subject: IB/rdmavt: Add multicast functions This patch adds in the multicast add and remove functions as well as the ancillary infrastructure needed. Reviewed-by: Mike Marciniszyn Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford diff --git a/drivers/infiniband/sw/rdmavt/mcast.c b/drivers/infiniband/sw/rdmavt/mcast.c index 5a78dc7..528c1ca 100644 --- a/drivers/infiniband/sw/rdmavt/mcast.c +++ b/drivers/infiniband/sw/rdmavt/mcast.c @@ -45,14 +45,345 @@ * */ +#include +#include +#include +#include +#include + #include "mcast.h" +void rvt_driver_mcast_init(struct rvt_dev_info *rdi) +{ + /* + * Anything that needs setup for multicast on a per driver or per rdi + * basis should be done in here. + */ + spin_lock_init(&rdi->n_mcast_grps_lock); +} + +/** + * mcast_qp_alloc - alloc a struct to link a QP to mcast GID struct + * @qp: the QP to link + */ +static struct rvt_mcast_qp *rvt_mcast_qp_alloc(struct rvt_qp *qp) +{ + struct rvt_mcast_qp *mqp; + + mqp = kmalloc(sizeof(*mqp), GFP_KERNEL); + if (!mqp) + goto bail; + + mqp->qp = qp; + atomic_inc(&qp->refcount); + +bail: + return mqp; +} + +static void rvt_mcast_qp_free(struct rvt_mcast_qp *mqp) +{ + struct rvt_qp *qp = mqp->qp; + + /* Notify hfi1_destroy_qp() if it is waiting. */ + if (atomic_dec_and_test(&qp->refcount)) + wake_up(&qp->wait); + + kfree(mqp); +} + +/** + * mcast_alloc - allocate the multicast GID structure + * @mgid: the multicast GID + * + * A list of QPs will be attached to this structure. + */ +static struct rvt_mcast *rvt_mcast_alloc(union ib_gid *mgid) +{ + struct rvt_mcast *mcast; + + mcast = kzalloc(sizeof(*mcast), GFP_KERNEL); + if (!mcast) + goto bail; + + mcast->mgid = *mgid; + INIT_LIST_HEAD(&mcast->qp_list); + init_waitqueue_head(&mcast->wait); + atomic_set(&mcast->refcount, 0); + +bail: + return mcast; +} + +static void rvt_mcast_free(struct rvt_mcast *mcast) +{ + struct rvt_mcast_qp *p, *tmp; + + list_for_each_entry_safe(p, tmp, &mcast->qp_list, list) + rvt_mcast_qp_free(p); + + kfree(mcast); +} + +/** + * rvt_mcast_find - search the global table for the given multicast GID + * @ibp: the IB port structure + * @mgid: the multicast GID to search for + * + * Returns NULL if not found. + * + * The caller is responsible for decrementing the reference count if found. + */ +struct rvt_mcast *rvt_mcast_find(struct rvt_ibport *ibp, union ib_gid *mgid) +{ + struct rb_node *n; + unsigned long flags; + struct rvt_mcast *found = NULL; + + spin_lock_irqsave(&ibp->lock, flags); + n = ibp->mcast_tree.rb_node; + while (n) { + int ret; + struct rvt_mcast *mcast; + + mcast = rb_entry(n, struct rvt_mcast, rb_node); + + ret = memcmp(mgid->raw, mcast->mgid.raw, + sizeof(union ib_gid)); + if (ret < 0) { + n = n->rb_left; + } else if (ret > 0) { + n = n->rb_right; + } else { + atomic_inc(&mcast->refcount); + found = mcast; + break; + } + } + spin_unlock_irqrestore(&ibp->lock, flags); + return found; +} +EXPORT_SYMBOL(rvt_mcast_find); + +/** + * mcast_add - insert mcast GID into table and attach QP struct + * @mcast: the mcast GID table + * @mqp: the QP to attach + * + * Return zero if both were added. Return EEXIST if the GID was already in + * the table but the QP was added. Return ESRCH if the QP was already + * attached and neither structure was added. + */ +static int rvt_mcast_add(struct rvt_dev_info *rdi, struct rvt_ibport *ibp, + struct rvt_mcast *mcast, struct rvt_mcast_qp *mqp) +{ + struct rb_node **n = &ibp->mcast_tree.rb_node; + struct rb_node *pn = NULL; + int ret; + + spin_lock_irq(&ibp->lock); + + while (*n) { + struct rvt_mcast *tmcast; + struct rvt_mcast_qp *p; + + pn = *n; + tmcast = rb_entry(pn, struct rvt_mcast, rb_node); + + ret = memcmp(mcast->mgid.raw, tmcast->mgid.raw, + sizeof(union ib_gid)); + if (ret < 0) { + n = &pn->rb_left; + continue; + } + if (ret > 0) { + n = &pn->rb_right; + continue; + } + + /* Search the QP list to see if this is already there. */ + list_for_each_entry_rcu(p, &tmcast->qp_list, list) { + if (p->qp == mqp->qp) { + ret = ESRCH; + goto bail; + } + } + if (tmcast->n_attached == + rdi->dparms.props.max_mcast_qp_attach) { + ret = ENOMEM; + goto bail; + } + + tmcast->n_attached++; + + list_add_tail_rcu(&mqp->list, &tmcast->qp_list); + ret = EEXIST; + goto bail; + } + + spin_lock(&rdi->n_mcast_grps_lock); + if (rdi->n_mcast_grps_allocated == rdi->dparms.props.max_mcast_grp) { + spin_unlock(&rdi->n_mcast_grps_lock); + ret = ENOMEM; + goto bail; + } + + rdi->n_mcast_grps_allocated++; + spin_unlock(&rdi->n_mcast_grps_lock); + + mcast->n_attached++; + + list_add_tail_rcu(&mqp->list, &mcast->qp_list); + + atomic_inc(&mcast->refcount); + rb_link_node(&mcast->rb_node, pn, n); + rb_insert_color(&mcast->rb_node, &ibp->mcast_tree); + + ret = 0; + +bail: + spin_unlock_irq(&ibp->lock); + + return ret; +} + int rvt_attach_mcast(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) { - return -EOPNOTSUPP; + struct rvt_qp *qp = ibqp_to_rvtqp(ibqp); + struct rvt_dev_info *rdi = ib_to_rvt(ibqp->device); + struct rvt_ibport *ibp = rdi->ports[qp->port_num - 1]; + struct rvt_mcast *mcast; + struct rvt_mcast_qp *mqp; + int ret = -ENOMEM; + + if (ibqp->qp_num <= 1 || qp->state == IB_QPS_RESET) + return -EINVAL; + + /* + * Allocate data structures since its better to do this outside of + * spin locks and it will most likely be needed. + */ + mcast = rvt_mcast_alloc(gid); + if (!mcast) + return -ENOMEM; + + mqp = rvt_mcast_qp_alloc(qp); + if (!mqp) + goto bail_mcast; + + switch (rvt_mcast_add(rdi, ibp, mcast, mqp)) { + case ESRCH: + /* Neither was used: OK to attach the same QP twice. */ + ret = 0; + goto bail_mqp; + case EEXIST: /* The mcast wasn't used */ + ret = 0; + goto bail_mcast; + case ENOMEM: + /* Exceeded the maximum number of mcast groups. */ + ret = -ENOMEM; + goto bail_mqp; + default: + break; + } + + return 0; + +bail_mqp: + rvt_mcast_qp_free(mqp); + +bail_mcast: + rvt_mcast_free(mcast); + + return ret; } int rvt_detach_mcast(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) { - return -EOPNOTSUPP; + struct rvt_qp *qp = ibqp_to_rvtqp(ibqp); + struct rvt_dev_info *rdi = ib_to_rvt(ibqp->device); + struct rvt_ibport *ibp = rdi->ports[qp->port_num - 1]; + struct rvt_mcast *mcast = NULL; + struct rvt_mcast_qp *p, *tmp, *delp = NULL; + struct rb_node *n; + int last = 0; + int ret = 0; + + if (ibqp->qp_num <= 1 || qp->state == IB_QPS_RESET) + return -EINVAL; + + spin_lock_irq(&ibp->lock); + + /* Find the GID in the mcast table. */ + n = ibp->mcast_tree.rb_node; + while (1) { + if (!n) { + spin_unlock_irq(&ibp->lock); + return -EINVAL; + } + + mcast = rb_entry(n, struct rvt_mcast, rb_node); + ret = memcmp(gid->raw, mcast->mgid.raw, + sizeof(union ib_gid)); + if (ret < 0) + n = n->rb_left; + else if (ret > 0) + n = n->rb_right; + else + break; + } + + /* Search the QP list. */ + list_for_each_entry_safe(p, tmp, &mcast->qp_list, list) { + if (p->qp != qp) + continue; + /* + * We found it, so remove it, but don't poison the forward + * link until we are sure there are no list walkers. + */ + list_del_rcu(&p->list); + mcast->n_attached--; + delp = p; + + /* If this was the last attached QP, remove the GID too. */ + if (list_empty(&mcast->qp_list)) { + rb_erase(&mcast->rb_node, &ibp->mcast_tree); + last = 1; + } + break; + } + + spin_unlock_irq(&ibp->lock); + /* QP not attached */ + if (!delp) + return -EINVAL; + + /* + * Wait for any list walkers to finish before freeing the + * list element. + */ + wait_event(mcast->wait, atomic_read(&mcast->refcount) <= 1); + rvt_mcast_qp_free(delp); + + if (last) { + atomic_dec(&mcast->refcount); + wait_event(mcast->wait, !atomic_read(&mcast->refcount)); + rvt_mcast_free(mcast); + spin_lock_irq(&rdi->n_mcast_grps_lock); + rdi->n_mcast_grps_allocated--; + spin_unlock_irq(&rdi->n_mcast_grps_lock); + } + + return 0; +} + +int rvt_mcast_tree_empty(struct rvt_dev_info *rdi) +{ + int i; + int in_use = 0; + + for (i = 0; i < rdi->dparms.nports; i++) + if (rdi->ports[i]->mcast_tree.rb_node) + in_use++; + return in_use; } diff --git a/drivers/infiniband/sw/rdmavt/mcast.h b/drivers/infiniband/sw/rdmavt/mcast.h index 21647c3..cd15a98 100644 --- a/drivers/infiniband/sw/rdmavt/mcast.h +++ b/drivers/infiniband/sw/rdmavt/mcast.h @@ -50,7 +50,9 @@ #include +void rvt_driver_mcast_init(struct rvt_dev_info *rdi); int rvt_attach_mcast(struct ib_qp *ibqp, union ib_gid *gid, u16 lid); int rvt_detach_mcast(struct ib_qp *ibqp, union ib_gid *gid, u16 lid); +int rvt_mcast_tree_empty(struct rvt_dev_info *rdi); #endif /* DEF_RVTMCAST_H */ diff --git a/drivers/infiniband/sw/rdmavt/qp.c b/drivers/infiniband/sw/rdmavt/qp.c index 0eeef49..64b9c01 100644 --- a/drivers/infiniband/sw/rdmavt/qp.c +++ b/drivers/infiniband/sw/rdmavt/qp.c @@ -240,6 +240,8 @@ static unsigned rvt_free_all_qps(struct rvt_dev_info *rdi) if (rdi->driver_f.free_all_qps) qp_inuse = rdi->driver_f.free_all_qps(rdi); + qp_inuse += rvt_mcast_tree_empty(rdi); + if (!rdi->qp_dev) return qp_inuse; diff --git a/drivers/infiniband/sw/rdmavt/vt.c b/drivers/infiniband/sw/rdmavt/vt.c index 7f56a42..5a094eb 100644 --- a/drivers/infiniband/sw/rdmavt/vt.c +++ b/drivers/infiniband/sw/rdmavt/vt.c @@ -305,6 +305,7 @@ int rvt_register_device(struct rvt_dev_info *rdi) CHECK_DRIVER_OVERRIDE(rdi, query_srq); /* Multicast */ + rvt_driver_mcast_init(rdi); CHECK_DRIVER_OVERRIDE(rdi, attach_mcast); CHECK_DRIVER_OVERRIDE(rdi, detach_mcast); diff --git a/include/rdma/rdma_vt.h b/include/rdma/rdma_vt.h index 1c7123f..04e9019 100644 --- a/include/rdma/rdma_vt.h +++ b/include/rdma/rdma_vt.h @@ -306,6 +306,11 @@ struct rvt_dev_info { struct kthread_worker *worker; /* per device cq worker */ u32 n_cqs_allocated; /* number of CQs allocated for device */ spinlock_t n_cqs_lock; /* protect count of in use cqs */ + + /* Multicast */ + u32 n_mcast_grps_allocated; /* number of mcast groups allocated */ + spinlock_t n_mcast_grps_lock; + }; static inline struct rvt_pd *ibpd_to_rvtpd(struct ib_pd *ibpd) @@ -399,8 +404,11 @@ struct rvt_mmap_info *rvt_create_mmap_info(struct rvt_dev_info *rdi, void *obj); void rvt_update_mmap_info(struct rvt_dev_info *rdi, struct rvt_mmap_info *ip, u32 size, void *obj); +int rvt_reg_mr(struct rvt_qp *qp, struct ib_reg_wr *wr); +struct rvt_mcast *rvt_mcast_find(struct rvt_ibport *ibp, union ib_gid *mgid); /* Temporary export */ void rvt_reset_qp(struct rvt_dev_info *rdi, struct rvt_qp *qp, enum ib_qp_type type); + #endif /* DEF_RDMA_VT_H */ diff --git a/include/rdma/rdmavt_qp.h b/include/rdma/rdmavt_qp.h index e66bcc9..a97b95b 100644 --- a/include/rdma/rdmavt_qp.h +++ b/include/rdma/rdmavt_qp.h @@ -50,6 +50,7 @@ #include #include +#include /* * Atomic bit definitions for r_aflags. */ @@ -386,8 +387,27 @@ struct rvt_qp_ibdev { }; /* + * There is one struct rvt_mcast for each multicast GID. + * All attached QPs are then stored as a list of + * struct rvt_mcast_qp. + */ +struct rvt_mcast_qp { + struct list_head list; + struct rvt_qp *qp; +}; + +struct rvt_mcast { + struct rb_node rb_node; + union ib_gid mgid; + struct list_head qp_list; + wait_queue_head_t wait; + atomic_t refcount; + int n_attached; +}; + +/* * Since struct rvt_swqe is not a fixed size, we can't simply index into - * struct hfi1_qp.s_wq. This function does the array index computation. + * struct rvt_qp.s_wq. This function does the array index computation. */ static inline struct rvt_swqe *rvt_get_swqe_ptr(struct rvt_qp *qp, unsigned n) -- cgit v0.10.2 From 182285d0fcaba7b284b4feb71ebad5e7aaea0f4b Mon Sep 17 00:00:00 2001 From: Dennis Dalessandro Date: Fri, 22 Jan 2016 13:01:01 -0800 Subject: IB/rdmavt: Add misc dev register functionality There are a number of minor things that should be set by rdmavt rather than by the drivers. Now that rdmavt has solidified in its design we can go ahead and clean up this stuff. Reviewed-by: Ira Weiny Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford diff --git a/drivers/infiniband/sw/rdmavt/vt.c b/drivers/infiniband/sw/rdmavt/vt.c index 5a094eb..cf7cac6 100644 --- a/drivers/infiniband/sw/rdmavt/vt.c +++ b/drivers/infiniband/sw/rdmavt/vt.c @@ -50,6 +50,8 @@ #include "vt.h" #include "trace.h" +#define RVT_UVERBS_ABI_VERSION 2 + MODULE_LICENSE("Dual BSD/GPL"); MODULE_DESCRIPTION("RDMA Verbs Transport Library"); @@ -348,6 +350,47 @@ int rvt_register_device(struct rvt_dev_info *rdi) spin_lock_init(&rdi->n_pds_lock); rdi->n_pds_allocated = 0; + /* + * There are some things which could be set by underlying drivers but + * really should be up to rdmavt to set. For instance drivers can't know + * exactly which functions rdmavt supports, nor do they know the ABI + * version, so we do all of this sort of stuff here. + */ + rdi->ibdev.uverbs_abi_ver = RVT_UVERBS_ABI_VERSION; + rdi->ibdev.uverbs_cmd_mask = + (1ull << IB_USER_VERBS_CMD_GET_CONTEXT) | + (1ull << IB_USER_VERBS_CMD_QUERY_DEVICE) | + (1ull << IB_USER_VERBS_CMD_QUERY_PORT) | + (1ull << IB_USER_VERBS_CMD_ALLOC_PD) | + (1ull << IB_USER_VERBS_CMD_DEALLOC_PD) | + (1ull << IB_USER_VERBS_CMD_CREATE_AH) | + (1ull << IB_USER_VERBS_CMD_MODIFY_AH) | + (1ull << IB_USER_VERBS_CMD_QUERY_AH) | + (1ull << IB_USER_VERBS_CMD_DESTROY_AH) | + (1ull << IB_USER_VERBS_CMD_REG_MR) | + (1ull << IB_USER_VERBS_CMD_DEREG_MR) | + (1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) | + (1ull << IB_USER_VERBS_CMD_CREATE_CQ) | + (1ull << IB_USER_VERBS_CMD_RESIZE_CQ) | + (1ull << IB_USER_VERBS_CMD_DESTROY_CQ) | + (1ull << IB_USER_VERBS_CMD_POLL_CQ) | + (1ull << IB_USER_VERBS_CMD_REQ_NOTIFY_CQ) | + (1ull << IB_USER_VERBS_CMD_CREATE_QP) | + (1ull << IB_USER_VERBS_CMD_QUERY_QP) | + (1ull << IB_USER_VERBS_CMD_MODIFY_QP) | + (1ull << IB_USER_VERBS_CMD_DESTROY_QP) | + (1ull << IB_USER_VERBS_CMD_POST_SEND) | + (1ull << IB_USER_VERBS_CMD_POST_RECV) | + (1ull << IB_USER_VERBS_CMD_ATTACH_MCAST) | + (1ull << IB_USER_VERBS_CMD_DETACH_MCAST) | + (1ull << IB_USER_VERBS_CMD_CREATE_SRQ) | + (1ull << IB_USER_VERBS_CMD_MODIFY_SRQ) | + (1ull << IB_USER_VERBS_CMD_QUERY_SRQ) | + (1ull << IB_USER_VERBS_CMD_DESTROY_SRQ) | + (1ull << IB_USER_VERBS_CMD_POST_SRQ_RECV); + rdi->ibdev.node_type = RDMA_NODE_IB_CA; + rdi->ibdev.num_comp_vectors = 1; + /* We are now good to announce we exist */ ret = ib_register_device(&rdi->ibdev, rdi->driver_f.port_callback); if (ret) { -- cgit v0.10.2 From 2b047ea7a3ceef0322e666782e0a82e98424f6f1 Mon Sep 17 00:00:00 2001 From: Ira Weiny Date: Fri, 22 Jan 2016 13:04:32 -0800 Subject: IB/rdmavt: Remove unused variable from Queue Pair s_sde should be in the low level driver QP private data. Remove the definition from rvt_qp. Signed-off-by: Ira Weiny Signed-off-by: Doug Ledford diff --git a/include/rdma/rdmavt_qp.h b/include/rdma/rdmavt_qp.h index a97b95b..f0e2426 100644 --- a/include/rdma/rdmavt_qp.h +++ b/include/rdma/rdmavt_qp.h @@ -304,7 +304,6 @@ struct rvt_qp { struct rvt_swqe *s_wqe; struct rvt_sge_state s_sge; /* current send request data */ struct rvt_mregion *s_rdma_mr; - struct sdma_engine *s_sde; /* current sde */ u32 s_cur_size; /* size of send packet in bytes */ u32 s_len; /* total length of s_sge */ u32 s_rdma_read_len; /* total length of s_rdma_read_sge */ -- cgit v0.10.2 From e85ec33d820e1f3f763a46f9fd41230ca0ce40c6 Mon Sep 17 00:00:00 2001 From: Ira Weiny Date: Fri, 22 Jan 2016 13:04:38 -0800 Subject: IB/rdmavt: add modify queue pair driver helpers Low level drivers need to be able to check incoming attributes as well as be able to adjust their private data on queue pair modification. Add 2 driver callbacks, check_modify_qp and modify_qp, to facilitate this. Signed-off-by: Ira Weiny Signed-off-by: Doug Ledford diff --git a/drivers/infiniband/sw/rdmavt/qp.c b/drivers/infiniband/sw/rdmavt/qp.c index 64b9c01..615358e 100644 --- a/drivers/infiniband/sw/rdmavt/qp.c +++ b/drivers/infiniband/sw/rdmavt/qp.c @@ -970,6 +970,10 @@ int rvt_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, attr_mask, link)) goto inval; + if (rdi->driver_f.check_modify_qp && + rdi->driver_f.check_modify_qp(qp, attr, attr_mask, udata)) + goto inval; + if (attr_mask & IB_QP_AV) { if (attr->ah_attr.dlid >= be16_to_cpu(IB_MULTICAST_LID_BASE)) goto inval; @@ -1166,6 +1170,9 @@ int rvt_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC) qp->s_max_rd_atomic = attr->max_rd_atomic; + if (rdi->driver_f.modify_qp) + rdi->driver_f.modify_qp(qp, attr, attr_mask, udata); + spin_unlock(&qp->s_lock); spin_unlock_irq(&qp->r_lock); diff --git a/include/rdma/rdma_vt.h b/include/rdma/rdma_vt.h index 04e9019..e382cca 100644 --- a/include/rdma/rdma_vt.h +++ b/include/rdma/rdma_vt.h @@ -256,6 +256,13 @@ struct rvt_driver_provided { struct rvt_ah *); int (*alloc_qpn)(struct rvt_dev_info *rdi, struct rvt_qpn_table *qpt, enum ib_qp_type type, u8 port, gfp_t gfp); + /** + * Return 0 if modification is valid, -errno otherwise + */ + int (*check_modify_qp)(struct rvt_qp *qp, struct ib_qp_attr *attr, + int attr_mask, struct ib_udata *udata); + void (*modify_qp)(struct rvt_qp *qp, struct ib_qp_attr *attr, + int attr_mask, struct ib_udata *udata); }; struct rvt_dev_info { -- cgit v0.10.2 From ff6acd69518e0a84bd9c9b7f1bd4313f7076db97 Mon Sep 17 00:00:00 2001 From: Dennis Dalessandro Date: Fri, 22 Jan 2016 13:04:45 -0800 Subject: IB/rdmavt: Add device structure allocation This patch adds rdmavt device structure allocation in rdamvt. The ib_device alloc is now done in rdmavt instead of the driver. Drivers need to tell rdmavt the number of ports when calling. A side of effect of this patch is fixing a bug with port initialization where the device structure port array was allocated over top of an existing one. Reviewed-by: Ira Weiny Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford diff --git a/drivers/infiniband/sw/rdmavt/vt.c b/drivers/infiniband/sw/rdmavt/vt.c index cf7cac6..450caa7 100644 --- a/drivers/infiniband/sw/rdmavt/vt.c +++ b/drivers/infiniband/sw/rdmavt/vt.c @@ -67,6 +67,24 @@ static void rvt_cleanup(void) } module_exit(rvt_cleanup); +struct rvt_dev_info *rvt_alloc_device(size_t size, int nports) +{ + struct rvt_dev_info *rdi = ERR_PTR(-ENOMEM); + + rdi = (struct rvt_dev_info *)ib_alloc_device(size); + if (!rdi) + return rdi; + + rdi->ports = kcalloc(nports, + sizeof(struct rvt_ibport **), + GFP_KERNEL); + if (!rdi->ports) + ib_dealloc_device(&rdi->ibdev); + + return rdi; +} +EXPORT_SYMBOL(rvt_alloc_device); + static int rvt_query_device(struct ib_device *ibdev, struct ib_device_attr *props, struct ib_udata *uhw) @@ -434,18 +452,6 @@ EXPORT_SYMBOL(rvt_unregister_device); int rvt_init_port(struct rvt_dev_info *rdi, struct rvt_ibport *port, int portnum, u16 *pkey_table) { - if (!rdi->dparms.nports) { - rvt_pr_err(rdi, "Driver says it has no ports.\n"); - return -EINVAL; - } - - rdi->ports = kcalloc(rdi->dparms.nports, - sizeof(struct rvt_ibport **), - GFP_KERNEL); - if (!rdi->ports) { - rvt_pr_err(rdi, "Could not allocate port mem.\n"); - return -ENOMEM; - } rdi->ports[portnum] = port; rdi->ports[portnum]->pkey_table = pkey_table; diff --git a/include/rdma/rdma_vt.h b/include/rdma/rdma_vt.h index e382cca..7768e04 100644 --- a/include/rdma/rdma_vt.h +++ b/include/rdma/rdma_vt.h @@ -394,6 +394,7 @@ static inline struct rvt_qp *rvt_lookup_qpn(struct rvt_dev_info *rdi, return qp; } +struct rvt_dev_info *rvt_alloc_device(size_t size, int nports); int rvt_register_device(struct rvt_dev_info *rvd); void rvt_unregister_device(struct rvt_dev_info *rvd); int rvt_check_ah(struct ib_device *ibdev, struct ib_ah_attr *ah_attr); -- cgit v0.10.2 From 3711baf27d78475436b063f33399908ba208a8f2 Mon Sep 17 00:00:00 2001 From: Dennis Dalessandro Date: Fri, 22 Jan 2016 13:04:51 -0800 Subject: IB/rdmavt: Add mad agents to rdmavt This patch adds mad agent create and free to rdmavt. Reviewed-by: Ira Weiny Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford diff --git a/drivers/infiniband/sw/rdmavt/mad.c b/drivers/infiniband/sw/rdmavt/mad.c index eef7029..e01f3fb 100644 --- a/drivers/infiniband/sw/rdmavt/mad.c +++ b/drivers/infiniband/sw/rdmavt/mad.c @@ -45,6 +45,7 @@ * */ +#include #include "mad.h" /** @@ -83,3 +84,74 @@ int rvt_process_mad(struct ib_device *ibdev, int mad_flags, u8 port, */ return IB_MAD_RESULT_FAILURE; } + +static void rvt_send_mad_handler(struct ib_mad_agent *agent, + struct ib_mad_send_wc *mad_send_wc) +{ + ib_free_send_mad(mad_send_wc->send_buf); +} + +int rvt_create_mad_agents(struct rvt_dev_info *rdi) +{ + struct ib_mad_agent *agent; + struct rvt_ibport *rvp; + int p; + int ret; + + for (p = 0; p < rdi->dparms.nports; p++) { + rvp = rdi->ports[p]; + agent = ib_register_mad_agent(&rdi->ibdev, p + 1, + IB_QPT_SMI, + NULL, 0, rvt_send_mad_handler, + NULL, NULL, 0); + if (IS_ERR(agent)) { + ret = PTR_ERR(agent); + goto err; + } + + rvp->send_agent = agent; + + if (rdi->driver_f.notify_create_mad_agent) + rdi->driver_f.notify_create_mad_agent(rdi, p); + } + + return 0; + +err: + for (p = 0; p < rdi->dparms.nports; p++) { + rvp = rdi->ports[p]; + if (rvp->send_agent) { + agent = rvp->send_agent; + rvp->send_agent = NULL; + ib_unregister_mad_agent(agent); + if (rdi->driver_f.notify_free_mad_agent) + rdi->driver_f.notify_free_mad_agent(rdi, p); + } + } + + return ret; +} + +void rvt_free_mad_agents(struct rvt_dev_info *rdi) +{ + struct ib_mad_agent *agent; + struct rvt_ibport *rvp; + int p; + + for (p = 0; p < rdi->dparms.nports; p++) { + rvp = rdi->ports[p]; + if (rvp->send_agent) { + agent = rvp->send_agent; + rvp->send_agent = NULL; + ib_unregister_mad_agent(agent); + } + if (rvp->sm_ah) { + ib_destroy_ah(&rvp->sm_ah->ibah); + rvp->sm_ah = NULL; + } + + if (rdi->driver_f.notify_free_mad_agent) + rdi->driver_f.notify_free_mad_agent(rdi, p); + } +} + diff --git a/drivers/infiniband/sw/rdmavt/mad.h b/drivers/infiniband/sw/rdmavt/mad.h index ee740e9..5d8a6a9 100644 --- a/drivers/infiniband/sw/rdmavt/mad.h +++ b/drivers/infiniband/sw/rdmavt/mad.h @@ -55,5 +55,6 @@ int rvt_process_mad(struct ib_device *ibdev, int mad_flags, u8 port, const struct ib_mad_hdr *in, size_t in_mad_size, struct ib_mad_hdr *out, size_t *out_mad_size, u16 *out_mad_pkey_index); - +int rvt_create_mad_agents(struct rvt_dev_info *rdi); +void rvt_free_mad_agents(struct rvt_dev_info *rdi); #endif /* DEF_RVTMAD_H */ diff --git a/drivers/infiniband/sw/rdmavt/vt.c b/drivers/infiniband/sw/rdmavt/vt.c index 450caa7..7496d43 100644 --- a/drivers/infiniband/sw/rdmavt/vt.c +++ b/drivers/infiniband/sw/rdmavt/vt.c @@ -416,6 +416,8 @@ int rvt_register_device(struct rvt_dev_info *rdi) goto bail_cq; } + rvt_create_mad_agents(rdi); + rvt_pr_info(rdi, "Registration with rdmavt done.\n"); return ret; @@ -438,6 +440,8 @@ void rvt_unregister_device(struct rvt_dev_info *rdi) if (!rdi) return; + rvt_free_mad_agents(rdi); + ib_unregister_device(&rdi->ibdev); rvt_cq_exit(rdi); rvt_mr_exit(rdi); diff --git a/drivers/infiniband/sw/rdmavt/vt.h b/drivers/infiniband/sw/rdmavt/vt.h index 54ee05a..d9f78cc 100644 --- a/drivers/infiniband/sw/rdmavt/vt.h +++ b/drivers/infiniband/sw/rdmavt/vt.h @@ -59,6 +59,7 @@ #include "mcast.h" #include "mmap.h" #include "cq.h" +#include "mad.h" #define rvt_pr_info(rdi, fmt, ...) \ __rvt_pr_info(rdi->driver_f.get_pci_dev(rdi), \ diff --git a/include/rdma/rdma_vt.h b/include/rdma/rdma_vt.h index 7768e04..31f9e5a 100644 --- a/include/rdma/rdma_vt.h +++ b/include/rdma/rdma_vt.h @@ -136,7 +136,8 @@ struct rvt_ibport { */ u16 *pkey_table; - /* TODO: Move sm_ah and smi_ah into here as well*/ + struct rvt_ah *sm_ah; + struct rvt_ah *smi_ah; }; #define RVT_CQN_MAX 16 /* maximum length of cq name */ @@ -263,6 +264,9 @@ struct rvt_driver_provided { int attr_mask, struct ib_udata *udata); void (*modify_qp)(struct rvt_qp *qp, struct ib_qp_attr *attr, int attr_mask, struct ib_udata *udata); + + void (*notify_create_mad_agent)(struct rvt_dev_info *rdi, int port_idx); + void (*notify_free_mad_agent)(struct rvt_dev_info *rdi, int port_idx); }; struct rvt_dev_info { -- cgit v0.10.2 From fe31419501ba133a967da7b7da0d32945ef21840 Mon Sep 17 00:00:00 2001 From: Dennis Dalessandro Date: Fri, 22 Jan 2016 13:04:58 -0800 Subject: IB/rdmavt: Fix copyright date Update all files added by rdmavt which do not yet have 2016 as the copyright year. Reviewed-by: Ira Weiny Reviewed-by: Harish Chegondi Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford diff --git a/drivers/infiniband/sw/rdmavt/ah.c b/drivers/infiniband/sw/rdmavt/ah.c index c194d9d..9372c43 100644 --- a/drivers/infiniband/sw/rdmavt/ah.c +++ b/drivers/infiniband/sw/rdmavt/ah.c @@ -1,5 +1,5 @@ /* - * Copyright(c) 2015 Intel Corporation. + * Copyright(c) 2016 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. diff --git a/drivers/infiniband/sw/rdmavt/ah.h b/drivers/infiniband/sw/rdmavt/ah.h index 8cd7ea7..e9c36be 100644 --- a/drivers/infiniband/sw/rdmavt/ah.h +++ b/drivers/infiniband/sw/rdmavt/ah.h @@ -2,7 +2,7 @@ #define DEF_RVTAH_H /* - * Copyright(c) 2015 Intel Corporation. + * Copyright(c) 2016 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. diff --git a/drivers/infiniband/sw/rdmavt/cq.c b/drivers/infiniband/sw/rdmavt/cq.c index 7308a27..055aa71 100644 --- a/drivers/infiniband/sw/rdmavt/cq.c +++ b/drivers/infiniband/sw/rdmavt/cq.c @@ -1,5 +1,5 @@ /* - * Copyright(c) 2015 Intel Corporation. + * Copyright(c) 2016 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. diff --git a/drivers/infiniband/sw/rdmavt/cq.h b/drivers/infiniband/sw/rdmavt/cq.h index 3813d90..6182c29 100644 --- a/drivers/infiniband/sw/rdmavt/cq.h +++ b/drivers/infiniband/sw/rdmavt/cq.h @@ -2,7 +2,7 @@ #define DEF_RVTCQ_H /* - * Copyright(c) 2015 Intel Corporation. + * Copyright(c) 2016 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. diff --git a/drivers/infiniband/sw/rdmavt/dma.c b/drivers/infiniband/sw/rdmavt/dma.c index c070141..33076a5 100644 --- a/drivers/infiniband/sw/rdmavt/dma.c +++ b/drivers/infiniband/sw/rdmavt/dma.c @@ -1,5 +1,5 @@ /* - * Copyright(c) 2015 Intel Corporation. + * Copyright(c) 2016 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. diff --git a/drivers/infiniband/sw/rdmavt/dma.h b/drivers/infiniband/sw/rdmavt/dma.h index a80cc35..979f07e 100644 --- a/drivers/infiniband/sw/rdmavt/dma.h +++ b/drivers/infiniband/sw/rdmavt/dma.h @@ -2,7 +2,7 @@ #define DEF_RDMAVTDMA_H /* - * Copyright(c) 2015 Intel Corporation. + * Copyright(c) 2016 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. diff --git a/drivers/infiniband/sw/rdmavt/mad.c b/drivers/infiniband/sw/rdmavt/mad.c index e01f3fb..5c720d35 100644 --- a/drivers/infiniband/sw/rdmavt/mad.c +++ b/drivers/infiniband/sw/rdmavt/mad.c @@ -1,5 +1,5 @@ /* - * Copyright(c) 2015 Intel Corporation. + * Copyright(c) 2016 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. diff --git a/drivers/infiniband/sw/rdmavt/mad.h b/drivers/infiniband/sw/rdmavt/mad.h index 5d8a6a9..c89faf4 100644 --- a/drivers/infiniband/sw/rdmavt/mad.h +++ b/drivers/infiniband/sw/rdmavt/mad.h @@ -2,7 +2,7 @@ #define DEF_RVTMAD_H /* - * Copyright(c) 2015 Intel Corporation. + * Copyright(c) 2016 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. diff --git a/drivers/infiniband/sw/rdmavt/mcast.c b/drivers/infiniband/sw/rdmavt/mcast.c index 528c1ca..e06a875 100644 --- a/drivers/infiniband/sw/rdmavt/mcast.c +++ b/drivers/infiniband/sw/rdmavt/mcast.c @@ -1,5 +1,5 @@ /* - * Copyright(c) 2015 Intel Corporation. + * Copyright(c) 2016 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. diff --git a/drivers/infiniband/sw/rdmavt/mcast.h b/drivers/infiniband/sw/rdmavt/mcast.h index cd15a98..29f5792 100644 --- a/drivers/infiniband/sw/rdmavt/mcast.h +++ b/drivers/infiniband/sw/rdmavt/mcast.h @@ -2,7 +2,7 @@ #define DEF_RVTMCAST_H /* - * Copyright(c) 2015 Intel Corporation. + * Copyright(c) 2016 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. diff --git a/drivers/infiniband/sw/rdmavt/mmap.c b/drivers/infiniband/sw/rdmavt/mmap.c index fc30ff7..d6330d7 100644 --- a/drivers/infiniband/sw/rdmavt/mmap.c +++ b/drivers/infiniband/sw/rdmavt/mmap.c @@ -1,5 +1,5 @@ /* - * Copyright(c) 2015 Intel Corporation. + * Copyright(c) 2016 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. diff --git a/drivers/infiniband/sw/rdmavt/mmap.h b/drivers/infiniband/sw/rdmavt/mmap.h index 3513e25..e806747 100644 --- a/drivers/infiniband/sw/rdmavt/mmap.h +++ b/drivers/infiniband/sw/rdmavt/mmap.h @@ -2,7 +2,7 @@ #define DEF_RDMAVTMMAP_H /* - * Copyright(c) 2015 Intel Corporation. + * Copyright(c) 2016 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. diff --git a/drivers/infiniband/sw/rdmavt/mr.c b/drivers/infiniband/sw/rdmavt/mr.c index f1dcaf4..ee36be3 100644 --- a/drivers/infiniband/sw/rdmavt/mr.c +++ b/drivers/infiniband/sw/rdmavt/mr.c @@ -1,5 +1,5 @@ /* - * Copyright(c) 2015 Intel Corporation. + * Copyright(c) 2016 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. diff --git a/drivers/infiniband/sw/rdmavt/mr.h b/drivers/infiniband/sw/rdmavt/mr.h index c5339aa..6938051 100644 --- a/drivers/infiniband/sw/rdmavt/mr.h +++ b/drivers/infiniband/sw/rdmavt/mr.h @@ -2,7 +2,7 @@ #define DEF_RVTMR_H /* - * Copyright(c) 2015 Intel Corporation. + * Copyright(c) 2016 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. diff --git a/drivers/infiniband/sw/rdmavt/pd.c b/drivers/infiniband/sw/rdmavt/pd.c index f8dba88..62fee44 100644 --- a/drivers/infiniband/sw/rdmavt/pd.c +++ b/drivers/infiniband/sw/rdmavt/pd.c @@ -1,5 +1,5 @@ /* - * Copyright(c) 2015 Intel Corporation. + * Copyright(c) 2016 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. diff --git a/drivers/infiniband/sw/rdmavt/pd.h b/drivers/infiniband/sw/rdmavt/pd.h index 56d75e6..1892ca4 100644 --- a/drivers/infiniband/sw/rdmavt/pd.h +++ b/drivers/infiniband/sw/rdmavt/pd.h @@ -2,7 +2,7 @@ #define DEF_RDMAVTPD_H /* - * Copyright(c) 2015 Intel Corporation. + * Copyright(c) 2016 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. diff --git a/drivers/infiniband/sw/rdmavt/qp.c b/drivers/infiniband/sw/rdmavt/qp.c index 615358e..8d3563a 100644 --- a/drivers/infiniband/sw/rdmavt/qp.c +++ b/drivers/infiniband/sw/rdmavt/qp.c @@ -1,5 +1,5 @@ /* - * Copyright(c) 2015 Intel Corporation. + * Copyright(c) 2016 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. diff --git a/drivers/infiniband/sw/rdmavt/qp.h b/drivers/infiniband/sw/rdmavt/qp.h index f438809..8409f80 100644 --- a/drivers/infiniband/sw/rdmavt/qp.h +++ b/drivers/infiniband/sw/rdmavt/qp.h @@ -2,7 +2,7 @@ #define DEF_RVTQP_H /* - * Copyright(c) 2015 Intel Corporation. + * Copyright(c) 2016 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. diff --git a/drivers/infiniband/sw/rdmavt/srq.c b/drivers/infiniband/sw/rdmavt/srq.c index bbb623a..c9eb8b3 100644 --- a/drivers/infiniband/sw/rdmavt/srq.c +++ b/drivers/infiniband/sw/rdmavt/srq.c @@ -1,5 +1,5 @@ /* - * Copyright(c) 2015 Intel Corporation. + * Copyright(c) 2016 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. diff --git a/drivers/infiniband/sw/rdmavt/srq.h b/drivers/infiniband/sw/rdmavt/srq.h index 0c3c5a7..9f07880 100644 --- a/drivers/infiniband/sw/rdmavt/srq.h +++ b/drivers/infiniband/sw/rdmavt/srq.h @@ -2,7 +2,7 @@ #define DEF_RVTSRQ_H /* - * Copyright(c) 2015 Intel Corporation. + * Copyright(c) 2016 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. diff --git a/drivers/infiniband/sw/rdmavt/trace.c b/drivers/infiniband/sw/rdmavt/trace.c index 19afe39..d593285 100644 --- a/drivers/infiniband/sw/rdmavt/trace.c +++ b/drivers/infiniband/sw/rdmavt/trace.c @@ -1,5 +1,5 @@ /* - * Copyright(c) 2015 Intel Corporation. + * Copyright(c) 2016 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. diff --git a/drivers/infiniband/sw/rdmavt/trace.h b/drivers/infiniband/sw/rdmavt/trace.h index b269291..d5b1281 100644 --- a/drivers/infiniband/sw/rdmavt/trace.h +++ b/drivers/infiniband/sw/rdmavt/trace.h @@ -1,5 +1,5 @@ /* - * Copyright(c) 2015 Intel Corporation. + * Copyright(c) 2016 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. diff --git a/drivers/infiniband/sw/rdmavt/vt.c b/drivers/infiniband/sw/rdmavt/vt.c index 7496d43..571463e 100644 --- a/drivers/infiniband/sw/rdmavt/vt.c +++ b/drivers/infiniband/sw/rdmavt/vt.c @@ -1,5 +1,5 @@ /* - * Copyright(c) 2015 Intel Corporation. + * Copyright(c) 2016 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. diff --git a/drivers/infiniband/sw/rdmavt/vt.h b/drivers/infiniband/sw/rdmavt/vt.h index d9f78cc..a5c36d3 100644 --- a/drivers/infiniband/sw/rdmavt/vt.h +++ b/drivers/infiniband/sw/rdmavt/vt.h @@ -2,7 +2,7 @@ #define DEF_RDMAVT_H /* - * Copyright(c) 2015 Intel Corporation. + * Copyright(c) 2016 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. diff --git a/include/rdma/rdma_vt.h b/include/rdma/rdma_vt.h index 31f9e5a..f6569b2 100644 --- a/include/rdma/rdma_vt.h +++ b/include/rdma/rdma_vt.h @@ -2,7 +2,7 @@ #define DEF_RDMA_VT_H /* - * Copyright(c) 2015 Intel Corporation. + * Copyright(c) 2016 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. diff --git a/include/rdma/rdmavt_mr.h b/include/rdma/rdmavt_mr.h index 4aa8171..5edffdc 100644 --- a/include/rdma/rdmavt_mr.h +++ b/include/rdma/rdmavt_mr.h @@ -2,7 +2,7 @@ #define DEF_RDMAVT_INCMR_H /* - * Copyright(c) 2015 Intel Corporation. + * Copyright(c) 2016 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. diff --git a/include/rdma/rdmavt_qp.h b/include/rdma/rdmavt_qp.h index f0e2426..91f20fd 100644 --- a/include/rdma/rdmavt_qp.h +++ b/include/rdma/rdmavt_qp.h @@ -2,7 +2,7 @@ #define DEF_RDMAVT_INCQP_H /* - * Copyright(c) 2015 Intel Corporation. + * Copyright(c) 2016 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. -- cgit v0.10.2 From 74d2d50067c09c2e9686ef742c1ae08f9c8c3ddf Mon Sep 17 00:00:00 2001 From: Harish Chegondi Date: Fri, 22 Jan 2016 13:05:04 -0800 Subject: IB/rdmavt: Add support for rvt_query_qp Drivers using rdmavt can rely on rvt_query_qp instead of defining their own query_qp functions. Reviewed-by: Dennis Dalessandro Signed-off-by: Harish Chegondi Signed-off-by: Doug Ledford diff --git a/drivers/infiniband/sw/rdmavt/qp.c b/drivers/infiniband/sw/rdmavt/qp.c index 8d3563a..354fdac 100644 --- a/drivers/infiniband/sw/rdmavt/qp.c +++ b/drivers/infiniband/sw/rdmavt/qp.c @@ -1239,7 +1239,52 @@ int rvt_destroy_qp(struct ib_qp *ibqp) int rvt_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask, struct ib_qp_init_attr *init_attr) { - return -EOPNOTSUPP; + struct rvt_qp *qp = ibqp_to_rvtqp(ibqp); + struct rvt_dev_info *rdi = ib_to_rvt(ibqp->device); + + attr->qp_state = qp->state; + attr->cur_qp_state = attr->qp_state; + attr->path_mtu = qp->path_mtu; + attr->path_mig_state = qp->s_mig_state; + attr->qkey = qp->qkey; + attr->rq_psn = qp->r_psn & rdi->dparms.psn_mask; + attr->sq_psn = qp->s_next_psn & rdi->dparms.psn_mask; + attr->dest_qp_num = qp->remote_qpn; + attr->qp_access_flags = qp->qp_access_flags; + attr->cap.max_send_wr = qp->s_size - 1; + attr->cap.max_recv_wr = qp->ibqp.srq ? 0 : qp->r_rq.size - 1; + attr->cap.max_send_sge = qp->s_max_sge; + attr->cap.max_recv_sge = qp->r_rq.max_sge; + attr->cap.max_inline_data = 0; + attr->ah_attr = qp->remote_ah_attr; + attr->alt_ah_attr = qp->alt_ah_attr; + attr->pkey_index = qp->s_pkey_index; + attr->alt_pkey_index = qp->s_alt_pkey_index; + attr->en_sqd_async_notify = 0; + attr->sq_draining = qp->s_draining; + attr->max_rd_atomic = qp->s_max_rd_atomic; + attr->max_dest_rd_atomic = qp->r_max_rd_atomic; + attr->min_rnr_timer = qp->r_min_rnr_timer; + attr->port_num = qp->port_num; + attr->timeout = qp->timeout; + attr->retry_cnt = qp->s_retry_cnt; + attr->rnr_retry = qp->s_rnr_retry_cnt; + attr->alt_port_num = qp->alt_ah_attr.port_num; + attr->alt_timeout = qp->alt_timeout; + + init_attr->event_handler = qp->ibqp.event_handler; + init_attr->qp_context = qp->ibqp.qp_context; + init_attr->send_cq = qp->ibqp.send_cq; + init_attr->recv_cq = qp->ibqp.recv_cq; + init_attr->srq = qp->ibqp.srq; + init_attr->cap = attr->cap; + if (qp->s_flags & RVT_S_SIGNAL_REQ_WR) + init_attr->sq_sig_type = IB_SIGNAL_REQ_WR; + else + init_attr->sq_sig_type = IB_SIGNAL_ALL_WR; + init_attr->qp_type = qp->ibqp.qp_type; + init_attr->port_num = qp->port_num; + return 0; } /** -- cgit v0.10.2 From 5df1673f1de2b6dad614c929ef47ccebba3bd970 Mon Sep 17 00:00:00 2001 From: Dennis Dalessandro Date: Fri, 22 Jan 2016 13:07:23 -0800 Subject: IB/qib: Use rdmavt device allocation function No longer do drivers need to call into the IB core to allocate the verbs device. Use the functionality provided by rdmavt. Reviewed-by: Ira Weiny Reviewed-by: Harish Chegondi Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford diff --git a/drivers/infiniband/hw/qib/qib_init.c b/drivers/infiniband/hw/qib/qib_init.c index 5087a1f..a3c74bb 100644 --- a/drivers/infiniband/hw/qib/qib_init.c +++ b/drivers/infiniband/hw/qib/qib_init.c @@ -1131,9 +1131,12 @@ struct qib_devdata *qib_alloc_devdata(struct pci_dev *pdev, size_t extra) { unsigned long flags; struct qib_devdata *dd; - int ret; + int ret, nports; - dd = (struct qib_devdata *) ib_alloc_device(sizeof(*dd) + extra); + /* extra is * number of ports */ + nports = extra / sizeof(struct qib_pportdata); + dd = (struct qib_devdata *)rvt_alloc_device(sizeof(*dd) + extra, + nports); if (!dd) return ERR_PTR(-ENOMEM); -- cgit v0.10.2 From 5196aa96e18a7b3ccbf5ec4705fe7981aee03771 Mon Sep 17 00:00:00 2001 From: Dennis Dalessandro Date: Fri, 22 Jan 2016 13:07:30 -0800 Subject: IB/qib: Remove create and free mad agents Get rid of create and free mad agent from the driver and use rdmavt version. Reviewed-by: Ira Weiny Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford diff --git a/drivers/infiniband/hw/qib/qib_iba7322.c b/drivers/infiniband/hw/qib/qib_iba7322.c index ca28c19..a159922 100644 --- a/drivers/infiniband/hw/qib/qib_iba7322.c +++ b/drivers/infiniband/hw/qib/qib_iba7322.c @@ -2910,8 +2910,8 @@ static void qib_setup_7322_cleanup(struct qib_devdata *dd) spin_unlock_irqrestore(&dd->cspec->gpio_lock, flags); qib_qsfp_deinit(&dd->pport[i].cpspec->qsfp_data); } - if (dd->pport[i].ibport_data.smi_ah) - ib_destroy_ah(&dd->pport[i].ibport_data.smi_ah->ibah); + if (dd->pport[i].ibport_data.rvp.smi_ah) + ib_destroy_ah(&dd->pport[i].ibport_data.rvp.smi_ah->ibah); } } @@ -5507,7 +5507,7 @@ static void try_7322_ipg(struct qib_pportdata *ppd) if (IS_ERR(send_buf)) goto retry; - if (!ibp->smi_ah) { + if (!ibp->rvp.smi_ah) { struct ib_ah *ah; ah = qib_create_qp0_ah(ibp, be16_to_cpu(IB_LID_PERMISSIVE)); @@ -5515,11 +5515,11 @@ static void try_7322_ipg(struct qib_pportdata *ppd) ret = PTR_ERR(ah); else { send_buf->ah = ah; - ibp->smi_ah = ibah_to_rvtah(ah); + ibp->rvp.smi_ah = ibah_to_rvtah(ah); ret = 0; } } else { - send_buf->ah = &ibp->smi_ah->ibah; + send_buf->ah = &ibp->rvp.smi_ah->ibah; ret = 0; } diff --git a/drivers/infiniband/hw/qib/qib_mad.c b/drivers/infiniband/hw/qib/qib_mad.c index 3e8dde2..1273537 100644 --- a/drivers/infiniband/hw/qib/qib_mad.c +++ b/drivers/infiniband/hw/qib/qib_mad.c @@ -101,7 +101,7 @@ static void qib_send_trap(struct qib_ibport *ibp, void *data, unsigned len) memcpy(smp->data, data, len); spin_lock_irqsave(&ibp->rvp.lock, flags); - if (!ibp->sm_ah) { + if (!ibp->rvp.sm_ah) { if (ibp->rvp.sm_lid != be16_to_cpu(IB_LID_PERMISSIVE)) { struct ib_ah *ah; @@ -110,13 +110,13 @@ static void qib_send_trap(struct qib_ibport *ibp, void *data, unsigned len) ret = PTR_ERR(ah); else { send_buf->ah = ah; - ibp->sm_ah = ibah_to_rvtah(ah); + ibp->rvp.sm_ah = ibah_to_rvtah(ah); ret = 0; } } else ret = -EINVAL; } else { - send_buf->ah = &ibp->sm_ah->ibah; + send_buf->ah = &ibp->rvp.sm_ah->ibah; ret = 0; } spin_unlock_irqrestore(&ibp->rvp.lock, flags); @@ -712,11 +712,11 @@ static int subn_set_portinfo(struct ib_smp *smp, struct ib_device *ibdev, smp->status |= IB_SMP_INVALID_FIELD; else if (smlid != ibp->rvp.sm_lid || msl != ibp->rvp.sm_sl) { spin_lock_irqsave(&ibp->rvp.lock, flags); - if (ibp->sm_ah) { + if (ibp->rvp.sm_ah) { if (smlid != ibp->rvp.sm_lid) - ibp->sm_ah->attr.dlid = smlid; + ibp->rvp.sm_ah->attr.dlid = smlid; if (msl != ibp->rvp.sm_sl) - ibp->sm_ah->attr.sl = msl; + ibp->rvp.sm_ah->attr.sl = msl; } spin_unlock_irqrestore(&ibp->rvp.lock, flags); if (smlid != ibp->rvp.sm_lid) @@ -2445,12 +2445,6 @@ bail: return ret; } -static void send_handler(struct ib_mad_agent *agent, - struct ib_mad_send_wc *mad_send_wc) -{ - ib_free_send_mad(mad_send_wc->send_buf); -} - static void xmit_wait_timer_func(unsigned long opaque) { struct qib_pportdata *ppd = (struct qib_pportdata *)opaque; @@ -2475,71 +2469,28 @@ done: mod_timer(&ppd->cong_stats.timer, jiffies + HZ); } -int qib_create_agents(struct qib_ibdev *dev) +void qib_notify_create_mad_agent(struct rvt_dev_info *rdi, int port_idx) { - struct qib_devdata *dd = dd_from_dev(dev); - struct ib_mad_agent *agent; - struct qib_ibport *ibp; - int p; - int ret; + struct qib_ibdev *ibdev = container_of(rdi, struct qib_ibdev, rdi); + struct qib_devdata *dd = container_of(ibdev, + struct qib_devdata, verbs_dev); - for (p = 0; p < dd->num_pports; p++) { - ibp = &dd->pport[p].ibport_data; - agent = ib_register_mad_agent(&dev->rdi.ibdev, p + 1, - IB_QPT_SMI, - NULL, 0, send_handler, - NULL, NULL, 0); - if (IS_ERR(agent)) { - ret = PTR_ERR(agent); - goto err; - } - - /* Initialize xmit_wait structure */ - dd->pport[p].cong_stats.counter = 0; - init_timer(&dd->pport[p].cong_stats.timer); - dd->pport[p].cong_stats.timer.function = xmit_wait_timer_func; - dd->pport[p].cong_stats.timer.data = - (unsigned long)(&dd->pport[p]); - dd->pport[p].cong_stats.timer.expires = 0; - add_timer(&dd->pport[p].cong_stats.timer); - - ibp->rvp.send_agent = agent; - } - - return 0; - -err: - for (p = 0; p < dd->num_pports; p++) { - ibp = &dd->pport[p].ibport_data; - if (ibp->rvp.send_agent) { - agent = ibp->rvp.send_agent; - ibp->rvp.send_agent = NULL; - ib_unregister_mad_agent(agent); - } - } - - return ret; + /* Initialize xmit_wait structure */ + dd->pport[port_idx].cong_stats.counter = 0; + init_timer(&dd->pport[port_idx].cong_stats.timer); + dd->pport[port_idx].cong_stats.timer.function = xmit_wait_timer_func; + dd->pport[port_idx].cong_stats.timer.data = + (unsigned long)(&dd->pport[port_idx]); + dd->pport[port_idx].cong_stats.timer.expires = 0; + add_timer(&dd->pport[port_idx].cong_stats.timer); } -void qib_free_agents(struct qib_ibdev *dev) +void qib_notify_free_mad_agent(struct rvt_dev_info *rdi, int port_idx) { - struct qib_devdata *dd = dd_from_dev(dev); - struct ib_mad_agent *agent; - struct qib_ibport *ibp; - int p; - - for (p = 0; p < dd->num_pports; p++) { - ibp = &dd->pport[p].ibport_data; - if (ibp->rvp.send_agent) { - agent = ibp->rvp.send_agent; - ibp->rvp.send_agent = NULL; - ib_unregister_mad_agent(agent); - } - if (ibp->sm_ah) { - ib_destroy_ah(&ibp->sm_ah->ibah); - ibp->sm_ah = NULL; - } - if (dd->pport[p].cong_stats.timer.data) - del_timer_sync(&dd->pport[p].cong_stats.timer); - } + struct qib_ibdev *ibdev = container_of(rdi, struct qib_ibdev, rdi); + struct qib_devdata *dd = container_of(ibdev, + struct qib_devdata, verbs_dev); + + if (dd->pport[port_idx].cong_stats.timer.data) + del_timer_sync(&dd->pport[port_idx].cong_stats.timer); } diff --git a/drivers/infiniband/hw/qib/qib_verbs.c b/drivers/infiniband/hw/qib/qib_verbs.c index 6b85153..a181502 100644 --- a/drivers/infiniband/hw/qib/qib_verbs.c +++ b/drivers/infiniband/hw/qib/qib_verbs.c @@ -2084,23 +2084,16 @@ int qib_register_ib_device(struct qib_devdata *dd) ret = rvt_register_device(&dd->verbs_dev.rdi); if (ret) - goto err_reg; - - ret = qib_create_agents(dev); - if (ret) - goto err_agents; + goto err_tx; ret = qib_verbs_register_sysfs(dd); if (ret) goto err_class; - goto bail; + return ret; err_class: - qib_free_agents(dev); -err_agents: rvt_unregister_device(&dd->verbs_dev.rdi); -err_reg: err_tx: while (!list_empty(&dev->txreq_free)) { struct list_head *l = dev->txreq_free.next; @@ -2117,7 +2110,6 @@ err_tx: dev->pio_hdrs, dev->pio_hdrs_phys); err_hdrs: qib_dev_err(dd, "cannot register verbs: %d!\n", -ret); -bail: return ret; } @@ -2127,8 +2119,6 @@ void qib_unregister_ib_device(struct qib_devdata *dd) qib_verbs_unregister_sysfs(dd); - qib_free_agents(dev); - rvt_unregister_device(&dd->verbs_dev.rdi); if (!list_empty(&dev->piowait)) diff --git a/drivers/infiniband/hw/qib/qib_verbs.h b/drivers/infiniband/hw/qib/qib_verbs.h index bcc6271..3383d56 100644 --- a/drivers/infiniband/hw/qib/qib_verbs.h +++ b/drivers/infiniband/hw/qib/qib_verbs.h @@ -307,8 +307,6 @@ struct qib_pma_counters { struct qib_ibport { struct rvt_ibport rvp; - struct rvt_ah *sm_ah; - struct rvt_ah *smi_ah; __be64 guids[QIB_GUIDS_PER_PORT - 1]; /* writable GUIDs */ struct qib_pma_counters __percpu *pmastats; u64 z_unicast_xmit; /* starting count for PMA */ @@ -433,8 +431,8 @@ int qib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, const struct ib_mad_hdr *in, size_t in_mad_size, struct ib_mad_hdr *out, size_t *out_mad_size, u16 *out_mad_pkey_index); -int qib_create_agents(struct qib_ibdev *dev); -void qib_free_agents(struct qib_ibdev *dev); +void qib_notify_create_mad_agent(struct rvt_dev_info *rdi, int port_idx); +void qib_notify_free_mad_agent(struct rvt_dev_info *rdi, int port_idx); /* * Compare the lower 24 bits of the two values. -- cgit v0.10.2 From 4bb88e5f84326ff6343bc64a33040850f45b44d8 Mon Sep 17 00:00:00 2001 From: Harish Chegondi Date: Fri, 22 Jan 2016 13:07:36 -0800 Subject: IB/qib: Remove completion queue data structures and functions from qib Use the completion queue functionality provided by rdmavt. Reviewed-by: Dennis Dalessandro Signed-off-by: Harish Chegondi Signed-off-by: Doug Ledford diff --git a/drivers/infiniband/hw/qib/Makefile b/drivers/infiniband/hw/qib/Makefile index 45db4fc..d78f688 100644 --- a/drivers/infiniband/hw/qib/Makefile +++ b/drivers/infiniband/hw/qib/Makefile @@ -1,6 +1,6 @@ obj-$(CONFIG_INFINIBAND_QIB) += ib_qib.o -ib_qib-y := qib_cq.o qib_diag.o qib_driver.o qib_eeprom.o \ +ib_qib-y := qib_diag.o qib_driver.o qib_eeprom.o \ qib_file_ops.o qib_fs.o qib_init.o qib_intr.o \ qib_mad.o qib_pcie.o qib_pio_copy.o \ qib_qp.o qib_qsfp.o qib_rc.o qib_ruc.o qib_sdma.o qib_srq.o \ diff --git a/drivers/infiniband/hw/qib/qib.h b/drivers/infiniband/hw/qib/qib.h index 29cbe67..ccadece 100644 --- a/drivers/infiniband/hw/qib/qib.h +++ b/drivers/infiniband/hw/qib/qib.h @@ -1097,8 +1097,6 @@ struct qib_devdata { u16 psxmitwait_check_rate; /* high volume overflow errors defered to tasklet */ struct tasklet_struct error_tasklet; - /* per device cq worker */ - struct kthread_worker *worker; int assigned_node_id; /* NUMA node closest to HCA */ }; diff --git a/drivers/infiniband/hw/qib/qib_cq.c b/drivers/infiniband/hw/qib/qib_cq.c deleted file mode 100644 index 094f694..0000000 --- a/drivers/infiniband/hw/qib/qib_cq.c +++ /dev/null @@ -1,545 +0,0 @@ -/* - * Copyright (c) 2013 Intel Corporation. All rights reserved. - * Copyright (c) 2006, 2007, 2008, 2010 QLogic Corporation. All rights reserved. - * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include -#include -#include -#include - -#include "qib_verbs.h" -#include "qib.h" - -/** - * qib_cq_enter - add a new entry to the completion queue - * @cq: completion queue - * @entry: work completion entry to add - * @sig: true if @entry is a solicitated entry - * - * This may be called with qp->s_lock held. - */ -void qib_cq_enter(struct qib_cq *cq, struct ib_wc *entry, int solicited) -{ - struct qib_cq_wc *wc; - unsigned long flags; - u32 head; - u32 next; - - spin_lock_irqsave(&cq->lock, flags); - - /* - * Note that the head pointer might be writable by user processes. - * Take care to verify it is a sane value. - */ - wc = cq->queue; - head = wc->head; - if (head >= (unsigned) cq->ibcq.cqe) { - head = cq->ibcq.cqe; - next = 0; - } else - next = head + 1; - if (unlikely(next == wc->tail)) { - spin_unlock_irqrestore(&cq->lock, flags); - if (cq->ibcq.event_handler) { - struct ib_event ev; - - ev.device = cq->ibcq.device; - ev.element.cq = &cq->ibcq; - ev.event = IB_EVENT_CQ_ERR; - cq->ibcq.event_handler(&ev, cq->ibcq.cq_context); - } - return; - } - if (cq->ip) { - wc->uqueue[head].wr_id = entry->wr_id; - wc->uqueue[head].status = entry->status; - wc->uqueue[head].opcode = entry->opcode; - wc->uqueue[head].vendor_err = entry->vendor_err; - wc->uqueue[head].byte_len = entry->byte_len; - wc->uqueue[head].ex.imm_data = - (__u32 __force)entry->ex.imm_data; - wc->uqueue[head].qp_num = entry->qp->qp_num; - wc->uqueue[head].src_qp = entry->src_qp; - wc->uqueue[head].wc_flags = entry->wc_flags; - wc->uqueue[head].pkey_index = entry->pkey_index; - wc->uqueue[head].slid = entry->slid; - wc->uqueue[head].sl = entry->sl; - wc->uqueue[head].dlid_path_bits = entry->dlid_path_bits; - wc->uqueue[head].port_num = entry->port_num; - /* Make sure entry is written before the head index. */ - smp_wmb(); - } else - wc->kqueue[head] = *entry; - wc->head = next; - - if (cq->notify == IB_CQ_NEXT_COMP || - (cq->notify == IB_CQ_SOLICITED && - (solicited || entry->status != IB_WC_SUCCESS))) { - struct kthread_worker *worker; - /* - * This will cause send_complete() to be called in - * another thread. - */ - smp_rmb(); - worker = cq->dd->worker; - if (likely(worker)) { - cq->notify = IB_CQ_NONE; - cq->triggered++; - queue_kthread_work(worker, &cq->comptask); - } - } - - spin_unlock_irqrestore(&cq->lock, flags); -} - -/** - * qib_poll_cq - poll for work completion entries - * @ibcq: the completion queue to poll - * @num_entries: the maximum number of entries to return - * @entry: pointer to array where work completions are placed - * - * Returns the number of completion entries polled. - * - * This may be called from interrupt context. Also called by ib_poll_cq() - * in the generic verbs code. - */ -int qib_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *entry) -{ - struct qib_cq *cq = to_icq(ibcq); - struct qib_cq_wc *wc; - unsigned long flags; - int npolled; - u32 tail; - - /* The kernel can only poll a kernel completion queue */ - if (cq->ip) { - npolled = -EINVAL; - goto bail; - } - - spin_lock_irqsave(&cq->lock, flags); - - wc = cq->queue; - tail = wc->tail; - if (tail > (u32) cq->ibcq.cqe) - tail = (u32) cq->ibcq.cqe; - for (npolled = 0; npolled < num_entries; ++npolled, ++entry) { - if (tail == wc->head) - break; - /* The kernel doesn't need a RMB since it has the lock. */ - *entry = wc->kqueue[tail]; - if (tail >= cq->ibcq.cqe) - tail = 0; - else - tail++; - } - wc->tail = tail; - - spin_unlock_irqrestore(&cq->lock, flags); - -bail: - return npolled; -} - -static void send_complete(struct kthread_work *work) -{ - struct qib_cq *cq = container_of(work, struct qib_cq, comptask); - - /* - * The completion handler will most likely rearm the notification - * and poll for all pending entries. If a new completion entry - * is added while we are in this routine, queue_work() - * won't call us again until we return so we check triggered to - * see if we need to call the handler again. - */ - for (;;) { - u8 triggered = cq->triggered; - - /* - * IPoIB connected mode assumes the callback is from a - * soft IRQ. We simulate this by blocking "bottom halves". - * See the implementation for ipoib_cm_handle_tx_wc(), - * netif_tx_lock_bh() and netif_tx_lock(). - */ - local_bh_disable(); - cq->ibcq.comp_handler(&cq->ibcq, cq->ibcq.cq_context); - local_bh_enable(); - - if (cq->triggered == triggered) - return; - } -} - -/** - * qib_create_cq - create a completion queue - * @ibdev: the device this completion queue is attached to - * @attr: creation attributes - * @context: unused by the QLogic_IB driver - * @udata: user data for libibverbs.so - * - * Returns a pointer to the completion queue or negative errno values - * for failure. - * - * Called by ib_create_cq() in the generic verbs code. - */ -struct ib_cq *qib_create_cq(struct ib_device *ibdev, - const struct ib_cq_init_attr *attr, - struct ib_ucontext *context, - struct ib_udata *udata) -{ - int entries = attr->cqe; - struct qib_ibdev *dev = to_idev(ibdev); - struct qib_cq *cq; - struct qib_cq_wc *wc; - struct ib_cq *ret; - u32 sz; - - if (attr->flags) - return ERR_PTR(-EINVAL); - - if (entries < 1 || entries > ib_qib_max_cqes) { - ret = ERR_PTR(-EINVAL); - goto done; - } - - /* Allocate the completion queue structure. */ - cq = kmalloc(sizeof(*cq), GFP_KERNEL); - if (!cq) { - ret = ERR_PTR(-ENOMEM); - goto done; - } - - /* - * Allocate the completion queue entries and head/tail pointers. - * This is allocated separately so that it can be resized and - * also mapped into user space. - * We need to use vmalloc() in order to support mmap and large - * numbers of entries. - */ - sz = sizeof(*wc); - if (udata && udata->outlen >= sizeof(__u64)) - sz += sizeof(struct ib_uverbs_wc) * (entries + 1); - else - sz += sizeof(struct ib_wc) * (entries + 1); - wc = vmalloc_user(sz); - if (!wc) { - ret = ERR_PTR(-ENOMEM); - goto bail_cq; - } - - /* - * Return the address of the WC as the offset to mmap. - * See qib_mmap() for details. - */ - if (udata && udata->outlen >= sizeof(__u64)) { - int err; - - cq->ip = rvt_create_mmap_info(&dev->rdi, sz, context, wc); - if (!cq->ip) { - ret = ERR_PTR(-ENOMEM); - goto bail_wc; - } - - err = ib_copy_to_udata(udata, &cq->ip->offset, - sizeof(cq->ip->offset)); - if (err) { - ret = ERR_PTR(err); - goto bail_ip; - } - } else - cq->ip = NULL; - - spin_lock(&dev->n_cqs_lock); - if (dev->n_cqs_allocated == ib_qib_max_cqs) { - spin_unlock(&dev->n_cqs_lock); - ret = ERR_PTR(-ENOMEM); - goto bail_ip; - } - - dev->n_cqs_allocated++; - spin_unlock(&dev->n_cqs_lock); - - if (cq->ip) { - spin_lock_irq(&dev->rdi.pending_lock); - list_add(&cq->ip->pending_mmaps, &dev->rdi.pending_mmaps); - spin_unlock_irq(&dev->rdi.pending_lock); - } - - /* - * ib_create_cq() will initialize cq->ibcq except for cq->ibcq.cqe. - * The number of entries should be >= the number requested or return - * an error. - */ - cq->dd = dd_from_dev(dev); - cq->ibcq.cqe = entries; - cq->notify = IB_CQ_NONE; - cq->triggered = 0; - spin_lock_init(&cq->lock); - init_kthread_work(&cq->comptask, send_complete); - wc->head = 0; - wc->tail = 0; - cq->queue = wc; - - ret = &cq->ibcq; - - goto done; - -bail_ip: - kfree(cq->ip); -bail_wc: - vfree(wc); -bail_cq: - kfree(cq); -done: - return ret; -} - -/** - * qib_destroy_cq - destroy a completion queue - * @ibcq: the completion queue to destroy. - * - * Returns 0 for success. - * - * Called by ib_destroy_cq() in the generic verbs code. - */ -int qib_destroy_cq(struct ib_cq *ibcq) -{ - struct qib_ibdev *dev = to_idev(ibcq->device); - struct qib_cq *cq = to_icq(ibcq); - - flush_kthread_work(&cq->comptask); - spin_lock(&dev->n_cqs_lock); - dev->n_cqs_allocated--; - spin_unlock(&dev->n_cqs_lock); - if (cq->ip) - kref_put(&cq->ip->ref, rvt_release_mmap_info); - else - vfree(cq->queue); - kfree(cq); - - return 0; -} - -/** - * qib_req_notify_cq - change the notification type for a completion queue - * @ibcq: the completion queue - * @notify_flags: the type of notification to request - * - * Returns 0 for success. - * - * This may be called from interrupt context. Also called by - * ib_req_notify_cq() in the generic verbs code. - */ -int qib_req_notify_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags notify_flags) -{ - struct qib_cq *cq = to_icq(ibcq); - unsigned long flags; - int ret = 0; - - spin_lock_irqsave(&cq->lock, flags); - /* - * Don't change IB_CQ_NEXT_COMP to IB_CQ_SOLICITED but allow - * any other transitions (see C11-31 and C11-32 in ch. 11.4.2.2). - */ - if (cq->notify != IB_CQ_NEXT_COMP) - cq->notify = notify_flags & IB_CQ_SOLICITED_MASK; - - if ((notify_flags & IB_CQ_REPORT_MISSED_EVENTS) && - cq->queue->head != cq->queue->tail) - ret = 1; - - spin_unlock_irqrestore(&cq->lock, flags); - - return ret; -} - -/** - * qib_resize_cq - change the size of the CQ - * @ibcq: the completion queue - * - * Returns 0 for success. - */ -int qib_resize_cq(struct ib_cq *ibcq, int cqe, struct ib_udata *udata) -{ - struct qib_cq *cq = to_icq(ibcq); - struct qib_cq_wc *old_wc; - struct qib_cq_wc *wc; - u32 head, tail, n; - int ret; - u32 sz; - - if (cqe < 1 || cqe > ib_qib_max_cqes) { - ret = -EINVAL; - goto bail; - } - - /* - * Need to use vmalloc() if we want to support large #s of entries. - */ - sz = sizeof(*wc); - if (udata && udata->outlen >= sizeof(__u64)) - sz += sizeof(struct ib_uverbs_wc) * (cqe + 1); - else - sz += sizeof(struct ib_wc) * (cqe + 1); - wc = vmalloc_user(sz); - if (!wc) { - ret = -ENOMEM; - goto bail; - } - - /* Check that we can write the offset to mmap. */ - if (udata && udata->outlen >= sizeof(__u64)) { - __u64 offset = 0; - - ret = ib_copy_to_udata(udata, &offset, sizeof(offset)); - if (ret) - goto bail_free; - } - - spin_lock_irq(&cq->lock); - /* - * Make sure head and tail are sane since they - * might be user writable. - */ - old_wc = cq->queue; - head = old_wc->head; - if (head > (u32) cq->ibcq.cqe) - head = (u32) cq->ibcq.cqe; - tail = old_wc->tail; - if (tail > (u32) cq->ibcq.cqe) - tail = (u32) cq->ibcq.cqe; - if (head < tail) - n = cq->ibcq.cqe + 1 + head - tail; - else - n = head - tail; - if (unlikely((u32)cqe < n)) { - ret = -EINVAL; - goto bail_unlock; - } - for (n = 0; tail != head; n++) { - if (cq->ip) - wc->uqueue[n] = old_wc->uqueue[tail]; - else - wc->kqueue[n] = old_wc->kqueue[tail]; - if (tail == (u32) cq->ibcq.cqe) - tail = 0; - else - tail++; - } - cq->ibcq.cqe = cqe; - wc->head = n; - wc->tail = 0; - cq->queue = wc; - spin_unlock_irq(&cq->lock); - - vfree(old_wc); - - if (cq->ip) { - struct qib_ibdev *dev = to_idev(ibcq->device); - struct rvt_mmap_info *ip = cq->ip; - - rvt_update_mmap_info(&dev->rdi, ip, sz, wc); - - /* - * Return the offset to mmap. - * See qib_mmap() for details. - */ - if (udata && udata->outlen >= sizeof(__u64)) { - ret = ib_copy_to_udata(udata, &ip->offset, - sizeof(ip->offset)); - if (ret) - goto bail; - } - - spin_lock_irq(&dev->rdi.pending_lock); - if (list_empty(&ip->pending_mmaps)) - list_add(&ip->pending_mmaps, &dev->rdi.pending_mmaps); - spin_unlock_irq(&dev->rdi.pending_lock); - } - - ret = 0; - goto bail; - -bail_unlock: - spin_unlock_irq(&cq->lock); -bail_free: - vfree(wc); -bail: - return ret; -} - -int qib_cq_init(struct qib_devdata *dd) -{ - int ret = 0; - int cpu; - struct task_struct *task; - - if (dd->worker) - return 0; - dd->worker = kzalloc(sizeof(*dd->worker), GFP_KERNEL); - if (!dd->worker) - return -ENOMEM; - init_kthread_worker(dd->worker); - task = kthread_create_on_node( - kthread_worker_fn, - dd->worker, - dd->assigned_node_id, - "qib_cq%d", dd->unit); - if (IS_ERR(task)) - goto task_fail; - cpu = cpumask_first(cpumask_of_node(dd->assigned_node_id)); - kthread_bind(task, cpu); - wake_up_process(task); -out: - return ret; -task_fail: - ret = PTR_ERR(task); - kfree(dd->worker); - dd->worker = NULL; - goto out; -} - -void qib_cq_exit(struct qib_devdata *dd) -{ - struct kthread_worker *worker; - - worker = dd->worker; - if (!worker) - return; - /* blocks future queuing from send_complete() */ - dd->worker = NULL; - smp_wmb(); - flush_kthread_worker(worker); - kthread_stop(worker->task); - kfree(worker); -} diff --git a/drivers/infiniband/hw/qib/qib_init.c b/drivers/infiniband/hw/qib/qib_init.c index a3c74bb..3f062f0 100644 --- a/drivers/infiniband/hw/qib/qib_init.c +++ b/drivers/infiniband/hw/qib/qib_init.c @@ -457,8 +457,6 @@ static int loadtime_init(struct qib_devdata *dd) init_timer(&dd->intrchk_timer); dd->intrchk_timer.function = verify_interrupt; dd->intrchk_timer.data = (unsigned long) dd; - - ret = qib_cq_init(dd); done: return ret; } @@ -1435,7 +1433,6 @@ static void cleanup_device_data(struct qib_devdata *dd) } kfree(tmp); kfree(dd->boardname); - qib_cq_exit(dd); } /* diff --git a/drivers/infiniband/hw/qib/qib_qp.c b/drivers/infiniband/hw/qib/qib_qp.c index 83dec69..6e5a05e 100644 --- a/drivers/infiniband/hw/qib/qib_qp.c +++ b/drivers/infiniband/hw/qib/qib_qp.c @@ -473,7 +473,7 @@ int qib_error_qp(struct rvt_qp *qp, enum ib_wc_status err) if (test_and_clear_bit(RVT_R_WRID_VALID, &qp->r_aflags)) { wc.wr_id = qp->r_wr_id; wc.status = err; - qib_cq_enter(to_icq(qp->ibqp.recv_cq), &wc, 1); + rvt_cq_enter(ibcq_to_rvtcq(qp->ibqp.recv_cq), &wc, 1); } wc.status = IB_WC_WR_FLUSH_ERR; @@ -496,7 +496,7 @@ int qib_error_qp(struct rvt_qp *qp, enum ib_wc_status err) wc.wr_id = get_rwqe_ptr(&qp->r_rq, tail)->wr_id; if (++tail >= qp->r_rq.size) tail = 0; - qib_cq_enter(to_icq(qp->ibqp.recv_cq), &wc, 1); + rvt_cq_enter(ibcq_to_rvtcq(qp->ibqp.recv_cq), &wc, 1); } wq->tail = tail; diff --git a/drivers/infiniband/hw/qib/qib_rc.c b/drivers/infiniband/hw/qib/qib_rc.c index e118004..8be5d45 100644 --- a/drivers/infiniband/hw/qib/qib_rc.c +++ b/drivers/infiniband/hw/qib/qib_rc.c @@ -1026,7 +1026,7 @@ void qib_rc_send_complete(struct rvt_qp *qp, struct qib_ib_header *hdr) wc.opcode = ib_qib_wc_opcode[wqe->wr.opcode]; wc.byte_len = wqe->length; wc.qp = &qp->ibqp; - qib_cq_enter(to_icq(qp->ibqp.send_cq), &wc, 0); + rvt_cq_enter(ibcq_to_rvtcq(qp->ibqp.send_cq), &wc, 0); } if (++qp->s_last >= qp->s_size) qp->s_last = 0; @@ -1082,7 +1082,7 @@ static struct rvt_swqe *do_rc_completion(struct rvt_qp *qp, wc.opcode = ib_qib_wc_opcode[wqe->wr.opcode]; wc.byte_len = wqe->length; wc.qp = &qp->ibqp; - qib_cq_enter(to_icq(qp->ibqp.send_cq), &wc, 0); + rvt_cq_enter(ibcq_to_rvtcq(qp->ibqp.send_cq), &wc, 0); } if (++qp->s_last >= qp->s_size) qp->s_last = 0; @@ -2048,7 +2048,7 @@ send_last: wc.dlid_path_bits = 0; wc.port_num = 0; /* Signal completion event if the solicited bit is set. */ - qib_cq_enter(to_icq(qp->ibqp.recv_cq), &wc, + rvt_cq_enter(ibcq_to_rvtcq(qp->ibqp.recv_cq), &wc, (ohdr->bth[0] & cpu_to_be32(IB_BTH_SOLICITED)) != 0); break; diff --git a/drivers/infiniband/hw/qib/qib_ruc.c b/drivers/infiniband/hw/qib/qib_ruc.c index f7b3bb7..80f1130 100644 --- a/drivers/infiniband/hw/qib/qib_ruc.c +++ b/drivers/infiniband/hw/qib/qib_ruc.c @@ -120,7 +120,7 @@ bad_lkey: wc.opcode = IB_WC_RECV; wc.qp = &qp->ibqp; /* Signal solicited completion event. */ - qib_cq_enter(to_icq(qp->ibqp.recv_cq), &wc, 1); + rvt_cq_enter(ibcq_to_rvtcq(qp->ibqp.recv_cq), &wc, 1); ret = 0; bail: return ret; @@ -563,8 +563,8 @@ again: wc.sl = qp->remote_ah_attr.sl; wc.port_num = 1; /* Signal completion event if the solicited bit is set. */ - qib_cq_enter(to_icq(qp->ibqp.recv_cq), &wc, - wqe->wr.send_flags & IB_SEND_SOLICITED); + rvt_cq_enter(ibcq_to_rvtcq(qp->ibqp.recv_cq), &wc, + wqe->wr.send_flags & IB_SEND_SOLICITED); send_comp: spin_lock_irqsave(&sqp->s_lock, flags); @@ -806,7 +806,7 @@ void qib_send_complete(struct rvt_qp *qp, struct rvt_swqe *wqe, wc.qp = &qp->ibqp; if (status == IB_WC_SUCCESS) wc.byte_len = wqe->length; - qib_cq_enter(to_icq(qp->ibqp.send_cq), &wc, + rvt_cq_enter(ibcq_to_rvtcq(qp->ibqp.send_cq), &wc, status != IB_WC_SUCCESS); } diff --git a/drivers/infiniband/hw/qib/qib_uc.c b/drivers/infiniband/hw/qib/qib_uc.c index deceb45..caf0191 100644 --- a/drivers/infiniband/hw/qib/qib_uc.c +++ b/drivers/infiniband/hw/qib/qib_uc.c @@ -415,7 +415,7 @@ last_imm: wc.dlid_path_bits = 0; wc.port_num = 0; /* Signal completion event if the solicited bit is set. */ - qib_cq_enter(to_icq(qp->ibqp.recv_cq), &wc, + rvt_cq_enter(ibcq_to_rvtcq(qp->ibqp.recv_cq), &wc, (ohdr->bth[0] & cpu_to_be32(IB_BTH_SOLICITED)) != 0); break; diff --git a/drivers/infiniband/hw/qib/qib_ud.c b/drivers/infiniband/hw/qib/qib_ud.c index 76f854e..abca527 100644 --- a/drivers/infiniband/hw/qib/qib_ud.c +++ b/drivers/infiniband/hw/qib/qib_ud.c @@ -217,7 +217,7 @@ static void qib_ud_loopback(struct rvt_qp *sqp, struct rvt_swqe *swqe) wc.dlid_path_bits = ah_attr->dlid & ((1 << ppd->lmc) - 1); wc.port_num = qp->port_num; /* Signal completion event if the solicited bit is set. */ - qib_cq_enter(to_icq(qp->ibqp.recv_cq), &wc, + rvt_cq_enter(ibcq_to_rvtcq(qp->ibqp.recv_cq), &wc, swqe->wr.send_flags & IB_SEND_SOLICITED); ibp->rvp.n_loop_pkts++; bail_unlock: @@ -583,7 +583,7 @@ void qib_ud_rcv(struct qib_ibport *ibp, struct qib_ib_header *hdr, dlid & ((1 << ppd_from_ibp(ibp)->lmc) - 1); wc.port_num = qp->port_num; /* Signal completion event if the solicited bit is set. */ - qib_cq_enter(to_icq(qp->ibqp.recv_cq), &wc, + rvt_cq_enter(ibcq_to_rvtcq(qp->ibqp.recv_cq), &wc, (ohdr->bth[0] & cpu_to_be32(IB_BTH_SOLICITED)) != 0); return; diff --git a/drivers/infiniband/hw/qib/qib_verbs.c b/drivers/infiniband/hw/qib/qib_verbs.c index a181502..a27166b 100644 --- a/drivers/infiniband/hw/qib/qib_verbs.c +++ b/drivers/infiniband/hw/qib/qib_verbs.c @@ -1909,7 +1909,6 @@ int qib_register_ib_device(struct qib_devdata *dd) init_ibport(ppd + i); /* Only need to initialize non-zero fields. */ - spin_lock_init(&dev->n_cqs_lock); spin_lock_init(&dev->n_qps_lock); spin_lock_init(&dev->n_srqs_lock); spin_lock_init(&dev->n_mcast_grps_lock); @@ -2021,11 +2020,11 @@ int qib_register_ib_device(struct qib_devdata *dd) ibdev->post_send = qib_post_send; ibdev->post_recv = qib_post_receive; ibdev->post_srq_recv = qib_post_srq_receive; - ibdev->create_cq = qib_create_cq; - ibdev->destroy_cq = qib_destroy_cq; - ibdev->resize_cq = qib_resize_cq; - ibdev->poll_cq = qib_poll_cq; - ibdev->req_notify_cq = qib_req_notify_cq; + ibdev->create_cq = NULL; + ibdev->destroy_cq = NULL; + ibdev->resize_cq = NULL; + ibdev->poll_cq = NULL; + ibdev->req_notify_cq = NULL; ibdev->get_dma_mr = NULL; ibdev->reg_user_mr = NULL; ibdev->dereg_mr = NULL; @@ -2059,7 +2058,7 @@ int qib_register_ib_device(struct qib_devdata *dd) dd->verbs_dev.rdi.driver_f.free_all_qps = qib_free_all_qps; dd->verbs_dev.rdi.driver_f.notify_qp_reset = notify_qp_reset; - dd->verbs_dev.rdi.flags = RVT_FLAG_CQ_INIT_DRIVER; + dd->verbs_dev.rdi.flags = 0; dd->verbs_dev.rdi.dparms.lkey_table_size = qib_lkey_table_size; dd->verbs_dev.rdi.dparms.qp_table_size = ib_qib_qp_table_size; @@ -2070,6 +2069,10 @@ int qib_register_ib_device(struct qib_devdata *dd) dd->verbs_dev.rdi.dparms.qos_shift = 1; dd->verbs_dev.rdi.dparms.nports = dd->num_pports; dd->verbs_dev.rdi.dparms.npkeys = qib_get_npkeys(dd); + dd->verbs_dev.rdi.dparms.node = dd->assigned_node_id; + snprintf(dd->verbs_dev.rdi.dparms.cq_name, + sizeof(dd->verbs_dev.rdi.dparms.cq_name), + "qib_cq%d", dd->unit); qib_fill_device_attr(dd); diff --git a/drivers/infiniband/hw/qib/qib_verbs.h b/drivers/infiniband/hw/qib/qib_verbs.h index 3383d56..818ac87 100644 --- a/drivers/infiniband/hw/qib/qib_verbs.h +++ b/drivers/infiniband/hw/qib/qib_verbs.h @@ -46,6 +46,7 @@ #include #include #include +#include struct qib_ctxtdata; struct qib_pportdata; @@ -61,12 +62,6 @@ struct qib_verbs_txreq; */ #define QIB_UVERBS_ABI_VERSION 2 -/* - * Define an ib_cq_notify value that is not valid so we know when CQ - * notifications are armed. - */ -#define IB_CQ_NONE (IB_CQ_NEXT_COMP + 1) - #define IB_SEQ_NAK (3 << 29) /* AETH NAK opcode values */ @@ -220,35 +215,6 @@ struct qib_mcast { }; /* - * This structure is used to contain the head pointer, tail pointer, - * and completion queue entries as a single memory allocation so - * it can be mmap'ed into user space. - */ -struct qib_cq_wc { - u32 head; /* index of next entry to fill */ - u32 tail; /* index of next ib_poll_cq() entry */ - union { - /* these are actually size ibcq.cqe + 1 */ - struct ib_uverbs_wc uqueue[0]; - struct ib_wc kqueue[0]; - }; -}; - -/* - * The completion queue structure. - */ -struct qib_cq { - struct ib_cq ibcq; - struct kthread_work comptask; - struct qib_devdata *dd; - spinlock_t lock; /* protect changes in this struct */ - u8 notify; - u8 triggered; - struct qib_cq_wc *queue; - struct rvt_mmap_info *ip; -}; - -/* * qib specific data structure that will be hidden from rvt after the queue pair * is made common. */ @@ -345,8 +311,6 @@ struct qib_ibdev { u32 n_piowait; u32 n_txwait; - u32 n_cqs_allocated; /* number of CQs allocated for device */ - spinlock_t n_cqs_lock; u32 n_qps_allocated; /* number of QPs allocated for device */ spinlock_t n_qps_lock; u32 n_srqs_allocated; /* number of SRQs allocated for device */ @@ -375,11 +339,6 @@ struct qib_verbs_counters { u32 vl15_dropped; }; -static inline struct qib_cq *to_icq(struct ib_cq *ibcq) -{ - return container_of(ibcq, struct qib_cq, ibcq); -} - static inline struct rvt_qp *to_iqp(struct ib_qp *ibqp) { return container_of(ibqp, struct rvt_qp, ibqp); @@ -545,25 +504,6 @@ int qib_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr); int qib_destroy_srq(struct ib_srq *ibsrq); -int qib_cq_init(struct qib_devdata *dd); - -void qib_cq_exit(struct qib_devdata *dd); - -void qib_cq_enter(struct qib_cq *cq, struct ib_wc *entry, int sig); - -int qib_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *entry); - -struct ib_cq *qib_create_cq(struct ib_device *ibdev, - const struct ib_cq_init_attr *attr, - struct ib_ucontext *context, - struct ib_udata *udata); - -int qib_destroy_cq(struct ib_cq *ibcq); - -int qib_req_notify_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags notify_flags); - -int qib_resize_cq(struct ib_cq *ibcq, int cqe, struct ib_udata *udata); - void mr_rcu_callback(struct rcu_head *list); static inline void qib_put_ss(struct rvt_sge_state *ss) -- cgit v0.10.2 From db3ef0eb84947e341b923c435ace2520d097d014 Mon Sep 17 00:00:00 2001 From: Harish Chegondi Date: Fri, 22 Jan 2016 13:07:42 -0800 Subject: IB/qib: Use rdmavt version of post_send This patch removes the post_send and post_one_send from the qib driver. The "posting" of sends will be done by rdmavt which will walk a WQE and queue work. This patch will still provide the capability to schedule that work as well as kick the progress. These are provided to the rdmavt layer. Reviewed-by: Dennis Dalessandro Signed-off-by: Harish Chegondi Signed-off-by: Doug Ledford diff --git a/drivers/infiniband/hw/qib/qib_driver.c b/drivers/infiniband/hw/qib/qib_driver.c index ad41df3..a11de8e 100644 --- a/drivers/infiniband/hw/qib/qib_driver.c +++ b/drivers/infiniband/hw/qib/qib_driver.c @@ -377,8 +377,8 @@ static u32 qib_rcv_hdrerr(struct qib_ctxtdata *rcd, struct qib_pportdata *ppd, spin_lock(&qp->r_lock); /* Check for valid receive state. */ - if (!(ib_qib_state_ops[qp->state] & - QIB_PROCESS_RECV_OK)) { + if (!(ib_rvt_state_ops[qp->state] & + RVT_PROCESS_RECV_OK)) { ibp->rvp.n_pkt_drops++; goto unlock; } @@ -592,8 +592,8 @@ move_along: qp->r_flags &= ~RVT_R_RSP_SEND; spin_lock_irqsave(&qp->s_lock, flags); - if (ib_qib_state_ops[qp->state] & - QIB_PROCESS_OR_FLUSH_SEND) + if (ib_rvt_state_ops[qp->state] & + RVT_PROCESS_OR_FLUSH_SEND) qib_schedule_send(qp); spin_unlock_irqrestore(&qp->s_lock, flags); } diff --git a/drivers/infiniband/hw/qib/qib_qp.c b/drivers/infiniband/hw/qib/qib_qp.c index 6e5a05e..65b752c 100644 --- a/drivers/infiniband/hw/qib/qib_qp.c +++ b/drivers/infiniband/hw/qib/qib_qp.c @@ -375,7 +375,7 @@ static void clear_mr_refs(struct rvt_qp *qp, int clr_sends) if (clr_sends) { while (qp->s_last != qp->s_head) { - struct rvt_swqe *wqe = get_swqe_ptr(qp, qp->s_last); + struct rvt_swqe *wqe = rvt_get_swqe_ptr(qp, qp->s_last); unsigned i; for (i = 0; i < wqe->wr.num_sge; i++) { @@ -521,7 +521,7 @@ int qib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask, struct ib_udata *udata) { struct qib_ibdev *dev = to_idev(ibqp->device); - struct rvt_qp *qp = to_iqp(ibqp); + struct rvt_qp *qp = ibqp_to_rvtqp(ibqp); struct qib_qp_priv *priv = qp->priv; enum ib_qp_state cur_state, new_state; struct ib_event ev; @@ -809,7 +809,7 @@ bail: int qib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask, struct ib_qp_init_attr *init_attr) { - struct rvt_qp *qp = to_iqp(ibqp); + struct rvt_qp *qp = ibqp_to_rvtqp(ibqp); attr->qp_state = qp->state; attr->cur_qp_state = attr->qp_state; @@ -931,7 +931,7 @@ void *qp_priv_alloc(struct rvt_dev_info *rdi, struct rvt_qp *qp, gfp_t gfp) return ERR_PTR(-ENOMEM); } init_waitqueue_head(&priv->wait_dma); - INIT_WORK(&priv->s_work, qib_do_send); + INIT_WORK(&priv->s_work, _qib_do_send); INIT_LIST_HEAD(&priv->iowait); return priv; @@ -956,7 +956,7 @@ void qp_priv_free(struct rvt_dev_info *rdi, struct rvt_qp *qp) */ int qib_destroy_qp(struct ib_qp *ibqp) { - struct rvt_qp *qp = to_iqp(ibqp); + struct rvt_qp *qp = ibqp_to_rvtqp(ibqp); struct qib_ibdev *dev = to_idev(ibqp->device); struct qib_qp_priv *priv = qp->priv; @@ -1095,7 +1095,7 @@ void qib_qp_iter_print(struct seq_file *s, struct qib_qp_iter *iter) struct rvt_qp *qp = iter->qp; struct qib_qp_priv *priv = qp->priv; - wqe = get_swqe_ptr(qp, qp->s_last); + wqe = rvt_get_swqe_ptr(qp, qp->s_last); seq_printf(s, "N %d QP%u %s %u %u %u f=%x %u %u %u %u %u PSN %x %x %x %x %x (%u %u %u %u %u %u) QP%u LID %x\n", iter->n, diff --git a/drivers/infiniband/hw/qib/qib_rc.c b/drivers/infiniband/hw/qib/qib_rc.c index 8be5d45..78ae93e 100644 --- a/drivers/infiniband/hw/qib/qib_rc.c +++ b/drivers/infiniband/hw/qib/qib_rc.c @@ -84,7 +84,7 @@ static int qib_make_rc_ack(struct qib_ibdev *dev, struct rvt_qp *qp, u32 bth2; /* Don't send an ACK if we aren't supposed to. */ - if (!(ib_qib_state_ops[qp->state] & QIB_PROCESS_RECV_OK)) + if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK)) goto bail; /* header size in 32-bit words LRH+BTH = (8+12)/4. */ @@ -260,8 +260,8 @@ int qib_make_rc_req(struct rvt_qp *qp) qib_make_rc_ack(dev, qp, ohdr, pmtu)) goto done; - if (!(ib_qib_state_ops[qp->state] & QIB_PROCESS_SEND_OK)) { - if (!(ib_qib_state_ops[qp->state] & QIB_FLUSH_SEND)) + if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_SEND_OK)) { + if (!(ib_rvt_state_ops[qp->state] & RVT_FLUSH_SEND)) goto bail; /* We are in the error state, flush the work request. */ if (qp->s_last == qp->s_head) @@ -271,7 +271,7 @@ int qib_make_rc_req(struct rvt_qp *qp) qp->s_flags |= RVT_S_WAIT_DMA; goto bail; } - wqe = get_swqe_ptr(qp, qp->s_last); + wqe = rvt_get_swqe_ptr(qp, qp->s_last); qib_send_complete(qp, wqe, qp->s_last != qp->s_acked ? IB_WC_SUCCESS : IB_WC_WR_FLUSH_ERR); /* will get called again */ @@ -295,10 +295,10 @@ int qib_make_rc_req(struct rvt_qp *qp) bth0 = 0; /* Send a request. */ - wqe = get_swqe_ptr(qp, qp->s_cur); + wqe = rvt_get_swqe_ptr(qp, qp->s_cur); switch (qp->s_state) { default: - if (!(ib_qib_state_ops[qp->state] & QIB_PROCESS_NEXT_SEND_OK)) + if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_NEXT_SEND_OK)) goto bail; /* * Resend an old request or start a new one. @@ -666,7 +666,7 @@ void qib_send_rc_ack(struct rvt_qp *qp) spin_lock_irqsave(&qp->s_lock, flags); - if (!(ib_qib_state_ops[qp->state] & QIB_PROCESS_RECV_OK)) + if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK)) goto unlock; /* Don't send ACK or NAK if a RDMA read or atomic is pending. */ @@ -759,7 +759,7 @@ void qib_send_rc_ack(struct rvt_qp *qp) goto done; queue_ack: - if (ib_qib_state_ops[qp->state] & QIB_PROCESS_RECV_OK) { + if (ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK) { this_cpu_inc(*ibp->rvp.rc_qacks); qp->s_flags |= RVT_S_ACK_PENDING | RVT_S_RESP_PENDING; qp->s_nak_state = qp->r_nak_state; @@ -786,7 +786,7 @@ done: static void reset_psn(struct rvt_qp *qp, u32 psn) { u32 n = qp->s_acked; - struct rvt_swqe *wqe = get_swqe_ptr(qp, n); + struct rvt_swqe *wqe = rvt_get_swqe_ptr(qp, n); u32 opcode; qp->s_cur = n; @@ -809,7 +809,7 @@ static void reset_psn(struct rvt_qp *qp, u32 psn) n = 0; if (n == qp->s_tail) break; - wqe = get_swqe_ptr(qp, n); + wqe = rvt_get_swqe_ptr(qp, n); diff = qib_cmp24(psn, wqe->psn); if (diff < 0) break; @@ -870,7 +870,7 @@ done: */ static void qib_restart_rc(struct rvt_qp *qp, u32 psn, int wait) { - struct rvt_swqe *wqe = get_swqe_ptr(qp, qp->s_acked); + struct rvt_swqe *wqe = rvt_get_swqe_ptr(qp, qp->s_acked); struct qib_ibport *ibp; if (qp->s_retry == 0) { @@ -951,7 +951,7 @@ static void reset_sending_psn(struct rvt_qp *qp, u32 psn) /* Find the work request corresponding to the given PSN. */ for (;;) { - wqe = get_swqe_ptr(qp, n); + wqe = rvt_get_swqe_ptr(qp, n); if (qib_cmp24(psn, wqe->lpsn) <= 0) { if (wqe->wr.opcode == IB_WR_RDMA_READ) qp->s_sending_psn = wqe->lpsn + 1; @@ -978,7 +978,7 @@ void qib_rc_send_complete(struct rvt_qp *qp, struct qib_ib_header *hdr) u32 opcode; u32 psn; - if (!(ib_qib_state_ops[qp->state] & QIB_PROCESS_OR_FLUSH_SEND)) + if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_OR_FLUSH_SEND)) return; /* Find out where the BTH is */ @@ -1004,11 +1004,11 @@ void qib_rc_send_complete(struct rvt_qp *qp, struct qib_ib_header *hdr) */ if ((psn & IB_BTH_REQ_ACK) && qp->s_acked != qp->s_tail && !(qp->s_flags & (RVT_S_TIMER | RVT_S_WAIT_RNR | RVT_S_WAIT_PSN)) && - (ib_qib_state_ops[qp->state] & QIB_PROCESS_RECV_OK)) + (ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK)) start_timer(qp); while (qp->s_last != qp->s_acked) { - wqe = get_swqe_ptr(qp, qp->s_last); + wqe = rvt_get_swqe_ptr(qp, qp->s_last); if (qib_cmp24(wqe->lpsn, qp->s_sending_psn) >= 0 && qib_cmp24(qp->s_sending_psn, qp->s_sending_hpsn) <= 0) break; @@ -1101,7 +1101,7 @@ static struct rvt_swqe *do_rc_completion(struct rvt_qp *qp, if (++qp->s_cur >= qp->s_size) qp->s_cur = 0; qp->s_acked = qp->s_cur; - wqe = get_swqe_ptr(qp, qp->s_cur); + wqe = rvt_get_swqe_ptr(qp, qp->s_cur); if (qp->s_acked != qp->s_tail) { qp->s_state = OP(SEND_LAST); qp->s_psn = wqe->psn; @@ -1111,7 +1111,7 @@ static struct rvt_swqe *do_rc_completion(struct rvt_qp *qp, qp->s_acked = 0; if (qp->state == IB_QPS_SQD && qp->s_acked == qp->s_cur) qp->s_draining = 0; - wqe = get_swqe_ptr(qp, qp->s_acked); + wqe = rvt_get_swqe_ptr(qp, qp->s_acked); } return wqe; } @@ -1152,7 +1152,7 @@ static int do_rc_ack(struct rvt_qp *qp, u32 aeth, u32 psn, int opcode, ack_psn = psn; if (aeth >> 29) ack_psn--; - wqe = get_swqe_ptr(qp, qp->s_acked); + wqe = rvt_get_swqe_ptr(qp, qp->s_acked); ibp = to_iport(qp->ibqp.device, qp->port_num); /* @@ -1361,7 +1361,7 @@ static void rdma_seq_err(struct rvt_qp *qp, struct qib_ibport *ibp, u32 psn, del_timer(&qp->s_timer); } - wqe = get_swqe_ptr(qp, qp->s_acked); + wqe = rvt_get_swqe_ptr(qp, qp->s_acked); while (qib_cmp24(psn, wqe->lpsn) > 0) { if (wqe->wr.opcode == IB_WR_RDMA_READ || @@ -1438,7 +1438,7 @@ static void qib_rc_rcv_resp(struct qib_ibport *ibp, } spin_lock_irqsave(&qp->s_lock, flags); - if (!(ib_qib_state_ops[qp->state] & QIB_PROCESS_RECV_OK)) + if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK)) goto ack_done; /* Ignore invalid responses. */ @@ -1469,7 +1469,7 @@ static void qib_rc_rcv_resp(struct qib_ibport *ibp, if (unlikely(qp->s_acked == qp->s_tail)) goto ack_done; - wqe = get_swqe_ptr(qp, qp->s_acked); + wqe = rvt_get_swqe_ptr(qp, qp->s_acked); status = IB_WC_SUCCESS; switch (opcode) { @@ -1488,7 +1488,7 @@ static void qib_rc_rcv_resp(struct qib_ibport *ibp, opcode != OP(RDMA_READ_RESPONSE_FIRST)) goto ack_done; hdrsize += 4; - wqe = get_swqe_ptr(qp, qp->s_acked); + wqe = rvt_get_swqe_ptr(qp, qp->s_acked); if (unlikely(wqe->wr.opcode != IB_WR_RDMA_READ)) goto ack_op_err; /* @@ -1554,7 +1554,7 @@ read_middle: * have to be careful to copy the data to the right * location. */ - wqe = get_swqe_ptr(qp, qp->s_acked); + wqe = rvt_get_swqe_ptr(qp, qp->s_acked); qp->s_rdma_read_len = restart_sge(&qp->s_rdma_read_sge, wqe, psn, pmtu); goto read_last; diff --git a/drivers/infiniband/hw/qib/qib_ruc.c b/drivers/infiniband/hw/qib/qib_ruc.c index 80f1130..4961a54 100644 --- a/drivers/infiniband/hw/qib/qib_ruc.c +++ b/drivers/infiniband/hw/qib/qib_ruc.c @@ -158,7 +158,7 @@ int qib_get_rwqe(struct rvt_qp *qp, int wr_id_only) } spin_lock_irqsave(&rq->lock, flags); - if (!(ib_qib_state_ops[qp->state] & QIB_PROCESS_RECV_OK)) { + if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK)) { ret = 0; goto unlock; } @@ -379,7 +379,7 @@ static void qib_ruc_loopback(struct rvt_qp *sqp) /* Return if we are already busy processing a work request. */ if ((sqp->s_flags & (RVT_S_BUSY | RVT_S_ANY_WAIT)) || - !(ib_qib_state_ops[sqp->state] & QIB_PROCESS_OR_FLUSH_SEND)) + !(ib_rvt_state_ops[sqp->state] & RVT_PROCESS_OR_FLUSH_SEND)) goto unlock; sqp->s_flags |= RVT_S_BUSY; @@ -387,11 +387,11 @@ static void qib_ruc_loopback(struct rvt_qp *sqp) again: if (sqp->s_last == sqp->s_head) goto clr_busy; - wqe = get_swqe_ptr(sqp, sqp->s_last); + wqe = rvt_get_swqe_ptr(sqp, sqp->s_last); /* Return if it is not OK to start a new work reqeust. */ - if (!(ib_qib_state_ops[sqp->state] & QIB_PROCESS_NEXT_SEND_OK)) { - if (!(ib_qib_state_ops[sqp->state] & QIB_FLUSH_SEND)) + if (!(ib_rvt_state_ops[sqp->state] & RVT_PROCESS_NEXT_SEND_OK)) { + if (!(ib_rvt_state_ops[sqp->state] & RVT_FLUSH_SEND)) goto clr_busy; /* We are in the error state, flush the work request. */ send_status = IB_WC_WR_FLUSH_ERR; @@ -409,7 +409,7 @@ again: } spin_unlock_irqrestore(&sqp->s_lock, flags); - if (!qp || !(ib_qib_state_ops[qp->state] & QIB_PROCESS_RECV_OK) || + if (!qp || !(ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK) || qp->ibqp.qp_type != sqp->ibqp.qp_type) { ibp->rvp.n_pkt_drops++; /* @@ -590,7 +590,7 @@ rnr_nak: if (sqp->s_rnr_retry_cnt < 7) sqp->s_rnr_retry--; spin_lock_irqsave(&sqp->s_lock, flags); - if (!(ib_qib_state_ops[sqp->state] & QIB_PROCESS_RECV_OK)) + if (!(ib_rvt_state_ops[sqp->state] & RVT_PROCESS_RECV_OK)) goto clr_busy; sqp->s_flags |= RVT_S_WAIT_RNR; sqp->s_timer.function = qib_rc_rnr_retry; @@ -711,19 +711,26 @@ void qib_make_ruc_header(struct rvt_qp *qp, struct qib_other_headers *ohdr, this_cpu_inc(ibp->pmastats->n_unicast_xmit); } +void _qib_do_send(struct work_struct *work) +{ + struct qib_qp_priv *priv = container_of(work, struct qib_qp_priv, + s_work); + struct rvt_qp *qp = priv->owner; + + qib_do_send(qp); +} + /** * qib_do_send - perform a send on a QP - * @work: contains a pointer to the QP + * @qp: pointer to the QP * * Process entries in the send work queue until credit or queue is * exhausted. Only allow one CPU to send a packet per QP (tasklet). * Otherwise, two threads could send packets out of order. */ -void qib_do_send(struct work_struct *work) +void qib_do_send(struct rvt_qp *qp) { - struct qib_qp_priv *priv = container_of(work, struct qib_qp_priv, - s_work); - struct rvt_qp *qp = priv->owner; + struct qib_qp_priv *priv = qp->priv; struct qib_ibport *ibp = to_iport(qp->ibqp.device, qp->port_num); struct qib_pportdata *ppd = ppd_from_ibp(ibp); int (*make_req)(struct rvt_qp *qp); @@ -780,7 +787,7 @@ void qib_send_complete(struct rvt_qp *qp, struct rvt_swqe *wqe, u32 old_last, last; unsigned i; - if (!(ib_qib_state_ops[qp->state] & QIB_PROCESS_OR_FLUSH_SEND)) + if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_OR_FLUSH_SEND)) return; for (i = 0; i < wqe->wr.num_sge; i++) { diff --git a/drivers/infiniband/hw/qib/qib_sdma.c b/drivers/infiniband/hw/qib/qib_sdma.c index 3819a6d..ae65e9f 100644 --- a/drivers/infiniband/hw/qib/qib_sdma.c +++ b/drivers/infiniband/hw/qib/qib_sdma.c @@ -672,7 +672,7 @@ unmap: spin_lock(&qp->s_lock); if (qp->ibqp.qp_type == IB_QPT_RC) { /* XXX what about error sending RDMA read responses? */ - if (ib_qib_state_ops[qp->state] & QIB_PROCESS_RECV_OK) + if (ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK) qib_error_qp(qp, IB_WC_GENERAL_ERR); } else if (qp->s_wqe) qib_send_complete(qp, qp->s_wqe, IB_WC_GENERAL_ERR); @@ -685,7 +685,7 @@ busy: qp = tx->qp; priv = qp->priv; spin_lock(&qp->s_lock); - if (ib_qib_state_ops[qp->state] & QIB_PROCESS_RECV_OK) { + if (ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK) { struct qib_ibdev *dev; /* diff --git a/drivers/infiniband/hw/qib/qib_uc.c b/drivers/infiniband/hw/qib/qib_uc.c index caf0191..b97892f 100644 --- a/drivers/infiniband/hw/qib/qib_uc.c +++ b/drivers/infiniband/hw/qib/qib_uc.c @@ -57,8 +57,8 @@ int qib_make_uc_req(struct rvt_qp *qp) spin_lock_irqsave(&qp->s_lock, flags); - if (!(ib_qib_state_ops[qp->state] & QIB_PROCESS_SEND_OK)) { - if (!(ib_qib_state_ops[qp->state] & QIB_FLUSH_SEND)) + if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_SEND_OK)) { + if (!(ib_rvt_state_ops[qp->state] & RVT_FLUSH_SEND)) goto bail; /* We are in the error state, flush the work request. */ if (qp->s_last == qp->s_head) @@ -68,7 +68,7 @@ int qib_make_uc_req(struct rvt_qp *qp) qp->s_flags |= RVT_S_WAIT_DMA; goto bail; } - wqe = get_swqe_ptr(qp, qp->s_last); + wqe = rvt_get_swqe_ptr(qp, qp->s_last); qib_send_complete(qp, wqe, IB_WC_WR_FLUSH_ERR); goto done; } @@ -82,12 +82,12 @@ int qib_make_uc_req(struct rvt_qp *qp) bth0 = 0; /* Get the next send request. */ - wqe = get_swqe_ptr(qp, qp->s_cur); + wqe = rvt_get_swqe_ptr(qp, qp->s_cur); qp->s_wqe = NULL; switch (qp->s_state) { default: - if (!(ib_qib_state_ops[qp->state] & - QIB_PROCESS_NEXT_SEND_OK)) + if (!(ib_rvt_state_ops[qp->state] & + RVT_PROCESS_NEXT_SEND_OK)) goto bail; /* Check if send work queue is empty. */ if (qp->s_cur == qp->s_head) diff --git a/drivers/infiniband/hw/qib/qib_ud.c b/drivers/infiniband/hw/qib/qib_ud.c index abca527..f0ea002 100644 --- a/drivers/infiniband/hw/qib/qib_ud.c +++ b/drivers/infiniband/hw/qib/qib_ud.c @@ -72,7 +72,7 @@ static void qib_ud_loopback(struct rvt_qp *sqp, struct rvt_swqe *swqe) IB_QPT_UD : qp->ibqp.qp_type; if (dqptype != sqptype || - !(ib_qib_state_ops[qp->state] & QIB_PROCESS_RECV_OK)) { + !(ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK)) { ibp->rvp.n_pkt_drops++; goto drop; } @@ -252,8 +252,8 @@ int qib_make_ud_req(struct rvt_qp *qp) spin_lock_irqsave(&qp->s_lock, flags); - if (!(ib_qib_state_ops[qp->state] & QIB_PROCESS_NEXT_SEND_OK)) { - if (!(ib_qib_state_ops[qp->state] & QIB_FLUSH_SEND)) + if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_NEXT_SEND_OK)) { + if (!(ib_rvt_state_ops[qp->state] & RVT_FLUSH_SEND)) goto bail; /* We are in the error state, flush the work request. */ if (qp->s_last == qp->s_head) @@ -263,7 +263,7 @@ int qib_make_ud_req(struct rvt_qp *qp) qp->s_flags |= RVT_S_WAIT_DMA; goto bail; } - wqe = get_swqe_ptr(qp, qp->s_last); + wqe = rvt_get_swqe_ptr(qp, qp->s_last); qib_send_complete(qp, wqe, IB_WC_WR_FLUSH_ERR); goto done; } @@ -271,7 +271,7 @@ int qib_make_ud_req(struct rvt_qp *qp) if (qp->s_cur == qp->s_head) goto bail; - wqe = get_swqe_ptr(qp, qp->s_cur); + wqe = rvt_get_swqe_ptr(qp, qp->s_cur); next_cur = qp->s_cur + 1; if (next_cur >= qp->s_size) next_cur = 0; diff --git a/drivers/infiniband/hw/qib/qib_verbs.c b/drivers/infiniband/hw/qib/qib_verbs.c index a27166b..3766ea4 100644 --- a/drivers/infiniband/hw/qib/qib_verbs.c +++ b/drivers/infiniband/hw/qib/qib_verbs.c @@ -114,26 +114,6 @@ module_param_named(disable_sma, ib_qib_disable_sma, uint, S_IWUSR | S_IRUGO); MODULE_PARM_DESC(disable_sma, "Disable the SMA"); /* - * Note that it is OK to post send work requests in the SQE and ERR - * states; qib_do_send() will process them and generate error - * completions as per IB 1.2 C10-96. - */ -const int ib_qib_state_ops[IB_QPS_ERR + 1] = { - [IB_QPS_RESET] = 0, - [IB_QPS_INIT] = QIB_POST_RECV_OK, - [IB_QPS_RTR] = QIB_POST_RECV_OK | QIB_PROCESS_RECV_OK, - [IB_QPS_RTS] = QIB_POST_RECV_OK | QIB_PROCESS_RECV_OK | - QIB_POST_SEND_OK | QIB_PROCESS_SEND_OK | - QIB_PROCESS_NEXT_SEND_OK, - [IB_QPS_SQD] = QIB_POST_RECV_OK | QIB_PROCESS_RECV_OK | - QIB_POST_SEND_OK | QIB_PROCESS_SEND_OK, - [IB_QPS_SQE] = QIB_POST_RECV_OK | QIB_PROCESS_RECV_OK | - QIB_POST_SEND_OK | QIB_FLUSH_SEND, - [IB_QPS_ERR] = QIB_POST_RECV_OK | QIB_FLUSH_RECV | - QIB_POST_SEND_OK | QIB_FLUSH_SEND, -}; - -/* * Translate ib_wr_opcode into ib_wc_opcode. */ const enum ib_wc_opcode ib_qib_wc_opcode[] = { @@ -321,179 +301,7 @@ static void qib_copy_from_sge(void *data, struct rvt_sge_state *ss, u32 length) } /** - * qib_post_one_send - post one RC, UC, or UD send work request - * @qp: the QP to post on - * @wr: the work request to send - */ -static int qib_post_one_send(struct rvt_qp *qp, struct ib_send_wr *wr, - int *scheduled) -{ - struct rvt_swqe *wqe; - u32 next; - int i; - int j; - int acc; - int ret; - unsigned long flags; - struct rvt_lkey_table *rkt; - struct rvt_pd *pd; - int avoid_schedule = 0; - - spin_lock_irqsave(&qp->s_lock, flags); - - /* Check that state is OK to post send. */ - if (unlikely(!(ib_qib_state_ops[qp->state] & QIB_POST_SEND_OK))) - goto bail_inval; - - /* IB spec says that num_sge == 0 is OK. */ - if (wr->num_sge > qp->s_max_sge) - goto bail_inval; - - /* - * Don't allow RDMA reads or atomic operations on UC or - * undefined operations. - * Make sure buffer is large enough to hold the result for atomics. - */ - if (qp->ibqp.qp_type == IB_QPT_UC) { - if ((unsigned) wr->opcode >= IB_WR_RDMA_READ) - goto bail_inval; - } else if (qp->ibqp.qp_type != IB_QPT_RC) { - /* Check IB_QPT_SMI, IB_QPT_GSI, IB_QPT_UD opcode */ - if (wr->opcode != IB_WR_SEND && - wr->opcode != IB_WR_SEND_WITH_IMM) - goto bail_inval; - /* Check UD destination address PD */ - if (qp->ibqp.pd != ud_wr(wr)->ah->pd) - goto bail_inval; - } else if ((unsigned) wr->opcode > IB_WR_ATOMIC_FETCH_AND_ADD) - goto bail_inval; - else if (wr->opcode >= IB_WR_ATOMIC_CMP_AND_SWP && - (wr->num_sge == 0 || - wr->sg_list[0].length < sizeof(u64) || - wr->sg_list[0].addr & (sizeof(u64) - 1))) - goto bail_inval; - else if (wr->opcode >= IB_WR_RDMA_READ && !qp->s_max_rd_atomic) - goto bail_inval; - - next = qp->s_head + 1; - if (next >= qp->s_size) - next = 0; - if (next == qp->s_last) { - ret = -ENOMEM; - goto bail; - } - - rkt = &to_idev(qp->ibqp.device)->rdi.lkey_table; - pd = ibpd_to_rvtpd(qp->ibqp.pd); - wqe = get_swqe_ptr(qp, qp->s_head); - - if (qp->ibqp.qp_type != IB_QPT_UC && - qp->ibqp.qp_type != IB_QPT_RC) - memcpy(&wqe->ud_wr, ud_wr(wr), sizeof(wqe->ud_wr)); - else if (wr->opcode == IB_WR_REG_MR) - memcpy(&wqe->reg_wr, reg_wr(wr), - sizeof(wqe->reg_wr)); - else if (wr->opcode == IB_WR_RDMA_WRITE_WITH_IMM || - wr->opcode == IB_WR_RDMA_WRITE || - wr->opcode == IB_WR_RDMA_READ) - memcpy(&wqe->rdma_wr, rdma_wr(wr), sizeof(wqe->rdma_wr)); - else if (wr->opcode == IB_WR_ATOMIC_CMP_AND_SWP || - wr->opcode == IB_WR_ATOMIC_FETCH_AND_ADD) - memcpy(&wqe->atomic_wr, atomic_wr(wr), sizeof(wqe->atomic_wr)); - else - memcpy(&wqe->wr, wr, sizeof(wqe->wr)); - - wqe->length = 0; - j = 0; - if (wr->num_sge) { - acc = wr->opcode >= IB_WR_RDMA_READ ? - IB_ACCESS_LOCAL_WRITE : 0; - for (i = 0; i < wr->num_sge; i++) { - u32 length = wr->sg_list[i].length; - int ok; - - if (length == 0) - continue; - ok = rvt_lkey_ok(rkt, pd, &wqe->sg_list[j], - &wr->sg_list[i], acc); - if (!ok) - goto bail_inval_free; - wqe->length += length; - j++; - } - wqe->wr.num_sge = j; - } - if (qp->ibqp.qp_type == IB_QPT_UC || - qp->ibqp.qp_type == IB_QPT_RC) { - if (wqe->length > 0x80000000U) - goto bail_inval_free; - if (wqe->length <= qp->pmtu) - avoid_schedule = 1; - } else if (wqe->length > (dd_from_ibdev(qp->ibqp.device)->pport + - qp->port_num - 1)->ibmtu) { - goto bail_inval_free; - } else { - atomic_inc(&ibah_to_rvtah(ud_wr(wr)->ah)->refcount); - avoid_schedule = 1; - } - wqe->ssn = qp->s_ssn++; - qp->s_head = next; - - ret = 0; - goto bail; - -bail_inval_free: - while (j) { - struct rvt_sge *sge = &wqe->sg_list[--j]; - rvt_put_mr(sge->mr); - } -bail_inval: - ret = -EINVAL; -bail: - if (!ret && !wr->next && !avoid_schedule && - !qib_sdma_empty( - dd_from_ibdev(qp->ibqp.device)->pport + qp->port_num - 1)) { - qib_schedule_send(qp); - *scheduled = 1; - } - spin_unlock_irqrestore(&qp->s_lock, flags); - return ret; -} - -/** - * qib_post_send - post a send on a QP - * @ibqp: the QP to post the send on - * @wr: the list of work requests to post - * @bad_wr: the first bad WR is put here - * - * This may be called from interrupt context. - */ -static int qib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, - struct ib_send_wr **bad_wr) -{ - struct rvt_qp *qp = to_iqp(ibqp); - struct qib_qp_priv *priv = qp->priv; - int err = 0; - int scheduled = 0; - - for (; wr; wr = wr->next) { - err = qib_post_one_send(qp, wr, &scheduled); - if (err) { - *bad_wr = wr; - goto bail; - } - } - - /* Try to do the send work in the caller's context. */ - if (!scheduled) - qib_do_send(&priv->s_work); - -bail: - return err; -} - -/** * qib_post_receive - post a receive on a QP * @ibqp: the QP to post the receive on * @wr: the WR to post @@ -504,13 +312,13 @@ bail: static int qib_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr, struct ib_recv_wr **bad_wr) { - struct rvt_qp *qp = to_iqp(ibqp); + struct rvt_qp *qp = ibqp_to_rvtqp(ibqp); struct rvt_rwq *wq = qp->r_rq.wq; unsigned long flags; int ret; /* Check that state is OK to post receive. */ - if (!(ib_qib_state_ops[qp->state] & QIB_POST_RECV_OK) || !wq) { + if (!(ib_rvt_state_ops[qp->state] & RVT_POST_RECV_OK) || !wq) { *bad_wr = wr; ret = -EINVAL; goto bail; @@ -575,7 +383,7 @@ static void qib_qp_rcv(struct qib_ctxtdata *rcd, struct qib_ib_header *hdr, spin_lock(&qp->r_lock); /* Check for valid receive state. */ - if (!(ib_qib_state_ops[qp->state] & QIB_PROCESS_RECV_OK)) { + if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK)) { ibp->rvp.n_pkt_drops++; goto unlock; } @@ -955,7 +763,7 @@ static noinline struct qib_verbs_txreq *__get_txreq(struct qib_ibdev *dev, spin_unlock_irqrestore(&qp->s_lock, flags); tx = list_entry(l, struct qib_verbs_txreq, txreq.list); } else { - if (ib_qib_state_ops[qp->state] & QIB_PROCESS_RECV_OK && + if (ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK && list_empty(&priv->iowait)) { dev->n_txwait++; qp->s_flags |= RVT_S_WAIT_TX; @@ -1136,7 +944,7 @@ static int wait_kmem(struct qib_ibdev *dev, struct rvt_qp *qp) int ret = 0; spin_lock_irqsave(&qp->s_lock, flags); - if (ib_qib_state_ops[qp->state] & QIB_PROCESS_RECV_OK) { + if (ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK) { spin_lock(&dev->rdi.pending_lock); if (list_empty(&priv->iowait)) { if (list_empty(&dev->memwait)) @@ -1273,7 +1081,7 @@ static int no_bufs_available(struct rvt_qp *qp) * enabling the PIO avail interrupt. */ spin_lock_irqsave(&qp->s_lock, flags); - if (ib_qib_state_ops[qp->state] & QIB_PROCESS_RECV_OK) { + if (ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK) { spin_lock(&dev->rdi.pending_lock); if (list_empty(&priv->iowait)) { dev->n_piowait++; @@ -2017,7 +1825,7 @@ int qib_register_ib_device(struct qib_devdata *dd) ibdev->modify_qp = qib_modify_qp; ibdev->query_qp = qib_query_qp; ibdev->destroy_qp = qib_destroy_qp; - ibdev->post_send = qib_post_send; + ibdev->post_send = NULL; ibdev->post_recv = qib_post_receive; ibdev->post_srq_recv = qib_post_srq_receive; ibdev->create_cq = NULL; @@ -2057,6 +1865,8 @@ int qib_register_ib_device(struct qib_devdata *dd) dd->verbs_dev.rdi.driver_f.qp_priv_free = qp_priv_free; dd->verbs_dev.rdi.driver_f.free_all_qps = qib_free_all_qps; dd->verbs_dev.rdi.driver_f.notify_qp_reset = notify_qp_reset; + dd->verbs_dev.rdi.driver_f.do_send = qib_do_send; + dd->verbs_dev.rdi.driver_f.schedule_send = qib_schedule_send; dd->verbs_dev.rdi.flags = 0; diff --git a/drivers/infiniband/hw/qib/qib_verbs.h b/drivers/infiniband/hw/qib/qib_verbs.h index 818ac87..71c8db4 100644 --- a/drivers/infiniband/hw/qib/qib_verbs.h +++ b/drivers/infiniband/hw/qib/qib_verbs.h @@ -72,17 +72,6 @@ struct qib_verbs_txreq; #define IB_NAK_REMOTE_OPERATIONAL_ERROR 0x63 #define IB_NAK_INVALID_RD_REQUEST 0x64 -/* Flags for checking QP state (see ib_qib_state_ops[]) */ -#define QIB_POST_SEND_OK 0x01 -#define QIB_POST_RECV_OK 0x02 -#define QIB_PROCESS_RECV_OK 0x04 -#define QIB_PROCESS_SEND_OK 0x08 -#define QIB_PROCESS_NEXT_SEND_OK 0x10 -#define QIB_FLUSH_SEND 0x20 -#define QIB_FLUSH_RECV 0x40 -#define QIB_PROCESS_OR_FLUSH_SEND \ - (QIB_PROCESS_SEND_OK | QIB_FLUSH_SEND) - /* IB Performance Manager status values */ #define IB_PMA_SAMPLE_STATUS_DONE 0x00 #define IB_PMA_SAMPLE_STATUS_STARTED 0x01 @@ -231,19 +220,6 @@ struct qib_qp_priv { #define QIB_PSN_CREDIT 16 /* - * Since struct rvt_swqe is not a fixed size, we can't simply index into - * struct rvt_qp.s_wq. This function does the array index computation. - */ -static inline struct rvt_swqe *get_swqe_ptr(struct rvt_qp *qp, - unsigned n) -{ - return (struct rvt_swqe *)((char *)qp->s_wq + - (sizeof(struct rvt_swqe) + - qp->s_max_sge * - sizeof(struct rvt_sge)) * n); -} - -/* * Since struct rvt_rwqe is not a fixed size, we can't simply index into * struct rvt_rwq.wq. This function does the array index computation. */ @@ -339,11 +315,6 @@ struct qib_verbs_counters { u32 vl15_dropped; }; -static inline struct rvt_qp *to_iqp(struct ib_qp *ibqp) -{ - return container_of(ibqp, struct rvt_qp, ibqp); -} - static inline struct qib_ibdev *to_idev(struct ib_device *ibdev) { struct rvt_dev_info *rdi; @@ -528,7 +499,9 @@ u32 qib_make_grh(struct qib_ibport *ibp, struct ib_grh *hdr, void qib_make_ruc_header(struct rvt_qp *qp, struct qib_other_headers *ohdr, u32 bth0, u32 bth2); -void qib_do_send(struct work_struct *work); +void _qib_do_send(struct work_struct *work); + +void qib_do_send(struct rvt_qp *qp); void qib_send_complete(struct rvt_qp *qp, struct rvt_swqe *wqe, enum ib_wc_status status); @@ -573,7 +546,7 @@ extern const enum ib_wc_opcode ib_qib_wc_opcode[]; #define IB_PHYSPORTSTATE_CFG_ENH 0x10 #define IB_PHYSPORTSTATE_CFG_WAIT_ENH 0x13 -extern const int ib_qib_state_ops[]; +extern const int ib_rvt_state_ops[]; extern __be64 ib_qib_sys_image_guid; /* in network order */ diff --git a/drivers/infiniband/hw/qib/qib_verbs_mcast.c b/drivers/infiniband/hw/qib/qib_verbs_mcast.c index c3d6535..cf5b88d 100644 --- a/drivers/infiniband/hw/qib/qib_verbs_mcast.c +++ b/drivers/infiniband/hw/qib/qib_verbs_mcast.c @@ -224,7 +224,7 @@ bail: int qib_multicast_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) { - struct rvt_qp *qp = to_iqp(ibqp); + struct rvt_qp *qp = ibqp_to_rvtqp(ibqp); struct qib_ibdev *dev = to_idev(ibqp->device); struct qib_ibport *ibp; struct qib_mcast *mcast; @@ -282,7 +282,7 @@ bail: int qib_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) { - struct rvt_qp *qp = to_iqp(ibqp); + struct rvt_qp *qp = ibqp_to_rvtqp(ibqp); struct qib_ibdev *dev = to_idev(ibqp->device); struct qib_ibport *ibp = to_iport(ibqp->device, qp->port_num); struct qib_mcast *mcast = NULL; -- cgit v0.10.2 From a7d34a47f212ae6bd7f4748aebcc4f1192a048d1 Mon Sep 17 00:00:00 2001 From: Harish Chegondi Date: Fri, 22 Jan 2016 13:07:49 -0800 Subject: IB/qib: Remove qib_post_receive and use rdmavt version This patch removes the simple post recv function in favor of using rdmavt. The packet receive processing still lives in the driver though. Reviewed-by: Dennis Dalessandro Signed-off-by: Harish Chegondi Signed-off-by: Doug Ledford diff --git a/drivers/infiniband/hw/qib/qib_verbs.c b/drivers/infiniband/hw/qib/qib_verbs.c index 3766ea4..add899b 100644 --- a/drivers/infiniband/hw/qib/qib_verbs.c +++ b/drivers/infiniband/hw/qib/qib_verbs.c @@ -301,68 +301,6 @@ static void qib_copy_from_sge(void *data, struct rvt_sge_state *ss, u32 length) } /** - - * qib_post_receive - post a receive on a QP - * @ibqp: the QP to post the receive on - * @wr: the WR to post - * @bad_wr: the first bad WR is put here - * - * This may be called from interrupt context. - */ -static int qib_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr, - struct ib_recv_wr **bad_wr) -{ - struct rvt_qp *qp = ibqp_to_rvtqp(ibqp); - struct rvt_rwq *wq = qp->r_rq.wq; - unsigned long flags; - int ret; - - /* Check that state is OK to post receive. */ - if (!(ib_rvt_state_ops[qp->state] & RVT_POST_RECV_OK) || !wq) { - *bad_wr = wr; - ret = -EINVAL; - goto bail; - } - - for (; wr; wr = wr->next) { - struct rvt_rwqe *wqe; - u32 next; - int i; - - if ((unsigned) wr->num_sge > qp->r_rq.max_sge) { - *bad_wr = wr; - ret = -EINVAL; - goto bail; - } - - spin_lock_irqsave(&qp->r_rq.lock, flags); - next = wq->head + 1; - if (next >= qp->r_rq.size) - next = 0; - if (next == wq->tail) { - spin_unlock_irqrestore(&qp->r_rq.lock, flags); - *bad_wr = wr; - ret = -ENOMEM; - goto bail; - } - - wqe = get_rwqe_ptr(&qp->r_rq, wq->head); - wqe->wr_id = wr->wr_id; - wqe->num_sge = wr->num_sge; - for (i = 0; i < wr->num_sge; i++) - wqe->sg_list[i] = wr->sg_list[i]; - /* Make sure queue entry is written before the head index. */ - smp_wmb(); - wq->head = next; - spin_unlock_irqrestore(&qp->r_rq.lock, flags); - } - ret = 0; - -bail: - return ret; -} - -/** * qib_qp_rcv - processing an incoming packet on a QP * @rcd: the context pointer * @hdr: the packet header @@ -1826,7 +1764,7 @@ int qib_register_ib_device(struct qib_devdata *dd) ibdev->query_qp = qib_query_qp; ibdev->destroy_qp = qib_destroy_qp; ibdev->post_send = NULL; - ibdev->post_recv = qib_post_receive; + ibdev->post_recv = NULL; ibdev->post_srq_recv = qib_post_srq_receive; ibdev->create_cq = NULL; ibdev->destroy_cq = NULL; -- cgit v0.10.2 From 18f6c582b366d3ec76317458f498e24a4379c299 Mon Sep 17 00:00:00 2001 From: Harish Chegondi Date: Fri, 22 Jan 2016 13:07:55 -0800 Subject: IB/qib: Remove qib multicast verbs functions Multicast is now supported by rdmavt. Remove the verbs multicast functions and use that. Reviewed-by: Dennis Dalessandro Signed-off-by: Harish Chegondi Signed-off-by: Doug Ledford diff --git a/drivers/infiniband/hw/qib/Makefile b/drivers/infiniband/hw/qib/Makefile index d78f688..8d5e36b 100644 --- a/drivers/infiniband/hw/qib/Makefile +++ b/drivers/infiniband/hw/qib/Makefile @@ -5,7 +5,7 @@ ib_qib-y := qib_diag.o qib_driver.o qib_eeprom.o \ qib_mad.o qib_pcie.o qib_pio_copy.o \ qib_qp.o qib_qsfp.o qib_rc.o qib_ruc.o qib_sdma.o qib_srq.o \ qib_sysfs.o qib_twsi.o qib_tx.o qib_uc.o qib_ud.o \ - qib_user_pages.o qib_user_sdma.o qib_verbs_mcast.o qib_iba7220.o \ + qib_user_pages.o qib_user_sdma.o qib_iba7220.o \ qib_sd7220.o qib_iba7322.o qib_verbs.o # 6120 has no fallback if no MSI interrupts, others can do INTx diff --git a/drivers/infiniband/hw/qib/qib_qp.c b/drivers/infiniband/hw/qib/qib_qp.c index 65b752c..685b0bb 100644 --- a/drivers/infiniband/hw/qib/qib_qp.c +++ b/drivers/infiniband/hw/qib/qib_qp.c @@ -310,8 +310,6 @@ unsigned qib_free_all_qps(struct rvt_dev_info *rdi) for (n = 0; n < dd->num_pports; n++) { struct qib_ibport *ibp = &dd->pport[n].ibport_data; - if (!qib_mcast_tree_empty(ibp)) - qp_inuse++; rcu_read_lock(); if (rcu_dereference(ibp->rvp.qp[0])) qp_inuse++; diff --git a/drivers/infiniband/hw/qib/qib_verbs.c b/drivers/infiniband/hw/qib/qib_verbs.c index add899b..cbf5f88 100644 --- a/drivers/infiniband/hw/qib/qib_verbs.c +++ b/drivers/infiniband/hw/qib/qib_verbs.c @@ -411,19 +411,19 @@ void qib_ib_rcv(struct qib_ctxtdata *rcd, void *rhdr, void *data, u32 tlen) /* Get the destination QP number. */ qp_num = be32_to_cpu(ohdr->bth[1]) & QIB_QPN_MASK; if (qp_num == QIB_MULTICAST_QPN) { - struct qib_mcast *mcast; - struct qib_mcast_qp *p; + struct rvt_mcast *mcast; + struct rvt_mcast_qp *p; if (lnh != QIB_LRH_GRH) goto drop; - mcast = qib_mcast_find(ibp, &hdr->u.l.grh.dgid); + mcast = rvt_mcast_find(&ibp->rvp, &hdr->u.l.grh.dgid); if (mcast == NULL) goto drop; this_cpu_inc(ibp->pmastats->n_multicast_rcv); list_for_each_entry_rcu(p, &mcast->qp_list, list) qib_qp_rcv(rcd, hdr, 1, data, tlen, p->qp); /* - * Notify qib_multicast_detach() if it is waiting for us + * Notify rvt_multicast_detach() if it is waiting for us * to finish. */ if (atomic_dec_return(&mcast->refcount) <= 1) @@ -1657,7 +1657,6 @@ int qib_register_ib_device(struct qib_devdata *dd) /* Only need to initialize non-zero fields. */ spin_lock_init(&dev->n_qps_lock); spin_lock_init(&dev->n_srqs_lock); - spin_lock_init(&dev->n_mcast_grps_lock); init_timer(&dev->mem_timer); dev->mem_timer.function = mem_timer; dev->mem_timer.data = (unsigned long) dev; @@ -1780,8 +1779,8 @@ int qib_register_ib_device(struct qib_devdata *dd) ibdev->map_phys_fmr = NULL; ibdev->unmap_fmr = NULL; ibdev->dealloc_fmr = NULL; - ibdev->attach_mcast = qib_multicast_attach; - ibdev->detach_mcast = qib_multicast_detach; + ibdev->attach_mcast = NULL; + ibdev->detach_mcast = NULL; ibdev->process_mad = qib_process_mad; ibdev->mmap = NULL; ibdev->dma_ops = NULL; diff --git a/drivers/infiniband/hw/qib/qib_verbs.h b/drivers/infiniband/hw/qib/qib_verbs.h index 71c8db4..e3610df 100644 --- a/drivers/infiniband/hw/qib/qib_verbs.h +++ b/drivers/infiniband/hw/qib/qib_verbs.h @@ -185,25 +185,6 @@ struct qib_pio_header { } __packed; /* - * There is one struct qib_mcast for each multicast GID. - * All attached QPs are then stored as a list of - * struct qib_mcast_qp. - */ -struct qib_mcast_qp { - struct list_head list; - struct rvt_qp *qp; -}; - -struct qib_mcast { - struct rb_node rb_node; - union ib_gid mgid; - struct list_head qp_list; - wait_queue_head_t wait; - atomic_t refcount; - int n_attached; -}; - -/* * qib specific data structure that will be hidden from rvt after the queue pair * is made common. */ @@ -291,8 +272,6 @@ struct qib_ibdev { spinlock_t n_qps_lock; u32 n_srqs_allocated; /* number of SRQs allocated for device */ spinlock_t n_srqs_lock; - u32 n_mcast_grps_allocated; /* number of mcast groups allocated */ - spinlock_t n_mcast_grps_lock; #ifdef CONFIG_DEBUG_FS /* per HCA debugfs */ struct dentry *qib_ibdev_dbg; @@ -373,8 +352,6 @@ static inline int qib_cmp24(u32 a, u32 b) return (((int) a) - ((int) b)) << 8; } -struct qib_mcast *qib_mcast_find(struct qib_ibport *ibp, union ib_gid *mgid); - int qib_snapshot_counters(struct qib_pportdata *ppd, u64 *swords, u64 *rwords, u64 *spkts, u64 *rpkts, u64 *xmit_wait); @@ -382,12 +359,6 @@ int qib_snapshot_counters(struct qib_pportdata *ppd, u64 *swords, int qib_get_counters(struct qib_pportdata *ppd, struct qib_verbs_counters *cntrs); -int qib_multicast_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid); - -int qib_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid); - -int qib_mcast_tree_empty(struct qib_ibport *ibp); - __be32 qib_compute_aeth(struct rvt_qp *qp); struct rvt_qp *qib_lookup_qpn(struct qib_ibport *ibp, u32 qpn); diff --git a/drivers/infiniband/hw/qib/qib_verbs_mcast.c b/drivers/infiniband/hw/qib/qib_verbs_mcast.c deleted file mode 100644 index cf5b88d..0000000 --- a/drivers/infiniband/hw/qib/qib_verbs_mcast.c +++ /dev/null @@ -1,363 +0,0 @@ -/* - * Copyright (c) 2006, 2007, 2008, 2009 QLogic Corporation. All rights reserved. - * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include - -#include "qib.h" - -/** - * qib_mcast_qp_alloc - alloc a struct to link a QP to mcast GID struct - * @qp: the QP to link - */ -static struct qib_mcast_qp *qib_mcast_qp_alloc(struct rvt_qp *qp) -{ - struct qib_mcast_qp *mqp; - - mqp = kmalloc(sizeof(*mqp), GFP_KERNEL); - if (!mqp) - goto bail; - - mqp->qp = qp; - atomic_inc(&qp->refcount); - -bail: - return mqp; -} - -static void qib_mcast_qp_free(struct qib_mcast_qp *mqp) -{ - struct rvt_qp *qp = mqp->qp; - - /* Notify qib_destroy_qp() if it is waiting. */ - if (atomic_dec_and_test(&qp->refcount)) - wake_up(&qp->wait); - - kfree(mqp); -} - -/** - * qib_mcast_alloc - allocate the multicast GID structure - * @mgid: the multicast GID - * - * A list of QPs will be attached to this structure. - */ -static struct qib_mcast *qib_mcast_alloc(union ib_gid *mgid) -{ - struct qib_mcast *mcast; - - mcast = kmalloc(sizeof(*mcast), GFP_KERNEL); - if (!mcast) - goto bail; - - mcast->mgid = *mgid; - INIT_LIST_HEAD(&mcast->qp_list); - init_waitqueue_head(&mcast->wait); - atomic_set(&mcast->refcount, 0); - mcast->n_attached = 0; - -bail: - return mcast; -} - -static void qib_mcast_free(struct qib_mcast *mcast) -{ - struct qib_mcast_qp *p, *tmp; - - list_for_each_entry_safe(p, tmp, &mcast->qp_list, list) - qib_mcast_qp_free(p); - - kfree(mcast); -} - -/** - * qib_mcast_find - search the global table for the given multicast GID - * @ibp: the IB port structure - * @mgid: the multicast GID to search for - * - * Returns NULL if not found. - * - * The caller is responsible for decrementing the reference count if found. - */ -struct qib_mcast *qib_mcast_find(struct qib_ibport *ibp, union ib_gid *mgid) -{ - struct rb_node *n; - unsigned long flags; - struct qib_mcast *mcast; - - spin_lock_irqsave(&ibp->rvp.lock, flags); - n = ibp->rvp.mcast_tree.rb_node; - while (n) { - int ret; - - mcast = rb_entry(n, struct qib_mcast, rb_node); - - ret = memcmp(mgid->raw, mcast->mgid.raw, - sizeof(union ib_gid)); - if (ret < 0) - n = n->rb_left; - else if (ret > 0) - n = n->rb_right; - else { - atomic_inc(&mcast->refcount); - spin_unlock_irqrestore(&ibp->rvp.lock, flags); - goto bail; - } - } - spin_unlock_irqrestore(&ibp->rvp.lock, flags); - - mcast = NULL; - -bail: - return mcast; -} - -/** - * qib_mcast_add - insert mcast GID into table and attach QP struct - * @mcast: the mcast GID table - * @mqp: the QP to attach - * - * Return zero if both were added. Return EEXIST if the GID was already in - * the table but the QP was added. Return ESRCH if the QP was already - * attached and neither structure was added. - */ -static int qib_mcast_add(struct qib_ibdev *dev, struct qib_ibport *ibp, - struct qib_mcast *mcast, struct qib_mcast_qp *mqp) -{ - struct rb_node **n = &ibp->rvp.mcast_tree.rb_node; - struct rb_node *pn = NULL; - int ret; - - spin_lock_irq(&ibp->rvp.lock); - - while (*n) { - struct qib_mcast *tmcast; - struct qib_mcast_qp *p; - - pn = *n; - tmcast = rb_entry(pn, struct qib_mcast, rb_node); - - ret = memcmp(mcast->mgid.raw, tmcast->mgid.raw, - sizeof(union ib_gid)); - if (ret < 0) { - n = &pn->rb_left; - continue; - } - if (ret > 0) { - n = &pn->rb_right; - continue; - } - - /* Search the QP list to see if this is already there. */ - list_for_each_entry_rcu(p, &tmcast->qp_list, list) { - if (p->qp == mqp->qp) { - ret = ESRCH; - goto bail; - } - } - if (tmcast->n_attached == ib_qib_max_mcast_qp_attached) { - ret = ENOMEM; - goto bail; - } - - tmcast->n_attached++; - - list_add_tail_rcu(&mqp->list, &tmcast->qp_list); - ret = EEXIST; - goto bail; - } - - spin_lock(&dev->n_mcast_grps_lock); - if (dev->n_mcast_grps_allocated == ib_qib_max_mcast_grps) { - spin_unlock(&dev->n_mcast_grps_lock); - ret = ENOMEM; - goto bail; - } - - dev->n_mcast_grps_allocated++; - spin_unlock(&dev->n_mcast_grps_lock); - - mcast->n_attached++; - - list_add_tail_rcu(&mqp->list, &mcast->qp_list); - - atomic_inc(&mcast->refcount); - rb_link_node(&mcast->rb_node, pn, n); - rb_insert_color(&mcast->rb_node, &ibp->rvp.mcast_tree); - - ret = 0; - -bail: - spin_unlock_irq(&ibp->rvp.lock); - - return ret; -} - -int qib_multicast_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) -{ - struct rvt_qp *qp = ibqp_to_rvtqp(ibqp); - struct qib_ibdev *dev = to_idev(ibqp->device); - struct qib_ibport *ibp; - struct qib_mcast *mcast; - struct qib_mcast_qp *mqp; - int ret; - - if (ibqp->qp_num <= 1 || qp->state == IB_QPS_RESET) { - ret = -EINVAL; - goto bail; - } - - /* - * Allocate data structures since its better to do this outside of - * spin locks and it will most likely be needed. - */ - mcast = qib_mcast_alloc(gid); - if (mcast == NULL) { - ret = -ENOMEM; - goto bail; - } - mqp = qib_mcast_qp_alloc(qp); - if (mqp == NULL) { - qib_mcast_free(mcast); - ret = -ENOMEM; - goto bail; - } - ibp = to_iport(ibqp->device, qp->port_num); - switch (qib_mcast_add(dev, ibp, mcast, mqp)) { - case ESRCH: - /* Neither was used: OK to attach the same QP twice. */ - qib_mcast_qp_free(mqp); - qib_mcast_free(mcast); - break; - - case EEXIST: /* The mcast wasn't used */ - qib_mcast_free(mcast); - break; - - case ENOMEM: - /* Exceeded the maximum number of mcast groups. */ - qib_mcast_qp_free(mqp); - qib_mcast_free(mcast); - ret = -ENOMEM; - goto bail; - - default: - break; - } - - ret = 0; - -bail: - return ret; -} - -int qib_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) -{ - struct rvt_qp *qp = ibqp_to_rvtqp(ibqp); - struct qib_ibdev *dev = to_idev(ibqp->device); - struct qib_ibport *ibp = to_iport(ibqp->device, qp->port_num); - struct qib_mcast *mcast = NULL; - struct qib_mcast_qp *p, *tmp, *delp = NULL; - struct rb_node *n; - int last = 0; - int ret; - - if (ibqp->qp_num <= 1 || qp->state == IB_QPS_RESET) - return -EINVAL; - - spin_lock_irq(&ibp->rvp.lock); - - /* Find the GID in the mcast table. */ - n = ibp->rvp.mcast_tree.rb_node; - while (1) { - if (n == NULL) { - spin_unlock_irq(&ibp->rvp.lock); - return -EINVAL; - } - - mcast = rb_entry(n, struct qib_mcast, rb_node); - ret = memcmp(gid->raw, mcast->mgid.raw, - sizeof(union ib_gid)); - if (ret < 0) - n = n->rb_left; - else if (ret > 0) - n = n->rb_right; - else - break; - } - - /* Search the QP list. */ - list_for_each_entry_safe(p, tmp, &mcast->qp_list, list) { - if (p->qp != qp) - continue; - /* - * We found it, so remove it, but don't poison the forward - * link until we are sure there are no list walkers. - */ - list_del_rcu(&p->list); - mcast->n_attached--; - delp = p; - - /* If this was the last attached QP, remove the GID too. */ - if (list_empty(&mcast->qp_list)) { - rb_erase(&mcast->rb_node, &ibp->rvp.mcast_tree); - last = 1; - } - break; - } - - spin_unlock_irq(&ibp->rvp.lock); - /* QP not attached */ - if (!delp) - return -EINVAL; - /* - * Wait for any list walkers to finish before freeing the - * list element. - */ - wait_event(mcast->wait, atomic_read(&mcast->refcount) <= 1); - qib_mcast_qp_free(delp); - - if (last) { - atomic_dec(&mcast->refcount); - wait_event(mcast->wait, !atomic_read(&mcast->refcount)); - qib_mcast_free(mcast); - spin_lock_irq(&dev->n_mcast_grps_lock); - dev->n_mcast_grps_allocated--; - spin_unlock_irq(&dev->n_mcast_grps_lock); - } - return 0; -} - -int qib_mcast_tree_empty(struct qib_ibport *ibp) -{ - return !(ibp->rvp.mcast_tree.rb_node); -} -- cgit v0.10.2 From 034a3e7079aabc028783755d0ea1406fe9453d52 Mon Sep 17 00:00:00 2001 From: Harish Chegondi Date: Fri, 22 Jan 2016 13:08:01 -0800 Subject: IB/qib: Remove qib_query_qp function Rely on rvt_query_qp function defined in rdmavt Reviewed-by: Dennis Dalessandro Signed-off-by: Harish Chegondi Signed-off-by: Doug Ledford diff --git a/drivers/infiniband/hw/qib/qib_qp.c b/drivers/infiniband/hw/qib/qib_qp.c index 685b0bb..ce9002f 100644 --- a/drivers/infiniband/hw/qib/qib_qp.c +++ b/drivers/infiniband/hw/qib/qib_qp.c @@ -804,56 +804,6 @@ bail: return ret; } -int qib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, - int attr_mask, struct ib_qp_init_attr *init_attr) -{ - struct rvt_qp *qp = ibqp_to_rvtqp(ibqp); - - attr->qp_state = qp->state; - attr->cur_qp_state = attr->qp_state; - attr->path_mtu = qp->path_mtu; - attr->path_mig_state = qp->s_mig_state; - attr->qkey = qp->qkey; - attr->rq_psn = qp->r_psn & QIB_PSN_MASK; - attr->sq_psn = qp->s_next_psn & QIB_PSN_MASK; - attr->dest_qp_num = qp->remote_qpn; - attr->qp_access_flags = qp->qp_access_flags; - attr->cap.max_send_wr = qp->s_size - 1; - attr->cap.max_recv_wr = qp->ibqp.srq ? 0 : qp->r_rq.size - 1; - attr->cap.max_send_sge = qp->s_max_sge; - attr->cap.max_recv_sge = qp->r_rq.max_sge; - attr->cap.max_inline_data = 0; - attr->ah_attr = qp->remote_ah_attr; - attr->alt_ah_attr = qp->alt_ah_attr; - attr->pkey_index = qp->s_pkey_index; - attr->alt_pkey_index = qp->s_alt_pkey_index; - attr->en_sqd_async_notify = 0; - attr->sq_draining = qp->s_draining; - attr->max_rd_atomic = qp->s_max_rd_atomic; - attr->max_dest_rd_atomic = qp->r_max_rd_atomic; - attr->min_rnr_timer = qp->r_min_rnr_timer; - attr->port_num = qp->port_num; - attr->timeout = qp->timeout; - attr->retry_cnt = qp->s_retry_cnt; - attr->rnr_retry = qp->s_rnr_retry_cnt; - attr->alt_port_num = qp->alt_ah_attr.port_num; - attr->alt_timeout = qp->alt_timeout; - - init_attr->event_handler = qp->ibqp.event_handler; - init_attr->qp_context = qp->ibqp.qp_context; - init_attr->send_cq = qp->ibqp.send_cq; - init_attr->recv_cq = qp->ibqp.recv_cq; - init_attr->srq = qp->ibqp.srq; - init_attr->cap = attr->cap; - if (qp->s_flags & RVT_S_SIGNAL_REQ_WR) - init_attr->sq_sig_type = IB_SIGNAL_REQ_WR; - else - init_attr->sq_sig_type = IB_SIGNAL_ALL_WR; - init_attr->qp_type = qp->ibqp.qp_type; - init_attr->port_num = qp->port_num; - return 0; -} - /** * qib_compute_aeth - compute the AETH (syndrome + MSN) * @qp: the queue pair to compute the AETH for diff --git a/drivers/infiniband/hw/qib/qib_verbs.c b/drivers/infiniband/hw/qib/qib_verbs.c index cbf5f88..8b97ca1 100644 --- a/drivers/infiniband/hw/qib/qib_verbs.c +++ b/drivers/infiniband/hw/qib/qib_verbs.c @@ -1760,7 +1760,7 @@ int qib_register_ib_device(struct qib_devdata *dd) ibdev->destroy_srq = qib_destroy_srq; ibdev->create_qp = NULL; ibdev->modify_qp = qib_modify_qp; - ibdev->query_qp = qib_query_qp; + ibdev->query_qp = NULL; ibdev->destroy_qp = qib_destroy_qp; ibdev->post_send = NULL; ibdev->post_recv = NULL; @@ -1814,6 +1814,7 @@ int qib_register_ib_device(struct qib_devdata *dd) dd->verbs_dev.rdi.dparms.qpn_res_end = QIB_KD_QP; /* Reserve one QP */ dd->verbs_dev.rdi.dparms.qpn_inc = 1; dd->verbs_dev.rdi.dparms.qos_shift = 1; + dd->verbs_dev.rdi.dparms.psn_mask = QIB_PSN_MASK; dd->verbs_dev.rdi.dparms.nports = dd->num_pports; dd->verbs_dev.rdi.dparms.npkeys = qib_get_npkeys(dd); dd->verbs_dev.rdi.dparms.node = dd->assigned_node_id; diff --git a/drivers/infiniband/hw/qib/qib_verbs.h b/drivers/infiniband/hw/qib/qib_verbs.h index e3610df..34f7784 100644 --- a/drivers/infiniband/hw/qib/qib_verbs.h +++ b/drivers/infiniband/hw/qib/qib_verbs.h @@ -369,9 +369,6 @@ int qib_error_qp(struct rvt_qp *qp, enum ib_wc_status err); int qib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask, struct ib_udata *udata); - -int qib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, - int attr_mask, struct ib_qp_init_attr *init_attr); /* * Functions provided by qib driver for rdmavt to use */ -- cgit v0.10.2 From b8f881b913f34f712185b2ff7a41645dcad9a868 Mon Sep 17 00:00:00 2001 From: Jubin John Date: Wed, 3 Feb 2016 14:14:36 -0800 Subject: IB/rdmavt: Add srq functionality to rdmavt Fill in srq function stubs with code derived from hfi1 and qib. Move necessary functions and data structure members as well. Reviewed-by: Dennis Dalessandro Reviewed-by: Harish Chegondi Signed-off-by: Jubin John Signed-off-by: Doug Ledford diff --git a/drivers/infiniband/sw/rdmavt/qp.c b/drivers/infiniband/sw/rdmavt/qp.c index 354fdac..4711e14 100644 --- a/drivers/infiniband/sw/rdmavt/qp.c +++ b/drivers/infiniband/sw/rdmavt/qp.c @@ -1517,7 +1517,42 @@ bail: int rvt_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr, struct ib_recv_wr **bad_wr) { - return -EOPNOTSUPP; + struct rvt_srq *srq = ibsrq_to_rvtsrq(ibsrq); + struct rvt_rwq *wq; + unsigned long flags; + + for (; wr; wr = wr->next) { + struct rvt_rwqe *wqe; + u32 next; + int i; + + if ((unsigned)wr->num_sge > srq->rq.max_sge) { + *bad_wr = wr; + return -EINVAL; + } + + spin_lock_irqsave(&srq->rq.lock, flags); + wq = srq->rq.wq; + next = wq->head + 1; + if (next >= srq->rq.size) + next = 0; + if (next == wq->tail) { + spin_unlock_irqrestore(&srq->rq.lock, flags); + *bad_wr = wr; + return -ENOMEM; + } + + wqe = rvt_get_rwqe_ptr(&srq->rq, wq->head); + wqe->wr_id = wr->wr_id; + wqe->num_sge = wr->num_sge; + for (i = 0; i < wr->num_sge; i++) + wqe->sg_list[i] = wr->sg_list[i]; + /* Make sure queue entry is written before the head index. */ + smp_wmb(); + wq->head = next; + spin_unlock_irqrestore(&srq->rq.lock, flags); + } + return 0; } void rvt_free_qpn(struct rvt_qpn_table *qpt, u32 qpn) diff --git a/drivers/infiniband/sw/rdmavt/srq.c b/drivers/infiniband/sw/rdmavt/srq.c index c9eb8b3..4960a89 100644 --- a/drivers/infiniband/sw/rdmavt/srq.c +++ b/drivers/infiniband/sw/rdmavt/srq.c @@ -45,8 +45,21 @@ * */ +#include +#include +#include + #include "srq.h" +/* + * Do any initialization needed when a driver registers with rdmavt. + */ +void rvt_driver_srq_init(struct rvt_dev_info *rdi) +{ + spin_lock_init(&rdi->n_srqs_lock); + rdi->n_srqs_allocated = 0; +} + /** * rvt_create_srq - create a shared receive queue * @ibpd: the protection domain of the SRQ to create @@ -57,7 +70,96 @@ struct ib_srq *rvt_create_srq(struct ib_pd *ibpd, struct ib_srq_init_attr *srq_init_attr, struct ib_udata *udata) { - return ERR_PTR(-EOPNOTSUPP); + struct rvt_dev_info *dev = ib_to_rvt(ibpd->device); + struct rvt_srq *srq; + u32 sz; + struct ib_srq *ret; + + if (srq_init_attr->srq_type != IB_SRQT_BASIC) + return ERR_PTR(-ENOSYS); + + if (srq_init_attr->attr.max_sge == 0 || + srq_init_attr->attr.max_sge > dev->dparms.props.max_srq_sge || + srq_init_attr->attr.max_wr == 0 || + srq_init_attr->attr.max_wr > dev->dparms.props.max_srq_wr) + return ERR_PTR(-EINVAL); + + srq = kmalloc(sizeof(*srq), GFP_KERNEL); + if (!srq) + return ERR_PTR(-ENOMEM); + + /* + * Need to use vmalloc() if we want to support large #s of entries. + */ + srq->rq.size = srq_init_attr->attr.max_wr + 1; + srq->rq.max_sge = srq_init_attr->attr.max_sge; + sz = sizeof(struct ib_sge) * srq->rq.max_sge + + sizeof(struct rvt_rwqe); + srq->rq.wq = vmalloc_user(sizeof(struct rvt_rwq) + srq->rq.size * sz); + if (!srq->rq.wq) { + ret = ERR_PTR(-ENOMEM); + goto bail_srq; + } + + /* + * Return the address of the RWQ as the offset to mmap. + * See rvt_mmap() for details. + */ + if (udata && udata->outlen >= sizeof(__u64)) { + int err; + u32 s = sizeof(struct rvt_rwq) + srq->rq.size * sz; + + srq->ip = + rvt_create_mmap_info(dev, s, ibpd->uobject->context, + srq->rq.wq); + if (!srq->ip) { + ret = ERR_PTR(-ENOMEM); + goto bail_wq; + } + + err = ib_copy_to_udata(udata, &srq->ip->offset, + sizeof(srq->ip->offset)); + if (err) { + ret = ERR_PTR(err); + goto bail_ip; + } + } else { + srq->ip = NULL; + } + + /* + * ib_create_srq() will initialize srq->ibsrq. + */ + spin_lock_init(&srq->rq.lock); + srq->rq.wq->head = 0; + srq->rq.wq->tail = 0; + srq->limit = srq_init_attr->attr.srq_limit; + + spin_lock(&dev->n_srqs_lock); + if (dev->n_srqs_allocated == dev->dparms.props.max_srq) { + spin_unlock(&dev->n_srqs_lock); + ret = ERR_PTR(-ENOMEM); + goto bail_ip; + } + + dev->n_srqs_allocated++; + spin_unlock(&dev->n_srqs_lock); + + if (srq->ip) { + spin_lock_irq(&dev->pending_lock); + list_add(&srq->ip->pending_mmaps, &dev->pending_mmaps); + spin_unlock_irq(&dev->pending_lock); + } + + return &srq->ibsrq; + +bail_ip: + kfree(srq->ip); +bail_wq: + vfree(srq->rq.wq); +bail_srq: + kfree(srq); + return ret; } /** @@ -71,16 +173,161 @@ int rvt_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr, enum ib_srq_attr_mask attr_mask, struct ib_udata *udata) { - return -EOPNOTSUPP; + struct rvt_srq *srq = ibsrq_to_rvtsrq(ibsrq); + struct rvt_dev_info *dev = ib_to_rvt(ibsrq->device); + struct rvt_rwq *wq; + int ret = 0; + + if (attr_mask & IB_SRQ_MAX_WR) { + struct rvt_rwq *owq; + struct rvt_rwqe *p; + u32 sz, size, n, head, tail; + + /* Check that the requested sizes are below the limits. */ + if ((attr->max_wr > dev->dparms.props.max_srq_wr) || + ((attr_mask & IB_SRQ_LIMIT) ? + attr->srq_limit : srq->limit) > attr->max_wr) + return -EINVAL; + + sz = sizeof(struct rvt_rwqe) + + srq->rq.max_sge * sizeof(struct ib_sge); + size = attr->max_wr + 1; + wq = vmalloc_user(sizeof(struct rvt_rwq) + size * sz); + if (!wq) + return -ENOMEM; + + /* Check that we can write the offset to mmap. */ + if (udata && udata->inlen >= sizeof(__u64)) { + __u64 offset_addr; + __u64 offset = 0; + + ret = ib_copy_from_udata(&offset_addr, udata, + sizeof(offset_addr)); + if (ret) + goto bail_free; + udata->outbuf = (void __user *) + (unsigned long)offset_addr; + ret = ib_copy_to_udata(udata, &offset, + sizeof(offset)); + if (ret) + goto bail_free; + } + + spin_lock_irq(&srq->rq.lock); + /* + * validate head and tail pointer values and compute + * the number of remaining WQEs. + */ + owq = srq->rq.wq; + head = owq->head; + tail = owq->tail; + if (head >= srq->rq.size || tail >= srq->rq.size) { + ret = -EINVAL; + goto bail_unlock; + } + n = head; + if (n < tail) + n += srq->rq.size - tail; + else + n -= tail; + if (size <= n) { + ret = -EINVAL; + goto bail_unlock; + } + n = 0; + p = wq->wq; + while (tail != head) { + struct rvt_rwqe *wqe; + int i; + + wqe = rvt_get_rwqe_ptr(&srq->rq, tail); + p->wr_id = wqe->wr_id; + p->num_sge = wqe->num_sge; + for (i = 0; i < wqe->num_sge; i++) + p->sg_list[i] = wqe->sg_list[i]; + n++; + p = (struct rvt_rwqe *)((char *)p + sz); + if (++tail >= srq->rq.size) + tail = 0; + } + srq->rq.wq = wq; + srq->rq.size = size; + wq->head = n; + wq->tail = 0; + if (attr_mask & IB_SRQ_LIMIT) + srq->limit = attr->srq_limit; + spin_unlock_irq(&srq->rq.lock); + + vfree(owq); + + if (srq->ip) { + struct rvt_mmap_info *ip = srq->ip; + struct rvt_dev_info *dev = ib_to_rvt(srq->ibsrq.device); + u32 s = sizeof(struct rvt_rwq) + size * sz; + + rvt_update_mmap_info(dev, ip, s, wq); + + /* + * Return the offset to mmap. + * See rvt_mmap() for details. + */ + if (udata && udata->inlen >= sizeof(__u64)) { + ret = ib_copy_to_udata(udata, &ip->offset, + sizeof(ip->offset)); + if (ret) + return ret; + } + + /* + * Put user mapping info onto the pending list + * unless it already is on the list. + */ + spin_lock_irq(&dev->pending_lock); + if (list_empty(&ip->pending_mmaps)) + list_add(&ip->pending_mmaps, + &dev->pending_mmaps); + spin_unlock_irq(&dev->pending_lock); + } + } else if (attr_mask & IB_SRQ_LIMIT) { + spin_lock_irq(&srq->rq.lock); + if (attr->srq_limit >= srq->rq.size) + ret = -EINVAL; + else + srq->limit = attr->srq_limit; + spin_unlock_irq(&srq->rq.lock); + } + return ret; + +bail_unlock: + spin_unlock_irq(&srq->rq.lock); +bail_free: + vfree(wq); + return ret; } int rvt_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr) { - return -EOPNOTSUPP; + struct rvt_srq *srq = ibsrq_to_rvtsrq(ibsrq); + + attr->max_wr = srq->rq.size - 1; + attr->max_sge = srq->rq.max_sge; + attr->srq_limit = srq->limit; + return 0; } int rvt_destroy_srq(struct ib_srq *ibsrq) { - return -EOPNOTSUPP; -} + struct rvt_srq *srq = ibsrq_to_rvtsrq(ibsrq); + struct rvt_dev_info *dev = ib_to_rvt(ibsrq->device); + spin_lock(&dev->n_srqs_lock); + dev->n_srqs_allocated--; + spin_unlock(&dev->n_srqs_lock); + if (srq->ip) + kref_put(&srq->ip->ref, rvt_release_mmap_info); + else + vfree(srq->rq.wq); + kfree(srq); + + return 0; +} diff --git a/drivers/infiniband/sw/rdmavt/srq.h b/drivers/infiniband/sw/rdmavt/srq.h index 9f07880..bf0eaaf 100644 --- a/drivers/infiniband/sw/rdmavt/srq.h +++ b/drivers/infiniband/sw/rdmavt/srq.h @@ -49,6 +49,7 @@ */ #include +void rvt_driver_srq_init(struct rvt_dev_info *rdi); struct ib_srq *rvt_create_srq(struct ib_pd *ibpd, struct ib_srq_init_attr *srq_init_attr, struct ib_udata *udata); diff --git a/drivers/infiniband/sw/rdmavt/vt.c b/drivers/infiniband/sw/rdmavt/vt.c index 571463e..d45206c 100644 --- a/drivers/infiniband/sw/rdmavt/vt.c +++ b/drivers/infiniband/sw/rdmavt/vt.c @@ -323,6 +323,7 @@ int rvt_register_device(struct rvt_dev_info *rdi) CHECK_DRIVER_OVERRIDE(rdi, modify_srq); CHECK_DRIVER_OVERRIDE(rdi, destroy_srq); CHECK_DRIVER_OVERRIDE(rdi, query_srq); + rvt_driver_srq_init(rdi); /* Multicast */ rvt_driver_mcast_init(rdi); diff --git a/include/rdma/rdma_vt.h b/include/rdma/rdma_vt.h index f6569b2..1b77065 100644 --- a/include/rdma/rdma_vt.h +++ b/include/rdma/rdma_vt.h @@ -299,6 +299,9 @@ struct rvt_dev_info { int n_ahs_allocated; spinlock_t n_ahs_lock; /* Protect ah allocated count */ + u32 n_srqs_allocated; + spinlock_t n_srqs_lock; /* Protect srqs allocated count */ + int flags; struct rvt_ibport **ports; -- cgit v0.10.2 From 60c30f572595e46c819503b5a8c3a8e2f922de7a Mon Sep 17 00:00:00 2001 From: Ira Weiny Date: Wed, 3 Feb 2016 14:14:45 -0800 Subject: IB/rdmavt: Add hardware driver send work request check Some hardware drivers requires additional checks on send WRs. Create an optional call back to allow hardware drivers to reject a send WR. Reviewed-by: Dennis Dalessandro Signed-off-by: Ira Weiny Signed-off-by: Doug Ledford diff --git a/drivers/infiniband/sw/rdmavt/qp.c b/drivers/infiniband/sw/rdmavt/qp.c index 4711e14..e9e3138 100644 --- a/drivers/infiniband/sw/rdmavt/qp.c +++ b/drivers/infiniband/sw/rdmavt/qp.c @@ -1394,6 +1394,10 @@ static int rvt_post_one_wr(struct rvt_qp *qp, struct ib_send_wr *wr) if (next == qp->s_last) return -ENOMEM; + if (rdi->driver_f.check_send_wr && + rdi->driver_f.check_send_wr(qp, wr)) + return -EINVAL; + rkt = &rdi->lkey_table; pd = ibpd_to_rvtpd(qp->ibqp.pd); wqe = rvt_get_swqe_ptr(qp, qp->s_head); diff --git a/include/rdma/rdma_vt.h b/include/rdma/rdma_vt.h index 1b77065..52dfa9c 100644 --- a/include/rdma/rdma_vt.h +++ b/include/rdma/rdma_vt.h @@ -265,6 +265,8 @@ struct rvt_driver_provided { void (*modify_qp)(struct rvt_qp *qp, struct ib_qp_attr *attr, int attr_mask, struct ib_udata *udata); + int (*check_send_wr)(struct rvt_qp *qp, struct ib_send_wr *wr); + void (*notify_create_mad_agent)(struct rvt_dev_info *rdi, int port_idx); void (*notify_free_mad_agent)(struct rvt_dev_info *rdi, int port_idx); }; -- cgit v0.10.2 From d1b697b678cd591e12c493a9b91343107816cceb Mon Sep 17 00:00:00 2001 From: Mitko Haralanov Date: Wed, 3 Feb 2016 14:14:54 -0800 Subject: IB/rdmavt: Add Mem affinity support Change verbs memory allocations to the device numa node. This keeps memory close to the device for optimal performance. Reviewed-by: Dean Luick Reviewed-by: Mike Marciniszyn Reviewed-by: Ira Weiny Reviewed-by: Dennis Dalessandro Signed-off-by: Mitko Haralanov Signed-off-by: Doug Ledford diff --git a/drivers/infiniband/sw/rdmavt/mmap.c b/drivers/infiniband/sw/rdmavt/mmap.c index d6330d7..49180c4 100644 --- a/drivers/infiniband/sw/rdmavt/mmap.c +++ b/drivers/infiniband/sw/rdmavt/mmap.c @@ -157,7 +157,7 @@ struct rvt_mmap_info *rvt_create_mmap_info(struct rvt_dev_info *rdi, { struct rvt_mmap_info *ip; - ip = kmalloc(sizeof(*ip), GFP_KERNEL); + ip = kmalloc_node(sizeof(*ip), GFP_KERNEL, rdi->dparms.node); if (!ip) return ip; diff --git a/drivers/infiniband/sw/rdmavt/mr.c b/drivers/infiniband/sw/rdmavt/mr.c index ee36be3..8bff6bb 100644 --- a/drivers/infiniband/sw/rdmavt/mr.c +++ b/drivers/infiniband/sw/rdmavt/mr.c @@ -87,7 +87,7 @@ int rvt_driver_mr_init(struct rvt_dev_info *rdi) } lk_tab_size = rdi->lkey_table.max * sizeof(*rdi->lkey_table.table); rdi->lkey_table.table = (struct rvt_mregion __rcu **) - vmalloc(lk_tab_size); + vmalloc_node(lk_tab_size, rdi->dparms.node); if (!rdi->lkey_table.table) return -ENOMEM; diff --git a/drivers/infiniband/sw/rdmavt/qp.c b/drivers/infiniband/sw/rdmavt/qp.c index e9e3138..471d9c5 100644 --- a/drivers/infiniband/sw/rdmavt/qp.c +++ b/drivers/infiniband/sw/rdmavt/qp.c @@ -186,7 +186,8 @@ int rvt_driver_qp_init(struct rvt_dev_info *rdi) return -EINVAL; /* allocate parent object */ - rdi->qp_dev = kzalloc(sizeof(*rdi->qp_dev), GFP_KERNEL); + rdi->qp_dev = kzalloc_node(sizeof(*rdi->qp_dev), GFP_KERNEL, + rdi->dparms.node); if (!rdi->qp_dev) return -ENOMEM; @@ -194,9 +195,9 @@ int rvt_driver_qp_init(struct rvt_dev_info *rdi) rdi->qp_dev->qp_table_size = rdi->dparms.qp_table_size; rdi->qp_dev->qp_table_bits = ilog2(rdi->dparms.qp_table_size); rdi->qp_dev->qp_table = - kmalloc(rdi->qp_dev->qp_table_size * - sizeof(*rdi->qp_dev->qp_table), - GFP_KERNEL); + kmalloc_node(rdi->qp_dev->qp_table_size * + sizeof(*rdi->qp_dev->qp_table), + GFP_KERNEL, rdi->dparms.node); if (!rdi->qp_dev->qp_table) goto no_qp_table; @@ -542,8 +543,9 @@ struct ib_qp *rvt_create_qp(struct ib_pd *ibpd, (init_attr->cap.max_send_wr + 1) * sz, gfp, PAGE_KERNEL); else - swq = vmalloc( - (init_attr->cap.max_send_wr + 1) * sz); + swq = vmalloc_node( + (init_attr->cap.max_send_wr + 1) * sz, + rdi->dparms.node); if (!swq) return ERR_PTR(-ENOMEM); @@ -558,7 +560,7 @@ struct ib_qp *rvt_create_qp(struct ib_pd *ibpd, } else if (init_attr->cap.max_recv_sge > 1) sg_list_sz = sizeof(*qp->r_sg_list) * (init_attr->cap.max_recv_sge - 1); - qp = kzalloc(sz + sg_list_sz, gfp); + qp = kzalloc_node(sz + sg_list_sz, gfp, rdi->dparms.node); if (!qp) goto bail_swq; @@ -592,9 +594,10 @@ struct ib_qp *rvt_create_qp(struct ib_pd *ibpd, qp->r_rq.size * sz, gfp, PAGE_KERNEL); else - qp->r_rq.wq = vmalloc( + qp->r_rq.wq = vmalloc_node( sizeof(struct rvt_rwq) + - qp->r_rq.size * sz); + qp->r_rq.size * sz, + rdi->dparms.node); if (!qp->r_rq.wq) goto bail_driver_priv; } -- cgit v0.10.2 From f1badc716349cc2ac6e55ad50dcff598ef97bad5 Mon Sep 17 00:00:00 2001 From: Dennis Dalessandro Date: Wed, 3 Feb 2016 14:15:02 -0800 Subject: IB/rdmavt: Clean up distinction between port number and index IB core uses 1 relative indexing for ports. All of our data structures use 0 based indexing. Add an inline function that we can use whenever we need to validate a legal value and try to convert a port number to a port index at the entrance into rdmavt. Try to follow the policy that when we are talking about a port from IB core point of view we refer to it as a port number. When port is an index into our arrays refer to it as a port index. Reviewed-by: Mike Marciniszyn Reviewed-by: Harish Chegondi Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford diff --git a/drivers/infiniband/sw/rdmavt/mad.c b/drivers/infiniband/sw/rdmavt/mad.c index 5c720d35..2feae47 100644 --- a/drivers/infiniband/sw/rdmavt/mad.c +++ b/drivers/infiniband/sw/rdmavt/mad.c @@ -47,12 +47,13 @@ #include #include "mad.h" +#include "vt.h" /** * rvt_process_mad - process an incoming MAD packet * @ibdev: the infiniband device this packet came in on * @mad_flags: MAD flags - * @port: the port number this packet came in on + * @port_num: the port number this packet came in on, 1 based from ib core * @in_wc: the work completion entry for this packet * @in_grh: the global route header for this packet * @in_mad: the incoming MAD @@ -67,7 +68,7 @@ * * This is called by the ib_mad module. */ -int rvt_process_mad(struct ib_device *ibdev, int mad_flags, u8 port, +int rvt_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, const struct ib_wc *in_wc, const struct ib_grh *in_grh, const struct ib_mad_hdr *in, size_t in_mad_size, struct ib_mad_hdr *out, size_t *out_mad_size, @@ -82,6 +83,9 @@ int rvt_process_mad(struct ib_device *ibdev, int mad_flags, u8 port, *VT-DRIVER-API: ???? * */ + if (ibport_num_to_idx(ibdev, port_num) < 0) + return -EINVAL; + return IB_MAD_RESULT_FAILURE; } diff --git a/drivers/infiniband/sw/rdmavt/mad.h b/drivers/infiniband/sw/rdmavt/mad.h index c89faf4..a9d6eec 100644 --- a/drivers/infiniband/sw/rdmavt/mad.h +++ b/drivers/infiniband/sw/rdmavt/mad.h @@ -50,7 +50,7 @@ #include -int rvt_process_mad(struct ib_device *ibdev, int mad_flags, u8 port, +int rvt_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, const struct ib_wc *in_wc, const struct ib_grh *in_grh, const struct ib_mad_hdr *in, size_t in_mad_size, struct ib_mad_hdr *out, size_t *out_mad_size, diff --git a/drivers/infiniband/sw/rdmavt/qp.c b/drivers/infiniband/sw/rdmavt/qp.c index 471d9c5..2647dba 100644 --- a/drivers/infiniband/sw/rdmavt/qp.c +++ b/drivers/infiniband/sw/rdmavt/qp.c @@ -286,26 +286,31 @@ static inline unsigned mk_qpn(struct rvt_qpn_table *qpt, return (map - qpt->map) * RVT_BITS_PER_PAGE + off; } -/* - * Allocate the next available QPN or - * zero/one for QP type IB_QPT_SMI/IB_QPT_GSI. +/** + * alloc_qpn - Allocate the next available qpn or zero/one for QP type + * IB_QPT_SMI/IB_QPT_GSI + *@rdi: rvt device info structure + *@qpt: queue pair number table pointer + *@port_num: IB port number, 1 based, comes from core + * + * Return: The queue pair number */ static int alloc_qpn(struct rvt_dev_info *rdi, struct rvt_qpn_table *qpt, - enum ib_qp_type type, u8 port, gfp_t gfp) + enum ib_qp_type type, u8 port_num, gfp_t gfp) { u32 i, offset, max_scan, qpn; struct rvt_qpn_map *map; u32 ret; if (rdi->driver_f.alloc_qpn) - return rdi->driver_f.alloc_qpn(rdi, qpt, type, port, + return rdi->driver_f.alloc_qpn(rdi, qpt, type, port_num, GFP_KERNEL); if (type == IB_QPT_SMI || type == IB_QPT_GSI) { unsigned n; ret = type == IB_QPT_GSI; - n = 1 << (ret + 2 * (port - 1)); + n = 1 << (ret + 2 * (port_num - 1)); spin_lock(&qpt->lock); if (qpt->flags & n) ret = -EINVAL; diff --git a/drivers/infiniband/sw/rdmavt/vt.c b/drivers/infiniband/sw/rdmavt/vt.c index d45206c..9f9cb9a 100644 --- a/drivers/infiniband/sw/rdmavt/vt.c +++ b/drivers/infiniband/sw/rdmavt/vt.c @@ -120,14 +120,17 @@ static int rvt_modify_device(struct ib_device *device, /** * rvt_query_port: Passes the query port call to the driver * @ibdev: Verbs IB dev - * @port: port number + * @port_num: port number, 1 based from ib core * @props: structure to hold returned properties * * Returns 0 on success */ -static int rvt_query_port(struct ib_device *ibdev, u8 port, +static int rvt_query_port(struct ib_device *ibdev, u8 port_num, struct ib_port_attr *props) { + if (ibport_num_to_idx(ibdev, port_num) < 0) + return -EINVAL; + /* * VT-DRIVER-API: query_port_state() * driver returns pretty much everything in ib_port_attr @@ -138,13 +141,13 @@ static int rvt_query_port(struct ib_device *ibdev, u8 port, /** * rvt_modify_port * @ibdev: Verbs IB dev - * @port: Port number + * @port_num: Port number, 1 based from ib core * @port_modify_mask: How to change the port * @props: Structure to fill in * * Returns 0 on success */ -static int rvt_modify_port(struct ib_device *ibdev, u8 port, +static int rvt_modify_port(struct ib_device *ibdev, u8 port_num, int port_modify_mask, struct ib_port_modify *props) { /* @@ -160,18 +163,21 @@ static int rvt_modify_port(struct ib_device *ibdev, u8 port, * TBD: send_trap() and post_mad_send() need examined to see where they * fit in. */ + if (ibport_num_to_idx(ibdev, port_num) < 0) + return -EINVAL; + return -EOPNOTSUPP; } /** * rvt_query_pkey - Return a pkey from the table at a given index * @ibdev: Verbs IB dev - * @port: Port number + * @port_num: Port number, 1 based from ib core * @intex: Index into pkey table * * Returns 0 on failure pkey otherwise */ -static int rvt_query_pkey(struct ib_device *ibdev, u8 port, u16 index, +static int rvt_query_pkey(struct ib_device *ibdev, u8 port_num, u16 index, u16 *pkey) { /* @@ -183,11 +189,11 @@ static int rvt_query_pkey(struct ib_device *ibdev, u8 port, u16 index, struct rvt_dev_info *rdi = ib_to_rvt(ibdev); int port_index; - if (index >= rvt_get_npkeys(rdi)) + port_index = ibport_num_to_idx(ibdev, port_num); + if (port_index < 0) return -EINVAL; - port_index = port - 1; /* IB ports start at 1 our array at 0 */ - if ((port_index < 0) || (port_index >= rdi->dparms.nports)) + if (index >= rvt_get_npkeys(rdi)) return -EINVAL; *pkey = rvt_get_pkey(rdi, port_index, index); @@ -197,13 +203,13 @@ static int rvt_query_pkey(struct ib_device *ibdev, u8 port, u16 index, /** * rvt_query_gid - Return a gid from the table * @ibdev: Verbs IB dev - * @port: Port number + * @port_num: Port number, 1 based from ib core * @index: = Index in table * @gid: Gid to return * * Returns 0 on success */ -static int rvt_query_gid(struct ib_device *ibdev, u8 port, +static int rvt_query_gid(struct ib_device *ibdev, u8 port_num, int index, union ib_gid *gid) { /* @@ -211,6 +217,8 @@ static int rvt_query_gid(struct ib_device *ibdev, u8 port, * to craft the return value. This will work similar to how query_pkey() * is being done. */ + if (ibport_num_to_idx(ibdev, port_num) < 0) + return -EINVAL; return -EOPNOTSUPP; } @@ -455,11 +463,11 @@ EXPORT_SYMBOL(rvt_unregister_device); * They persist until the driver goes away. */ int rvt_init_port(struct rvt_dev_info *rdi, struct rvt_ibport *port, - int portnum, u16 *pkey_table) + int port_index, u16 *pkey_table) { - rdi->ports[portnum] = port; - rdi->ports[portnum]->pkey_table = pkey_table; + rdi->ports[port_index] = port; + rdi->ports[port_index]->pkey_table = pkey_table; return 0; } diff --git a/drivers/infiniband/sw/rdmavt/vt.h b/drivers/infiniband/sw/rdmavt/vt.h index a5c36d3..e26f9e9 100644 --- a/drivers/infiniband/sw/rdmavt/vt.h +++ b/drivers/infiniband/sw/rdmavt/vt.h @@ -88,4 +88,16 @@ #define __rvt_pr_err(pdev, name, fmt, ...) \ dev_err(&pdev->dev, "%s: " fmt, name, ##__VA_ARGS__) +static inline int ibport_num_to_idx(struct ib_device *ibdev, u8 port_num) +{ + struct rvt_dev_info *rdi = ib_to_rvt(ibdev); + int port_index; + + port_index = port_num - 1; /* IB ports start at 1 our arrays at 0 */ + if ((port_index < 0) || (port_index >= rdi->dparms.nports)) + return -EINVAL; + + return port_index; +} + #endif /* DEF_RDMAVT_H */ diff --git a/include/rdma/rdma_vt.h b/include/rdma/rdma_vt.h index 52dfa9c..5d1c694 100644 --- a/include/rdma/rdma_vt.h +++ b/include/rdma/rdma_vt.h @@ -256,7 +256,7 @@ struct rvt_driver_provided { void (*notify_new_ah)(struct ib_device *, struct ib_ah_attr *, struct rvt_ah *); int (*alloc_qpn)(struct rvt_dev_info *rdi, struct rvt_qpn_table *qpt, - enum ib_qp_type type, u8 port, gfp_t gfp); + enum ib_qp_type type, u8 port_num, gfp_t gfp); /** * Return 0 if modification is valid, -errno otherwise */ @@ -408,7 +408,7 @@ int rvt_register_device(struct rvt_dev_info *rvd); void rvt_unregister_device(struct rvt_dev_info *rvd); int rvt_check_ah(struct ib_device *ibdev, struct ib_ah_attr *ah_attr); int rvt_init_port(struct rvt_dev_info *rdi, struct rvt_ibport *port, - int portnum, u16 *pkey_table); + int port_index, u16 *pkey_table); int rvt_rkey_ok(struct rvt_qp *qp, struct rvt_sge *sge, u32 len, u64 vaddr, u32 rkey, int acc); int rvt_lkey_ok(struct rvt_lkey_table *rkt, struct rvt_pd *pd, -- cgit v0.10.2 From 1f024992ef05d1eb9b3a0becd1611ecfa21854a6 Mon Sep 17 00:00:00 2001 From: Dennis Dalessandro Date: Wed, 3 Feb 2016 14:15:11 -0800 Subject: IB/rdmavt: Add query gid support. Addin query gid support. Rdmavt still relies on the driver to maintain the gid table. Rdmavt simply calls into the driver to retrive the guid for a particular port. Reviewed-by: Harish Chegondi Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford diff --git a/drivers/infiniband/sw/rdmavt/vt.c b/drivers/infiniband/sw/rdmavt/vt.c index 9f9cb9a..e017117 100644 --- a/drivers/infiniband/sw/rdmavt/vt.c +++ b/drivers/infiniband/sw/rdmavt/vt.c @@ -210,17 +210,28 @@ static int rvt_query_pkey(struct ib_device *ibdev, u8 port_num, u16 index, * Returns 0 on success */ static int rvt_query_gid(struct ib_device *ibdev, u8 port_num, - int index, union ib_gid *gid) + int guid_index, union ib_gid *gid) { + struct rvt_dev_info *rdi; + struct rvt_ibport *rvp; + int port_index; + /* * Driver is responsible for updating the guid table. Which will be used * to craft the return value. This will work similar to how query_pkey() * is being done. */ - if (ibport_num_to_idx(ibdev, port_num) < 0) + port_index = ibport_num_to_idx(ibdev, port_num); + if (port_index < 0) return -EINVAL; - return -EOPNOTSUPP; + rdi = ib_to_rvt(ibdev); + rvp = rdi->ports[port_index]; + + gid->global.subnet_prefix = rvp->gid_prefix; + + return rdi->driver_f.get_guid_be(rdi, rvp, guid_index, + &gid->global.interface_id); } struct rvt_ucontext { diff --git a/include/rdma/rdma_vt.h b/include/rdma/rdma_vt.h index 5d1c694..dabf4d5 100644 --- a/include/rdma/rdma_vt.h +++ b/include/rdma/rdma_vt.h @@ -248,6 +248,8 @@ struct rvt_driver_provided { u32 (*mtu_from_qp)(struct rvt_dev_info *rdi, struct rvt_qp *qp, u32 pmtu); int (*mtu_to_path_mtu)(u32 mtu); + int (*get_guid_be)(struct rvt_dev_info *rdi, struct rvt_ibport *rvp, + int guid_index, __be64 *guid); /*--------------------*/ /* Optional functions */ -- cgit v0.10.2 From 61a650c14d728354b2d493bed3f1b0531f033dac Mon Sep 17 00:00:00 2001 From: Harish Chegondi Date: Wed, 3 Feb 2016 14:15:20 -0800 Subject: IB/rdmavt: Add support for query_port, modify_port and get_port_immutable rvt_query_port calls into the driver through a call back function query_port_state to populate the rest of ib_port_attr elements. rvt_modify_port calls into the driver if needed through a call back function shut_down_port() Signed-off-by: Harish Chegondi Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford diff --git a/drivers/infiniband/sw/rdmavt/vt.c b/drivers/infiniband/sw/rdmavt/vt.c index e017117..2ccf610 100644 --- a/drivers/infiniband/sw/rdmavt/vt.c +++ b/drivers/infiniband/sw/rdmavt/vt.c @@ -128,14 +128,27 @@ static int rvt_modify_device(struct ib_device *device, static int rvt_query_port(struct ib_device *ibdev, u8 port_num, struct ib_port_attr *props) { - if (ibport_num_to_idx(ibdev, port_num) < 0) + struct rvt_dev_info *rdi = ib_to_rvt(ibdev); + struct rvt_ibport *rvp; + int port_index = ibport_num_to_idx(ibdev, port_num); + + if (port_index < 0) return -EINVAL; - /* - * VT-DRIVER-API: query_port_state() - * driver returns pretty much everything in ib_port_attr - */ - return -EOPNOTSUPP; + rvp = rdi->ports[port_index]; + memset(props, 0, sizeof(*props)); + props->sm_lid = rvp->sm_lid; + props->sm_sl = rvp->sm_sl; + props->port_cap_flags = rvp->port_cap_flags; + props->max_msg_sz = 0x80000000; + props->pkey_tbl_len = rvt_get_npkeys(rdi); + props->bad_pkey_cntr = rvp->pkey_violations; + props->qkey_viol_cntr = rvp->qkey_violations; + props->subnet_timeout = rvp->subnet_timeout; + props->init_type_reply = 0; + + /* Populate the remaining ib_port_attr elements */ + return rdi->driver_f.query_port_state(rdi, port_num, props); } /** @@ -150,23 +163,26 @@ static int rvt_query_port(struct ib_device *ibdev, u8 port_num, static int rvt_modify_port(struct ib_device *ibdev, u8 port_num, int port_modify_mask, struct ib_port_modify *props) { - /* - * VT-DRIVER-API: set_link_state() - * driver will set the link state using the IB enumeration - * - * VT-DRIVER-API: clear_qkey_violations() - * clears driver private qkey counter - * - * VT-DRIVER-API: get_lid() - * driver needs to return the LID - * - * TBD: send_trap() and post_mad_send() need examined to see where they - * fit in. - */ - if (ibport_num_to_idx(ibdev, port_num) < 0) + struct rvt_dev_info *rdi = ib_to_rvt(ibdev); + struct rvt_ibport *rvp; + int ret = 0; + int port_index = ibport_num_to_idx(ibdev, port_num); + + if (port_index < 0) return -EINVAL; - return -EOPNOTSUPP; + rvp = rdi->ports[port_index]; + rvp->port_cap_flags |= props->set_port_cap_mask; + rvp->port_cap_flags &= ~props->clr_port_cap_mask; + + if (props->set_port_cap_mask || props->clr_port_cap_mask) + rdi->driver_f.cap_mask_chg(rdi, port_num); + if (port_modify_mask & IB_PORT_SHUTDOWN) + ret = rdi->driver_f.shut_down_port(rdi, port_num); + if (port_modify_mask & IB_PORT_RESET_QKEY_CNTR) + rvp->qkey_violations = 0; + + return ret; } /** @@ -273,7 +289,24 @@ static int rvt_dealloc_ucontext(struct ib_ucontext *context) static int rvt_get_port_immutable(struct ib_device *ibdev, u8 port_num, struct ib_port_immutable *immutable) { - return -EOPNOTSUPP; + struct rvt_dev_info *rdi = ib_to_rvt(ibdev); + struct ib_port_attr attr; + int err, port_index; + + port_index = ibport_num_to_idx(ibdev, port_num); + if (port_index < 0) + return -EINVAL; + + err = rvt_query_port(ibdev, port_num, &attr); + if (err) + return err; + + immutable->pkey_tbl_len = attr.pkey_tbl_len; + immutable->gid_tbl_len = attr.gid_tbl_len; + immutable->core_cap_flags = rdi->dparms.core_cap_flags; + immutable->max_mad_size = rdi->dparms.max_mad_size; + + return 0; } /* diff --git a/include/rdma/rdma_vt.h b/include/rdma/rdma_vt.h index dabf4d5..4242fea 100644 --- a/include/rdma/rdma_vt.h +++ b/include/rdma/rdma_vt.h @@ -200,6 +200,8 @@ struct rvt_driver_params { int psn_mask; int psn_shift; int psn_modify_mask; + u32 core_cap_flags; + u32 max_mad_size; }; /* Protection domain */ @@ -250,6 +252,10 @@ struct rvt_driver_provided { int (*mtu_to_path_mtu)(u32 mtu); int (*get_guid_be)(struct rvt_dev_info *rdi, struct rvt_ibport *rvp, int guid_index, __be64 *guid); + int (*query_port_state)(struct rvt_dev_info *rdi, u8 port_num, + struct ib_port_attr *props); + int (*shut_down_port)(struct rvt_dev_info *rdi, u8 port_num); + void (*cap_mask_chg)(struct rvt_dev_info *rdi, u8 port_num); /*--------------------*/ /* Optional functions */ -- cgit v0.10.2 From b7b3cf44647cab47f6b7d8f10bfdc92cafbb952f Mon Sep 17 00:00:00 2001 From: Ira Weiny Date: Wed, 3 Feb 2016 14:15:28 -0800 Subject: IB/rdmavt: Properly pass gfp to hw driver function alloc_qpn must use GFP and the hardware drivers should use it as well. Reviewed-by: Mike Marciniszyn Signed-off-by: Ira Weiny Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford diff --git a/drivers/infiniband/sw/rdmavt/qp.c b/drivers/infiniband/sw/rdmavt/qp.c index 2647dba..e8d0da8 100644 --- a/drivers/infiniband/sw/rdmavt/qp.c +++ b/drivers/infiniband/sw/rdmavt/qp.c @@ -303,8 +303,7 @@ static int alloc_qpn(struct rvt_dev_info *rdi, struct rvt_qpn_table *qpt, u32 ret; if (rdi->driver_f.alloc_qpn) - return rdi->driver_f.alloc_qpn(rdi, qpt, type, port_num, - GFP_KERNEL); + return rdi->driver_f.alloc_qpn(rdi, qpt, type, port_num, gfp); if (type == IB_QPT_SMI || type == IB_QPT_GSI) { unsigned n; -- cgit v0.10.2 From fd0bf5bedfbd898bddc9ea8e646b4cb3779ec9ab Mon Sep 17 00:00:00 2001 From: Jubin John Date: Wed, 3 Feb 2016 14:20:02 -0800 Subject: IB/qib: Remove srq functionality srq functionality is now in rdmavt. Remove it from the qib driver. Reviewed-by: Dennis Dalessandro Reviewed-by: Harish Chegondi Signed-off-by: Jubin John Signed-off-by: Doug Ledford diff --git a/drivers/infiniband/hw/qib/Makefile b/drivers/infiniband/hw/qib/Makefile index 8d5e36b..79ebd79 100644 --- a/drivers/infiniband/hw/qib/Makefile +++ b/drivers/infiniband/hw/qib/Makefile @@ -3,7 +3,7 @@ obj-$(CONFIG_INFINIBAND_QIB) += ib_qib.o ib_qib-y := qib_diag.o qib_driver.o qib_eeprom.o \ qib_file_ops.o qib_fs.o qib_init.o qib_intr.o \ qib_mad.o qib_pcie.o qib_pio_copy.o \ - qib_qp.o qib_qsfp.o qib_rc.o qib_ruc.o qib_sdma.o qib_srq.o \ + qib_qp.o qib_qsfp.o qib_rc.o qib_ruc.o qib_sdma.o \ qib_sysfs.o qib_twsi.o qib_tx.o qib_uc.o qib_ud.o \ qib_user_pages.o qib_user_sdma.o qib_iba7220.o \ qib_sd7220.o qib_iba7322.o qib_verbs.o diff --git a/drivers/infiniband/hw/qib/qib_srq.c b/drivers/infiniband/hw/qib/qib_srq.c deleted file mode 100644 index dff8808..0000000 --- a/drivers/infiniband/hw/qib/qib_srq.c +++ /dev/null @@ -1,380 +0,0 @@ -/* - * Copyright (c) 2006, 2007, 2008, 2009 QLogic Corporation. All rights reserved. - * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include -#include -#include - -#include "qib_verbs.h" - -/** - * qib_post_srq_receive - post a receive on a shared receive queue - * @ibsrq: the SRQ to post the receive on - * @wr: the list of work requests to post - * @bad_wr: A pointer to the first WR to cause a problem is put here - * - * This may be called from interrupt context. - */ -int qib_post_srq_receive(struct ib_srq *ibsrq, struct ib_recv_wr *wr, - struct ib_recv_wr **bad_wr) -{ - struct rvt_srq *srq = ibsrq_to_rvtsrq(ibsrq); - struct rvt_rwq *wq; - unsigned long flags; - int ret; - - for (; wr; wr = wr->next) { - struct rvt_rwqe *wqe; - u32 next; - int i; - - if ((unsigned) wr->num_sge > srq->rq.max_sge) { - *bad_wr = wr; - ret = -EINVAL; - goto bail; - } - - spin_lock_irqsave(&srq->rq.lock, flags); - wq = srq->rq.wq; - next = wq->head + 1; - if (next >= srq->rq.size) - next = 0; - if (next == wq->tail) { - spin_unlock_irqrestore(&srq->rq.lock, flags); - *bad_wr = wr; - ret = -ENOMEM; - goto bail; - } - - wqe = get_rwqe_ptr(&srq->rq, wq->head); - wqe->wr_id = wr->wr_id; - wqe->num_sge = wr->num_sge; - for (i = 0; i < wr->num_sge; i++) - wqe->sg_list[i] = wr->sg_list[i]; - /* Make sure queue entry is written before the head index. */ - smp_wmb(); - wq->head = next; - spin_unlock_irqrestore(&srq->rq.lock, flags); - } - ret = 0; - -bail: - return ret; -} - -/** - * qib_create_srq - create a shared receive queue - * @ibpd: the protection domain of the SRQ to create - * @srq_init_attr: the attributes of the SRQ - * @udata: data from libibverbs when creating a user SRQ - */ -struct ib_srq *qib_create_srq(struct ib_pd *ibpd, - struct ib_srq_init_attr *srq_init_attr, - struct ib_udata *udata) -{ - struct qib_ibdev *dev = to_idev(ibpd->device); - struct rvt_srq *srq; - u32 sz; - struct ib_srq *ret; - - if (srq_init_attr->srq_type != IB_SRQT_BASIC) { - ret = ERR_PTR(-ENOSYS); - goto done; - } - - if (srq_init_attr->attr.max_sge == 0 || - srq_init_attr->attr.max_sge > ib_qib_max_srq_sges || - srq_init_attr->attr.max_wr == 0 || - srq_init_attr->attr.max_wr > ib_qib_max_srq_wrs) { - ret = ERR_PTR(-EINVAL); - goto done; - } - - srq = kmalloc(sizeof(*srq), GFP_KERNEL); - if (!srq) { - ret = ERR_PTR(-ENOMEM); - goto done; - } - - /* - * Need to use vmalloc() if we want to support large #s of entries. - */ - srq->rq.size = srq_init_attr->attr.max_wr + 1; - srq->rq.max_sge = srq_init_attr->attr.max_sge; - sz = sizeof(struct ib_sge) * srq->rq.max_sge + - sizeof(struct rvt_rwqe); - srq->rq.wq = vmalloc_user(sizeof(struct rvt_rwq) + srq->rq.size * sz); - if (!srq->rq.wq) { - ret = ERR_PTR(-ENOMEM); - goto bail_srq; - } - - /* - * Return the address of the RWQ as the offset to mmap. - * See qib_mmap() for details. - */ - if (udata && udata->outlen >= sizeof(__u64)) { - int err; - u32 s = sizeof(struct rvt_rwq) + srq->rq.size * sz; - - srq->ip = - rvt_create_mmap_info(&dev->rdi, s, ibpd->uobject->context, - srq->rq.wq); - if (!srq->ip) { - ret = ERR_PTR(-ENOMEM); - goto bail_wq; - } - - err = ib_copy_to_udata(udata, &srq->ip->offset, - sizeof(srq->ip->offset)); - if (err) { - ret = ERR_PTR(err); - goto bail_ip; - } - } else - srq->ip = NULL; - - /* - * ib_create_srq() will initialize srq->ibsrq. - */ - spin_lock_init(&srq->rq.lock); - srq->rq.wq->head = 0; - srq->rq.wq->tail = 0; - srq->limit = srq_init_attr->attr.srq_limit; - - spin_lock(&dev->n_srqs_lock); - if (dev->n_srqs_allocated == ib_qib_max_srqs) { - spin_unlock(&dev->n_srqs_lock); - ret = ERR_PTR(-ENOMEM); - goto bail_ip; - } - - dev->n_srqs_allocated++; - spin_unlock(&dev->n_srqs_lock); - - if (srq->ip) { - spin_lock_irq(&dev->rdi.pending_lock); - list_add(&srq->ip->pending_mmaps, &dev->rdi.pending_mmaps); - spin_unlock_irq(&dev->rdi.pending_lock); - } - - ret = &srq->ibsrq; - goto done; - -bail_ip: - kfree(srq->ip); -bail_wq: - vfree(srq->rq.wq); -bail_srq: - kfree(srq); -done: - return ret; -} - -/** - * qib_modify_srq - modify a shared receive queue - * @ibsrq: the SRQ to modify - * @attr: the new attributes of the SRQ - * @attr_mask: indicates which attributes to modify - * @udata: user data for libibverbs.so - */ -int qib_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr, - enum ib_srq_attr_mask attr_mask, - struct ib_udata *udata) -{ - struct rvt_srq *srq = ibsrq_to_rvtsrq(ibsrq); - struct rvt_rwq *wq; - int ret = 0; - - if (attr_mask & IB_SRQ_MAX_WR) { - struct rvt_rwq *owq; - struct rvt_rwqe *p; - u32 sz, size, n, head, tail; - - /* Check that the requested sizes are below the limits. */ - if ((attr->max_wr > ib_qib_max_srq_wrs) || - ((attr_mask & IB_SRQ_LIMIT) ? - attr->srq_limit : srq->limit) > attr->max_wr) { - ret = -EINVAL; - goto bail; - } - - sz = sizeof(struct rvt_rwqe) + - srq->rq.max_sge * sizeof(struct ib_sge); - size = attr->max_wr + 1; - wq = vmalloc_user(sizeof(struct rvt_rwq) + size * sz); - if (!wq) { - ret = -ENOMEM; - goto bail; - } - - /* Check that we can write the offset to mmap. */ - if (udata && udata->inlen >= sizeof(__u64)) { - __u64 offset_addr; - __u64 offset = 0; - - ret = ib_copy_from_udata(&offset_addr, udata, - sizeof(offset_addr)); - if (ret) - goto bail_free; - udata->outbuf = - (void __user *) (unsigned long) offset_addr; - ret = ib_copy_to_udata(udata, &offset, - sizeof(offset)); - if (ret) - goto bail_free; - } - - spin_lock_irq(&srq->rq.lock); - /* - * validate head and tail pointer values and compute - * the number of remaining WQEs. - */ - owq = srq->rq.wq; - head = owq->head; - tail = owq->tail; - if (head >= srq->rq.size || tail >= srq->rq.size) { - ret = -EINVAL; - goto bail_unlock; - } - n = head; - if (n < tail) - n += srq->rq.size - tail; - else - n -= tail; - if (size <= n) { - ret = -EINVAL; - goto bail_unlock; - } - n = 0; - p = wq->wq; - while (tail != head) { - struct rvt_rwqe *wqe; - int i; - - wqe = get_rwqe_ptr(&srq->rq, tail); - p->wr_id = wqe->wr_id; - p->num_sge = wqe->num_sge; - for (i = 0; i < wqe->num_sge; i++) - p->sg_list[i] = wqe->sg_list[i]; - n++; - p = (struct rvt_rwqe *)((char *)p + sz); - if (++tail >= srq->rq.size) - tail = 0; - } - srq->rq.wq = wq; - srq->rq.size = size; - wq->head = n; - wq->tail = 0; - if (attr_mask & IB_SRQ_LIMIT) - srq->limit = attr->srq_limit; - spin_unlock_irq(&srq->rq.lock); - - vfree(owq); - - if (srq->ip) { - struct rvt_mmap_info *ip = srq->ip; - struct qib_ibdev *dev = to_idev(srq->ibsrq.device); - u32 s = sizeof(struct rvt_rwq) + size * sz; - - rvt_update_mmap_info(&dev->rdi, ip, s, wq); - - /* - * Return the offset to mmap. - * See qib_mmap() for details. - */ - if (udata && udata->inlen >= sizeof(__u64)) { - ret = ib_copy_to_udata(udata, &ip->offset, - sizeof(ip->offset)); - if (ret) - goto bail; - } - - /* - * Put user mapping info onto the pending list - * unless it already is on the list. - */ - spin_lock_irq(&dev->rdi.pending_lock); - if (list_empty(&ip->pending_mmaps)) - list_add(&ip->pending_mmaps, - &dev->rdi.pending_mmaps); - spin_unlock_irq(&dev->rdi.pending_lock); - } - } else if (attr_mask & IB_SRQ_LIMIT) { - spin_lock_irq(&srq->rq.lock); - if (attr->srq_limit >= srq->rq.size) - ret = -EINVAL; - else - srq->limit = attr->srq_limit; - spin_unlock_irq(&srq->rq.lock); - } - goto bail; - -bail_unlock: - spin_unlock_irq(&srq->rq.lock); -bail_free: - vfree(wq); -bail: - return ret; -} - -int qib_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr) -{ - struct rvt_srq *srq = ibsrq_to_rvtsrq(ibsrq); - - attr->max_wr = srq->rq.size - 1; - attr->max_sge = srq->rq.max_sge; - attr->srq_limit = srq->limit; - return 0; -} - -/** - * qib_destroy_srq - destroy a shared receive queue - * @ibsrq: the SRQ to destroy - */ -int qib_destroy_srq(struct ib_srq *ibsrq) -{ - struct rvt_srq *srq = ibsrq_to_rvtsrq(ibsrq); - struct qib_ibdev *dev = to_idev(ibsrq->device); - - spin_lock(&dev->n_srqs_lock); - dev->n_srqs_allocated--; - spin_unlock(&dev->n_srqs_lock); - if (srq->ip) - kref_put(&srq->ip->ref, rvt_release_mmap_info); - else - vfree(srq->rq.wq); - kfree(srq); - - return 0; -} diff --git a/drivers/infiniband/hw/qib/qib_verbs.c b/drivers/infiniband/hw/qib/qib_verbs.c index 8b97ca1..3785a52 100644 --- a/drivers/infiniband/hw/qib/qib_verbs.c +++ b/drivers/infiniband/hw/qib/qib_verbs.c @@ -1656,7 +1656,6 @@ int qib_register_ib_device(struct qib_devdata *dd) /* Only need to initialize non-zero fields. */ spin_lock_init(&dev->n_qps_lock); - spin_lock_init(&dev->n_srqs_lock); init_timer(&dev->mem_timer); dev->mem_timer.function = mem_timer; dev->mem_timer.data = (unsigned long) dev; @@ -1754,17 +1753,12 @@ int qib_register_ib_device(struct qib_devdata *dd) ibdev->destroy_ah = NULL; ibdev->modify_ah = NULL; ibdev->query_ah = NULL; - ibdev->create_srq = qib_create_srq; - ibdev->modify_srq = qib_modify_srq; - ibdev->query_srq = qib_query_srq; - ibdev->destroy_srq = qib_destroy_srq; ibdev->create_qp = NULL; ibdev->modify_qp = qib_modify_qp; ibdev->query_qp = NULL; ibdev->destroy_qp = qib_destroy_qp; ibdev->post_send = NULL; ibdev->post_recv = NULL; - ibdev->post_srq_recv = qib_post_srq_receive; ibdev->create_cq = NULL; ibdev->destroy_cq = NULL; ibdev->resize_cq = NULL; diff --git a/drivers/infiniband/hw/qib/qib_verbs.h b/drivers/infiniband/hw/qib/qib_verbs.h index 34f7784..a7e3c71 100644 --- a/drivers/infiniband/hw/qib/qib_verbs.h +++ b/drivers/infiniband/hw/qib/qib_verbs.h @@ -270,8 +270,6 @@ struct qib_ibdev { u32 n_qps_allocated; /* number of QPs allocated for device */ spinlock_t n_qps_lock; - u32 n_srqs_allocated; /* number of SRQs allocated for device */ - spinlock_t n_srqs_lock; #ifdef CONFIG_DEBUG_FS /* per HCA debugfs */ struct dentry *qib_ibdev_dbg; @@ -428,21 +426,6 @@ int qib_post_ud_send(struct rvt_qp *qp, struct ib_send_wr *wr); void qib_ud_rcv(struct qib_ibport *ibp, struct qib_ib_header *hdr, int has_grh, void *data, u32 tlen, struct rvt_qp *qp); -int qib_post_srq_receive(struct ib_srq *ibsrq, struct ib_recv_wr *wr, - struct ib_recv_wr **bad_wr); - -struct ib_srq *qib_create_srq(struct ib_pd *ibpd, - struct ib_srq_init_attr *srq_init_attr, - struct ib_udata *udata); - -int qib_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr, - enum ib_srq_attr_mask attr_mask, - struct ib_udata *udata); - -int qib_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr); - -int qib_destroy_srq(struct ib_srq *ibsrq); - void mr_rcu_callback(struct rcu_head *list); static inline void qib_put_ss(struct rvt_sge_state *ss) -- cgit v0.10.2 From 9e804b1f2d9790ba69d7e27a44500bd4448215dc Mon Sep 17 00:00:00 2001 From: Dennis Dalessandro Date: Wed, 3 Feb 2016 14:20:10 -0800 Subject: IB/qib: Clean up register_ib_device Remove some of the unnecessary code from qib_register_ib_device. Reviewed-by: Ira Weiny Reviewed-by: Harish Chegondi Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford diff --git a/drivers/infiniband/hw/qib/qib_verbs.c b/drivers/infiniband/hw/qib/qib_verbs.c index 3785a52..0116f58 100644 --- a/drivers/infiniband/hw/qib/qib_verbs.c +++ b/drivers/infiniband/hw/qib/qib_verbs.c @@ -1703,81 +1703,15 @@ int qib_register_ib_device(struct qib_devdata *dd) strlcpy(ibdev->name, "qib%d", IB_DEVICE_NAME_MAX); ibdev->owner = THIS_MODULE; ibdev->node_guid = ppd->guid; - ibdev->uverbs_abi_ver = QIB_UVERBS_ABI_VERSION; - ibdev->uverbs_cmd_mask = - (1ull << IB_USER_VERBS_CMD_GET_CONTEXT) | - (1ull << IB_USER_VERBS_CMD_QUERY_DEVICE) | - (1ull << IB_USER_VERBS_CMD_QUERY_PORT) | - (1ull << IB_USER_VERBS_CMD_ALLOC_PD) | - (1ull << IB_USER_VERBS_CMD_DEALLOC_PD) | - (1ull << IB_USER_VERBS_CMD_CREATE_AH) | - (1ull << IB_USER_VERBS_CMD_MODIFY_AH) | - (1ull << IB_USER_VERBS_CMD_QUERY_AH) | - (1ull << IB_USER_VERBS_CMD_DESTROY_AH) | - (1ull << IB_USER_VERBS_CMD_REG_MR) | - (1ull << IB_USER_VERBS_CMD_DEREG_MR) | - (1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) | - (1ull << IB_USER_VERBS_CMD_CREATE_CQ) | - (1ull << IB_USER_VERBS_CMD_RESIZE_CQ) | - (1ull << IB_USER_VERBS_CMD_DESTROY_CQ) | - (1ull << IB_USER_VERBS_CMD_POLL_CQ) | - (1ull << IB_USER_VERBS_CMD_REQ_NOTIFY_CQ) | - (1ull << IB_USER_VERBS_CMD_CREATE_QP) | - (1ull << IB_USER_VERBS_CMD_QUERY_QP) | - (1ull << IB_USER_VERBS_CMD_MODIFY_QP) | - (1ull << IB_USER_VERBS_CMD_DESTROY_QP) | - (1ull << IB_USER_VERBS_CMD_POST_SEND) | - (1ull << IB_USER_VERBS_CMD_POST_RECV) | - (1ull << IB_USER_VERBS_CMD_ATTACH_MCAST) | - (1ull << IB_USER_VERBS_CMD_DETACH_MCAST) | - (1ull << IB_USER_VERBS_CMD_CREATE_SRQ) | - (1ull << IB_USER_VERBS_CMD_MODIFY_SRQ) | - (1ull << IB_USER_VERBS_CMD_QUERY_SRQ) | - (1ull << IB_USER_VERBS_CMD_DESTROY_SRQ) | - (1ull << IB_USER_VERBS_CMD_POST_SRQ_RECV); - ibdev->node_type = RDMA_NODE_IB_CA; ibdev->phys_port_cnt = dd->num_pports; - ibdev->num_comp_vectors = 1; ibdev->dma_device = &dd->pcidev->dev; - ibdev->query_device = NULL; ibdev->modify_device = qib_modify_device; ibdev->query_port = qib_query_port; ibdev->modify_port = qib_modify_port; - ibdev->query_pkey = NULL; ibdev->query_gid = qib_query_gid; - ibdev->alloc_ucontext = NULL; - ibdev->dealloc_ucontext = NULL; - ibdev->alloc_pd = NULL; - ibdev->dealloc_pd = NULL; - ibdev->create_ah = NULL; - ibdev->destroy_ah = NULL; - ibdev->modify_ah = NULL; - ibdev->query_ah = NULL; - ibdev->create_qp = NULL; ibdev->modify_qp = qib_modify_qp; - ibdev->query_qp = NULL; ibdev->destroy_qp = qib_destroy_qp; - ibdev->post_send = NULL; - ibdev->post_recv = NULL; - ibdev->create_cq = NULL; - ibdev->destroy_cq = NULL; - ibdev->resize_cq = NULL; - ibdev->poll_cq = NULL; - ibdev->req_notify_cq = NULL; - ibdev->get_dma_mr = NULL; - ibdev->reg_user_mr = NULL; - ibdev->dereg_mr = NULL; - ibdev->alloc_mr = NULL; - ibdev->map_mr_sg = NULL; - ibdev->alloc_fmr = NULL; - ibdev->map_phys_fmr = NULL; - ibdev->unmap_fmr = NULL; - ibdev->dealloc_fmr = NULL; - ibdev->attach_mcast = NULL; - ibdev->detach_mcast = NULL; ibdev->process_mad = qib_process_mad; - ibdev->mmap = NULL; - ibdev->dma_ops = NULL; ibdev->get_port_immutable = qib_port_immutable; snprintf(ibdev->node_desc, sizeof(ibdev->node_desc), @@ -1799,8 +1733,6 @@ int qib_register_ib_device(struct qib_devdata *dd) dd->verbs_dev.rdi.driver_f.do_send = qib_do_send; dd->verbs_dev.rdi.driver_f.schedule_send = qib_schedule_send; - dd->verbs_dev.rdi.flags = 0; - dd->verbs_dev.rdi.dparms.lkey_table_size = qib_lkey_table_size; dd->verbs_dev.rdi.dparms.qp_table_size = ib_qib_qp_table_size; dd->verbs_dev.rdi.dparms.qpn_start = 1; -- cgit v0.10.2 From 1cefc2cd20f1d2b4e84bba14d5a5bf5d44936dc6 Mon Sep 17 00:00:00 2001 From: Harish Chegondi Date: Wed, 3 Feb 2016 14:20:19 -0800 Subject: IB/qib: Remove qib_lookup_qpn and use rvt_lookup_qpn instead Add calls to rcu_read_lock()/rcu_read_unlock() as rvt_lookup_qpn callers must hold the rcu_read_lock before calling and keep the lock until the returned qp is no longer in use. Remove lookaside qp and some qp refcount atomics in the sdma send code that is redundant with the s_dma_busy refcount, which will also stall the state processing to the reset state. Change the qpn hash function to hash_32 which is hash function used in rvt_lookup_qpn. qpn_hash function would be eliminated in later patches. Reviewed-by: Ira Weiny Reviewed-by: Mike Marciniszyn Reviewed-by: Dennis Dalessandro Signed-off-by: Harish Chegondi Signed-off-by: Doug Ledford diff --git a/drivers/infiniband/hw/qib/qib.h b/drivers/infiniband/hw/qib/qib.h index ccadece..751c9d7 100644 --- a/drivers/infiniband/hw/qib/qib.h +++ b/drivers/infiniband/hw/qib/qib.h @@ -230,9 +230,6 @@ struct qib_ctxtdata { u8 redirect_seq_cnt; /* ctxt rcvhdrq head offset */ u32 head; - /* lookaside fields */ - struct rvt_qp *lookaside_qp; - u32 lookaside_qpn; /* QPs waiting for context processing */ struct list_head qp_wait_list; #ifdef CONFIG_DEBUG_FS diff --git a/drivers/infiniband/hw/qib/qib_driver.c b/drivers/infiniband/hw/qib/qib_driver.c index a11de8e..707d789 100644 --- a/drivers/infiniband/hw/qib/qib_driver.c +++ b/drivers/infiniband/hw/qib/qib_driver.c @@ -322,6 +322,8 @@ static u32 qib_rcv_hdrerr(struct qib_ctxtdata *rcd, struct qib_pportdata *ppd, struct qib_ib_header *hdr = (struct qib_ib_header *) rhdr; struct qib_other_headers *ohdr = NULL; struct qib_ibport *ibp = &ppd->ibport_data; + struct qib_devdata *dd = ppd->dd; + struct rvt_dev_info *rdi = &dd->verbs_dev.rdi; struct rvt_qp *qp = NULL; u32 tlen = qib_hdrget_length_in_bytes(rhf_addr); u16 lid = be16_to_cpu(hdr->lrh[1]); @@ -366,9 +368,12 @@ static u32 qib_rcv_hdrerr(struct qib_ctxtdata *rcd, struct qib_pportdata *ppd, if (qp_num != QIB_MULTICAST_QPN) { int ruc_res; - qp = qib_lookup_qpn(ibp, qp_num); - if (!qp) + rcu_read_lock(); + qp = rvt_lookup_qpn(rdi, &ibp->rvp, qp_num); + if (!qp) { + rcu_read_unlock(); goto drop; + } /* * Handle only RC QPs - for other QP types drop error @@ -435,12 +440,7 @@ static u32 qib_rcv_hdrerr(struct qib_ctxtdata *rcd, struct qib_pportdata *ppd, unlock: spin_unlock(&qp->r_lock); - /* - * Notify qib_destroy_qp() if it is waiting - * for us to finish. - */ - if (atomic_dec_and_test(&qp->refcount)) - wake_up(&qp->wait); + rcu_read_unlock(); } /* Unicast QP */ } /* Valid packet with TIDErr */ @@ -565,15 +565,6 @@ move_along: updegr = 0; } } - /* - * Notify qib_destroy_qp() if it is waiting - * for lookaside_qp to finish. - */ - if (rcd->lookaside_qp) { - if (atomic_dec_and_test(&rcd->lookaside_qp->refcount)) - wake_up(&rcd->lookaside_qp->wait); - rcd->lookaside_qp = NULL; - } rcd->head = l; diff --git a/drivers/infiniband/hw/qib/qib_qp.c b/drivers/infiniband/hw/qib/qib_qp.c index ce9002f..8508e69 100644 --- a/drivers/infiniband/hw/qib/qib_qp.c +++ b/drivers/infiniband/hw/qib/qib_qp.c @@ -34,7 +34,6 @@ #include #include -#include #include #ifdef CONFIG_DEBUG_FS #include @@ -221,8 +220,7 @@ static void free_qpn(struct rvt_qpn_table *qpt, u32 qpn) static inline unsigned qpn_hash(struct qib_ibdev *dev, u32 qpn) { - return jhash_1word(qpn, dev->qp_rnd) & - (dev->rdi.qp_dev->qp_table_size - 1); + return hash_32(qpn, dev->rdi.qp_dev->qp_table_bits); } @@ -293,7 +291,8 @@ static void remove_qp(struct qib_ibdev *dev, struct rvt_qp *qp) spin_unlock_irqrestore(&dev->rdi.qp_dev->qpt_lock, flags); if (removed) { synchronize_rcu(); - atomic_dec(&qp->refcount); + if (atomic_dec_and_test(&qp->refcount)) + wake_up(&qp->wait); } } @@ -320,41 +319,6 @@ unsigned qib_free_all_qps(struct rvt_dev_info *rdi) return qp_inuse; } -/** - * qib_lookup_qpn - return the QP with the given QPN - * @qpt: the QP table - * @qpn: the QP number to look up - * - * The caller is responsible for decrementing the QP reference count - * when done. - */ -struct rvt_qp *qib_lookup_qpn(struct qib_ibport *ibp, u32 qpn) -{ - struct rvt_qp *qp = NULL; - - rcu_read_lock(); - if (unlikely(qpn <= 1)) { - if (qpn == 0) - qp = rcu_dereference(ibp->rvp.qp[0]); - else - qp = rcu_dereference(ibp->rvp.qp[1]); - if (qp) - atomic_inc(&qp->refcount); - } else { - struct qib_ibdev *dev = &ppd_from_ibp(ibp)->dd->verbs_dev; - unsigned n = qpn_hash(dev, qpn); - - for (qp = rcu_dereference(dev->rdi.qp_dev->qp_table[n]); qp; - qp = rcu_dereference(qp->next)) - if (qp->ibqp.qp_num == qpn) { - atomic_inc(&qp->refcount); - break; - } - } - rcu_read_unlock(); - return qp; -} - void notify_qp_reset(struct rvt_qp *qp) { struct qib_qp_priv *priv = qp->priv; diff --git a/drivers/infiniband/hw/qib/qib_ruc.c b/drivers/infiniband/hw/qib/qib_ruc.c index 4961a54..e1d1a15 100644 --- a/drivers/infiniband/hw/qib/qib_ruc.c +++ b/drivers/infiniband/hw/qib/qib_ruc.c @@ -358,6 +358,9 @@ err: static void qib_ruc_loopback(struct rvt_qp *sqp) { struct qib_ibport *ibp = to_iport(sqp->ibqp.device, sqp->port_num); + struct qib_pportdata *ppd = ppd_from_ibp(ibp); + struct qib_devdata *dd = ppd->dd; + struct rvt_dev_info *rdi = &dd->verbs_dev.rdi; struct rvt_qp *qp; struct rvt_swqe *wqe; struct rvt_sge *sge; @@ -369,11 +372,14 @@ static void qib_ruc_loopback(struct rvt_qp *sqp) int release; int ret; + rcu_read_lock(); /* * Note that we check the responder QP state after * checking the requester's state. */ - qp = qib_lookup_qpn(ibp, sqp->remote_qpn); + qp = rvt_lookup_qpn(rdi, &ibp->rvp, sqp->remote_qpn); + if (!qp) + goto done; spin_lock_irqsave(&sqp->s_lock, flags); @@ -639,8 +645,7 @@ clr_busy: unlock: spin_unlock_irqrestore(&sqp->s_lock, flags); done: - if (qp && atomic_dec_and_test(&qp->refcount)) - wake_up(&qp->wait); + rcu_read_unlock(); } /** diff --git a/drivers/infiniband/hw/qib/qib_ud.c b/drivers/infiniband/hw/qib/qib_ud.c index f0ea002..98bccb5 100644 --- a/drivers/infiniband/hw/qib/qib_ud.c +++ b/drivers/infiniband/hw/qib/qib_ud.c @@ -50,7 +50,9 @@ static void qib_ud_loopback(struct rvt_qp *sqp, struct rvt_swqe *swqe) { struct qib_ibport *ibp = to_iport(sqp->ibqp.device, sqp->port_num); - struct qib_pportdata *ppd; + struct qib_pportdata *ppd = ppd_from_ibp(ibp); + struct qib_devdata *dd = ppd->dd; + struct rvt_dev_info *rdi = &dd->verbs_dev.rdi; struct rvt_qp *qp; struct ib_ah_attr *ah_attr; unsigned long flags; @@ -60,9 +62,11 @@ static void qib_ud_loopback(struct rvt_qp *sqp, struct rvt_swqe *swqe) u32 length; enum ib_qp_type sqptype, dqptype; - qp = qib_lookup_qpn(ibp, swqe->ud_wr.remote_qpn); + rcu_read_lock(); + qp = rvt_lookup_qpn(rdi, &ibp->rvp, swqe->ud_wr.remote_qpn); if (!qp) { ibp->rvp.n_pkt_drops++; + rcu_read_unlock(); return; } @@ -223,8 +227,7 @@ static void qib_ud_loopback(struct rvt_qp *sqp, struct rvt_swqe *swqe) bail_unlock: spin_unlock_irqrestore(&qp->r_lock, flags); drop: - if (atomic_dec_and_test(&qp->refcount)) - wake_up(&qp->wait); + rcu_read_unlock(); } /** diff --git a/drivers/infiniband/hw/qib/qib_verbs.c b/drivers/infiniband/hw/qib/qib_verbs.c index 0116f58..c9cc3ae 100644 --- a/drivers/infiniband/hw/qib/qib_verbs.c +++ b/drivers/infiniband/hw/qib/qib_verbs.c @@ -367,6 +367,8 @@ void qib_ib_rcv(struct qib_ctxtdata *rcd, void *rhdr, void *data, u32 tlen) struct qib_pportdata *ppd = rcd->ppd; struct qib_ibport *ibp = &ppd->ibport_data; struct qib_ib_header *hdr = rhdr; + struct qib_devdata *dd = ppd->dd; + struct rvt_dev_info *rdi = &dd->verbs_dev.rdi; struct qib_other_headers *ohdr; struct rvt_qp *qp; u32 qp_num; @@ -429,25 +431,15 @@ void qib_ib_rcv(struct qib_ctxtdata *rcd, void *rhdr, void *data, u32 tlen) if (atomic_dec_return(&mcast->refcount) <= 1) wake_up(&mcast->wait); } else { - if (rcd->lookaside_qp) { - if (rcd->lookaside_qpn != qp_num) { - if (atomic_dec_and_test( - &rcd->lookaside_qp->refcount)) - wake_up( - &rcd->lookaside_qp->wait); - rcd->lookaside_qp = NULL; - } + rcu_read_lock(); + qp = rvt_lookup_qpn(rdi, &ibp->rvp, qp_num); + if (!qp) { + rcu_read_unlock(); + goto drop; } - if (!rcd->lookaside_qp) { - qp = qib_lookup_qpn(ibp, qp_num); - if (!qp) - goto drop; - rcd->lookaside_qp = qp; - rcd->lookaside_qpn = qp_num; - } else - qp = rcd->lookaside_qp; this_cpu_inc(ibp->pmastats->n_unicast_rcv); qib_qp_rcv(rcd, hdr, lnh == QIB_LRH_GRH, data, tlen, qp); + rcu_read_unlock(); } return; @@ -747,8 +739,6 @@ void qib_put_txreq(struct qib_verbs_txreq *tx) qp = tx->qp; dev = to_idev(qp->ibqp.device); - if (atomic_dec_and_test(&qp->refcount)) - wake_up(&qp->wait); if (tx->mr) { rvt_put_mr(tx->mr); tx->mr = NULL; @@ -929,7 +919,6 @@ static int qib_verbs_send_dma(struct rvt_qp *qp, struct qib_ib_header *hdr, control = dd->f_setpbc_control(ppd, plen, qp->s_srate, be16_to_cpu(hdr->lrh[0]) >> 12); tx->qp = qp; - atomic_inc(&qp->refcount); tx->wqe = qp->s_wqe; tx->mr = qp->s_rdma_mr; if (qp->s_rdma_mr) diff --git a/drivers/infiniband/hw/qib/qib_verbs.h b/drivers/infiniband/hw/qib/qib_verbs.h index a7e3c71..6ad924f 100644 --- a/drivers/infiniband/hw/qib/qib_verbs.h +++ b/drivers/infiniband/hw/qib/qib_verbs.h @@ -359,8 +359,6 @@ int qib_get_counters(struct qib_pportdata *ppd, __be32 qib_compute_aeth(struct rvt_qp *qp); -struct rvt_qp *qib_lookup_qpn(struct qib_ibport *ibp, u32 qpn); - int qib_destroy_qp(struct ib_qp *ibqp); int qib_error_qp(struct rvt_qp *qp, enum ib_wc_status err); -- cgit v0.10.2 From 70696ea75b0b9d2cb220a09ea19d72a49f501d8e Mon Sep 17 00:00:00 2001 From: Harish Chegondi Date: Wed, 3 Feb 2016 14:20:27 -0800 Subject: IB/qib: Remove modify queue pair code Modify queue pair functionality in rdmavt will be used instead. Remove ancillary functions which are being used by modify QP code. Reviewed-by: Dennis Dalessandro Signed-off-by: Harish Chegondi Signed-off-by: Doug Ledford diff --git a/drivers/infiniband/hw/qib/qib.h b/drivers/infiniband/hw/qib/qib.h index 751c9d7..5ba073e 100644 --- a/drivers/infiniband/hw/qib/qib.h +++ b/drivers/infiniband/hw/qib/qib.h @@ -1539,4 +1539,14 @@ struct qib_hwerror_msgs { void qib_format_hwerrors(u64 hwerrs, const struct qib_hwerror_msgs *hwerrmsgs, size_t nhwerrmsgs, char *msg, size_t lmsg); + +void stop_send_queue(struct rvt_qp *qp); +void quiesce_qp(struct rvt_qp *qp); +void flush_qp_waiters(struct rvt_qp *qp); +int mtu_to_path_mtu(u32 mtu); +u32 mtu_from_qp(struct rvt_dev_info *rdi, struct rvt_qp *qp, u32 pmtu); +void notify_error_qp(struct rvt_qp *qp); +int get_pmtu_from_attr(struct rvt_dev_info *rdi, struct rvt_qp *qp, + struct ib_qp_attr *attr); + #endif /* _QIB_KERNEL_H */ diff --git a/drivers/infiniband/hw/qib/qib_common.h b/drivers/infiniband/hw/qib/qib_common.h index 1d87ec0..1d6e63e 100644 --- a/drivers/infiniband/hw/qib/qib_common.h +++ b/drivers/infiniband/hw/qib/qib_common.h @@ -747,7 +747,6 @@ struct qib_tid_session_member { #define QIB_AETH_CREDIT_INVAL 0x1F #define QIB_PSN_MASK 0xFFFFFF #define QIB_MSN_MASK 0xFFFFFF -#define QIB_QPN_MASK 0xFFFFFF #define QIB_EAGER_TID_ID QLOGIC_IB_I_TID_MASK #define QIB_MULTICAST_QPN 0xFFFFFF diff --git a/drivers/infiniband/hw/qib/qib_driver.c b/drivers/infiniband/hw/qib/qib_driver.c index 707d789..67ee643 100644 --- a/drivers/infiniband/hw/qib/qib_driver.c +++ b/drivers/infiniband/hw/qib/qib_driver.c @@ -364,7 +364,7 @@ static u32 qib_rcv_hdrerr(struct qib_ctxtdata *rcd, struct qib_pportdata *ppd, psn = be32_to_cpu(ohdr->bth[2]); /* Get the destination QP number. */ - qp_num = be32_to_cpu(ohdr->bth[1]) & QIB_QPN_MASK; + qp_num = be32_to_cpu(ohdr->bth[1]) & RVT_QPN_MASK; if (qp_num != QIB_MULTICAST_QPN) { int ruc_res; diff --git a/drivers/infiniband/hw/qib/qib_qp.c b/drivers/infiniband/hw/qib/qib_qp.c index 8508e69..cee4aa3 100644 --- a/drivers/infiniband/hw/qib/qib_qp.c +++ b/drivers/infiniband/hw/qib/qib_qp.c @@ -209,46 +209,11 @@ bail: return ret; } -static void free_qpn(struct rvt_qpn_table *qpt, u32 qpn) -{ - struct rvt_qpn_map *map; - - map = qpt->map + qpn / RVT_BITS_PER_PAGE; - if (map->page) - clear_bit(qpn & RVT_BITS_PER_PAGE_MASK, map->page); -} - static inline unsigned qpn_hash(struct qib_ibdev *dev, u32 qpn) { return hash_32(qpn, dev->rdi.qp_dev->qp_table_bits); } - -/* - * Put the QP into the hash table. - * The hash table holds a reference to the QP. - */ -static void insert_qp(struct qib_ibdev *dev, struct rvt_qp *qp) -{ - struct qib_ibport *ibp = to_iport(qp->ibqp.device, qp->port_num); - unsigned long flags; - unsigned n = qpn_hash(dev, qp->ibqp.qp_num); - - atomic_inc(&qp->refcount); - spin_lock_irqsave(&dev->rdi.qp_dev->qpt_lock, flags); - - if (qp->ibqp.qp_num == 0) - rcu_assign_pointer(ibp->rvp.qp[0], qp); - else if (qp->ibqp.qp_num == 1) - rcu_assign_pointer(ibp->rvp.qp[1], qp); - else { - qp->next = dev->rdi.qp_dev->qp_table[n]; - rcu_assign_pointer(dev->rdi.qp_dev->qp_table[n], qp); - } - - spin_unlock_irqrestore(&dev->rdi.qp_dev->qpt_lock, flags); -} - /* * Remove the QP from the table so it can't be found asynchronously by * the receive interrupt routine. @@ -326,82 +291,10 @@ void notify_qp_reset(struct rvt_qp *qp) atomic_set(&priv->s_dma_busy, 0); } -static void clear_mr_refs(struct rvt_qp *qp, int clr_sends) -{ - unsigned n; - - if (test_and_clear_bit(RVT_R_REWIND_SGE, &qp->r_aflags)) - qib_put_ss(&qp->s_rdma_read_sge); - - qib_put_ss(&qp->r_sge); - - if (clr_sends) { - while (qp->s_last != qp->s_head) { - struct rvt_swqe *wqe = rvt_get_swqe_ptr(qp, qp->s_last); - unsigned i; - - for (i = 0; i < wqe->wr.num_sge; i++) { - struct rvt_sge *sge = &wqe->sg_list[i]; - - rvt_put_mr(sge->mr); - } - if (qp->ibqp.qp_type == IB_QPT_UD || - qp->ibqp.qp_type == IB_QPT_SMI || - qp->ibqp.qp_type == IB_QPT_GSI) - atomic_dec( - &ibah_to_rvtah(wqe->ud_wr.ah)->refcount); - if (++qp->s_last >= qp->s_size) - qp->s_last = 0; - } - if (qp->s_rdma_mr) { - rvt_put_mr(qp->s_rdma_mr); - qp->s_rdma_mr = NULL; - } - } - - if (qp->ibqp.qp_type != IB_QPT_RC) - return; - - for (n = 0; n < ARRAY_SIZE(qp->s_ack_queue); n++) { - struct rvt_ack_entry *e = &qp->s_ack_queue[n]; - - if (e->opcode == IB_OPCODE_RC_RDMA_READ_REQUEST && - e->rdma_sge.mr) { - rvt_put_mr(e->rdma_sge.mr); - e->rdma_sge.mr = NULL; - } - } -} - -/** - * qib_error_qp - put a QP into the error state - * @qp: the QP to put into the error state - * @err: the receive completion error to signal if a RWQE is active - * - * Flushes both send and receive work queues. - * Returns true if last WQE event should be generated. - * The QP r_lock and s_lock should be held and interrupts disabled. - * If we are already in error state, just return. - */ -int qib_error_qp(struct rvt_qp *qp, enum ib_wc_status err) +void notify_error_qp(struct rvt_qp *qp) { struct qib_qp_priv *priv = qp->priv; struct qib_ibdev *dev = to_idev(qp->ibqp.device); - struct ib_wc wc; - int ret = 0; - - if (qp->state == IB_QPS_ERR || qp->state == IB_QPS_RESET) - goto bail; - - qp->state = IB_QPS_ERR; - - if (qp->s_flags & (RVT_S_TIMER | RVT_S_WAIT_RNR)) { - qp->s_flags &= ~(RVT_S_TIMER | RVT_S_WAIT_RNR); - del_timer(&qp->s_timer); - } - - if (qp->s_flags & RVT_S_ANY_WAIT_SEND) - qp->s_flags &= ~RVT_S_ANY_WAIT_SEND; spin_lock(&dev->rdi.pending_lock); if (!list_empty(&priv->iowait) && !(qp->s_flags & RVT_S_BUSY)) { @@ -421,351 +314,60 @@ int qib_error_qp(struct rvt_qp *qp, enum ib_wc_status err) priv->s_tx = NULL; } } - - /* Schedule the sending tasklet to drain the send work queue. */ - if (qp->s_last != qp->s_head) - qib_schedule_send(qp); - - clear_mr_refs(qp, 0); - - memset(&wc, 0, sizeof(wc)); - wc.qp = &qp->ibqp; - wc.opcode = IB_WC_RECV; - - if (test_and_clear_bit(RVT_R_WRID_VALID, &qp->r_aflags)) { - wc.wr_id = qp->r_wr_id; - wc.status = err; - rvt_cq_enter(ibcq_to_rvtcq(qp->ibqp.recv_cq), &wc, 1); - } - wc.status = IB_WC_WR_FLUSH_ERR; - - if (qp->r_rq.wq) { - struct rvt_rwq *wq; - u32 head; - u32 tail; - - spin_lock(&qp->r_rq.lock); - - /* sanity check pointers before trusting them */ - wq = qp->r_rq.wq; - head = wq->head; - if (head >= qp->r_rq.size) - head = 0; - tail = wq->tail; - if (tail >= qp->r_rq.size) - tail = 0; - while (tail != head) { - wc.wr_id = get_rwqe_ptr(&qp->r_rq, tail)->wr_id; - if (++tail >= qp->r_rq.size) - tail = 0; - rvt_cq_enter(ibcq_to_rvtcq(qp->ibqp.recv_cq), &wc, 1); - } - wq->tail = tail; - - spin_unlock(&qp->r_rq.lock); - } else if (qp->ibqp.event_handler) - ret = 1; - -bail: - return ret; } -/** - * qib_modify_qp - modify the attributes of a queue pair - * @ibqp: the queue pair who's attributes we're modifying - * @attr: the new attributes - * @attr_mask: the mask of attributes to modify - * @udata: user data for libibverbs.so - * - * Returns 0 on success, otherwise returns an errno. - */ -int qib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, - int attr_mask, struct ib_udata *udata) +static int mtu_to_enum(u32 mtu) { - struct qib_ibdev *dev = to_idev(ibqp->device); - struct rvt_qp *qp = ibqp_to_rvtqp(ibqp); - struct qib_qp_priv *priv = qp->priv; - enum ib_qp_state cur_state, new_state; - struct ib_event ev; - int lastwqe = 0; - int mig = 0; - int ret; - u32 pmtu = 0; /* for gcc warning only */ - - spin_lock_irq(&qp->r_lock); - spin_lock(&qp->s_lock); - - cur_state = attr_mask & IB_QP_CUR_STATE ? - attr->cur_qp_state : qp->state; - new_state = attr_mask & IB_QP_STATE ? attr->qp_state : cur_state; - - if (!ib_modify_qp_is_ok(cur_state, new_state, ibqp->qp_type, - attr_mask, IB_LINK_LAYER_UNSPECIFIED)) - goto inval; - - if (attr_mask & IB_QP_AV) { - if (attr->ah_attr.dlid >= be16_to_cpu(IB_MULTICAST_LID_BASE)) - goto inval; - if (rvt_check_ah(qp->ibqp.device, &attr->ah_attr)) - goto inval; - } - - if (attr_mask & IB_QP_ALT_PATH) { - if (attr->alt_ah_attr.dlid >= - be16_to_cpu(IB_MULTICAST_LID_BASE)) - goto inval; - if (rvt_check_ah(qp->ibqp.device, &attr->alt_ah_attr)) - goto inval; - if (attr->alt_pkey_index >= qib_get_npkeys(dd_from_dev(dev))) - goto inval; - } - - if (attr_mask & IB_QP_PKEY_INDEX) - if (attr->pkey_index >= qib_get_npkeys(dd_from_dev(dev))) - goto inval; - - if (attr_mask & IB_QP_MIN_RNR_TIMER) - if (attr->min_rnr_timer > 31) - goto inval; - - if (attr_mask & IB_QP_PORT) - if (qp->ibqp.qp_type == IB_QPT_SMI || - qp->ibqp.qp_type == IB_QPT_GSI || - attr->port_num == 0 || - attr->port_num > ibqp->device->phys_port_cnt) - goto inval; + int enum_mtu; - if (attr_mask & IB_QP_DEST_QPN) - if (attr->dest_qp_num > QIB_QPN_MASK) - goto inval; - - if (attr_mask & IB_QP_RETRY_CNT) - if (attr->retry_cnt > 7) - goto inval; - - if (attr_mask & IB_QP_RNR_RETRY) - if (attr->rnr_retry > 7) - goto inval; - - /* - * Don't allow invalid path_mtu values. OK to set greater - * than the active mtu (or even the max_cap, if we have tuned - * that to a small mtu. We'll set qp->path_mtu - * to the lesser of requested attribute mtu and active, - * for packetizing messages. - * Note that the QP port has to be set in INIT and MTU in RTR. - */ - if (attr_mask & IB_QP_PATH_MTU) { - struct qib_devdata *dd = dd_from_dev(dev); - int mtu, pidx = qp->port_num - 1; - - mtu = ib_mtu_enum_to_int(attr->path_mtu); - if (mtu == -1) - goto inval; - if (mtu > dd->pport[pidx].ibmtu) { - switch (dd->pport[pidx].ibmtu) { - case 4096: - pmtu = IB_MTU_4096; - break; - case 2048: - pmtu = IB_MTU_2048; - break; - case 1024: - pmtu = IB_MTU_1024; - break; - case 512: - pmtu = IB_MTU_512; - break; - case 256: - pmtu = IB_MTU_256; - break; - default: - pmtu = IB_MTU_2048; - } - } else - pmtu = attr->path_mtu; - } - - if (attr_mask & IB_QP_PATH_MIG_STATE) { - if (attr->path_mig_state == IB_MIG_REARM) { - if (qp->s_mig_state == IB_MIG_ARMED) - goto inval; - if (new_state != IB_QPS_RTS) - goto inval; - } else if (attr->path_mig_state == IB_MIG_MIGRATED) { - if (qp->s_mig_state == IB_MIG_REARM) - goto inval; - if (new_state != IB_QPS_RTS && new_state != IB_QPS_SQD) - goto inval; - if (qp->s_mig_state == IB_MIG_ARMED) - mig = 1; - } else - goto inval; - } - - if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC) - if (attr->max_dest_rd_atomic > QIB_MAX_RDMA_ATOMIC) - goto inval; - - switch (new_state) { - case IB_QPS_RESET: - if (qp->state != IB_QPS_RESET) { - qp->state = IB_QPS_RESET; - spin_lock(&dev->rdi.pending_lock); - if (!list_empty(&priv->iowait)) - list_del_init(&priv->iowait); - spin_unlock(&dev->rdi.pending_lock); - qp->s_flags &= ~(RVT_S_TIMER | RVT_S_ANY_WAIT); - spin_unlock(&qp->s_lock); - spin_unlock_irq(&qp->r_lock); - /* Stop the sending work queue and retry timer */ - cancel_work_sync(&priv->s_work); - del_timer_sync(&qp->s_timer); - wait_event(priv->wait_dma, - !atomic_read(&priv->s_dma_busy)); - if (priv->s_tx) { - qib_put_txreq(priv->s_tx); - priv->s_tx = NULL; - } - remove_qp(dev, qp); - wait_event(qp->wait, !atomic_read(&qp->refcount)); - spin_lock_irq(&qp->r_lock); - spin_lock(&qp->s_lock); - clear_mr_refs(qp, 1); - rvt_reset_qp(&dev->rdi, qp, ibqp->qp_type); - } + switch (mtu) { + case 4096: + enum_mtu = IB_MTU_4096; break; - - case IB_QPS_RTR: - /* Allow event to retrigger if QP set to RTR more than once */ - qp->r_flags &= ~RVT_R_COMM_EST; - qp->state = new_state; + case 2048: + enum_mtu = IB_MTU_2048; break; - - case IB_QPS_SQD: - qp->s_draining = qp->s_last != qp->s_cur; - qp->state = new_state; + case 1024: + enum_mtu = IB_MTU_1024; break; - - case IB_QPS_SQE: - if (qp->ibqp.qp_type == IB_QPT_RC) - goto inval; - qp->state = new_state; + case 512: + enum_mtu = IB_MTU_512; break; - - case IB_QPS_ERR: - lastwqe = qib_error_qp(qp, IB_WC_WR_FLUSH_ERR); + case 256: + enum_mtu = IB_MTU_256; break; - default: - qp->state = new_state; - break; - } - - if (attr_mask & IB_QP_PKEY_INDEX) - qp->s_pkey_index = attr->pkey_index; - - if (attr_mask & IB_QP_PORT) - qp->port_num = attr->port_num; - - if (attr_mask & IB_QP_DEST_QPN) - qp->remote_qpn = attr->dest_qp_num; - - if (attr_mask & IB_QP_SQ_PSN) { - qp->s_next_psn = attr->sq_psn & QIB_PSN_MASK; - qp->s_psn = qp->s_next_psn; - qp->s_sending_psn = qp->s_next_psn; - qp->s_last_psn = qp->s_next_psn - 1; - qp->s_sending_hpsn = qp->s_last_psn; - } - - if (attr_mask & IB_QP_RQ_PSN) - qp->r_psn = attr->rq_psn & QIB_PSN_MASK; - - if (attr_mask & IB_QP_ACCESS_FLAGS) - qp->qp_access_flags = attr->qp_access_flags; - - if (attr_mask & IB_QP_AV) { - qp->remote_ah_attr = attr->ah_attr; - qp->s_srate = attr->ah_attr.static_rate; - } - - if (attr_mask & IB_QP_ALT_PATH) { - qp->alt_ah_attr = attr->alt_ah_attr; - qp->s_alt_pkey_index = attr->alt_pkey_index; - } - - if (attr_mask & IB_QP_PATH_MIG_STATE) { - qp->s_mig_state = attr->path_mig_state; - if (mig) { - qp->remote_ah_attr = qp->alt_ah_attr; - qp->port_num = qp->alt_ah_attr.port_num; - qp->s_pkey_index = qp->s_alt_pkey_index; - } - } - - if (attr_mask & IB_QP_PATH_MTU) { - qp->path_mtu = pmtu; - qp->pmtu = ib_mtu_enum_to_int(pmtu); - } - - if (attr_mask & IB_QP_RETRY_CNT) { - qp->s_retry_cnt = attr->retry_cnt; - qp->s_retry = attr->retry_cnt; - } - - if (attr_mask & IB_QP_RNR_RETRY) { - qp->s_rnr_retry_cnt = attr->rnr_retry; - qp->s_rnr_retry = attr->rnr_retry; - } - - if (attr_mask & IB_QP_MIN_RNR_TIMER) - qp->r_min_rnr_timer = attr->min_rnr_timer; - - if (attr_mask & IB_QP_TIMEOUT) { - qp->timeout = attr->timeout; - qp->timeout_jiffies = - usecs_to_jiffies((4096UL * (1UL << qp->timeout)) / - 1000UL); + enum_mtu = IB_MTU_2048; } + return enum_mtu; +} - if (attr_mask & IB_QP_QKEY) - qp->qkey = attr->qkey; - - if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC) - qp->r_max_rd_atomic = attr->max_dest_rd_atomic; - - if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC) - qp->s_max_rd_atomic = attr->max_rd_atomic; - - spin_unlock(&qp->s_lock); - spin_unlock_irq(&qp->r_lock); - - if (cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT) - insert_qp(dev, qp); +int get_pmtu_from_attr(struct rvt_dev_info *rdi, struct rvt_qp *qp, + struct ib_qp_attr *attr) +{ + int mtu, pmtu, pidx = qp->port_num - 1; + struct qib_ibdev *verbs_dev = container_of(rdi, struct qib_ibdev, rdi); + struct qib_devdata *dd = container_of(verbs_dev, struct qib_devdata, + verbs_dev); + mtu = ib_mtu_enum_to_int(attr->path_mtu); + if (mtu == -1) + return -EINVAL; - if (lastwqe) { - ev.device = qp->ibqp.device; - ev.element.qp = &qp->ibqp; - ev.event = IB_EVENT_QP_LAST_WQE_REACHED; - qp->ibqp.event_handler(&ev, qp->ibqp.qp_context); - } - if (mig) { - ev.device = qp->ibqp.device; - ev.element.qp = &qp->ibqp; - ev.event = IB_EVENT_PATH_MIG; - qp->ibqp.event_handler(&ev, qp->ibqp.qp_context); - } - ret = 0; - goto bail; + if (mtu > dd->pport[pidx].ibmtu) + pmtu = mtu_to_enum(dd->pport[pidx].ibmtu); + else + pmtu = attr->path_mtu; + return pmtu; +} -inval: - spin_unlock(&qp->s_lock); - spin_unlock_irq(&qp->r_lock); - ret = -EINVAL; +int mtu_to_path_mtu(u32 mtu) +{ + return mtu_to_enum(mtu); +} -bail: - return ret; +u32 mtu_from_qp(struct rvt_dev_info *rdi, struct rvt_qp *qp, u32 pmtu) +{ + return ib_mtu_enum_to_int(pmtu); } /** @@ -857,6 +459,35 @@ void qp_priv_free(struct rvt_dev_info *rdi, struct rvt_qp *qp) kfree(priv); } +void stop_send_queue(struct rvt_qp *qp) +{ + struct qib_qp_priv *priv = qp->priv; + + cancel_work_sync(&priv->s_work); +} + +void quiesce_qp(struct rvt_qp *qp) +{ + struct qib_qp_priv *priv = qp->priv; + + wait_event(priv->wait_dma, !atomic_read(&priv->s_dma_busy)); + if (priv->s_tx) { + qib_put_txreq(priv->s_tx); + priv->s_tx = NULL; + } +} + +void flush_qp_waiters(struct rvt_qp *qp) +{ + struct qib_qp_priv *priv = qp->priv; + struct qib_ibdev *dev = to_idev(qp->ibqp.device); + + spin_lock(&dev->rdi.pending_lock); + if (!list_empty(&priv->iowait)) + list_del_init(&priv->iowait); + spin_unlock(&dev->rdi.pending_lock); +} + /** * qib_destroy_qp - destroy a queue pair * @ibqp: the queue pair to destroy @@ -891,15 +522,13 @@ int qib_destroy_qp(struct ib_qp *ibqp) } remove_qp(dev, qp); wait_event(qp->wait, !atomic_read(&qp->refcount)); - clear_mr_refs(qp, 1); + rvt_clear_mr_refs(qp, 1); } else spin_unlock_irq(&qp->s_lock); /* all user's cleaned up, mark it available */ - free_qpn(&dev->rdi.qp_dev->qpn_table, qp->ibqp.qp_num); - spin_lock(&dev->n_qps_lock); - dev->n_qps_allocated--; - spin_unlock(&dev->n_qps_lock); + rvt_free_qpn(&dev->rdi.qp_dev->qpn_table, qp->ibqp.qp_num); + rvt_dec_qp_cnt(&dev->rdi); if (qp->ip) kref_put(&qp->ip->ref, rvt_release_mmap_info); diff --git a/drivers/infiniband/hw/qib/qib_rc.c b/drivers/infiniband/hw/qib/qib_rc.c index 78ae93e..044525d9 100644 --- a/drivers/infiniband/hw/qib/qib_rc.c +++ b/drivers/infiniband/hw/qib/qib_rc.c @@ -879,7 +879,7 @@ static void qib_restart_rc(struct rvt_qp *qp, u32 psn, int wait) qp->s_retry = qp->s_retry_cnt; } else if (qp->s_last == qp->s_acked) { qib_send_complete(qp, wqe, IB_WC_RETRY_EXC_ERR); - qib_error_qp(qp, IB_WC_WR_FLUSH_ERR); + rvt_error_qp(qp, IB_WC_WR_FLUSH_ERR); return; } else /* XXX need to handle delayed completion */ return; @@ -1324,7 +1324,7 @@ static int do_rc_ack(struct rvt_qp *qp, u32 aeth, u32 psn, int opcode, class_b: if (qp->s_last == qp->s_acked) { qib_send_complete(qp, wqe, status); - qib_error_qp(qp, IB_WC_WR_FLUSH_ERR); + rvt_error_qp(qp, IB_WC_WR_FLUSH_ERR); } break; @@ -1599,7 +1599,7 @@ ack_len_err: ack_err: if (qp->s_last == qp->s_acked) { qib_send_complete(qp, wqe, status); - qib_error_qp(qp, IB_WC_WR_FLUSH_ERR); + rvt_error_qp(qp, IB_WC_WR_FLUSH_ERR); } ack_done: spin_unlock_irqrestore(&qp->s_lock, flags); @@ -1825,7 +1825,7 @@ void qib_rc_error(struct rvt_qp *qp, enum ib_wc_status err) int lastwqe; spin_lock_irqsave(&qp->s_lock, flags); - lastwqe = qib_error_qp(qp, err); + lastwqe = rvt_error_qp(qp, err); spin_unlock_irqrestore(&qp->s_lock, flags); if (lastwqe) { @@ -2027,7 +2027,7 @@ send_last: if (unlikely(wc.byte_len > qp->r_len)) goto nack_inv; qib_copy_sge(&qp->r_sge, data, tlen, 1); - qib_put_ss(&qp->r_sge); + rvt_put_ss(&qp->r_sge); qp->r_msn++; if (!test_and_clear_bit(RVT_R_WRID_VALID, &qp->r_aflags)) break; diff --git a/drivers/infiniband/hw/qib/qib_ruc.c b/drivers/infiniband/hw/qib/qib_ruc.c index e1d1a15..56668cb 100644 --- a/drivers/infiniband/hw/qib/qib_ruc.c +++ b/drivers/infiniband/hw/qib/qib_ruc.c @@ -174,7 +174,7 @@ int qib_get_rwqe(struct rvt_qp *qp, int wr_id_only) } /* Make sure entry is read after head index is read. */ smp_rmb(); - wqe = get_rwqe_ptr(rq, tail); + wqe = rvt_get_rwqe_ptr(rq, tail); /* * Even though we update the tail index in memory, the verbs * consumer is not supposed to post more entries until a @@ -551,7 +551,7 @@ again: sqp->s_len -= len; } if (release) - qib_put_ss(&qp->r_sge); + rvt_put_ss(&qp->r_sge); if (!test_and_clear_bit(RVT_R_WRID_VALID, &qp->r_aflags)) goto send_comp; @@ -626,7 +626,7 @@ serr: spin_lock_irqsave(&sqp->s_lock, flags); qib_send_complete(sqp, wqe, send_status); if (sqp->ibqp.qp_type == IB_QPT_RC) { - int lastwqe = qib_error_qp(sqp, IB_WC_WR_FLUSH_ERR); + int lastwqe = rvt_error_qp(sqp, IB_WC_WR_FLUSH_ERR); sqp->s_flags &= ~RVT_S_BUSY; spin_unlock_irqrestore(&sqp->s_lock, flags); diff --git a/drivers/infiniband/hw/qib/qib_sdma.c b/drivers/infiniband/hw/qib/qib_sdma.c index ae65e9f..891873b 100644 --- a/drivers/infiniband/hw/qib/qib_sdma.c +++ b/drivers/infiniband/hw/qib/qib_sdma.c @@ -673,7 +673,7 @@ unmap: if (qp->ibqp.qp_type == IB_QPT_RC) { /* XXX what about error sending RDMA read responses? */ if (ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK) - qib_error_qp(qp, IB_WC_GENERAL_ERR); + rvt_error_qp(qp, IB_WC_GENERAL_ERR); } else if (qp->s_wqe) qib_send_complete(qp, qp->s_wqe, IB_WC_GENERAL_ERR); spin_unlock(&qp->s_lock); diff --git a/drivers/infiniband/hw/qib/qib_uc.c b/drivers/infiniband/hw/qib/qib_uc.c index b97892f..1b2fc69 100644 --- a/drivers/infiniband/hw/qib/qib_uc.c +++ b/drivers/infiniband/hw/qib/qib_uc.c @@ -282,7 +282,7 @@ inv: set_bit(RVT_R_REWIND_SGE, &qp->r_aflags); qp->r_sge.num_sge = 0; } else - qib_put_ss(&qp->r_sge); + rvt_put_ss(&qp->r_sge); qp->r_state = OP(SEND_LAST); switch (opcode) { case OP(SEND_FIRST): @@ -401,7 +401,7 @@ send_last: goto rewind; wc.opcode = IB_WC_RECV; qib_copy_sge(&qp->r_sge, data, tlen, 0); - qib_put_ss(&qp->s_rdma_read_sge); + rvt_put_ss(&qp->s_rdma_read_sge); last_imm: wc.wr_id = qp->r_wr_id; wc.status = IB_WC_SUCCESS; @@ -485,7 +485,7 @@ rdma_last_imm: if (unlikely(tlen + qp->r_rcv_len != qp->r_len)) goto drop; if (test_and_clear_bit(RVT_R_REWIND_SGE, &qp->r_aflags)) - qib_put_ss(&qp->s_rdma_read_sge); + rvt_put_ss(&qp->s_rdma_read_sge); else { ret = qib_get_rwqe(qp, 1); if (ret < 0) @@ -496,7 +496,7 @@ rdma_last_imm: wc.byte_len = qp->r_len; wc.opcode = IB_WC_RECV_RDMA_WITH_IMM; qib_copy_sge(&qp->r_sge, data, tlen, 1); - qib_put_ss(&qp->r_sge); + rvt_put_ss(&qp->r_sge); goto last_imm; case OP(RDMA_WRITE_LAST): @@ -512,7 +512,7 @@ rdma_last: if (unlikely(tlen + qp->r_rcv_len != qp->r_len)) goto drop; qib_copy_sge(&qp->r_sge, data, tlen, 1); - qib_put_ss(&qp->r_sge); + rvt_put_ss(&qp->r_sge); break; default: diff --git a/drivers/infiniband/hw/qib/qib_ud.c b/drivers/infiniband/hw/qib/qib_ud.c index 98bccb5..fe49172 100644 --- a/drivers/infiniband/hw/qib/qib_ud.c +++ b/drivers/infiniband/hw/qib/qib_ud.c @@ -206,7 +206,7 @@ static void qib_ud_loopback(struct rvt_qp *sqp, struct rvt_swqe *swqe) } length -= len; } - qib_put_ss(&qp->r_sge); + rvt_put_ss(&qp->r_sge); if (!test_and_clear_bit(RVT_R_WRID_VALID, &qp->r_aflags)) goto bail_unlock; wc.wr_id = qp->r_wr_id; @@ -452,7 +452,7 @@ void qib_ud_rcv(struct qib_ibport *ibp, struct qib_ib_header *hdr, hdrsize = 8 + 40 + 12 + 8; /* LRH + GRH + BTH + DETH */ } qkey = be32_to_cpu(ohdr->u.ud.deth[0]); - src_qp = be32_to_cpu(ohdr->u.ud.deth[1]) & QIB_QPN_MASK; + src_qp = be32_to_cpu(ohdr->u.ud.deth[1]) & RVT_QPN_MASK; /* * Get the number of bytes the message was padded by @@ -565,7 +565,7 @@ void qib_ud_rcv(struct qib_ibport *ibp, struct qib_ib_header *hdr, } else qib_skip_sge(&qp->r_sge, sizeof(struct ib_grh), 1); qib_copy_sge(&qp->r_sge, data, wc.byte_len - sizeof(struct ib_grh), 1); - qib_put_ss(&qp->r_sge); + rvt_put_ss(&qp->r_sge); if (!test_and_clear_bit(RVT_R_WRID_VALID, &qp->r_aflags)) return; wc.wr_id = qp->r_wr_id; diff --git a/drivers/infiniband/hw/qib/qib_verbs.c b/drivers/infiniband/hw/qib/qib_verbs.c index c9cc3ae..187f150 100644 --- a/drivers/infiniband/hw/qib/qib_verbs.c +++ b/drivers/infiniband/hw/qib/qib_verbs.c @@ -411,7 +411,7 @@ void qib_ib_rcv(struct qib_ctxtdata *rcd, void *rhdr, void *data, u32 tlen) #endif /* Get the destination QP number. */ - qp_num = be32_to_cpu(ohdr->bth[1]) & QIB_QPN_MASK; + qp_num = be32_to_cpu(ohdr->bth[1]) & RVT_QPN_MASK; if (qp_num == QIB_MULTICAST_QPN) { struct rvt_mcast *mcast; struct rvt_mcast_qp *p; @@ -1644,7 +1644,6 @@ int qib_register_ib_device(struct qib_devdata *dd) init_ibport(ppd + i); /* Only need to initialize non-zero fields. */ - spin_lock_init(&dev->n_qps_lock); init_timer(&dev->mem_timer); dev->mem_timer.function = mem_timer; dev->mem_timer.data = (unsigned long) dev; @@ -1698,7 +1697,6 @@ int qib_register_ib_device(struct qib_devdata *dd) ibdev->query_port = qib_query_port; ibdev->modify_port = qib_modify_port; ibdev->query_gid = qib_query_gid; - ibdev->modify_qp = qib_modify_qp; ibdev->destroy_qp = qib_destroy_qp; ibdev->process_mad = qib_process_mad; ibdev->get_port_immutable = qib_port_immutable; @@ -1721,7 +1719,15 @@ int qib_register_ib_device(struct qib_devdata *dd) dd->verbs_dev.rdi.driver_f.notify_qp_reset = notify_qp_reset; dd->verbs_dev.rdi.driver_f.do_send = qib_do_send; dd->verbs_dev.rdi.driver_f.schedule_send = qib_schedule_send; - + dd->verbs_dev.rdi.driver_f.quiesce_qp = quiesce_qp; + dd->verbs_dev.rdi.driver_f.stop_send_queue = stop_send_queue; + dd->verbs_dev.rdi.driver_f.flush_qp_waiters = flush_qp_waiters; + dd->verbs_dev.rdi.driver_f.notify_error_qp = notify_error_qp; + dd->verbs_dev.rdi.driver_f.mtu_to_path_mtu = mtu_to_path_mtu; + dd->verbs_dev.rdi.driver_f.mtu_from_qp = mtu_from_qp; + dd->verbs_dev.rdi.driver_f.get_pmtu_from_attr = get_pmtu_from_attr; + + dd->verbs_dev.rdi.dparms.max_rdma_atomic = QIB_MAX_RDMA_ATOMIC; dd->verbs_dev.rdi.dparms.lkey_table_size = qib_lkey_table_size; dd->verbs_dev.rdi.dparms.qp_table_size = ib_qib_qp_table_size; dd->verbs_dev.rdi.dparms.qpn_start = 1; @@ -1730,6 +1736,8 @@ int qib_register_ib_device(struct qib_devdata *dd) dd->verbs_dev.rdi.dparms.qpn_inc = 1; dd->verbs_dev.rdi.dparms.qos_shift = 1; dd->verbs_dev.rdi.dparms.psn_mask = QIB_PSN_MASK; + dd->verbs_dev.rdi.dparms.psn_shift = QIB_PSN_SHIFT; + dd->verbs_dev.rdi.dparms.psn_modify_mask = QIB_PSN_MASK; dd->verbs_dev.rdi.dparms.nports = dd->num_pports; dd->verbs_dev.rdi.dparms.npkeys = qib_get_npkeys(dd); dd->verbs_dev.rdi.dparms.node = dd->assigned_node_id; diff --git a/drivers/infiniband/hw/qib/qib_verbs.h b/drivers/infiniband/hw/qib/qib_verbs.h index 6ad924f..0487d62 100644 --- a/drivers/infiniband/hw/qib/qib_verbs.h +++ b/drivers/infiniband/hw/qib/qib_verbs.h @@ -55,6 +55,7 @@ struct qib_verbs_txreq; #define QIB_MAX_RDMA_ATOMIC 16 #define QIB_GUIDS_PER_PORT 5 +#define QIB_PSN_SHIFT 8 /* * Increment this value if any changes that break userspace ABI @@ -200,18 +201,6 @@ struct qib_qp_priv { #define QIB_PSN_CREDIT 16 -/* - * Since struct rvt_rwqe is not a fixed size, we can't simply index into - * struct rvt_rwq.wq. This function does the array index computation. - */ -static inline struct rvt_rwqe *get_rwqe_ptr(struct rvt_rq *rq, unsigned n) -{ - return (struct rvt_rwqe *) - ((char *) rq->wq->wq + - (sizeof(struct rvt_rwqe) + - rq->max_sge * sizeof(struct ib_sge)) * n); -} - struct qib_opcode_stats { u64 n_packets; /* number of packets */ u64 n_bytes; /* total number of bytes */ @@ -268,8 +257,6 @@ struct qib_ibdev { u32 n_piowait; u32 n_txwait; - u32 n_qps_allocated; /* number of QPs allocated for device */ - spinlock_t n_qps_lock; #ifdef CONFIG_DEBUG_FS /* per HCA debugfs */ struct dentry *qib_ibdev_dbg; @@ -361,10 +348,6 @@ __be32 qib_compute_aeth(struct rvt_qp *qp); int qib_destroy_qp(struct ib_qp *ibqp); -int qib_error_qp(struct rvt_qp *qp, enum ib_wc_status err); - -int qib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, - int attr_mask, struct ib_udata *udata); /* * Functions provided by qib driver for rdmavt to use */ @@ -426,15 +409,6 @@ void qib_ud_rcv(struct qib_ibport *ibp, struct qib_ib_header *hdr, void mr_rcu_callback(struct rcu_head *list); -static inline void qib_put_ss(struct rvt_sge_state *ss) -{ - while (ss->num_sge) { - rvt_put_mr(ss->sge.mr); - if (--ss->num_sge) - ss->sge = *ss->sg_list++; - } -} - int qib_get_rwqe(struct rvt_qp *qp, int wr_id_only); void qib_migrate_qp(struct rvt_qp *qp); -- cgit v0.10.2 From 8e4c066634aa35e7da08981439f4f1b6693fd9fb Mon Sep 17 00:00:00 2001 From: Harish Chegondi Date: Wed, 3 Feb 2016 14:20:35 -0800 Subject: IB/qib: Remove destroy queue pair code Destroy QP functionality in rdmavt will be used instead. Remove the remove_qp function being called exclusively by destroy qp code. Reviewed-by: Dennis Dalessandro Signed-off-by: Harish Chegondi Signed-off-by: Doug Ledford diff --git a/drivers/infiniband/hw/qib/qib_qp.c b/drivers/infiniband/hw/qib/qib_qp.c index cee4aa3..45bed5f 100644 --- a/drivers/infiniband/hw/qib/qib_qp.c +++ b/drivers/infiniband/hw/qib/qib_qp.c @@ -209,58 +209,6 @@ bail: return ret; } -static inline unsigned qpn_hash(struct qib_ibdev *dev, u32 qpn) -{ - return hash_32(qpn, dev->rdi.qp_dev->qp_table_bits); -} - -/* - * Remove the QP from the table so it can't be found asynchronously by - * the receive interrupt routine. - */ -static void remove_qp(struct qib_ibdev *dev, struct rvt_qp *qp) -{ - struct qib_ibport *ibp = to_iport(qp->ibqp.device, qp->port_num); - unsigned n = qpn_hash(dev, qp->ibqp.qp_num); - unsigned long flags; - int removed = 1; - spinlock_t *qpt_lock_ptr; /* Pointer to make checkpatch happy */ - - spin_lock_irqsave(&dev->rdi.qp_dev->qpt_lock, flags); - - qpt_lock_ptr = &dev->rdi.qp_dev->qpt_lock; - if (rcu_dereference_protected(ibp->rvp.qp[0], - lockdep_is_held(qpt_lock_ptr)) == qp) { - RCU_INIT_POINTER(ibp->rvp.qp[0], NULL); - } else if (rcu_dereference_protected(ibp->rvp.qp[1], - lockdep_is_held(&dev->rdi.qp_dev->qpt_lock)) == qp) { - RCU_INIT_POINTER(ibp->rvp.qp[1], NULL); - } else { - struct rvt_qp *q; - struct rvt_qp __rcu **qpp; - - removed = 0; - qpp = &dev->rdi.qp_dev->qp_table[n]; - for (; (q = rcu_dereference_protected(*qpp, - lockdep_is_held(qpt_lock_ptr))) != NULL; - qpp = &q->next) - if (q == qp) { - RCU_INIT_POINTER(*qpp, - rcu_dereference_protected(qp->next, - lockdep_is_held(qpt_lock_ptr))); - removed = 1; - break; - } - } - - spin_unlock_irqrestore(&dev->rdi.qp_dev->qpt_lock, flags); - if (removed) { - synchronize_rcu(); - if (atomic_dec_and_test(&qp->refcount)) - wake_up(&qp->wait); - } -} - /** * qib_free_all_qps - check for QPs still in use */ @@ -489,59 +437,6 @@ void flush_qp_waiters(struct rvt_qp *qp) } /** - * qib_destroy_qp - destroy a queue pair - * @ibqp: the queue pair to destroy - * - * Returns 0 on success. - * - * Note that this can be called while the QP is actively sending or - * receiving! - */ -int qib_destroy_qp(struct ib_qp *ibqp) -{ - struct rvt_qp *qp = ibqp_to_rvtqp(ibqp); - struct qib_ibdev *dev = to_idev(ibqp->device); - struct qib_qp_priv *priv = qp->priv; - - /* Make sure HW and driver activity is stopped. */ - spin_lock_irq(&qp->s_lock); - if (qp->state != IB_QPS_RESET) { - qp->state = IB_QPS_RESET; - spin_lock(&dev->rdi.pending_lock); - if (!list_empty(&priv->iowait)) - list_del_init(&priv->iowait); - spin_unlock(&dev->rdi.pending_lock); - qp->s_flags &= ~(RVT_S_TIMER | RVT_S_ANY_WAIT); - spin_unlock_irq(&qp->s_lock); - cancel_work_sync(&priv->s_work); - del_timer_sync(&qp->s_timer); - wait_event(priv->wait_dma, !atomic_read(&priv->s_dma_busy)); - if (priv->s_tx) { - qib_put_txreq(priv->s_tx); - priv->s_tx = NULL; - } - remove_qp(dev, qp); - wait_event(qp->wait, !atomic_read(&qp->refcount)); - rvt_clear_mr_refs(qp, 1); - } else - spin_unlock_irq(&qp->s_lock); - - /* all user's cleaned up, mark it available */ - rvt_free_qpn(&dev->rdi.qp_dev->qpn_table, qp->ibqp.qp_num); - rvt_dec_qp_cnt(&dev->rdi); - - if (qp->ip) - kref_put(&qp->ip->ref, rvt_release_mmap_info); - else - vfree(qp->r_rq.wq); - vfree(qp->s_wq); - kfree(priv->s_hdr); - kfree(priv); - kfree(qp); - return 0; -} - -/** * qib_get_credit - flush the send work queue of a QP * @qp: the qp who's send work queue to flush * @aeth: the Acknowledge Extended Transport Header diff --git a/drivers/infiniband/hw/qib/qib_verbs.c b/drivers/infiniband/hw/qib/qib_verbs.c index 187f150..e534355 100644 --- a/drivers/infiniband/hw/qib/qib_verbs.c +++ b/drivers/infiniband/hw/qib/qib_verbs.c @@ -1697,7 +1697,6 @@ int qib_register_ib_device(struct qib_devdata *dd) ibdev->query_port = qib_query_port; ibdev->modify_port = qib_modify_port; ibdev->query_gid = qib_query_gid; - ibdev->destroy_qp = qib_destroy_qp; ibdev->process_mad = qib_process_mad; ibdev->get_port_immutable = qib_port_immutable; diff --git a/drivers/infiniband/hw/qib/qib_verbs.h b/drivers/infiniband/hw/qib/qib_verbs.h index 0487d62..e12bb9d 100644 --- a/drivers/infiniband/hw/qib/qib_verbs.h +++ b/drivers/infiniband/hw/qib/qib_verbs.h @@ -346,8 +346,6 @@ int qib_get_counters(struct qib_pportdata *ppd, __be32 qib_compute_aeth(struct rvt_qp *qp); -int qib_destroy_qp(struct ib_qp *ibqp); - /* * Functions provided by qib driver for rdmavt to use */ -- cgit v0.10.2 From 2366754632d3e52a97bf607a22bcca592a46f3f9 Mon Sep 17 00:00:00 2001 From: Dennis Dalessandro Date: Wed, 3 Feb 2016 14:20:44 -0800 Subject: IB/qib: Support query gid in rdmavt Query gid is in rdmavt, but still relies on the driver to maintain the guid table. Add the necessary driver call back and remove the existing verb handler. Reviewed-by: Harish Chegondi Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford diff --git a/drivers/infiniband/hw/qib/qib_verbs.c b/drivers/infiniband/hw/qib/qib_verbs.c index e534355..a3a13a5 100644 --- a/drivers/infiniband/hw/qib/qib_verbs.c +++ b/drivers/infiniband/hw/qib/qib_verbs.c @@ -1421,28 +1421,20 @@ static int qib_modify_port(struct ib_device *ibdev, u8 port, return 0; } -static int qib_query_gid(struct ib_device *ibdev, u8 port, - int index, union ib_gid *gid) +static int qib_get_guid_be(struct rvt_dev_info *rdi, struct rvt_ibport *rvp, + int guid_index, __be64 *guid) { - struct qib_devdata *dd = dd_from_ibdev(ibdev); - int ret = 0; - - if (!port || port > dd->num_pports) - ret = -EINVAL; - else { - struct qib_ibport *ibp = to_iport(ibdev, port); - struct qib_pportdata *ppd = ppd_from_ibp(ibp); + struct qib_ibport *ibp = container_of(rvp, struct qib_ibport, rvp); + struct qib_pportdata *ppd = ppd_from_ibp(ibp); - gid->global.subnet_prefix = ibp->rvp.gid_prefix; - if (index == 0) - gid->global.interface_id = ppd->guid; - else if (index < QIB_GUIDS_PER_PORT) - gid->global.interface_id = ibp->guids[index - 1]; - else - ret = -EINVAL; - } + if (guid_index == 0) + *guid = ppd->guid; + else if (guid_index < QIB_GUIDS_PER_PORT) + *guid = ibp->guids[guid_index - 1]; + else + return -EINVAL; - return ret; + return 0; } int qib_check_ah(struct ib_device *ibdev, struct ib_ah_attr *ah_attr) @@ -1696,7 +1688,6 @@ int qib_register_ib_device(struct qib_devdata *dd) ibdev->modify_device = qib_modify_device; ibdev->query_port = qib_query_port; ibdev->modify_port = qib_modify_port; - ibdev->query_gid = qib_query_gid; ibdev->process_mad = qib_process_mad; ibdev->get_port_immutable = qib_port_immutable; @@ -1727,6 +1718,7 @@ int qib_register_ib_device(struct qib_devdata *dd) dd->verbs_dev.rdi.driver_f.get_pmtu_from_attr = get_pmtu_from_attr; dd->verbs_dev.rdi.dparms.max_rdma_atomic = QIB_MAX_RDMA_ATOMIC; + dd->verbs_dev.rdi.driver_f.get_guid_be = qib_get_guid_be; dd->verbs_dev.rdi.dparms.lkey_table_size = qib_lkey_table_size; dd->verbs_dev.rdi.dparms.qp_table_size = ib_qib_qp_table_size; dd->verbs_dev.rdi.dparms.qpn_start = 1; -- cgit v0.10.2 From 530a5d8ebd0d80c52aa7cbdb2127ff45b0db97ae Mon Sep 17 00:00:00 2001 From: Harish Chegondi Date: Wed, 3 Feb 2016 14:20:52 -0800 Subject: IB/qib: Remove modify_port and port_immutable functions Delete code from query_port which has been moved into rvt_query_port Create a call back function to shut down a port which may be called from rvt_modify_port Signed-off-by: Harish Chegondi Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford diff --git a/drivers/infiniband/hw/qib/qib_mad.c b/drivers/infiniband/hw/qib/qib_mad.c index 1273537..73ca2c2 100644 --- a/drivers/infiniband/hw/qib/qib_mad.c +++ b/drivers/infiniband/hw/qib/qib_mad.c @@ -206,8 +206,11 @@ static void qib_bad_mkey(struct qib_ibport *ibp, struct ib_smp *smp) /* * Send a Port Capability Mask Changed trap (ch. 14.3.11). */ -void qib_cap_mask_chg(struct qib_ibport *ibp) +void qib_cap_mask_chg(struct rvt_dev_info *rdi, u8 port_num) { + struct qib_ibdev *ibdev = container_of(rdi, struct qib_ibdev, rdi); + struct qib_devdata *dd = dd_from_dev(ibdev); + struct qib_ibport *ibp = &dd->pport[port_num - 1].ibport_data; struct ib_mad_notice_attr data; data.generic_type = IB_NOTICE_TYPE_INFO; diff --git a/drivers/infiniband/hw/qib/qib_verbs.c b/drivers/infiniband/hw/qib/qib_verbs.c index a3a13a5..4158362 100644 --- a/drivers/infiniband/hw/qib/qib_verbs.c +++ b/drivers/infiniband/hw/qib/qib_verbs.c @@ -1311,33 +1311,24 @@ full: } } -static int qib_query_port(struct ib_device *ibdev, u8 port, +static int qib_query_port(struct rvt_dev_info *rdi, u8 port_num, struct ib_port_attr *props) { - struct qib_devdata *dd = dd_from_ibdev(ibdev); - struct qib_ibport *ibp = to_iport(ibdev, port); - struct qib_pportdata *ppd = ppd_from_ibp(ibp); + struct qib_ibdev *ibdev = container_of(rdi, struct qib_ibdev, rdi); + struct qib_devdata *dd = dd_from_dev(ibdev); + struct qib_pportdata *ppd = &dd->pport[port_num - 1]; enum ib_mtu mtu; u16 lid = ppd->lid; - memset(props, 0, sizeof(*props)); props->lid = lid ? lid : be16_to_cpu(IB_LID_PERMISSIVE); props->lmc = ppd->lmc; - props->sm_lid = ibp->rvp.sm_lid; - props->sm_sl = ibp->rvp.sm_sl; props->state = dd->f_iblink_state(ppd->lastibcstat); props->phys_state = dd->f_ibphys_portstate(ppd->lastibcstat); - props->port_cap_flags = ibp->rvp.port_cap_flags; props->gid_tbl_len = QIB_GUIDS_PER_PORT; - props->max_msg_sz = 0x80000000; - props->pkey_tbl_len = qib_get_npkeys(dd); - props->bad_pkey_cntr = ibp->rvp.pkey_violations; - props->qkey_viol_cntr = ibp->rvp.qkey_violations; props->active_width = ppd->link_width_active; /* See rate_show() */ props->active_speed = ppd->link_speed_active; props->max_vl_num = qib_num_vls(ppd->vls_supported); - props->init_type_reply = 0; props->max_mtu = qib_ibmtu ? qib_ibmtu : IB_MTU_4096; switch (ppd->ibmtu) { @@ -1360,7 +1351,6 @@ static int qib_query_port(struct ib_device *ibdev, u8 port, mtu = IB_MTU_2048; } props->active_mtu = mtu; - props->subnet_timeout = ibp->rvp.subnet_timeout; return 0; } @@ -1404,20 +1394,14 @@ bail: return ret; } -static int qib_modify_port(struct ib_device *ibdev, u8 port, - int port_modify_mask, struct ib_port_modify *props) +static int shut_down_port(struct rvt_dev_info *rdi, u8 port_num) { - struct qib_ibport *ibp = to_iport(ibdev, port); - struct qib_pportdata *ppd = ppd_from_ibp(ibp); + struct qib_ibdev *ibdev = container_of(rdi, struct qib_ibdev, rdi); + struct qib_devdata *dd = dd_from_dev(ibdev); + struct qib_pportdata *ppd = &dd->pport[port_num - 1]; + + qib_set_linkstate(ppd, QIB_IB_LINKDOWN); - ibp->rvp.port_cap_flags |= props->set_port_cap_mask; - ibp->rvp.port_cap_flags &= ~props->clr_port_cap_mask; - if (props->set_port_cap_mask || props->clr_port_cap_mask) - qib_cap_mask_chg(ibp); - if (port_modify_mask & IB_PORT_SHUTDOWN) - qib_set_linkstate(ppd, QIB_IB_LINKDOWN); - if (port_modify_mask & IB_PORT_RESET_QKEY_CNTR) - ibp->rvp.qkey_violations = 0; return 0; } @@ -1553,24 +1537,6 @@ static void init_ibport(struct qib_pportdata *ppd) RCU_INIT_POINTER(ibp->rvp.qp[1], NULL); } -static int qib_port_immutable(struct ib_device *ibdev, u8 port_num, - struct ib_port_immutable *immutable) -{ - struct ib_port_attr attr; - int err; - - err = qib_query_port(ibdev, port_num, &attr); - if (err) - return err; - - immutable->pkey_tbl_len = attr.pkey_tbl_len; - immutable->gid_tbl_len = attr.gid_tbl_len; - immutable->core_cap_flags = RDMA_CORE_PORT_IBA_IB; - immutable->max_mad_size = IB_MGMT_MAD_SIZE; - - return 0; -} - /** * qib_fill_device_attr - Fill in rvt dev info device attributes. * @dd: the device data structure @@ -1686,10 +1652,7 @@ int qib_register_ib_device(struct qib_devdata *dd) ibdev->phys_port_cnt = dd->num_pports; ibdev->dma_device = &dd->pcidev->dev; ibdev->modify_device = qib_modify_device; - ibdev->query_port = qib_query_port; - ibdev->modify_port = qib_modify_port; ibdev->process_mad = qib_process_mad; - ibdev->get_port_immutable = qib_port_immutable; snprintf(ibdev->node_desc, sizeof(ibdev->node_desc), "Intel Infiniband HCA %s", init_utsname()->nodename); @@ -1716,6 +1679,9 @@ int qib_register_ib_device(struct qib_devdata *dd) dd->verbs_dev.rdi.driver_f.mtu_to_path_mtu = mtu_to_path_mtu; dd->verbs_dev.rdi.driver_f.mtu_from_qp = mtu_from_qp; dd->verbs_dev.rdi.driver_f.get_pmtu_from_attr = get_pmtu_from_attr; + dd->verbs_dev.rdi.driver_f.query_port_state = qib_query_port; + dd->verbs_dev.rdi.driver_f.shut_down_port = shut_down_port; + dd->verbs_dev.rdi.driver_f.cap_mask_chg = qib_cap_mask_chg; dd->verbs_dev.rdi.dparms.max_rdma_atomic = QIB_MAX_RDMA_ATOMIC; dd->verbs_dev.rdi.driver_f.get_guid_be = qib_get_guid_be; @@ -1732,6 +1698,9 @@ int qib_register_ib_device(struct qib_devdata *dd) dd->verbs_dev.rdi.dparms.nports = dd->num_pports; dd->verbs_dev.rdi.dparms.npkeys = qib_get_npkeys(dd); dd->verbs_dev.rdi.dparms.node = dd->assigned_node_id; + dd->verbs_dev.rdi.dparms.core_cap_flags = RDMA_CORE_PORT_IBA_IB; + dd->verbs_dev.rdi.dparms.max_mad_size = IB_MGMT_MAD_SIZE; + snprintf(dd->verbs_dev.rdi.dparms.cq_name, sizeof(dd->verbs_dev.rdi.dparms.cq_name), "qib_cq%d", dd->unit); diff --git a/drivers/infiniband/hw/qib/qib_verbs.h b/drivers/infiniband/hw/qib/qib_verbs.h index e12bb9d..cd73a97 100644 --- a/drivers/infiniband/hw/qib/qib_verbs.h +++ b/drivers/infiniband/hw/qib/qib_verbs.h @@ -317,7 +317,7 @@ static inline int qib_pkey_ok(u16 pkey1, u16 pkey2) void qib_bad_pqkey(struct qib_ibport *ibp, __be16 trap_num, u32 key, u32 sl, u32 qp1, u32 qp2, __be16 lid1, __be16 lid2); -void qib_cap_mask_chg(struct qib_ibport *ibp); +void qib_cap_mask_chg(struct rvt_dev_info *rdi, u8 port_num); void qib_sys_guid_chg(struct qib_ibport *ibp); void qib_node_desc_chg(struct qib_ibport *ibp); int qib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, -- cgit v0.10.2 From ec3f2c12a1e6bea48fd58f2dfa97d7373263b39a Mon Sep 17 00:00:00 2001 From: Dennis Dalessandro Date: Tue, 19 Jan 2016 14:41:33 -0800 Subject: staging/rdma/hfi1: Begin to use rdmavt for verbs This patch begins to make use of rdmavt by registering with it and providing access to the header files. This is just the beginning of rdmavt support in hfi1. Reviewed-by: Ira Weiny Reviewed-by: Mike Marciniszyn Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford diff --git a/drivers/staging/rdma/hfi1/Kconfig b/drivers/staging/rdma/hfi1/Kconfig index bd0249b..846c240c 100644 --- a/drivers/staging/rdma/hfi1/Kconfig +++ b/drivers/staging/rdma/hfi1/Kconfig @@ -1,6 +1,6 @@ config INFINIBAND_HFI1 tristate "Intel OPA Gen1 support" - depends on X86_64 + depends on X86_64 && INFINIBAND_RDMAVT select MMU_NOTIFIER default m ---help--- diff --git a/drivers/staging/rdma/hfi1/chip.c b/drivers/staging/rdma/hfi1/chip.c index 63d5d71..da2718f 100644 --- a/drivers/staging/rdma/hfi1/chip.c +++ b/drivers/staging/rdma/hfi1/chip.c @@ -9925,7 +9925,7 @@ int set_link_state(struct hfi1_pportdata *ppd, u32 state) sdma_all_running(dd); /* Signal the IB layer that the port has went active */ - event.device = &dd->verbs_dev.ibdev; + event.device = &dd->verbs_dev.rdi.ibdev; event.element.port_num = ppd->port; event.event = IB_EVENT_PORT_ACTIVE; } diff --git a/drivers/staging/rdma/hfi1/diag.c b/drivers/staging/rdma/hfi1/diag.c index 0c88317..fbe9b15 100644 --- a/drivers/staging/rdma/hfi1/diag.c +++ b/drivers/staging/rdma/hfi1/diag.c @@ -860,7 +860,7 @@ static ssize_t hfi1_snoop_write(struct file *fp, const char __user *data, vl = sc4; } else { sl = (byte_two >> 4) & 0xf; - ibp = to_iport(&dd->verbs_dev.ibdev, 1); + ibp = to_iport(&dd->verbs_dev.rdi.ibdev, 1); sc5 = ibp->sl_to_sc[sl]; vl = sc_to_vlt(dd, sc5); if (vl != sc4) { diff --git a/drivers/staging/rdma/hfi1/hfi.h b/drivers/staging/rdma/hfi1/hfi.h index 23d7e02..dbea286 100644 --- a/drivers/staging/rdma/hfi1/hfi.h +++ b/drivers/staging/rdma/hfi1/hfi.h @@ -65,8 +65,7 @@ #include #include #include -#include -#include +#include #include "chip_registers.h" #include "common.h" diff --git a/drivers/staging/rdma/hfi1/init.c b/drivers/staging/rdma/hfi1/init.c index 27b31fc..b4076b2 100644 --- a/drivers/staging/rdma/hfi1/init.c +++ b/drivers/staging/rdma/hfi1/init.c @@ -56,6 +56,7 @@ #include #include #include +#include #include "hfi.h" #include "device.h" @@ -983,7 +984,7 @@ void hfi1_free_devdata(struct hfi1_devdata *dd) rcu_barrier(); /* wait for rcu callbacks to complete */ free_percpu(dd->int_counter); free_percpu(dd->rcv_limit); - ib_dealloc_device(&dd->verbs_dev.ibdev); + ib_dealloc_device(&dd->verbs_dev.rdi.ibdev); } /* @@ -1079,7 +1080,7 @@ struct hfi1_devdata *hfi1_alloc_devdata(struct pci_dev *pdev, size_t extra) bail: if (!list_empty(&dd->list)) list_del_init(&dd->list); - ib_dealloc_device(&dd->verbs_dev.ibdev); + ib_dealloc_device(&dd->verbs_dev.rdi.ibdev); return ERR_PTR(ret); } diff --git a/drivers/staging/rdma/hfi1/intr.c b/drivers/staging/rdma/hfi1/intr.c index 426582b..1283f2d 100644 --- a/drivers/staging/rdma/hfi1/intr.c +++ b/drivers/staging/rdma/hfi1/intr.c @@ -98,7 +98,7 @@ static void signal_ib_event(struct hfi1_pportdata *ppd, enum ib_event_type ev) */ if (!(dd->flags & HFI1_INITTED)) return; - event.device = &dd->verbs_dev.ibdev; + event.device = &dd->verbs_dev.rdi.ibdev; event.element.port_num = ppd->port; event.event = ev; ib_dispatch_event(&event); diff --git a/drivers/staging/rdma/hfi1/mad.c b/drivers/staging/rdma/hfi1/mad.c index aa84781..ed88a5a 100644 --- a/drivers/staging/rdma/hfi1/mad.c +++ b/drivers/staging/rdma/hfi1/mad.c @@ -1388,7 +1388,7 @@ static int set_pkeys(struct hfi1_devdata *dd, u8 port, u16 *pkeys) (void)hfi1_set_ib_cfg(ppd, HFI1_IB_CFG_PKEYS, 0); event.event = IB_EVENT_PKEY_CHANGE; - event.device = &dd->verbs_dev.ibdev; + event.device = &dd->verbs_dev.rdi.ibdev; event.element.port_num = port; ib_dispatch_event(&event); } @@ -4171,7 +4171,8 @@ int hfi1_create_agents(struct hfi1_ibdev *dev) for (p = 0; p < dd->num_pports; p++) { ibp = &dd->pport[p].ibport_data; - agent = ib_register_mad_agent(&dev->ibdev, p + 1, IB_QPT_SMI, + agent = ib_register_mad_agent(&dev->rdi.ibdev, p + 1, + IB_QPT_SMI, NULL, 0, send_handler, NULL, NULL, 0); if (IS_ERR(agent)) { diff --git a/drivers/staging/rdma/hfi1/qp.c b/drivers/staging/rdma/hfi1/qp.c index ce03681..bb447b5 100644 --- a/drivers/staging/rdma/hfi1/qp.c +++ b/drivers/staging/rdma/hfi1/qp.c @@ -1570,7 +1570,7 @@ struct qp_iter *qp_iter_init(struct hfi1_ibdev *dev) return NULL; iter->dev = dev; - iter->specials = dev->ibdev.phys_port_cnt * 2; + iter->specials = dev->rdi.ibdev.phys_port_cnt * 2; if (qp_iter_next(iter)) { kfree(iter); return NULL; @@ -1610,7 +1610,7 @@ int qp_iter_next(struct qp_iter *iter) struct hfi1_ibport *ibp; int pidx; - pidx = n % dev->ibdev.phys_port_cnt; + pidx = n % dev->rdi.ibdev.phys_port_cnt; ppd = &dd_from_dev(dev)->pport[pidx]; ibp = &ppd->ibport_data; diff --git a/drivers/staging/rdma/hfi1/sysfs.c b/drivers/staging/rdma/hfi1/sysfs.c index 1dd6727..d05b9f3 100644 --- a/drivers/staging/rdma/hfi1/sysfs.c +++ b/drivers/staging/rdma/hfi1/sysfs.c @@ -446,7 +446,7 @@ static ssize_t show_rev(struct device *device, struct device_attribute *attr, char *buf) { struct hfi1_ibdev *dev = - container_of(device, struct hfi1_ibdev, ibdev.dev); + container_of(device, struct hfi1_ibdev, rdi.ibdev.dev); return sprintf(buf, "%x\n", dd_from_dev(dev)->minrev); } @@ -455,7 +455,7 @@ static ssize_t show_hfi(struct device *device, struct device_attribute *attr, char *buf) { struct hfi1_ibdev *dev = - container_of(device, struct hfi1_ibdev, ibdev.dev); + container_of(device, struct hfi1_ibdev, rdi.ibdev.dev); struct hfi1_devdata *dd = dd_from_dev(dev); int ret; @@ -470,7 +470,7 @@ static ssize_t show_boardversion(struct device *device, struct device_attribute *attr, char *buf) { struct hfi1_ibdev *dev = - container_of(device, struct hfi1_ibdev, ibdev.dev); + container_of(device, struct hfi1_ibdev, rdi.ibdev.dev); struct hfi1_devdata *dd = dd_from_dev(dev); /* The string printed here is already newline-terminated. */ @@ -482,7 +482,7 @@ static ssize_t show_nctxts(struct device *device, struct device_attribute *attr, char *buf) { struct hfi1_ibdev *dev = - container_of(device, struct hfi1_ibdev, ibdev.dev); + container_of(device, struct hfi1_ibdev, rdi.ibdev.dev); struct hfi1_devdata *dd = dd_from_dev(dev); /* @@ -500,7 +500,7 @@ static ssize_t show_nfreectxts(struct device *device, struct device_attribute *attr, char *buf) { struct hfi1_ibdev *dev = - container_of(device, struct hfi1_ibdev, ibdev.dev); + container_of(device, struct hfi1_ibdev, rdi.ibdev.dev); struct hfi1_devdata *dd = dd_from_dev(dev); /* Return the number of free user ports (contexts) available. */ @@ -511,7 +511,7 @@ static ssize_t show_serial(struct device *device, struct device_attribute *attr, char *buf) { struct hfi1_ibdev *dev = - container_of(device, struct hfi1_ibdev, ibdev.dev); + container_of(device, struct hfi1_ibdev, rdi.ibdev.dev); struct hfi1_devdata *dd = dd_from_dev(dev); return scnprintf(buf, PAGE_SIZE, "%s", dd->serial); @@ -523,7 +523,7 @@ static ssize_t store_chip_reset(struct device *device, size_t count) { struct hfi1_ibdev *dev = - container_of(device, struct hfi1_ibdev, ibdev.dev); + container_of(device, struct hfi1_ibdev, rdi.ibdev.dev); struct hfi1_devdata *dd = dd_from_dev(dev); int ret; @@ -552,7 +552,7 @@ static ssize_t show_tempsense(struct device *device, struct device_attribute *attr, char *buf) { struct hfi1_ibdev *dev = - container_of(device, struct hfi1_ibdev, ibdev.dev); + container_of(device, struct hfi1_ibdev, rdi.ibdev.dev); struct hfi1_devdata *dd = dd_from_dev(dev); struct hfi1_temp temp; int ret; @@ -700,7 +700,7 @@ bail: */ int hfi1_verbs_register_sysfs(struct hfi1_devdata *dd) { - struct ib_device *dev = &dd->verbs_dev.ibdev; + struct ib_device *dev = &dd->verbs_dev.rdi.ibdev; int i, ret; for (i = 0; i < ARRAY_SIZE(hfi1_attributes); ++i) { diff --git a/drivers/staging/rdma/hfi1/verbs.c b/drivers/staging/rdma/hfi1/verbs.c index 09b8d41..0692ec4 100644 --- a/drivers/staging/rdma/hfi1/verbs.c +++ b/drivers/staging/rdma/hfi1/verbs.c @@ -1903,7 +1903,7 @@ static void verbs_txreq_kmem_cache_ctor(void *obj) int hfi1_register_ib_device(struct hfi1_devdata *dd) { struct hfi1_ibdev *dev = &dd->verbs_dev; - struct ib_device *ibdev = &dev->ibdev; + struct ib_device *ibdev = &dev->rdi.ibdev; struct hfi1_pportdata *ppd = dd->pport; unsigned i, lk_tab_size; int ret; @@ -2069,7 +2069,13 @@ int hfi1_register_ib_device(struct hfi1_devdata *dd) strncpy(ibdev->node_desc, init_utsname()->nodename, sizeof(ibdev->node_desc)); - ret = ib_register_device(ibdev, hfi1_create_port_files); + /* + * Fill in rvt info object. + */ + dd->verbs_dev.rdi.driver_f.port_callback = hfi1_create_port_files; + dd->verbs_dev.rdi.dparms.props.max_pd = hfi1_max_pds; + + ret = rvt_register_device(&dd->verbs_dev.rdi); if (ret) goto err_reg; @@ -2086,7 +2092,7 @@ int hfi1_register_ib_device(struct hfi1_devdata *dd) err_class: hfi1_free_agents(dev); err_agents: - ib_unregister_device(ibdev); + rvt_unregister_device(&dd->verbs_dev.rdi); err_reg: err_verbs_txreq: kmem_cache_destroy(dev->verbs_txreq_cache); @@ -2102,13 +2108,12 @@ bail: void hfi1_unregister_ib_device(struct hfi1_devdata *dd) { struct hfi1_ibdev *dev = &dd->verbs_dev; - struct ib_device *ibdev = &dev->ibdev; hfi1_verbs_unregister_sysfs(dd); hfi1_free_agents(dev); - ib_unregister_device(ibdev); + rvt_unregister_device(&dd->verbs_dev.rdi); if (!list_empty(&dev->txwait)) dd_dev_err(dd, "txwait list not empty!\n"); diff --git a/drivers/staging/rdma/hfi1/verbs.h b/drivers/staging/rdma/hfi1/verbs.h index a163fc2..58fb122 100644 --- a/drivers/staging/rdma/hfi1/verbs.h +++ b/drivers/staging/rdma/hfi1/verbs.h @@ -62,6 +62,7 @@ #include #include #include +#include struct hfi1_ctxtdata; struct hfi1_pportdata; @@ -749,7 +750,7 @@ struct hfi1_ibport { struct hfi1_qp_ibdev; struct hfi1_ibdev { - struct ib_device ibdev; + struct rvt_dev_info rdi; /* Must be first */ struct list_head pending_mmaps; spinlock_t mmap_offset_lock; /* protect mmap_offset */ u32 mmap_offset; @@ -843,7 +844,10 @@ static inline struct hfi1_qp *to_iqp(struct ib_qp *ibqp) static inline struct hfi1_ibdev *to_idev(struct ib_device *ibdev) { - return container_of(ibdev, struct hfi1_ibdev, ibdev); + struct rvt_dev_info *rdi; + + rdi = container_of(ibdev, struct rvt_dev_info, ibdev); + return container_of(rdi, struct hfi1_ibdev, rdi); } /* -- cgit v0.10.2 From 583be13cde4f90aeac5d3c7ba555fb4909553c16 Mon Sep 17 00:00:00 2001 From: Dennis Dalessandro Date: Tue, 19 Jan 2016 14:41:39 -0800 Subject: staging/rdma/hfi1: Add basic rdmavt capability flags for hfi1 Most functionality is still being done in the driver, set flags so that rdmavt will let hfi1 continue to handle mr, qp, and cq init. Reviewed-by: Mike Marciniszyn Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford diff --git a/drivers/staging/rdma/hfi1/verbs.c b/drivers/staging/rdma/hfi1/verbs.c index 0692ec4..5e21132 100644 --- a/drivers/staging/rdma/hfi1/verbs.c +++ b/drivers/staging/rdma/hfi1/verbs.c @@ -2074,6 +2074,9 @@ int hfi1_register_ib_device(struct hfi1_devdata *dd) */ dd->verbs_dev.rdi.driver_f.port_callback = hfi1_create_port_files; dd->verbs_dev.rdi.dparms.props.max_pd = hfi1_max_pds; + dd->verbs_dev.rdi.flags = (RVT_FLAG_MR_INIT_DRIVER | + RVT_FLAG_QP_INIT_DRIVER | + RVT_FLAG_CQ_INIT_DRIVER); ret = rvt_register_device(&dd->verbs_dev.rdi); if (ret) -- cgit v0.10.2 From f326674ae374e08b34d8b02b2357bad4ef07317c Mon Sep 17 00:00:00 2001 From: Ira Weiny Date: Tue, 19 Jan 2016 14:41:44 -0800 Subject: staging/rdma/hfi1: Consolidate dma ops for hfi1 Remove the dma.c file from hfi1 in favor of using that which is present in rdmavt. Reviewed-by: Ira Weiny Reviewed-by: Mike Marciniszyn Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford diff --git a/drivers/staging/rdma/hfi1/Makefile b/drivers/staging/rdma/hfi1/Makefile index e63251b..69fb10f 100644 --- a/drivers/staging/rdma/hfi1/Makefile +++ b/drivers/staging/rdma/hfi1/Makefile @@ -7,7 +7,7 @@ # obj-$(CONFIG_INFINIBAND_HFI1) += hfi1.o -hfi1-y := chip.o cq.o device.o diag.o dma.o driver.o efivar.o eprom.o file_ops.o firmware.o \ +hfi1-y := chip.o cq.o device.o diag.o driver.o efivar.o eprom.o file_ops.o firmware.o \ init.o intr.o keys.o mad.o mmap.o mr.o pcie.o pio.o pio_copy.o \ qp.o qsfp.o rc.o ruc.o sdma.o srq.o sysfs.o trace.o twsi.o \ uc.o ud.o user_exp_rcv.o user_pages.o user_sdma.o verbs_mcast.o verbs.o diff --git a/drivers/staging/rdma/hfi1/verbs.c b/drivers/staging/rdma/hfi1/verbs.c index 5e21132..347409e 100644 --- a/drivers/staging/rdma/hfi1/verbs.c +++ b/drivers/staging/rdma/hfi1/verbs.c @@ -2063,7 +2063,7 @@ int hfi1_register_ib_device(struct hfi1_devdata *dd) ibdev->detach_mcast = hfi1_multicast_detach; ibdev->process_mad = hfi1_process_mad; ibdev->mmap = hfi1_mmap; - ibdev->dma_ops = &hfi1_dma_mapping_ops; + ibdev->dma_ops = NULL; ibdev->get_port_immutable = port_immutable; strncpy(ibdev->node_desc, init_utsname()->nodename, diff --git a/drivers/staging/rdma/hfi1/verbs.h b/drivers/staging/rdma/hfi1/verbs.h index 58fb122..a505545 100644 --- a/drivers/staging/rdma/hfi1/verbs.h +++ b/drivers/staging/rdma/hfi1/verbs.h @@ -1151,6 +1151,4 @@ extern unsigned int hfi1_max_srq_wrs; extern const u32 ib_hfi1_rnr_table[]; -extern struct ib_dma_mapping_ops hfi1_dma_mapping_ops; - #endif /* HFI1_VERBS_H */ -- cgit v0.10.2 From 4f87ccfca0c29bb0fb9d2e6037656e871714f9e7 Mon Sep 17 00:00:00 2001 From: Dennis Dalessandro Date: Tue, 19 Jan 2016 14:41:50 -0800 Subject: staging/rdma/hfi1: Use rdmavt protection domain Remove protection domain from hfi1 and use rdmavt's version. Reviewed-by: Ira Weiny Reviewed-by: Mike Marciniszyn Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford diff --git a/drivers/staging/rdma/hfi1/keys.c b/drivers/staging/rdma/hfi1/keys.c index cb4e608..57a266f 100644 --- a/drivers/staging/rdma/hfi1/keys.c +++ b/drivers/staging/rdma/hfi1/keys.c @@ -176,7 +176,7 @@ out: * Check the IB SGE for validity and initialize our internal version * of it. */ -int hfi1_lkey_ok(struct hfi1_lkey_table *rkt, struct hfi1_pd *pd, +int hfi1_lkey_ok(struct hfi1_lkey_table *rkt, struct rvt_pd *pd, struct hfi1_sge *isge, struct ib_sge *sge, int acc) { struct hfi1_mregion *mr; @@ -285,7 +285,7 @@ int hfi1_rkey_ok(struct hfi1_qp *qp, struct hfi1_sge *sge, */ rcu_read_lock(); if (rkey == 0) { - struct hfi1_pd *pd = to_ipd(qp->ibqp.pd); + struct rvt_pd *pd = ibpd_to_rvtpd(qp->ibqp.pd); struct hfi1_ibdev *dev = to_idev(pd->ibpd.device); if (pd->user) diff --git a/drivers/staging/rdma/hfi1/mr.c b/drivers/staging/rdma/hfi1/mr.c index a3f8b88..3f1ef582 100644 --- a/drivers/staging/rdma/hfi1/mr.c +++ b/drivers/staging/rdma/hfi1/mr.c @@ -116,7 +116,7 @@ struct ib_mr *hfi1_get_dma_mr(struct ib_pd *pd, int acc) struct ib_mr *ret; int rval; - if (to_ipd(pd)->user) { + if (ibpd_to_rvtpd(pd)->user) { ret = ERR_PTR(-EPERM); goto bail; } diff --git a/drivers/staging/rdma/hfi1/ruc.c b/drivers/staging/rdma/hfi1/ruc.c index 4a91975..d255f31 100644 --- a/drivers/staging/rdma/hfi1/ruc.c +++ b/drivers/staging/rdma/hfi1/ruc.c @@ -102,11 +102,11 @@ static int init_sge(struct hfi1_qp *qp, struct hfi1_rwqe *wqe) int i, j, ret; struct ib_wc wc; struct hfi1_lkey_table *rkt; - struct hfi1_pd *pd; + struct rvt_pd *pd; struct hfi1_sge_state *ss; rkt = &to_idev(qp->ibqp.device)->lk_table; - pd = to_ipd(qp->ibqp.srq ? qp->ibqp.srq->pd : qp->ibqp.pd); + pd = ibpd_to_rvtpd(qp->ibqp.srq ? qp->ibqp.srq->pd : qp->ibqp.pd); ss = &qp->r_sge; ss->sg_list = qp->r_sg_list; qp->r_len = 0; diff --git a/drivers/staging/rdma/hfi1/verbs.c b/drivers/staging/rdma/hfi1/verbs.c index 347409e..ddfcfaf 100644 --- a/drivers/staging/rdma/hfi1/verbs.c +++ b/drivers/staging/rdma/hfi1/verbs.c @@ -368,7 +368,7 @@ static int post_one_send(struct hfi1_qp *qp, struct ib_send_wr *wr) int j; int acc; struct hfi1_lkey_table *rkt; - struct hfi1_pd *pd; + struct rvt_pd *pd; struct hfi1_devdata *dd = dd_from_ibdev(qp->ibqp.device); struct hfi1_pportdata *ppd; struct hfi1_ibport *ibp; @@ -413,7 +413,7 @@ static int post_one_send(struct hfi1_qp *qp, struct ib_send_wr *wr) return -ENOMEM; rkt = &to_idev(qp->ibqp.device)->lk_table; - pd = to_ipd(qp->ibqp.pd); + pd = ibpd_to_rvtpd(qp->ibqp.pd); wqe = get_swqe_ptr(qp, qp->s_head); @@ -1394,7 +1394,7 @@ static int query_device(struct ib_device *ibdev, props->max_mr = dev->lk_table.max; props->max_fmr = dev->lk_table.max; props->max_map_per_fmr = 32767; - props->max_pd = hfi1_max_pds; + props->max_pd = dev->rdi.dparms.props.max_pd; props->max_qp_rd_atom = HFI1_MAX_RDMA_ATOMIC; props->max_qp_init_rd_atom = 255; /* props->max_res_rd_atom */ @@ -1592,61 +1592,6 @@ static int query_gid(struct ib_device *ibdev, u8 port, return ret; } -static struct ib_pd *alloc_pd(struct ib_device *ibdev, - struct ib_ucontext *context, - struct ib_udata *udata) -{ - struct hfi1_ibdev *dev = to_idev(ibdev); - struct hfi1_pd *pd; - struct ib_pd *ret; - - /* - * This is actually totally arbitrary. Some correctness tests - * assume there's a maximum number of PDs that can be allocated. - * We don't actually have this limit, but we fail the test if - * we allow allocations of more than we report for this value. - */ - - pd = kmalloc(sizeof(*pd), GFP_KERNEL); - if (!pd) { - ret = ERR_PTR(-ENOMEM); - goto bail; - } - - spin_lock(&dev->n_pds_lock); - if (dev->n_pds_allocated == hfi1_max_pds) { - spin_unlock(&dev->n_pds_lock); - kfree(pd); - ret = ERR_PTR(-ENOMEM); - goto bail; - } - - dev->n_pds_allocated++; - spin_unlock(&dev->n_pds_lock); - - /* ib_alloc_pd() will initialize pd->ibpd. */ - pd->user = udata != NULL; - - ret = &pd->ibpd; - -bail: - return ret; -} - -static int dealloc_pd(struct ib_pd *ibpd) -{ - struct hfi1_pd *pd = to_ipd(ibpd); - struct hfi1_ibdev *dev = to_idev(ibpd->device); - - spin_lock(&dev->n_pds_lock); - dev->n_pds_allocated--; - spin_unlock(&dev->n_pds_lock); - - kfree(pd); - - return 0; -} - /* * convert ah port,sl to sc */ @@ -1920,7 +1865,7 @@ int hfi1_register_ib_device(struct hfi1_devdata *dd) init_ibport(ppd + i); /* Only need to initialize non-zero fields. */ - spin_lock_init(&dev->n_pds_lock); + spin_lock_init(&dev->n_ahs_lock); spin_lock_init(&dev->n_cqs_lock); spin_lock_init(&dev->n_qps_lock); @@ -2029,8 +1974,8 @@ int hfi1_register_ib_device(struct hfi1_devdata *dd) ibdev->query_gid = query_gid; ibdev->alloc_ucontext = alloc_ucontext; ibdev->dealloc_ucontext = dealloc_ucontext; - ibdev->alloc_pd = alloc_pd; - ibdev->dealloc_pd = dealloc_pd; + ibdev->alloc_pd = NULL; + ibdev->dealloc_pd = NULL; ibdev->create_ah = create_ah; ibdev->destroy_ah = destroy_ah; ibdev->modify_ah = modify_ah; diff --git a/drivers/staging/rdma/hfi1/verbs.h b/drivers/staging/rdma/hfi1/verbs.h index a505545..3079149 100644 --- a/drivers/staging/rdma/hfi1/verbs.h +++ b/drivers/staging/rdma/hfi1/verbs.h @@ -234,12 +234,6 @@ struct hfi1_mcast { int n_attached; }; -/* Protection domain */ -struct hfi1_pd { - struct ib_pd ibpd; - int user; /* non-zero if created from user space */ -}; - /* Address Handle */ struct hfi1_ah { struct ib_ah ibah; @@ -776,8 +770,6 @@ struct hfi1_ibdev { u64 n_kmem_wait; u64 n_send_schedule; - u32 n_pds_allocated; /* number of PDs allocated for device */ - spinlock_t n_pds_lock; u32 n_ahs_allocated; /* number of AHs allocated for device */ spinlock_t n_ahs_lock; u32 n_cqs_allocated; /* number of CQs allocated for device */ @@ -817,11 +809,6 @@ static inline struct hfi1_mr *to_imr(struct ib_mr *ibmr) return container_of(ibmr, struct hfi1_mr, ibmr); } -static inline struct hfi1_pd *to_ipd(struct ib_pd *ibpd) -{ - return container_of(ibpd, struct hfi1_pd, ibpd); -} - static inline struct hfi1_ah *to_iah(struct ib_ah *ibah) { return container_of(ibah, struct hfi1_ah, ibah); @@ -983,7 +970,7 @@ int hfi1_alloc_lkey(struct hfi1_mregion *mr, int dma_region); void hfi1_free_lkey(struct hfi1_mregion *mr); -int hfi1_lkey_ok(struct hfi1_lkey_table *rkt, struct hfi1_pd *pd, +int hfi1_lkey_ok(struct hfi1_lkey_table *rkt, struct rvt_pd *pd, struct hfi1_sge *isge, struct ib_sge *sge, int acc); int hfi1_rkey_ok(struct hfi1_qp *qp, struct hfi1_sge *sge, -- cgit v0.10.2 From cd4ceee341ca9d8b176762d3ad783e46538589a7 Mon Sep 17 00:00:00 2001 From: Dennis Dalessandro Date: Tue, 19 Jan 2016 14:41:55 -0800 Subject: staging/rdma/hfi1: Remove MR data structures from hfi1 Remove MR data structures from hfi1 and use the version in rdmavt Reviewed-by: Dean Luick Reviewed-by: Mike Marciniszyn Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford diff --git a/drivers/staging/rdma/hfi1/keys.c b/drivers/staging/rdma/hfi1/keys.c index 57a266f..ffaaa6f 100644 --- a/drivers/staging/rdma/hfi1/keys.c +++ b/drivers/staging/rdma/hfi1/keys.c @@ -63,21 +63,21 @@ * */ -int hfi1_alloc_lkey(struct hfi1_mregion *mr, int dma_region) +int hfi1_alloc_lkey(struct rvt_mregion *mr, int dma_region) { unsigned long flags; u32 r; u32 n; int ret = 0; struct hfi1_ibdev *dev = to_idev(mr->pd->device); - struct hfi1_lkey_table *rkt = &dev->lk_table; + struct rvt_lkey_table *rkt = &dev->lk_table; hfi1_get_mr(mr); spin_lock_irqsave(&rkt->lock, flags); /* special case for dma_mr lkey == 0 */ if (dma_region) { - struct hfi1_mregion *tmr; + struct rvt_mregion *tmr; tmr = rcu_access_pointer(dev->dma_mr); if (!tmr) { @@ -133,13 +133,13 @@ bail: * hfi1_free_lkey - free an lkey * @mr: mr to free from tables */ -void hfi1_free_lkey(struct hfi1_mregion *mr) +void hfi1_free_lkey(struct rvt_mregion *mr) { unsigned long flags; u32 lkey = mr->lkey; u32 r; struct hfi1_ibdev *dev = to_idev(mr->pd->device); - struct hfi1_lkey_table *rkt = &dev->lk_table; + struct rvt_lkey_table *rkt = &dev->lk_table; int freed = 0; spin_lock_irqsave(&rkt->lock, flags); @@ -176,10 +176,10 @@ out: * Check the IB SGE for validity and initialize our internal version * of it. */ -int hfi1_lkey_ok(struct hfi1_lkey_table *rkt, struct rvt_pd *pd, +int hfi1_lkey_ok(struct rvt_lkey_table *rkt, struct rvt_pd *pd, struct hfi1_sge *isge, struct ib_sge *sge, int acc) { - struct hfi1_mregion *mr; + struct rvt_mregion *mr; unsigned n, m; size_t off; @@ -231,15 +231,15 @@ int hfi1_lkey_ok(struct hfi1_lkey_table *rkt, struct rvt_pd *pd, entries_spanned_by_off = off >> mr->page_shift; off -= (entries_spanned_by_off << mr->page_shift); - m = entries_spanned_by_off / HFI1_SEGSZ; - n = entries_spanned_by_off % HFI1_SEGSZ; + m = entries_spanned_by_off / RVT_SEGSZ; + n = entries_spanned_by_off % RVT_SEGSZ; } else { m = 0; n = 0; while (off >= mr->map[m]->segs[n].length) { off -= mr->map[m]->segs[n].length; n++; - if (n >= HFI1_SEGSZ) { + if (n >= RVT_SEGSZ) { m++; n = 0; } @@ -274,8 +274,8 @@ bail: int hfi1_rkey_ok(struct hfi1_qp *qp, struct hfi1_sge *sge, u32 len, u64 vaddr, u32 rkey, int acc) { - struct hfi1_lkey_table *rkt = &to_idev(qp->ibqp.device)->lk_table; - struct hfi1_mregion *mr; + struct rvt_lkey_table *rkt = &to_idev(qp->ibqp.device)->lk_table; + struct rvt_mregion *mr; unsigned n, m; size_t off; @@ -328,15 +328,15 @@ int hfi1_rkey_ok(struct hfi1_qp *qp, struct hfi1_sge *sge, entries_spanned_by_off = off >> mr->page_shift; off -= (entries_spanned_by_off << mr->page_shift); - m = entries_spanned_by_off / HFI1_SEGSZ; - n = entries_spanned_by_off % HFI1_SEGSZ; + m = entries_spanned_by_off / RVT_SEGSZ; + n = entries_spanned_by_off % RVT_SEGSZ; } else { m = 0; n = 0; while (off >= mr->map[m]->segs[n].length) { off -= mr->map[m]->segs[n].length; n++; - if (n >= HFI1_SEGSZ) { + if (n >= RVT_SEGSZ) { m++; n = 0; } diff --git a/drivers/staging/rdma/hfi1/mr.c b/drivers/staging/rdma/hfi1/mr.c index 3f1ef582..7e14965 100644 --- a/drivers/staging/rdma/hfi1/mr.c +++ b/drivers/staging/rdma/hfi1/mr.c @@ -56,7 +56,7 @@ /* Fast memory region */ struct hfi1_fmr { struct ib_fmr ibfmr; - struct hfi1_mregion mr; /* must be last */ + struct rvt_mregion mr; /* must be last */ }; static inline struct hfi1_fmr *to_ifmr(struct ib_fmr *ibfmr) @@ -64,13 +64,13 @@ static inline struct hfi1_fmr *to_ifmr(struct ib_fmr *ibfmr) return container_of(ibfmr, struct hfi1_fmr, ibfmr); } -static int init_mregion(struct hfi1_mregion *mr, struct ib_pd *pd, +static int init_mregion(struct rvt_mregion *mr, struct ib_pd *pd, int count) { int m, i = 0; int rval = 0; - m = (count + HFI1_SEGSZ - 1) / HFI1_SEGSZ; + m = (count + RVT_SEGSZ - 1) / RVT_SEGSZ; for (; i < m; i++) { mr->map[i] = kzalloc(sizeof(*mr->map[0]), GFP_KERNEL); if (!mr->map[i]) @@ -91,7 +91,7 @@ bail: goto out; } -static void deinit_mregion(struct hfi1_mregion *mr) +static void deinit_mregion(struct rvt_mregion *mr) { int i = mr->mapsz; @@ -159,7 +159,7 @@ static struct hfi1_mr *alloc_mr(int count, struct ib_pd *pd) int m; /* Allocate struct plus pointers to first level page tables. */ - m = (count + HFI1_SEGSZ - 1) / HFI1_SEGSZ; + m = (count + RVT_SEGSZ - 1) / RVT_SEGSZ; mr = kzalloc(sizeof(*mr) + m * sizeof(mr->mr.map[0]), GFP_KERNEL); if (!mr) goto bail; @@ -245,7 +245,7 @@ struct ib_mr *hfi1_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, mr->mr.map[m]->segs[n].vaddr = vaddr; mr->mr.map[m]->segs[n].length = umem->page_size; n++; - if (n == HFI1_SEGSZ) { + if (n == RVT_SEGSZ) { m++; n = 0; } @@ -333,7 +333,7 @@ struct ib_fmr *hfi1_alloc_fmr(struct ib_pd *pd, int mr_access_flags, int rval = -ENOMEM; /* Allocate struct plus pointers to first level page tables. */ - m = (fmr_attr->max_pages + HFI1_SEGSZ - 1) / HFI1_SEGSZ; + m = (fmr_attr->max_pages + RVT_SEGSZ - 1) / RVT_SEGSZ; fmr = kzalloc(sizeof(*fmr) + m * sizeof(fmr->mr.map[0]), GFP_KERNEL); if (!fmr) goto bail; @@ -385,7 +385,7 @@ int hfi1_map_phys_fmr(struct ib_fmr *ibfmr, u64 *page_list, int list_len, u64 iova) { struct hfi1_fmr *fmr = to_ifmr(ibfmr); - struct hfi1_lkey_table *rkt; + struct rvt_lkey_table *rkt; unsigned long flags; int m, n, i; u32 ps; @@ -410,7 +410,7 @@ int hfi1_map_phys_fmr(struct ib_fmr *ibfmr, u64 *page_list, for (i = 0; i < list_len; i++) { fmr->mr.map[m]->segs[n].vaddr = (void *) page_list[i]; fmr->mr.map[m]->segs[n].length = ps; - if (++n == HFI1_SEGSZ) { + if (++n == RVT_SEGSZ) { m++; n = 0; } @@ -431,7 +431,7 @@ bail: int hfi1_unmap_fmr(struct list_head *fmr_list) { struct hfi1_fmr *fmr; - struct hfi1_lkey_table *rkt; + struct rvt_lkey_table *rkt; unsigned long flags; list_for_each_entry(fmr, fmr_list, ibfmr.list) { diff --git a/drivers/staging/rdma/hfi1/ruc.c b/drivers/staging/rdma/hfi1/ruc.c index d255f31..ea5efa4 100644 --- a/drivers/staging/rdma/hfi1/ruc.c +++ b/drivers/staging/rdma/hfi1/ruc.c @@ -101,7 +101,7 @@ static int init_sge(struct hfi1_qp *qp, struct hfi1_rwqe *wqe) { int i, j, ret; struct ib_wc wc; - struct hfi1_lkey_table *rkt; + struct rvt_lkey_table *rkt; struct rvt_pd *pd; struct hfi1_sge_state *ss; @@ -534,7 +534,7 @@ again: if (--sqp->s_sge.num_sge) *sge = *sqp->s_sge.sg_list++; } else if (sge->length == 0 && sge->mr->lkey) { - if (++sge->n >= HFI1_SEGSZ) { + if (++sge->n >= RVT_SEGSZ) { if (++sge->m >= sge->mr->mapsz) break; sge->n = 0; diff --git a/drivers/staging/rdma/hfi1/sdma.h b/drivers/staging/rdma/hfi1/sdma.h index 757017a..fbd0e41 100644 --- a/drivers/staging/rdma/hfi1/sdma.h +++ b/drivers/staging/rdma/hfi1/sdma.h @@ -381,7 +381,7 @@ struct verbs_txreq { struct sdma_txreq txreq; struct hfi1_qp *qp; struct hfi1_swqe *wqe; - struct hfi1_mregion *mr; + struct rvt_mregion *mr; struct hfi1_sge_state *ss; struct sdma_engine *sde; u16 hdr_dwords; diff --git a/drivers/staging/rdma/hfi1/ud.c b/drivers/staging/rdma/hfi1/ud.c index 25e6053..970d42f 100644 --- a/drivers/staging/rdma/hfi1/ud.c +++ b/drivers/staging/rdma/hfi1/ud.c @@ -210,7 +210,7 @@ static void ud_loopback(struct hfi1_qp *sqp, struct hfi1_swqe *swqe) if (--ssge.num_sge) *sge = *ssge.sg_list++; } else if (sge->length == 0 && sge->mr->lkey) { - if (++sge->n >= HFI1_SEGSZ) { + if (++sge->n >= RVT_SEGSZ) { if (++sge->m >= sge->mr->mapsz) break; sge->n = 0; diff --git a/drivers/staging/rdma/hfi1/verbs.c b/drivers/staging/rdma/hfi1/verbs.c index ddfcfaf..dc846d5 100644 --- a/drivers/staging/rdma/hfi1/verbs.c +++ b/drivers/staging/rdma/hfi1/verbs.c @@ -300,7 +300,7 @@ void hfi1_copy_sge( if (--ss->num_sge) *sge = *ss->sg_list++; } else if (sge->length == 0 && sge->mr->lkey) { - if (++sge->n >= HFI1_SEGSZ) { + if (++sge->n >= RVT_SEGSZ) { if (++sge->m >= sge->mr->mapsz) break; sge->n = 0; @@ -341,7 +341,7 @@ void hfi1_skip_sge(struct hfi1_sge_state *ss, u32 length, int release) if (--ss->num_sge) *sge = *ss->sg_list++; } else if (sge->length == 0 && sge->mr->lkey) { - if (++sge->n >= HFI1_SEGSZ) { + if (++sge->n >= RVT_SEGSZ) { if (++sge->m >= sge->mr->mapsz) break; sge->n = 0; @@ -367,7 +367,7 @@ static int post_one_send(struct hfi1_qp *qp, struct ib_send_wr *wr) int i; int j; int acc; - struct hfi1_lkey_table *rkt; + struct rvt_lkey_table *rkt; struct rvt_pd *pd; struct hfi1_devdata *dd = dd_from_ibdev(qp->ibqp.device); struct hfi1_pportdata *ppd; @@ -725,7 +725,7 @@ void update_sge(struct hfi1_sge_state *ss, u32 length) if (--ss->num_sge) *sge = *ss->sg_list++; } else if (sge->length == 0 && sge->mr->lkey) { - if (++sge->n >= HFI1_SEGSZ) { + if (++sge->n >= RVT_SEGSZ) { if (++sge->m >= sge->mr->mapsz) return; sge->n = 0; @@ -1883,13 +1883,13 @@ int hfi1_register_ib_device(struct hfi1_devdata *dd) spin_lock_init(&dev->lk_table.lock); dev->lk_table.max = 1 << hfi1_lkey_table_size; /* ensure generation is at least 4 bits (keys.c) */ - if (hfi1_lkey_table_size > MAX_LKEY_TABLE_BITS) { + if (hfi1_lkey_table_size > RVT_MAX_LKEY_TABLE_BITS) { dd_dev_warn(dd, "lkey bits %u too large, reduced to %u\n", - hfi1_lkey_table_size, MAX_LKEY_TABLE_BITS); - hfi1_lkey_table_size = MAX_LKEY_TABLE_BITS; + hfi1_lkey_table_size, RVT_MAX_LKEY_TABLE_BITS); + hfi1_lkey_table_size = RVT_MAX_LKEY_TABLE_BITS; } lk_tab_size = dev->lk_table.max * sizeof(*dev->lk_table.table); - dev->lk_table.table = (struct hfi1_mregion __rcu **) + dev->lk_table.table = (struct rvt_mregion __rcu **) vmalloc(lk_tab_size); if (dev->lk_table.table == NULL) { ret = -ENOMEM; diff --git a/drivers/staging/rdma/hfi1/verbs.h b/drivers/staging/rdma/hfi1/verbs.h index 3079149..14aa81c 100644 --- a/drivers/staging/rdma/hfi1/verbs.h +++ b/drivers/staging/rdma/hfi1/verbs.h @@ -285,44 +285,11 @@ struct hfi1_cq { }; /* - * A segment is a linear region of low physical memory. - * Used by the verbs layer. - */ -struct hfi1_seg { - void *vaddr; - size_t length; -}; - -/* The number of hfi1_segs that fit in a page. */ -#define HFI1_SEGSZ (PAGE_SIZE / sizeof(struct hfi1_seg)) - -struct hfi1_segarray { - struct hfi1_seg segs[HFI1_SEGSZ]; -}; - -struct hfi1_mregion { - struct ib_pd *pd; /* shares refcnt of ibmr.pd */ - u64 user_base; /* User's address for this region */ - u64 iova; /* IB start address of this region */ - size_t length; - u32 lkey; - u32 offset; /* offset (bytes) to start of region */ - int access_flags; - u32 max_segs; /* number of hfi1_segs in all the arrays */ - u32 mapsz; /* size of the map array */ - u8 page_shift; /* 0 - non unform/non powerof2 sizes */ - u8 lkey_published; /* in global table */ - struct completion comp; /* complete when refcount goes to zero */ - atomic_t refcount; - struct hfi1_segarray *map[0]; /* the segments */ -}; - -/* * These keep track of the copy progress within a memory region. * Used by the verbs layer. */ struct hfi1_sge { - struct hfi1_mregion *mr; + struct rvt_mregion *mr; void *vaddr; /* kernel virtual address of segment */ u32 sge_length; /* length of the SGE */ u32 length; /* remaining length of the segment */ @@ -334,7 +301,7 @@ struct hfi1_sge { struct hfi1_mr { struct ib_mr ibmr; struct ib_umem *umem; - struct hfi1_mregion mr; /* must be last */ + struct rvt_mregion mr; /* must be last */ }; /* @@ -501,7 +468,7 @@ struct hfi1_qp { u32 s_flags; struct hfi1_swqe *s_wqe; struct hfi1_sge_state s_sge; /* current send request data */ - struct hfi1_mregion *s_rdma_mr; + struct rvt_mregion *s_rdma_mr; u32 s_cur_size; /* size of send packet in bytes */ u32 s_len; /* total length of s_sge */ u32 s_rdma_read_len; /* total length of s_rdma_read_sge */ @@ -655,16 +622,6 @@ static inline struct hfi1_rwqe *get_rwqe_ptr(struct hfi1_rq *rq, unsigned n) rq->max_sge * sizeof(struct ib_sge)) * n); } -#define MAX_LKEY_TABLE_BITS 23 - -struct hfi1_lkey_table { - spinlock_t lock; /* protect changes in this struct */ - u32 next; /* next unused index (speeds search) */ - u32 gen; /* generation count */ - u32 max; /* size of the table */ - struct hfi1_mregion __rcu **table; -}; - struct hfi1_opcode_stats { u64 n_packets; /* number of packets */ u64 n_bytes; /* total number of bytes */ @@ -748,12 +705,12 @@ struct hfi1_ibdev { struct list_head pending_mmaps; spinlock_t mmap_offset_lock; /* protect mmap_offset */ u32 mmap_offset; - struct hfi1_mregion __rcu *dma_mr; + struct rvt_mregion __rcu *dma_mr; struct hfi1_qp_ibdev *qp_dev; /* QP numbers are shared by all IB ports */ - struct hfi1_lkey_table lk_table; + struct rvt_lkey_table lk_table; /* protect wait lists */ seqlock_t iowait_lock; struct list_head txwait; /* list for wait verbs_txreq */ @@ -966,11 +923,11 @@ void hfi1_ud_rcv(struct hfi1_packet *packet); int hfi1_lookup_pkey_idx(struct hfi1_ibport *ibp, u16 pkey); -int hfi1_alloc_lkey(struct hfi1_mregion *mr, int dma_region); +int hfi1_alloc_lkey(struct rvt_mregion *mr, int dma_region); -void hfi1_free_lkey(struct hfi1_mregion *mr); +void hfi1_free_lkey(struct rvt_mregion *mr); -int hfi1_lkey_ok(struct hfi1_lkey_table *rkt, struct rvt_pd *pd, +int hfi1_lkey_ok(struct rvt_lkey_table *rkt, struct rvt_pd *pd, struct hfi1_sge *isge, struct ib_sge *sge, int acc); int hfi1_rkey_ok(struct hfi1_qp *qp, struct hfi1_sge *sge, @@ -1035,12 +992,12 @@ int hfi1_unmap_fmr(struct list_head *fmr_list); int hfi1_dealloc_fmr(struct ib_fmr *ibfmr); -static inline void hfi1_get_mr(struct hfi1_mregion *mr) +static inline void hfi1_get_mr(struct rvt_mregion *mr) { atomic_inc(&mr->refcount); } -static inline void hfi1_put_mr(struct hfi1_mregion *mr) +static inline void hfi1_put_mr(struct rvt_mregion *mr) { if (unlikely(atomic_dec_and_test(&mr->refcount))) complete(&mr->comp); -- cgit v0.10.2 From 4c6829c5c7d6186b76cf0817f9aa8e63831a6a27 Mon Sep 17 00:00:00 2001 From: Dennis Dalessandro Date: Tue, 19 Jan 2016 14:42:00 -0800 Subject: staging/rdma/hfi1: Remove driver specific members from hfi1 qp type In preparation for moving the queue pair data structure to rdmavt the members of the driver specific queue pairs which are not common need to be pushed off to a private driver structure. This structure will be available in the queue pair once moved to rdmavt as a void pointer. This patch while not adding a lot of value in and of itself is a prerequisite to move the queue pair out of the drivers and into rdmavt. The driver specific, private queue pair data structure should condense as more of the send side code moves to rdmavt. Reviewed-by: Mike Marciniszyn Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford diff --git a/drivers/staging/rdma/hfi1/diag.c b/drivers/staging/rdma/hfi1/diag.c index fbe9b15..15c616a 100644 --- a/drivers/staging/rdma/hfi1/diag.c +++ b/drivers/staging/rdma/hfi1/diag.c @@ -1619,7 +1619,8 @@ int snoop_send_dma_handler(struct hfi1_qp *qp, struct hfi1_pkt_state *ps, int snoop_send_pio_handler(struct hfi1_qp *qp, struct hfi1_pkt_state *ps, u64 pbc) { - struct ahg_ib_header *ahdr = qp->s_hdr; + struct hfi1_qp_priv *priv = qp->priv; + struct ahg_ib_header *ahdr = priv->s_hdr; u32 hdrwords = qp->s_hdrwords; struct hfi1_sge_state *ss = qp->s_cur_sge; u32 len = qp->s_cur_size; diff --git a/drivers/staging/rdma/hfi1/pio.c b/drivers/staging/rdma/hfi1/pio.c index b51a441..25d65f9 100644 --- a/drivers/staging/rdma/hfi1/pio.c +++ b/drivers/staging/rdma/hfi1/pio.c @@ -1528,6 +1528,7 @@ static void sc_piobufavail(struct send_context *sc) struct list_head *list; struct hfi1_qp *qps[PIO_WAIT_BATCH_SIZE]; struct hfi1_qp *qp; + struct hfi1_qp_priv *priv; unsigned long flags; unsigned i, n = 0; @@ -1547,8 +1548,9 @@ static void sc_piobufavail(struct send_context *sc) if (n == ARRAY_SIZE(qps)) goto full; wait = list_first_entry(list, struct iowait, list); - qp = container_of(wait, struct hfi1_qp, s_iowait); - list_del_init(&qp->s_iowait.list); + qp = iowait_to_qp(wait); + priv = qp->priv; + list_del_init(&priv->s_iowait.list); /* refcount held until actual wake up */ qps[n++] = qp; } diff --git a/drivers/staging/rdma/hfi1/qp.c b/drivers/staging/rdma/hfi1/qp.c index bb447b5..cacef55 100644 --- a/drivers/staging/rdma/hfi1/qp.c +++ b/drivers/staging/rdma/hfi1/qp.c @@ -349,11 +349,12 @@ bail: */ static void reset_qp(struct hfi1_qp *qp, enum ib_qp_type type) { + struct hfi1_qp_priv *priv = qp->priv; qp->remote_qpn = 0; qp->qkey = 0; qp->qp_access_flags = 0; iowait_init( - &qp->s_iowait, + &priv->s_iowait, 1, hfi1_do_send, iowait_sleep, @@ -378,7 +379,7 @@ static void reset_qp(struct hfi1_qp *qp, enum ib_qp_type type) } qp->s_ack_state = IB_OPCODE_RC_ACKNOWLEDGE; qp->r_nak_state = 0; - qp->r_adefered = 0; + priv->r_adefered = 0; qp->r_aflags = 0; qp->r_flags = 0; qp->s_head = 0; @@ -460,6 +461,7 @@ static void clear_mr_refs(struct hfi1_qp *qp, int clr_sends) int hfi1_error_qp(struct hfi1_qp *qp, enum ib_wc_status err) { struct hfi1_ibdev *dev = to_idev(qp->ibqp.device); + struct hfi1_qp_priv *priv = qp->priv; struct ib_wc wc; int ret = 0; @@ -477,9 +479,9 @@ int hfi1_error_qp(struct hfi1_qp *qp, enum ib_wc_status err) qp->s_flags &= ~HFI1_S_ANY_WAIT_SEND; write_seqlock(&dev->iowait_lock); - if (!list_empty(&qp->s_iowait.list) && !(qp->s_flags & HFI1_S_BUSY)) { + if (!list_empty(&priv->s_iowait.list) && !(qp->s_flags & HFI1_S_BUSY)) { qp->s_flags &= ~HFI1_S_ANY_WAIT_IO; - list_del_init(&qp->s_iowait.list); + list_del_init(&priv->s_iowait.list); if (atomic_dec_and_test(&qp->refcount)) wake_up(&qp->wait); } @@ -544,11 +546,13 @@ bail: static void flush_tx_list(struct hfi1_qp *qp) { - while (!list_empty(&qp->s_iowait.tx_head)) { + struct hfi1_qp_priv *priv = qp->priv; + + while (!list_empty(&priv->s_iowait.tx_head)) { struct sdma_txreq *tx; tx = list_first_entry( - &qp->s_iowait.tx_head, + &priv->s_iowait.tx_head, struct sdma_txreq, list); list_del_init(&tx->list); @@ -559,12 +563,13 @@ static void flush_tx_list(struct hfi1_qp *qp) static void flush_iowait(struct hfi1_qp *qp) { + struct hfi1_qp_priv *priv = qp->priv; struct hfi1_ibdev *dev = to_idev(qp->ibqp.device); unsigned long flags; write_seqlock_irqsave(&dev->iowait_lock, flags); - if (!list_empty(&qp->s_iowait.list)) { - list_del_init(&qp->s_iowait.list); + if (!list_empty(&priv->s_iowait.list)) { + list_del_init(&priv->s_iowait.list); if (atomic_dec_and_test(&qp->refcount)) wake_up(&qp->wait); } @@ -612,6 +617,7 @@ int hfi1_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, { struct hfi1_ibdev *dev = to_idev(ibqp->device); struct hfi1_qp *qp = to_iqp(ibqp); + struct hfi1_qp_priv *priv = qp->priv; enum ib_qp_state cur_state, new_state; struct ib_event ev; int lastwqe = 0; @@ -738,9 +744,9 @@ int hfi1_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, spin_unlock(&qp->s_lock); spin_unlock_irq(&qp->r_lock); /* Stop the sending work queue and retry timer */ - cancel_work_sync(&qp->s_iowait.iowork); + cancel_work_sync(&priv->s_iowait.iowork); del_timer_sync(&qp->s_timer); - iowait_sdma_drain(&qp->s_iowait); + iowait_sdma_drain(&priv->s_iowait); flush_tx_list(qp); remove_qp(dev, qp); wait_event(qp->wait, !atomic_read(&qp->refcount)); @@ -805,8 +811,8 @@ int hfi1_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, qp->remote_ah_attr = attr->ah_attr; qp->s_srate = attr->ah_attr.static_rate; qp->srate_mbps = ib_rate_to_mbps(qp->s_srate); - qp->s_sc = ah_to_sc(ibqp->device, &qp->remote_ah_attr); - qp->s_sde = qp_to_sdma_engine(qp, qp->s_sc); + priv->s_sc = ah_to_sc(ibqp->device, &qp->remote_ah_attr); + priv->s_sde = qp_to_sdma_engine(qp, priv->s_sc); } if (attr_mask & IB_QP_ALT_PATH) { @@ -821,8 +827,8 @@ int hfi1_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, qp->port_num = qp->alt_ah_attr.port_num; qp->s_pkey_index = qp->s_alt_pkey_index; qp->s_flags |= HFI1_S_AHG_CLEAR; - qp->s_sc = ah_to_sc(ibqp->device, &qp->remote_ah_attr); - qp->s_sde = qp_to_sdma_engine(qp, qp->s_sc); + priv->s_sc = ah_to_sc(ibqp->device, &qp->remote_ah_attr); + priv->s_sde = qp_to_sdma_engine(qp, priv->s_sc); } } @@ -1031,6 +1037,7 @@ struct ib_qp *hfi1_create_qp(struct ib_pd *ibpd, struct ib_udata *udata) { struct hfi1_qp *qp; + struct hfi1_qp_priv *priv; int err; struct hfi1_swqe *swq = NULL; struct hfi1_ibdev *dev; @@ -1098,11 +1105,18 @@ struct ib_qp *hfi1_create_qp(struct ib_pd *ibpd, goto bail_swq; } RCU_INIT_POINTER(qp->next, NULL); - qp->s_hdr = kzalloc(sizeof(*qp->s_hdr), GFP_KERNEL); - if (!qp->s_hdr) { + priv = kzalloc(sizeof(*priv), GFP_KERNEL); + if (!priv) { + ret = ERR_PTR(-ENOMEM); + goto bail_qp_priv; + } + priv->owner = qp; + priv->s_hdr = kzalloc(sizeof(*priv->s_hdr), GFP_KERNEL); + if (!priv->s_hdr) { ret = ERR_PTR(-ENOMEM); goto bail_qp; } + qp->priv = priv; qp->timeout_jiffies = usecs_to_jiffies((4096UL * (1UL << qp->timeout)) / 1000UL); @@ -1245,7 +1259,9 @@ bail_ip: vfree(qp->r_rq.wq); free_qpn(&dev->qp_dev->qpn_table, qp->ibqp.qp_num); bail_qp: - kfree(qp->s_hdr); + kfree(priv->s_hdr); + kfree(priv); +bail_qp_priv: kfree(qp); bail_swq: vfree(swq); @@ -1266,6 +1282,7 @@ int hfi1_destroy_qp(struct ib_qp *ibqp) { struct hfi1_qp *qp = to_iqp(ibqp); struct hfi1_ibdev *dev = to_idev(ibqp->device); + struct hfi1_qp_priv *priv = qp->priv; /* Make sure HW and driver activity is stopped. */ spin_lock_irq(&qp->r_lock); @@ -1276,9 +1293,9 @@ int hfi1_destroy_qp(struct ib_qp *ibqp) qp->s_flags &= ~(HFI1_S_TIMER | HFI1_S_ANY_WAIT); spin_unlock(&qp->s_lock); spin_unlock_irq(&qp->r_lock); - cancel_work_sync(&qp->s_iowait.iowork); + cancel_work_sync(&priv->s_iowait.iowork); del_timer_sync(&qp->s_timer); - iowait_sdma_drain(&qp->s_iowait); + iowait_sdma_drain(&priv->s_iowait); flush_tx_list(qp); remove_qp(dev, qp); wait_event(qp->wait, !atomic_read(&qp->refcount)); @@ -1301,7 +1318,8 @@ int hfi1_destroy_qp(struct ib_qp *ibqp) else vfree(qp->r_rq.wq); vfree(qp->s_wq); - kfree(qp->s_hdr); + kfree(priv->s_hdr); + kfree(priv); kfree(qp); return 0; } @@ -1422,11 +1440,13 @@ static int iowait_sleep( { struct verbs_txreq *tx = container_of(stx, struct verbs_txreq, txreq); struct hfi1_qp *qp; + struct hfi1_qp_priv *priv; unsigned long flags; int ret = 0; struct hfi1_ibdev *dev; qp = tx->qp; + priv = qp->priv; spin_lock_irqsave(&qp->s_lock, flags); if (ib_hfi1_state_ops[qp->state] & HFI1_PROCESS_RECV_OK) { @@ -1442,13 +1462,13 @@ static int iowait_sleep( write_seqlock(&dev->iowait_lock); if (sdma_progress(sde, seq, stx)) goto eagain; - if (list_empty(&qp->s_iowait.list)) { + if (list_empty(&priv->s_iowait.list)) { struct hfi1_ibport *ibp = to_iport(qp->ibqp.device, qp->port_num); ibp->n_dmawait++; qp->s_flags |= HFI1_S_WAIT_DMA_DESC; - list_add_tail(&qp->s_iowait.list, &sde->dmawait); + list_add_tail(&priv->s_iowait.list, &sde->dmawait); trace_hfi1_qpsleep(qp, HFI1_S_WAIT_DMA_DESC); atomic_inc(&qp->refcount); } @@ -1470,7 +1490,7 @@ eagain: static void iowait_wakeup(struct iowait *wait, int reason) { - struct hfi1_qp *qp = container_of(wait, struct hfi1_qp, s_iowait); + struct hfi1_qp *qp = iowait_to_qp(wait); WARN_ON(reason != SDMA_AVAIL_REASON); hfi1_qp_wakeup(qp, HFI1_S_WAIT_DMA_DESC); @@ -1651,9 +1671,10 @@ void qp_iter_print(struct seq_file *s, struct qp_iter *iter) { struct hfi1_swqe *wqe; struct hfi1_qp *qp = iter->qp; + struct hfi1_qp_priv *priv = qp->priv; struct sdma_engine *sde; - sde = qp_to_sdma_engine(qp, qp->s_sc); + sde = qp_to_sdma_engine(qp, priv->s_sc); wqe = get_swqe_ptr(qp, qp->s_last); seq_printf(s, "N %d %s QP%u R %u %s %u %u %u f=%x %u %u %u %u %u PSN %x %x %x %x %x (%u %u %u %u %u %u) QP%u LID %x SL %u MTU %d %u %u %u SDE %p,%u\n", @@ -1666,8 +1687,8 @@ void qp_iter_print(struct seq_file *s, struct qp_iter *iter) wqe ? wqe->wr.opcode : 0, qp->s_hdrwords, qp->s_flags, - atomic_read(&qp->s_iowait.sdma_busy), - !list_empty(&qp->s_iowait.list), + atomic_read(&priv->s_iowait.sdma_busy), + !list_empty(&priv->s_iowait.list), qp->timeout, wqe ? wqe->ssn : 0, qp->s_lsn, @@ -1706,6 +1727,7 @@ void qp_comm_est(struct hfi1_qp *qp) */ void hfi1_migrate_qp(struct hfi1_qp *qp) { + struct hfi1_qp_priv *priv = qp->priv; struct ib_event ev; qp->s_mig_state = IB_MIG_MIGRATED; @@ -1713,8 +1735,8 @@ void hfi1_migrate_qp(struct hfi1_qp *qp) qp->port_num = qp->alt_ah_attr.port_num; qp->s_pkey_index = qp->s_alt_pkey_index; qp->s_flags |= HFI1_S_AHG_CLEAR; - qp->s_sc = ah_to_sc(qp->ibqp.device, &qp->remote_ah_attr); - qp->s_sde = qp_to_sdma_engine(qp, qp->s_sc); + priv->s_sc = ah_to_sc(qp->ibqp.device, &qp->remote_ah_attr); + priv->s_sde = qp_to_sdma_engine(qp, priv->s_sc); ev.device = qp->ibqp.device; ev.element.qp = &qp->ibqp; diff --git a/drivers/staging/rdma/hfi1/qp.h b/drivers/staging/rdma/hfi1/qp.h index 19b16a9..474c838 100644 --- a/drivers/staging/rdma/hfi1/qp.h +++ b/drivers/staging/rdma/hfi1/qp.h @@ -123,10 +123,12 @@ static inline struct hfi1_qp *hfi1_lookup_qpn(struct hfi1_ibport *ibp, */ static inline void clear_ahg(struct hfi1_qp *qp) { - qp->s_hdr->ahgcount = 0; + struct hfi1_qp_priv *priv = qp->priv; + + priv->s_hdr->ahgcount = 0; qp->s_flags &= ~(HFI1_S_AHG_VALID | HFI1_S_AHG_CLEAR); - if (qp->s_sde && qp->s_ahgidx >= 0) - sdma_ahg_free(qp->s_sde, qp->s_ahgidx); + if (priv->s_sde && qp->s_ahgidx >= 0) + sdma_ahg_free(priv->s_sde, qp->s_ahgidx); qp->s_ahgidx = -1; } @@ -257,14 +259,15 @@ void qp_comm_est(struct hfi1_qp *qp); */ static inline void _hfi1_schedule_send(struct hfi1_qp *qp) { + struct hfi1_qp_priv *priv = qp->priv; struct hfi1_ibport *ibp = to_iport(qp->ibqp.device, qp->port_num); struct hfi1_pportdata *ppd = ppd_from_ibp(ibp); struct hfi1_devdata *dd = dd_from_ibdev(qp->ibqp.device); - iowait_schedule(&qp->s_iowait, ppd->hfi1_wq, - qp->s_sde ? - qp->s_sde->cpu : + iowait_schedule(&priv->s_iowait, ppd->hfi1_wq, + priv->s_sde ? + priv->s_sde->cpu : cpumask_first(cpumask_of_node(dd->assigned_node_id))); } diff --git a/drivers/staging/rdma/hfi1/rc.c b/drivers/staging/rdma/hfi1/rc.c index 6f4a155..70d5bd1 100644 --- a/drivers/staging/rdma/hfi1/rc.c +++ b/drivers/staging/rdma/hfi1/rc.c @@ -259,6 +259,7 @@ bail: */ int hfi1_make_rc_req(struct hfi1_qp *qp) { + struct hfi1_qp_priv *priv = qp->priv; struct hfi1_ibdev *dev = to_idev(qp->ibqp.device); struct hfi1_other_headers *ohdr; struct hfi1_sge_state *ss; @@ -275,9 +276,9 @@ int hfi1_make_rc_req(struct hfi1_qp *qp) int middle = 0; int delta; - ohdr = &qp->s_hdr->ibh.u.oth; + ohdr = &priv->s_hdr->ibh.u.oth; if (qp->remote_ah_attr.ah_flags & IB_AH_GRH) - ohdr = &qp->s_hdr->ibh.u.l.oth; + ohdr = &priv->s_hdr->ibh.u.l.oth; /* * The lock is needed to synchronize between the sending tasklet, @@ -297,7 +298,7 @@ int hfi1_make_rc_req(struct hfi1_qp *qp) if (qp->s_last == qp->s_head) goto bail; /* If DMAs are in progress, we can't flush immediately. */ - if (atomic_read(&qp->s_iowait.sdma_busy)) { + if (atomic_read(&priv->s_iowait.sdma_busy)) { qp->s_flags |= HFI1_S_WAIT_DMA; goto bail; } @@ -1620,7 +1621,9 @@ static inline void rc_defered_ack(struct hfi1_ctxtdata *rcd, static inline void rc_cancel_ack(struct hfi1_qp *qp) { - qp->r_adefered = 0; + struct hfi1_qp_priv *priv = qp->priv; + + priv->r_adefered = 0; if (list_empty(&qp->rspwait)) return; list_del_init(&qp->rspwait); @@ -2347,11 +2350,13 @@ send_last: qp->r_nak_state = 0; /* Send an ACK if requested or required. */ if (psn & IB_BTH_REQ_ACK) { + struct hfi1_qp_priv *priv = qp->priv; + if (packet->numpkt == 0) { rc_cancel_ack(qp); goto send_ack; } - if (qp->r_adefered >= HFI1_PSN_CREDIT) { + if (priv->r_adefered >= HFI1_PSN_CREDIT) { rc_cancel_ack(qp); goto send_ack; } @@ -2359,7 +2364,7 @@ send_last: rc_cancel_ack(qp); goto send_ack; } - qp->r_adefered++; + priv->r_adefered++; rc_defered_ack(rcd, qp); } return; diff --git a/drivers/staging/rdma/hfi1/ruc.c b/drivers/staging/rdma/hfi1/ruc.c index ea5efa4..55ed00d 100644 --- a/drivers/staging/rdma/hfi1/ruc.c +++ b/drivers/staging/rdma/hfi1/ruc.c @@ -692,27 +692,28 @@ u32 hfi1_make_grh(struct hfi1_ibport *ibp, struct ib_grh *hdr, */ static inline void build_ahg(struct hfi1_qp *qp, u32 npsn) { + struct hfi1_qp_priv *priv = qp->priv; if (unlikely(qp->s_flags & HFI1_S_AHG_CLEAR)) clear_ahg(qp); if (!(qp->s_flags & HFI1_S_AHG_VALID)) { /* first middle that needs copy */ if (qp->s_ahgidx < 0) - qp->s_ahgidx = sdma_ahg_alloc(qp->s_sde); + qp->s_ahgidx = sdma_ahg_alloc(priv->s_sde); if (qp->s_ahgidx >= 0) { qp->s_ahgpsn = npsn; - qp->s_hdr->tx_flags |= SDMA_TXREQ_F_AHG_COPY; + priv->s_hdr->tx_flags |= SDMA_TXREQ_F_AHG_COPY; /* save to protect a change in another thread */ - qp->s_hdr->sde = qp->s_sde; - qp->s_hdr->ahgidx = qp->s_ahgidx; + priv->s_hdr->sde = priv->s_sde; + priv->s_hdr->ahgidx = qp->s_ahgidx; qp->s_flags |= HFI1_S_AHG_VALID; } } else { /* subsequent middle after valid */ if (qp->s_ahgidx >= 0) { - qp->s_hdr->tx_flags |= SDMA_TXREQ_F_USE_AHG; - qp->s_hdr->ahgidx = qp->s_ahgidx; - qp->s_hdr->ahgcount++; - qp->s_hdr->ahgdesc[0] = + priv->s_hdr->tx_flags |= SDMA_TXREQ_F_USE_AHG; + priv->s_hdr->ahgidx = qp->s_ahgidx; + priv->s_hdr->ahgcount++; + priv->s_hdr->ahgdesc[0] = sdma_build_ahg_descriptor( (__force u16)cpu_to_be16((u16)npsn), BTH2_OFFSET, @@ -720,8 +721,8 @@ static inline void build_ahg(struct hfi1_qp *qp, u32 npsn) 16); if ((npsn & 0xffff0000) != (qp->s_ahgpsn & 0xffff0000)) { - qp->s_hdr->ahgcount++; - qp->s_hdr->ahgdesc[1] = + priv->s_hdr->ahgcount++; + priv->s_hdr->ahgdesc[1] = sdma_build_ahg_descriptor( (__force u16)cpu_to_be16( (u16)(npsn >> 16)), @@ -737,6 +738,7 @@ void hfi1_make_ruc_header(struct hfi1_qp *qp, struct hfi1_other_headers *ohdr, u32 bth0, u32 bth2, int middle) { struct hfi1_ibport *ibp = to_iport(qp->ibqp.device, qp->port_num); + struct hfi1_qp_priv *priv = qp->priv; u16 lrh0; u32 nwords; u32 extra_bytes; @@ -747,13 +749,13 @@ void hfi1_make_ruc_header(struct hfi1_qp *qp, struct hfi1_other_headers *ohdr, nwords = (qp->s_cur_size + extra_bytes) >> 2; lrh0 = HFI1_LRH_BTH; if (unlikely(qp->remote_ah_attr.ah_flags & IB_AH_GRH)) { - qp->s_hdrwords += hfi1_make_grh(ibp, &qp->s_hdr->ibh.u.l.grh, - &qp->remote_ah_attr.grh, - qp->s_hdrwords, nwords); + qp->s_hdrwords += hfi1_make_grh(ibp, &priv->s_hdr->ibh.u.l.grh, + &qp->remote_ah_attr.grh, + qp->s_hdrwords, nwords); lrh0 = HFI1_LRH_GRH; middle = 0; } - lrh0 |= (qp->s_sc & 0xf) << 12 | (qp->remote_ah_attr.sl & 0xf) << 4; + lrh0 |= (priv->s_sc & 0xf) << 12 | (qp->remote_ah_attr.sl & 0xf) << 4; /* * reset s_hdr/AHG fields * @@ -765,10 +767,10 @@ void hfi1_make_ruc_header(struct hfi1_qp *qp, struct hfi1_other_headers *ohdr, * build_ahg() will modify as appropriate * to use the AHG feature. */ - qp->s_hdr->tx_flags = 0; - qp->s_hdr->ahgcount = 0; - qp->s_hdr->ahgidx = 0; - qp->s_hdr->sde = NULL; + priv->s_hdr->tx_flags = 0; + priv->s_hdr->ahgcount = 0; + priv->s_hdr->ahgidx = 0; + priv->s_hdr->sde = NULL; if (qp->s_mig_state == IB_MIG_MIGRATED) bth0 |= IB_BTH_MIG_REQ; else @@ -777,11 +779,11 @@ void hfi1_make_ruc_header(struct hfi1_qp *qp, struct hfi1_other_headers *ohdr, build_ahg(qp, bth2); else qp->s_flags &= ~HFI1_S_AHG_VALID; - qp->s_hdr->ibh.lrh[0] = cpu_to_be16(lrh0); - qp->s_hdr->ibh.lrh[1] = cpu_to_be16(qp->remote_ah_attr.dlid); - qp->s_hdr->ibh.lrh[2] = + priv->s_hdr->ibh.lrh[0] = cpu_to_be16(lrh0); + priv->s_hdr->ibh.lrh[1] = cpu_to_be16(qp->remote_ah_attr.dlid); + priv->s_hdr->ibh.lrh[2] = cpu_to_be16(qp->s_hdrwords + nwords + SIZE_OF_CRC); - qp->s_hdr->ibh.lrh[3] = cpu_to_be16(ppd_from_ibp(ibp)->lid | + priv->s_hdr->ibh.lrh[3] = cpu_to_be16(ppd_from_ibp(ibp)->lid | qp->remote_ah_attr.src_path_bits); bth0 |= hfi1_get_pkey(ibp, qp->s_pkey_index); bth0 |= extra_bytes << 20; @@ -810,7 +812,7 @@ void hfi1_make_ruc_header(struct hfi1_qp *qp, struct hfi1_other_headers *ohdr, void hfi1_do_send(struct work_struct *work) { struct iowait *wait = container_of(work, struct iowait, iowork); - struct hfi1_qp *qp = container_of(wait, struct hfi1_qp, s_iowait); + struct hfi1_qp *qp = iowait_to_qp(wait); struct hfi1_pkt_state ps; int (*make_req)(struct hfi1_qp *qp); unsigned long flags; diff --git a/drivers/staging/rdma/hfi1/uc.c b/drivers/staging/rdma/hfi1/uc.c index 4f2a788..1908a28 100644 --- a/drivers/staging/rdma/hfi1/uc.c +++ b/drivers/staging/rdma/hfi1/uc.c @@ -63,6 +63,7 @@ */ int hfi1_make_uc_req(struct hfi1_qp *qp) { + struct hfi1_qp_priv *priv = qp->priv; struct hfi1_other_headers *ohdr; struct hfi1_swqe *wqe; unsigned long flags; @@ -82,7 +83,7 @@ int hfi1_make_uc_req(struct hfi1_qp *qp) if (qp->s_last == qp->s_head) goto bail; /* If DMAs are in progress, we can't flush immediately. */ - if (atomic_read(&qp->s_iowait.sdma_busy)) { + if (atomic_read(&priv->s_iowait.sdma_busy)) { qp->s_flags |= HFI1_S_WAIT_DMA; goto bail; } @@ -92,9 +93,9 @@ int hfi1_make_uc_req(struct hfi1_qp *qp) goto done; } - ohdr = &qp->s_hdr->ibh.u.oth; + ohdr = &priv->s_hdr->ibh.u.oth; if (qp->remote_ah_attr.ah_flags & IB_AH_GRH) - ohdr = &qp->s_hdr->ibh.u.l.oth; + ohdr = &priv->s_hdr->ibh.u.l.oth; /* Get the next send request. */ wqe = get_swqe_ptr(qp, qp->s_cur); diff --git a/drivers/staging/rdma/hfi1/ud.c b/drivers/staging/rdma/hfi1/ud.c index 970d42f..00d1ae7 100644 --- a/drivers/staging/rdma/hfi1/ud.c +++ b/drivers/staging/rdma/hfi1/ud.c @@ -264,6 +264,7 @@ drop: */ int hfi1_make_ud_req(struct hfi1_qp *qp) { + struct hfi1_qp_priv *priv = qp->priv; struct hfi1_other_headers *ohdr; struct ib_ah_attr *ah_attr; struct hfi1_pportdata *ppd; @@ -288,7 +289,7 @@ int hfi1_make_ud_req(struct hfi1_qp *qp) if (qp->s_last == qp->s_head) goto bail; /* If DMAs are in progress, we can't flush immediately. */ - if (atomic_read(&qp->s_iowait.sdma_busy)) { + if (atomic_read(&priv->s_iowait.sdma_busy)) { qp->s_flags |= HFI1_S_WAIT_DMA; goto bail; } @@ -322,7 +323,7 @@ int hfi1_make_ud_req(struct hfi1_qp *qp) * Instead of waiting, we could queue a * zero length descriptor so we get a callback. */ - if (atomic_read(&qp->s_iowait.sdma_busy)) { + if (atomic_read(&priv->s_iowait.sdma_busy)) { qp->s_flags |= HFI1_S_WAIT_DMA; goto bail; } @@ -353,11 +354,11 @@ int hfi1_make_ud_req(struct hfi1_qp *qp) if (ah_attr->ah_flags & IB_AH_GRH) { /* Header size in 32-bit words. */ - qp->s_hdrwords += hfi1_make_grh(ibp, &qp->s_hdr->ibh.u.l.grh, + qp->s_hdrwords += hfi1_make_grh(ibp, &priv->s_hdr->ibh.u.l.grh, &ah_attr->grh, qp->s_hdrwords, nwords); lrh0 = HFI1_LRH_GRH; - ohdr = &qp->s_hdr->ibh.u.l.oth; + ohdr = &priv->s_hdr->ibh.u.l.oth; /* * Don't worry about sending to locally attached multicast * QPs. It is unspecified by the spec. what happens. @@ -365,7 +366,7 @@ int hfi1_make_ud_req(struct hfi1_qp *qp) } else { /* Header size in 32-bit words. */ lrh0 = HFI1_LRH_BTH; - ohdr = &qp->s_hdr->ibh.u.oth; + ohdr = &priv->s_hdr->ibh.u.oth; } if (wqe->wr.opcode == IB_WR_SEND_WITH_IMM) { qp->s_hdrwords++; @@ -377,25 +378,25 @@ int hfi1_make_ud_req(struct hfi1_qp *qp) lrh0 |= (ah_attr->sl & 0xf) << 4; if (qp->ibqp.qp_type == IB_QPT_SMI) { lrh0 |= 0xF000; /* Set VL (see ch. 13.5.3.1) */ - qp->s_sc = 0xf; + priv->s_sc = 0xf; } else { lrh0 |= (sc5 & 0xf) << 12; - qp->s_sc = sc5; + priv->s_sc = sc5; } - qp->s_sde = qp_to_sdma_engine(qp, qp->s_sc); - qp->s_hdr->ibh.lrh[0] = cpu_to_be16(lrh0); - qp->s_hdr->ibh.lrh[1] = cpu_to_be16(ah_attr->dlid); /* DEST LID */ - qp->s_hdr->ibh.lrh[2] = + priv->s_sde = qp_to_sdma_engine(qp, priv->s_sc); + priv->s_hdr->ibh.lrh[0] = cpu_to_be16(lrh0); + priv->s_hdr->ibh.lrh[1] = cpu_to_be16(ah_attr->dlid); /* DEST LID */ + priv->s_hdr->ibh.lrh[2] = cpu_to_be16(qp->s_hdrwords + nwords + SIZE_OF_CRC); if (ah_attr->dlid == be16_to_cpu(IB_LID_PERMISSIVE)) - qp->s_hdr->ibh.lrh[3] = IB_LID_PERMISSIVE; + priv->s_hdr->ibh.lrh[3] = IB_LID_PERMISSIVE; else { lid = ppd->lid; if (lid) { lid |= ah_attr->src_path_bits & ((1 << ppd->lmc) - 1); - qp->s_hdr->ibh.lrh[3] = cpu_to_be16(lid); + priv->s_hdr->ibh.lrh[3] = cpu_to_be16(lid); } else - qp->s_hdr->ibh.lrh[3] = IB_LID_PERMISSIVE; + priv->s_hdr->ibh.lrh[3] = IB_LID_PERMISSIVE; } if (wqe->wr.send_flags & IB_SEND_SOLICITED) bth0 |= IB_BTH_SOLICITED; @@ -415,10 +416,10 @@ int hfi1_make_ud_req(struct hfi1_qp *qp) qp->qkey : wqe->ud_wr.remote_qkey); ohdr->u.ud.deth[1] = cpu_to_be32(qp->ibqp.qp_num); /* disarm any ahg */ - qp->s_hdr->ahgcount = 0; - qp->s_hdr->ahgidx = 0; - qp->s_hdr->tx_flags = 0; - qp->s_hdr->sde = NULL; + priv->s_hdr->ahgcount = 0; + priv->s_hdr->ahgidx = 0; + priv->s_hdr->tx_flags = 0; + priv->s_hdr->sde = NULL; done: ret = 1; diff --git a/drivers/staging/rdma/hfi1/verbs.c b/drivers/staging/rdma/hfi1/verbs.c index dc846d5..b8c6f74 100644 --- a/drivers/staging/rdma/hfi1/verbs.c +++ b/drivers/staging/rdma/hfi1/verbs.c @@ -486,6 +486,7 @@ static int post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, struct ib_send_wr **bad_wr) { struct hfi1_qp *qp = to_iqp(ibqp); + struct hfi1_qp_priv *priv = qp->priv; int err = 0; int call_send; unsigned long flags; @@ -515,7 +516,7 @@ bail: if (nreq && !call_send) _hfi1_schedule_send(qp); if (nreq && call_send) - hfi1_do_send(&qp->s_iowait.iowork); + hfi1_do_send(&priv->s_iowait.iowork); return err; } @@ -698,12 +699,14 @@ static void mem_timer(unsigned long data) struct hfi1_qp *qp = NULL; struct iowait *wait; unsigned long flags; + struct hfi1_qp_priv *priv; write_seqlock_irqsave(&dev->iowait_lock, flags); if (!list_empty(list)) { wait = list_first_entry(list, struct iowait, list); - qp = container_of(wait, struct hfi1_qp, s_iowait); - list_del_init(&qp->s_iowait.list); + qp = iowait_to_qp(wait); + priv = qp->priv; + list_del_init(&priv->s_iowait.list); /* refcount held until actual wake up */ if (!list_empty(list)) mod_timer(&dev->mem_timer, jiffies + 1); @@ -738,6 +741,7 @@ void update_sge(struct hfi1_sge_state *ss, u32 length) static noinline struct verbs_txreq *__get_txreq(struct hfi1_ibdev *dev, struct hfi1_qp *qp) { + struct hfi1_qp_priv *priv = qp->priv; struct verbs_txreq *tx; unsigned long flags; @@ -746,10 +750,10 @@ static noinline struct verbs_txreq *__get_txreq(struct hfi1_ibdev *dev, spin_lock_irqsave(&qp->s_lock, flags); write_seqlock(&dev->iowait_lock); if (ib_hfi1_state_ops[qp->state] & HFI1_PROCESS_RECV_OK && - list_empty(&qp->s_iowait.list)) { + list_empty(&priv->s_iowait.list)) { dev->n_txwait++; qp->s_flags |= HFI1_S_WAIT_TX; - list_add_tail(&qp->s_iowait.list, &dev->txwait); + list_add_tail(&priv->s_iowait.list, &dev->txwait); trace_hfi1_qpsleep(qp, HFI1_S_WAIT_TX); atomic_inc(&qp->refcount); } @@ -783,6 +787,7 @@ void hfi1_put_txreq(struct verbs_txreq *tx) struct hfi1_qp *qp; unsigned long flags; unsigned int seq; + struct hfi1_qp_priv *priv; qp = tx->qp; dev = to_idev(qp->ibqp.device); @@ -805,8 +810,9 @@ void hfi1_put_txreq(struct verbs_txreq *tx) /* Wake up first QP wanting a free struct */ wait = list_first_entry(&dev->txwait, struct iowait, list); - qp = container_of(wait, struct hfi1_qp, s_iowait); - list_del_init(&qp->s_iowait.list); + qp = iowait_to_qp(wait); + priv = qp->priv; + list_del_init(&priv->s_iowait.list); /* refcount held until actual wake up */ write_sequnlock_irqrestore(&dev->iowait_lock, flags); hfi1_qp_wakeup(qp, HFI1_S_WAIT_TX); @@ -856,17 +862,18 @@ static void verbs_sdma_complete( static int wait_kmem(struct hfi1_ibdev *dev, struct hfi1_qp *qp) { + struct hfi1_qp_priv *priv = qp->priv; unsigned long flags; int ret = 0; spin_lock_irqsave(&qp->s_lock, flags); if (ib_hfi1_state_ops[qp->state] & HFI1_PROCESS_RECV_OK) { write_seqlock(&dev->iowait_lock); - if (list_empty(&qp->s_iowait.list)) { + if (list_empty(&priv->s_iowait.list)) { if (list_empty(&dev->memwait)) mod_timer(&dev->mem_timer, jiffies + 1); qp->s_flags |= HFI1_S_WAIT_KMEM; - list_add_tail(&qp->s_iowait.list, &dev->memwait); + list_add_tail(&priv->s_iowait.list, &dev->memwait); trace_hfi1_qpsleep(qp, HFI1_S_WAIT_KMEM); atomic_inc(&qp->refcount); } @@ -1004,7 +1011,8 @@ bail_txadd: int hfi1_verbs_send_dma(struct hfi1_qp *qp, struct hfi1_pkt_state *ps, u64 pbc) { - struct ahg_ib_header *ahdr = qp->s_hdr; + struct hfi1_qp_priv *priv = qp->priv; + struct ahg_ib_header *ahdr = priv->s_hdr; u32 hdrwords = qp->s_hdrwords; struct hfi1_sge_state *ss = qp->s_cur_sge; u32 len = qp->s_cur_size; @@ -1014,17 +1022,18 @@ int hfi1_verbs_send_dma(struct hfi1_qp *qp, struct hfi1_pkt_state *ps, struct verbs_txreq *tx; struct sdma_txreq *stx; u64 pbc_flags = 0; - u8 sc5 = qp->s_sc; + u8 sc5 = priv->s_sc; + int ret; - if (!list_empty(&qp->s_iowait.tx_head)) { + if (!list_empty(&priv->s_iowait.tx_head)) { stx = list_first_entry( - &qp->s_iowait.tx_head, + &priv->s_iowait.tx_head, struct sdma_txreq, list); list_del_init(&stx->list); tx = container_of(stx, struct verbs_txreq, txreq); - ret = sdma_send_txreq(tx->sde, &qp->s_iowait, stx); + ret = sdma_send_txreq(tx->sde, &priv->s_iowait, stx); if (unlikely(ret == -ECOMM)) goto bail_ecomm; return ret; @@ -1034,7 +1043,7 @@ int hfi1_verbs_send_dma(struct hfi1_qp *qp, struct hfi1_pkt_state *ps, if (IS_ERR(tx)) goto bail_tx; - tx->sde = qp->s_sde; + tx->sde = priv->s_sde; if (likely(pbc == 0)) { u32 vl = sc_to_vlt(dd_from_ibdev(qp->ibqp.device), sc5); @@ -1053,7 +1062,7 @@ int hfi1_verbs_send_dma(struct hfi1_qp *qp, struct hfi1_pkt_state *ps, if (unlikely(ret)) goto bail_build; trace_output_ibhdr(dd_from_ibdev(qp->ibqp.device), &ahdr->ibh); - ret = sdma_send_txreq(tx->sde, &qp->s_iowait, &tx->txreq); + ret = sdma_send_txreq(tx->sde, &priv->s_iowait, &tx->txreq); if (unlikely(ret == -ECOMM)) goto bail_ecomm; return ret; @@ -1075,6 +1084,7 @@ bail_tx: */ static int no_bufs_available(struct hfi1_qp *qp, struct send_context *sc) { + struct hfi1_qp_priv *priv = qp->priv; struct hfi1_devdata *dd = sc->dd; struct hfi1_ibdev *dev = &dd->verbs_dev; unsigned long flags; @@ -1089,14 +1099,14 @@ static int no_bufs_available(struct hfi1_qp *qp, struct send_context *sc) spin_lock_irqsave(&qp->s_lock, flags); if (ib_hfi1_state_ops[qp->state] & HFI1_PROCESS_RECV_OK) { write_seqlock(&dev->iowait_lock); - if (list_empty(&qp->s_iowait.list)) { + if (list_empty(&priv->s_iowait.list)) { struct hfi1_ibdev *dev = &dd->verbs_dev; int was_empty; dev->n_piowait++; qp->s_flags |= HFI1_S_WAIT_PIO; was_empty = list_empty(&sc->piowait); - list_add_tail(&qp->s_iowait.list, &sc->piowait); + list_add_tail(&priv->s_iowait.list, &sc->piowait); trace_hfi1_qpsleep(qp, HFI1_S_WAIT_PIO); atomic_inc(&qp->refcount); /* counting: only call wantpiobuf_intr if first user */ @@ -1126,7 +1136,8 @@ struct send_context *qp_to_send_context(struct hfi1_qp *qp, u8 sc5) int hfi1_verbs_send_pio(struct hfi1_qp *qp, struct hfi1_pkt_state *ps, u64 pbc) { - struct ahg_ib_header *ahdr = qp->s_hdr; + struct hfi1_qp_priv *priv = qp->priv; + struct ahg_ib_header *ahdr = priv->s_hdr; u32 hdrwords = qp->s_hdrwords; struct hfi1_sge_state *ss = qp->s_cur_sge; u32 len = qp->s_cur_size; @@ -1142,7 +1153,7 @@ int hfi1_verbs_send_pio(struct hfi1_qp *qp, struct hfi1_pkt_state *ps, int wc_status = IB_WC_SUCCESS; /* vl15 special case taken care of in ud.c */ - sc5 = qp->s_sc; + sc5 = priv->s_sc; sc = qp_to_send_context(qp, sc5); if (!sc) @@ -1249,11 +1260,12 @@ static inline int egress_pkey_check(struct hfi1_pportdata *ppd, struct hfi1_ib_header *hdr, struct hfi1_qp *qp) { + struct hfi1_qp_priv *priv = qp->priv; struct hfi1_other_headers *ohdr; struct hfi1_devdata *dd; int i = 0; u16 pkey; - u8 lnh, sc5 = qp->s_sc; + u8 lnh, sc5 = priv->s_sc; if (!(ppd->part_enforce & HFI1_PART_ENFORCE_OUT)) return 0; @@ -1312,7 +1324,8 @@ bad: int hfi1_verbs_send(struct hfi1_qp *qp, struct hfi1_pkt_state *ps) { struct hfi1_devdata *dd = dd_from_ibdev(qp->ibqp.device); - struct ahg_ib_header *ahdr = qp->s_hdr; + struct hfi1_qp_priv *priv = qp->priv; + struct ahg_ib_header *ahdr = priv->s_hdr; int ret; int pio = 0; unsigned long flags = 0; diff --git a/drivers/staging/rdma/hfi1/verbs.h b/drivers/staging/rdma/hfi1/verbs.h index 14aa81c..dc5aa9a 100644 --- a/drivers/staging/rdma/hfi1/verbs.h +++ b/drivers/staging/rdma/hfi1/verbs.h @@ -387,6 +387,20 @@ struct hfi1_ack_entry { }; /* + * hfi1 specific data structures that will be hidden from rvt after the queue + * pair is made common + */ +struct hfi1_qp; +struct hfi1_qp_priv { + struct ahg_ib_header *s_hdr; /* next packet header to send */ + struct sdma_engine *s_sde; /* current sde */ + u8 s_sc; /* SC[0..4] for next packet */ + u8 r_adefered; /* number of acks defered */ + struct iowait s_iowait; + struct hfi1_qp *owner; +}; + +/* * Variables prefixed with s_ are for the requester (sender). * Variables prefixed with r_ are for the responder (receiver). * Variables prefixed with ack_ are for responder replies. @@ -396,16 +410,13 @@ struct hfi1_ack_entry { */ struct hfi1_qp { struct ib_qp ibqp; + void *priv; /* read mostly fields above and below */ struct ib_ah_attr remote_ah_attr; struct ib_ah_attr alt_ah_attr; struct hfi1_qp __rcu *next; /* link list for QPN hash table */ struct hfi1_swqe *s_wq; /* send work queue */ struct hfi1_mmap_info *ip; - struct ahg_ib_header *s_hdr; /* next packet header to send */ - struct sdma_engine *s_sde; /* current sde */ - /* sc for UC/RC QPs - based on ah for UD */ - u8 s_sc; unsigned long timeout_jiffies; /* computed from timeout */ enum ib_mtu path_mtu; @@ -453,7 +464,6 @@ struct hfi1_qp { u32 r_psn; /* expected rcv packet sequence number */ u32 r_msn; /* message sequence number */ - u8 r_adefered; /* number of acks defered */ u8 r_state; /* opcode of last packet received */ u8 r_flags; u8 r_head_ack_queue; /* index into s_ack_queue[] */ @@ -501,8 +511,6 @@ struct hfi1_qp { struct hfi1_sge_state s_ack_rdma_sge; struct timer_list s_timer; - struct iowait s_iowait; - struct hfi1_sge r_sg_list[0] /* verified SGEs */ ____cacheline_aligned_in_smp; }; @@ -794,6 +802,14 @@ static inline struct hfi1_ibdev *to_idev(struct ib_device *ibdev) return container_of(rdi, struct hfi1_ibdev, rdi); } +static inline struct hfi1_qp *iowait_to_qp(struct iowait *s_iowait) +{ + struct hfi1_qp_priv *priv; + + priv = container_of(s_iowait, struct hfi1_qp_priv, s_iowait); + return priv->owner; +} + /* * Send if not busy or waiting for I/O and either * a RC response is pending or we can process send work requests. -- cgit v0.10.2 From 49dbb6cf1739e1eefa7ed3849430144bb817fdc6 Mon Sep 17 00:00:00 2001 From: Dennis Dalessandro Date: Tue, 19 Jan 2016 14:42:06 -0800 Subject: staging/rdma/hfi1: Add device specific info prints Implement get_card_name and get_pci_dev helper functions for rdmavt for hfi1. Reviewed-by: Mike Marciniszyn Reviewed-by: Ira Weiny Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford diff --git a/drivers/staging/rdma/hfi1/driver.c b/drivers/staging/rdma/hfi1/driver.c index d096f11..62ee03e 100644 --- a/drivers/staging/rdma/hfi1/driver.c +++ b/drivers/staging/rdma/hfi1/driver.c @@ -162,6 +162,22 @@ const char *get_unit_name(int unit) return iname; } +const char *get_card_name(struct rvt_dev_info *rdi) +{ + struct hfi1_ibdev *ibdev = container_of(rdi, struct hfi1_ibdev, rdi); + struct hfi1_devdata *dd = container_of(ibdev, + struct hfi1_devdata, verbs_dev); + return get_unit_name(dd->unit); +} + +struct pci_dev *get_pci_dev(struct rvt_dev_info *rdi) +{ + struct hfi1_ibdev *ibdev = container_of(rdi, struct hfi1_ibdev, rdi); + struct hfi1_devdata *dd = container_of(ibdev, + struct hfi1_devdata, verbs_dev); + return dd->pcidev; +} + /* * Return count of units with at least one port ACTIVE. */ diff --git a/drivers/staging/rdma/hfi1/hfi.h b/drivers/staging/rdma/hfi1/hfi.h index dbea286..cff966e 100644 --- a/drivers/staging/rdma/hfi1/hfi.h +++ b/drivers/staging/rdma/hfi1/hfi.h @@ -1660,6 +1660,8 @@ int get_platform_config_field(struct hfi1_devdata *dd, int table_index, int field_index, u32 *data, u32 len); const char *get_unit_name(int unit); +const char *get_card_name(struct rvt_dev_info *rdi); +struct pci_dev *get_pci_dev(struct rvt_dev_info *rdi); /* * Flush write combining store buffers (if present) and perform a write diff --git a/drivers/staging/rdma/hfi1/verbs.c b/drivers/staging/rdma/hfi1/verbs.c index b8c6f74..ccd91da 100644 --- a/drivers/staging/rdma/hfi1/verbs.c +++ b/drivers/staging/rdma/hfi1/verbs.c @@ -2031,6 +2031,8 @@ int hfi1_register_ib_device(struct hfi1_devdata *dd) * Fill in rvt info object. */ dd->verbs_dev.rdi.driver_f.port_callback = hfi1_create_port_files; + dd->verbs_dev.rdi.driver_f.get_card_name = get_card_name; + dd->verbs_dev.rdi.driver_f.get_pci_dev = get_pci_dev; dd->verbs_dev.rdi.dparms.props.max_pd = hfi1_max_pds; dd->verbs_dev.rdi.flags = (RVT_FLAG_MR_INIT_DRIVER | RVT_FLAG_QP_INIT_DRIVER | -- cgit v0.10.2 From 8859b4a6d08bcbd804459274c6f4134aaf6ace8a Mon Sep 17 00:00:00 2001 From: Dennis Dalessandro Date: Tue, 19 Jan 2016 14:42:11 -0800 Subject: staging/rdma/hfi1: Use correct rdmavt header files after move. Rdmavt split the header files to be based on ibta object. This patch makes changes in hfi1 to account for the move. Reviewed-by: Mike Marciniszyn Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford diff --git a/drivers/staging/rdma/hfi1/driver.c b/drivers/staging/rdma/hfi1/driver.c index 62ee03e..aa309a5 100644 --- a/drivers/staging/rdma/hfi1/driver.c +++ b/drivers/staging/rdma/hfi1/driver.c @@ -56,6 +56,7 @@ #include #include #include +#include #include "hfi.h" #include "trace.h" @@ -316,7 +317,7 @@ static void rcv_hdrerr(struct hfi1_ctxtdata *rcd, struct hfi1_pportdata *ppd, /* Get the destination QP number. */ qp_num = be32_to_cpu(ohdr->bth[1]) & HFI1_QPN_MASK; - if (lid < HFI1_MULTICAST_LID_BASE) { + if (lid < be16_to_cpu(IB_MULTICAST_LID_BASE)) { struct hfi1_qp *qp; unsigned long flags; diff --git a/drivers/staging/rdma/hfi1/mad.c b/drivers/staging/rdma/hfi1/mad.c index ed88a5a..1399485 100644 --- a/drivers/staging/rdma/hfi1/mad.c +++ b/drivers/staging/rdma/hfi1/mad.c @@ -1097,7 +1097,7 @@ static int __subn_set_opa_portinfo(struct opa_smp *smp, u32 am, u8 *data, /* Must be a valid unicast LID address. */ if ((lid == 0 && ls_old > IB_PORT_INIT) || - lid >= HFI1_MULTICAST_LID_BASE) { + lid >= be16_to_cpu(IB_MULTICAST_LID_BASE)) { smp->status |= IB_SMP_INVALID_FIELD; pr_warn("SubnSet(OPA_PortInfo) lid invalid 0x%x\n", lid); @@ -1130,7 +1130,7 @@ static int __subn_set_opa_portinfo(struct opa_smp *smp, u32 am, u8 *data, /* Must be a valid unicast LID address. */ if ((smlid == 0 && ls_old > IB_PORT_INIT) || - smlid >= HFI1_MULTICAST_LID_BASE) { + smlid >= be16_to_cpu(IB_MULTICAST_LID_BASE)) { smp->status |= IB_SMP_INVALID_FIELD; pr_warn("SubnSet(OPA_PortInfo) smlid invalid 0x%x\n", smlid); } else if (smlid != ibp->sm_lid || msl != ibp->sm_sl) { diff --git a/drivers/staging/rdma/hfi1/qp.c b/drivers/staging/rdma/hfi1/qp.c index cacef55..735253b 100644 --- a/drivers/staging/rdma/hfi1/qp.c +++ b/drivers/staging/rdma/hfi1/qp.c @@ -640,7 +640,7 @@ int hfi1_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, if (attr_mask & IB_QP_AV) { u8 sc; - if (attr->ah_attr.dlid >= HFI1_MULTICAST_LID_BASE) + if (attr->ah_attr.dlid >= be16_to_cpu(IB_MULTICAST_LID_BASE)) goto inval; if (hfi1_check_ah(qp->ibqp.device, &attr->ah_attr)) goto inval; @@ -653,7 +653,8 @@ int hfi1_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, if (attr_mask & IB_QP_ALT_PATH) { u8 sc; - if (attr->alt_ah_attr.dlid >= HFI1_MULTICAST_LID_BASE) + if (attr->alt_ah_attr.dlid >= + be16_to_cpu(IB_MULTICAST_LID_BASE)) goto inval; if (hfi1_check_ah(qp->ibqp.device, &attr->alt_ah_attr)) goto inval; diff --git a/drivers/staging/rdma/hfi1/ud.c b/drivers/staging/rdma/hfi1/ud.c index 00d1ae7..55a4eec 100644 --- a/drivers/staging/rdma/hfi1/ud.c +++ b/drivers/staging/rdma/hfi1/ud.c @@ -242,7 +242,7 @@ static void ud_loopback(struct hfi1_qp *sqp, struct hfi1_swqe *swqe) wc.slid = ppd->lid | (ah_attr->src_path_bits & ((1 << ppd->lmc) - 1)); /* Check for loopback when the port lid is not set */ if (wc.slid == 0 && sqp->ibqp.qp_type == IB_QPT_GSI) - wc.slid = HFI1_PERMISSIVE_LID; + wc.slid = be16_to_cpu(IB_LID_PERMISSIVE); wc.sl = ah_attr->sl; wc.dlid_path_bits = ah_attr->dlid & ((1 << ppd->lmc) - 1); wc.port_num = qp->port_num; @@ -310,11 +310,11 @@ int hfi1_make_ud_req(struct hfi1_qp *qp) ibp = to_iport(qp->ibqp.device, qp->port_num); ppd = ppd_from_ibp(ibp); ah_attr = &to_iah(wqe->ud_wr.ah)->attr; - if (ah_attr->dlid < HFI1_MULTICAST_LID_BASE || - ah_attr->dlid == HFI1_PERMISSIVE_LID) { + if (ah_attr->dlid < be16_to_cpu(IB_MULTICAST_LID_BASE) || + ah_attr->dlid == be16_to_cpu(IB_LID_PERMISSIVE)) { lid = ah_attr->dlid & ~((1 << ppd->lmc) - 1); if (unlikely(!loopback && (lid == ppd->lid || - (lid == HFI1_PERMISSIVE_LID && + (lid == be16_to_cpu(IB_LID_PERMISSIVE) && qp->ibqp.qp_type == IB_QPT_GSI)))) { /* * If DMAs are in progress, we can't generate @@ -666,8 +666,8 @@ void hfi1_ud_rcv(struct hfi1_packet *packet) qkey = be32_to_cpu(ohdr->u.ud.deth[0]); src_qp = be32_to_cpu(ohdr->u.ud.deth[1]) & HFI1_QPN_MASK; dlid = be16_to_cpu(hdr->lrh[1]); - is_mcast = (dlid > HFI1_MULTICAST_LID_BASE) && - (dlid != HFI1_PERMISSIVE_LID); + is_mcast = (dlid > be16_to_cpu(IB_MULTICAST_LID_BASE)) && + (dlid != be16_to_cpu(IB_LID_PERMISSIVE)); bth1 = be32_to_cpu(ohdr->bth[1]); if (unlikely(bth1 & HFI1_BECN_SMASK)) { /* @@ -874,7 +874,7 @@ void hfi1_ud_rcv(struct hfi1_packet *packet) /* * Save the LMC lower bits if the destination LID is a unicast LID. */ - wc.dlid_path_bits = dlid >= HFI1_MULTICAST_LID_BASE ? 0 : + wc.dlid_path_bits = dlid >= be16_to_cpu(IB_MULTICAST_LID_BASE) ? 0 : dlid & ((1 << ppd_from_ibp(ibp)->lmc) - 1); wc.port_num = qp->port_num; /* Signal completion event if the solicited bit is set. */ diff --git a/drivers/staging/rdma/hfi1/verbs.c b/drivers/staging/rdma/hfi1/verbs.c index ccd91da..5c952d8 100644 --- a/drivers/staging/rdma/hfi1/verbs.c +++ b/drivers/staging/rdma/hfi1/verbs.c @@ -646,8 +646,8 @@ void hfi1_ib_rcv(struct hfi1_packet *packet) /* Get the destination QP number. */ qp_num = be32_to_cpu(packet->ohdr->bth[1]) & HFI1_QPN_MASK; lid = be16_to_cpu(hdr->lrh[1]); - if (unlikely((lid >= HFI1_MULTICAST_LID_BASE) && - (lid != HFI1_PERMISSIVE_LID))) { + if (unlikely((lid >= be16_to_cpu(IB_MULTICAST_LID_BASE)) && + (lid != be16_to_cpu(IB_LID_PERMISSIVE)))) { struct hfi1_mcast *mcast; struct hfi1_mcast_qp *p; -- cgit v0.10.2 From 15723f06fb9d80cbfd895c32c6023881c7d0e0b4 Mon Sep 17 00:00:00 2001 From: Dennis Dalessandro Date: Tue, 19 Jan 2016 14:42:17 -0800 Subject: staging/rdma/hfi1: Use address handle in rdmavt and remove from hfi1 Original patch from Kamal Heib , split apart from original and modified to accomodate recent changes in rdmavt. Remove AH from hfi1 and use rdmavt version. Signed-off-by: Kamal Heib Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford diff --git a/drivers/staging/rdma/hfi1/common.h b/drivers/staging/rdma/hfi1/common.h index e4b1dc6..cb5ca79 100644 --- a/drivers/staging/rdma/hfi1/common.h +++ b/drivers/staging/rdma/hfi1/common.h @@ -341,7 +341,6 @@ struct hfi1_message_header { #define FULL_MGMT_P_KEY 0xFFFF #define DEFAULT_P_KEY LIM_MGMT_P_KEY -#define HFI1_PERMISSIVE_LID 0xFFFF #define HFI1_AETH_CREDIT_SHIFT 24 #define HFI1_AETH_CREDIT_MASK 0x1F #define HFI1_AETH_CREDIT_INVAL 0x1F @@ -353,7 +352,6 @@ struct hfi1_message_header { #define HFI1_BECN_SHIFT 30 #define HFI1_BECN_MASK 1 #define HFI1_BECN_SMASK BIT(HFI1_BECN_SHIFT) -#define HFI1_MULTICAST_LID_BASE 0xC000 static inline __u64 rhf_to_cpu(const __le32 *rbuf) { diff --git a/drivers/staging/rdma/hfi1/mad.c b/drivers/staging/rdma/hfi1/mad.c index 1399485..1190f8d 100644 --- a/drivers/staging/rdma/hfi1/mad.c +++ b/drivers/staging/rdma/hfi1/mad.c @@ -137,7 +137,7 @@ static void send_trap(struct hfi1_ibport *ibp, void *data, unsigned len) ret = PTR_ERR(ah); else { send_buf->ah = ah; - ibp->sm_ah = to_iah(ah); + ibp->sm_ah = ibah_to_rvtah(ah); ret = 0; } } else diff --git a/drivers/staging/rdma/hfi1/qp.c b/drivers/staging/rdma/hfi1/qp.c index 735253b..9fcf052 100644 --- a/drivers/staging/rdma/hfi1/qp.c +++ b/drivers/staging/rdma/hfi1/qp.c @@ -424,7 +424,7 @@ static void clear_mr_refs(struct hfi1_qp *qp, int clr_sends) if (qp->ibqp.qp_type == IB_QPT_UD || qp->ibqp.qp_type == IB_QPT_SMI || qp->ibqp.qp_type == IB_QPT_GSI) - atomic_dec(&to_iah(wqe->ud_wr.ah)->refcount); + atomic_dec(&ibah_to_rvtah(wqe->ud_wr.ah)->refcount); if (++qp->s_last >= qp->s_size) qp->s_last = 0; } @@ -642,7 +642,7 @@ int hfi1_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, if (attr->ah_attr.dlid >= be16_to_cpu(IB_MULTICAST_LID_BASE)) goto inval; - if (hfi1_check_ah(qp->ibqp.device, &attr->ah_attr)) + if (rvt_check_ah(qp->ibqp.device, &attr->ah_attr)) goto inval; sc = ah_to_sc(ibqp->device, &attr->ah_attr); if (!qp_to_sdma_engine(qp, sc) && @@ -656,7 +656,7 @@ int hfi1_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, if (attr->alt_ah_attr.dlid >= be16_to_cpu(IB_MULTICAST_LID_BASE)) goto inval; - if (hfi1_check_ah(qp->ibqp.device, &attr->alt_ah_attr)) + if (rvt_check_ah(qp->ibqp.device, &attr->alt_ah_attr)) goto inval; if (attr->alt_pkey_index >= hfi1_get_npkeys(dd)) goto inval; diff --git a/drivers/staging/rdma/hfi1/ruc.c b/drivers/staging/rdma/hfi1/ruc.c index 55ed00d..33bcfe5 100644 --- a/drivers/staging/rdma/hfi1/ruc.c +++ b/drivers/staging/rdma/hfi1/ruc.c @@ -893,7 +893,7 @@ void hfi1_send_complete(struct hfi1_qp *qp, struct hfi1_swqe *wqe, if (qp->ibqp.qp_type == IB_QPT_UD || qp->ibqp.qp_type == IB_QPT_SMI || qp->ibqp.qp_type == IB_QPT_GSI) - atomic_dec(&to_iah(wqe->ud_wr.ah)->refcount); + atomic_dec(&ibah_to_rvtah(wqe->ud_wr.ah)->refcount); /* See ch. 11.2.4.1 and 10.7.3.1 */ if (!(qp->s_flags & HFI1_S_SIGNAL_REQ_WR) || diff --git a/drivers/staging/rdma/hfi1/ud.c b/drivers/staging/rdma/hfi1/ud.c index 55a4eec..820fef2 100644 --- a/drivers/staging/rdma/hfi1/ud.c +++ b/drivers/staging/rdma/hfi1/ud.c @@ -98,7 +98,7 @@ static void ud_loopback(struct hfi1_qp *sqp, struct hfi1_swqe *swqe) goto drop; } - ah_attr = &to_iah(swqe->ud_wr.ah)->attr; + ah_attr = &ibah_to_rvtah(swqe->ud_wr.ah)->attr; ppd = ppd_from_ibp(ibp); if (qp->ibqp.qp_num > 1) { @@ -309,7 +309,7 @@ int hfi1_make_ud_req(struct hfi1_qp *qp) /* Construct the header. */ ibp = to_iport(qp->ibqp.device, qp->port_num); ppd = ppd_from_ibp(ibp); - ah_attr = &to_iah(wqe->ud_wr.ah)->attr; + ah_attr = &ibah_to_rvtah(wqe->ud_wr.ah)->attr; if (ah_attr->dlid < be16_to_cpu(IB_MULTICAST_LID_BASE) || ah_attr->dlid == be16_to_cpu(IB_LID_PERMISSIVE)) { lid = ah_attr->dlid & ~((1 << ppd->lmc) - 1); diff --git a/drivers/staging/rdma/hfi1/verbs.c b/drivers/staging/rdma/hfi1/verbs.c index 5c952d8..021e211 100644 --- a/drivers/staging/rdma/hfi1/verbs.c +++ b/drivers/staging/rdma/hfi1/verbs.c @@ -455,9 +455,7 @@ static int post_one_send(struct hfi1_qp *qp, struct ib_send_wr *wr) if (wqe->length > 0x80000000U) goto bail_inval_free; } else { - struct hfi1_ah *ah = to_iah(ud_wr(wr)->ah); - - atomic_inc(&ah->refcount); + atomic_inc(&ibah_to_rvtah(ud_wr(wr)->ah)->refcount); } wqe->ssn = qp->s_ssn++; qp->s_head = next; @@ -1615,88 +1613,21 @@ u8 ah_to_sc(struct ib_device *ibdev, struct ib_ah_attr *ah) return ibp->sl_to_sc[ah->sl]; } -int hfi1_check_ah(struct ib_device *ibdev, struct ib_ah_attr *ah_attr) +static int hfi1_check_ah(struct ib_device *ibdev, struct ib_ah_attr *ah_attr) { struct hfi1_ibport *ibp; struct hfi1_pportdata *ppd; struct hfi1_devdata *dd; u8 sc5; - /* A multicast address requires a GRH (see ch. 8.4.1). */ - if (ah_attr->dlid >= HFI1_MULTICAST_LID_BASE && - ah_attr->dlid != HFI1_PERMISSIVE_LID && - !(ah_attr->ah_flags & IB_AH_GRH)) - goto bail; - if ((ah_attr->ah_flags & IB_AH_GRH) && - ah_attr->grh.sgid_index >= HFI1_GUIDS_PER_PORT) - goto bail; - if (ah_attr->dlid == 0) - goto bail; - if (ah_attr->port_num < 1 || - ah_attr->port_num > ibdev->phys_port_cnt) - goto bail; - if (ah_attr->static_rate != IB_RATE_PORT_CURRENT && - ib_rate_to_mbps(ah_attr->static_rate) < 0) - goto bail; - if (ah_attr->sl >= OPA_MAX_SLS) - goto bail; /* test the mapping for validity */ ibp = to_iport(ibdev, ah_attr->port_num); ppd = ppd_from_ibp(ibp); sc5 = ibp->sl_to_sc[ah_attr->sl]; dd = dd_from_ppd(ppd); if (sc_to_vlt(dd, sc5) > num_vls && sc_to_vlt(dd, sc5) != 0xf) - goto bail; + return -EINVAL; return 0; -bail: - return -EINVAL; -} - -/** - * create_ah - create an address handle - * @pd: the protection domain - * @ah_attr: the attributes of the AH - * - * This may be called from interrupt context. - */ -static struct ib_ah *create_ah(struct ib_pd *pd, - struct ib_ah_attr *ah_attr) -{ - struct hfi1_ah *ah; - struct ib_ah *ret; - struct hfi1_ibdev *dev = to_idev(pd->device); - unsigned long flags; - - if (hfi1_check_ah(pd->device, ah_attr)) { - ret = ERR_PTR(-EINVAL); - goto bail; - } - - ah = kmalloc(sizeof(*ah), GFP_ATOMIC); - if (!ah) { - ret = ERR_PTR(-ENOMEM); - goto bail; - } - - spin_lock_irqsave(&dev->n_ahs_lock, flags); - if (dev->n_ahs_allocated == hfi1_max_ahs) { - spin_unlock_irqrestore(&dev->n_ahs_lock, flags); - kfree(ah); - ret = ERR_PTR(-ENOMEM); - goto bail; - } - - dev->n_ahs_allocated++; - spin_unlock_irqrestore(&dev->n_ahs_lock, flags); - - /* ib_create_ah() will initialize ah->ibah. */ - ah->attr = *ah_attr; - atomic_set(&ah->refcount, 0); - - ret = &ah->ibah; - -bail: - return ret; } struct ib_ah *hfi1_create_qp0_ah(struct hfi1_ibport *ibp, u16 dlid) @@ -1717,51 +1648,6 @@ struct ib_ah *hfi1_create_qp0_ah(struct hfi1_ibport *ibp, u16 dlid) } /** - * destroy_ah - destroy an address handle - * @ibah: the AH to destroy - * - * This may be called from interrupt context. - */ -static int destroy_ah(struct ib_ah *ibah) -{ - struct hfi1_ibdev *dev = to_idev(ibah->device); - struct hfi1_ah *ah = to_iah(ibah); - unsigned long flags; - - if (atomic_read(&ah->refcount) != 0) - return -EBUSY; - - spin_lock_irqsave(&dev->n_ahs_lock, flags); - dev->n_ahs_allocated--; - spin_unlock_irqrestore(&dev->n_ahs_lock, flags); - - kfree(ah); - - return 0; -} - -static int modify_ah(struct ib_ah *ibah, struct ib_ah_attr *ah_attr) -{ - struct hfi1_ah *ah = to_iah(ibah); - - if (hfi1_check_ah(ibah->device, ah_attr)) - return -EINVAL; - - ah->attr = *ah_attr; - - return 0; -} - -static int query_ah(struct ib_ah *ibah, struct ib_ah_attr *ah_attr) -{ - struct hfi1_ah *ah = to_iah(ibah); - - *ah_attr = ah->attr; - - return 0; -} - -/** * hfi1_get_npkeys - return the size of the PKEY table for context 0 * @dd: the hfi1_ib device */ @@ -1879,7 +1765,6 @@ int hfi1_register_ib_device(struct hfi1_devdata *dd) /* Only need to initialize non-zero fields. */ - spin_lock_init(&dev->n_ahs_lock); spin_lock_init(&dev->n_cqs_lock); spin_lock_init(&dev->n_qps_lock); spin_lock_init(&dev->n_srqs_lock); @@ -1989,10 +1874,10 @@ int hfi1_register_ib_device(struct hfi1_devdata *dd) ibdev->dealloc_ucontext = dealloc_ucontext; ibdev->alloc_pd = NULL; ibdev->dealloc_pd = NULL; - ibdev->create_ah = create_ah; - ibdev->destroy_ah = destroy_ah; - ibdev->modify_ah = modify_ah; - ibdev->query_ah = query_ah; + ibdev->create_ah = NULL; + ibdev->destroy_ah = NULL; + ibdev->modify_ah = NULL; + ibdev->query_ah = NULL; ibdev->create_srq = hfi1_create_srq; ibdev->modify_srq = hfi1_modify_srq; ibdev->query_srq = hfi1_query_srq; @@ -2033,6 +1918,8 @@ int hfi1_register_ib_device(struct hfi1_devdata *dd) dd->verbs_dev.rdi.driver_f.port_callback = hfi1_create_port_files; dd->verbs_dev.rdi.driver_f.get_card_name = get_card_name; dd->verbs_dev.rdi.driver_f.get_pci_dev = get_pci_dev; + dd->verbs_dev.rdi.driver_f.check_ah = hfi1_check_ah; + dd->verbs_dev.rdi.dparms.props.max_ah = hfi1_max_ahs; dd->verbs_dev.rdi.dparms.props.max_pd = hfi1_max_pds; dd->verbs_dev.rdi.flags = (RVT_FLAG_MR_INIT_DRIVER | RVT_FLAG_QP_INIT_DRIVER | diff --git a/drivers/staging/rdma/hfi1/verbs.h b/drivers/staging/rdma/hfi1/verbs.h index dc5aa9a..4db6136 100644 --- a/drivers/staging/rdma/hfi1/verbs.h +++ b/drivers/staging/rdma/hfi1/verbs.h @@ -234,13 +234,6 @@ struct hfi1_mcast { int n_attached; }; -/* Address Handle */ -struct hfi1_ah { - struct ib_ah ibah; - struct ib_ah_attr attr; - atomic_t refcount; -}; - /* * This structure is used by hfi1_mmap() to validate an offset * when an mmap() request is made. The vm_area_struct then uses @@ -652,8 +645,8 @@ static inline void inc_opstats( struct hfi1_ibport { struct hfi1_qp __rcu *qp[2]; struct ib_mad_agent *send_agent; /* agent for SMI (traps) */ - struct hfi1_ah *sm_ah; - struct hfi1_ah *smi_ah; + struct rvt_ah *sm_ah; + struct rvt_ah *smi_ah; struct rb_root mcast_tree; spinlock_t lock; /* protect changes in this struct */ @@ -735,8 +728,6 @@ struct hfi1_ibdev { u64 n_kmem_wait; u64 n_send_schedule; - u32 n_ahs_allocated; /* number of AHs allocated for device */ - spinlock_t n_ahs_lock; u32 n_cqs_allocated; /* number of CQs allocated for device */ spinlock_t n_cqs_lock; u32 n_qps_allocated; /* number of QPs allocated for device */ @@ -774,11 +765,6 @@ static inline struct hfi1_mr *to_imr(struct ib_mr *ibmr) return container_of(ibmr, struct hfi1_mr, ibmr); } -static inline struct hfi1_ah *to_iah(struct ib_ah *ibah) -{ - return container_of(ibah, struct hfi1_ah, ibah); -} - static inline struct hfi1_cq *to_icq(struct ib_cq *ibcq) { return container_of(ibcq, struct hfi1_cq, ibcq); @@ -925,8 +911,6 @@ void hfi1_rc_hdrerr( u8 ah_to_sc(struct ib_device *ibdev, struct ib_ah_attr *ah_attr); -int hfi1_check_ah(struct ib_device *ibdev, struct ib_ah_attr *ah_attr); - struct ib_ah *hfi1_create_qp0_ah(struct hfi1_ibport *ibp, u16 dlid); void hfi1_rc_rnr_retry(unsigned long arg); -- cgit v0.10.2 From 8f1764fa2ba5a39c651316998f40631e8492081d Mon Sep 17 00:00:00 2001 From: Dennis Dalessandro Date: Tue, 19 Jan 2016 14:42:22 -0800 Subject: staging/rdma/hfi1: Implement hfi1 support for AH notification For OPA devices additional work is required to create an AH. This patch adds support to set the VL correctly. Reviewed-by: Mike Marciniszyn Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford diff --git a/drivers/staging/rdma/hfi1/verbs.c b/drivers/staging/rdma/hfi1/verbs.c index 021e211..657efd3 100644 --- a/drivers/staging/rdma/hfi1/verbs.c +++ b/drivers/staging/rdma/hfi1/verbs.c @@ -1630,6 +1630,29 @@ static int hfi1_check_ah(struct ib_device *ibdev, struct ib_ah_attr *ah_attr) return 0; } +static void hfi1_notify_new_ah(struct ib_device *ibdev, + struct ib_ah_attr *ah_attr, + struct rvt_ah *ah) +{ + struct hfi1_ibport *ibp; + struct hfi1_pportdata *ppd; + struct hfi1_devdata *dd; + u8 sc5; + + /* + * Do not trust reading anything from rvt_ah at this point as it is not + * done being setup. We can however modify things which we need to set. + */ + + ibp = to_iport(ibdev, ah_attr->port_num); + ppd = ppd_from_ibp(ibp); + sc5 = ibp->sl_to_sc[ah->attr.sl]; + dd = dd_from_ppd(ppd); + ah->vl = sc_to_vlt(dd, sc5); + if (ah->vl < num_vls || ah->vl == 15) + ah->log_pmtu = ilog2(dd->vld[ah->vl].mtu); +} + struct ib_ah *hfi1_create_qp0_ah(struct hfi1_ibport *ibp, u16 dlid) { struct ib_ah_attr attr; @@ -1919,6 +1942,7 @@ int hfi1_register_ib_device(struct hfi1_devdata *dd) dd->verbs_dev.rdi.driver_f.get_card_name = get_card_name; dd->verbs_dev.rdi.driver_f.get_pci_dev = get_pci_dev; dd->verbs_dev.rdi.driver_f.check_ah = hfi1_check_ah; + dd->verbs_dev.rdi.driver_f.notify_new_ah = hfi1_notify_new_ah; dd->verbs_dev.rdi.dparms.props.max_ah = hfi1_max_ahs; dd->verbs_dev.rdi.dparms.props.max_pd = hfi1_max_pds; dd->verbs_dev.rdi.flags = (RVT_FLAG_MR_INIT_DRIVER | -- cgit v0.10.2 From 895420ddc8b35099ddd25132f5707306e70f0d6a Mon Sep 17 00:00:00 2001 From: Dennis Dalessandro Date: Tue, 19 Jan 2016 14:42:28 -0800 Subject: staging/rdma/hfi1: Remove hfi1 MR and hfi1 specific qp type This patch does the actual removal of the queue pair from the hfi1 driver along with a number of dependent data structures. These were moved to rvt. It also removes the MR functions to use those in rdmavt. These two pieces can not reasonably be split apart becuase they depend on each other. Reviewed-by: Mike Marciniszyn Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford diff --git a/drivers/staging/rdma/hfi1/Makefile b/drivers/staging/rdma/hfi1/Makefile index 69fb10f..d82d9dc 100644 --- a/drivers/staging/rdma/hfi1/Makefile +++ b/drivers/staging/rdma/hfi1/Makefile @@ -8,7 +8,7 @@ obj-$(CONFIG_INFINIBAND_HFI1) += hfi1.o hfi1-y := chip.o cq.o device.o diag.o driver.o efivar.o eprom.o file_ops.o firmware.o \ - init.o intr.o keys.o mad.o mmap.o mr.o pcie.o pio.o pio_copy.o \ + init.o intr.o mad.o mmap.o pcie.o pio.o pio_copy.o \ qp.o qsfp.o rc.o ruc.o sdma.o srq.o sysfs.o trace.o twsi.o \ uc.o ud.o user_exp_rcv.o user_pages.o user_sdma.o verbs_mcast.o verbs.o hfi1-$(CONFIG_DEBUG_FS) += debugfs.o diff --git a/drivers/staging/rdma/hfi1/cq.c b/drivers/staging/rdma/hfi1/cq.c index 4f046ff..ffd0e7a 100644 --- a/drivers/staging/rdma/hfi1/cq.c +++ b/drivers/staging/rdma/hfi1/cq.c @@ -479,7 +479,7 @@ int hfi1_resize_cq(struct ib_cq *ibcq, int cqe, struct ib_udata *udata) if (cq->ip) { struct hfi1_ibdev *dev = to_idev(ibcq->device); - struct hfi1_mmap_info *ip = cq->ip; + struct rvt_mmap_info *ip = cq->ip; hfi1_update_mmap_info(dev, ip, sz, wc); diff --git a/drivers/staging/rdma/hfi1/diag.c b/drivers/staging/rdma/hfi1/diag.c index 15c616a..d9889d4 100644 --- a/drivers/staging/rdma/hfi1/diag.c +++ b/drivers/staging/rdma/hfi1/diag.c @@ -1603,7 +1603,7 @@ int snoop_recv_handler(struct hfi1_packet *packet) /* * Handle snooping and capturing packets when sdma is being used. */ -int snoop_send_dma_handler(struct hfi1_qp *qp, struct hfi1_pkt_state *ps, +int snoop_send_dma_handler(struct rvt_qp *qp, struct hfi1_pkt_state *ps, u64 pbc) { pr_alert("Snooping/Capture of Send DMA Packets Is Not Supported!\n"); @@ -1616,13 +1616,13 @@ int snoop_send_dma_handler(struct hfi1_qp *qp, struct hfi1_pkt_state *ps, * bypass packets. The only way to send a bypass packet currently is to use the * diagpkt interface. When that interface is enable snoop/capture is not. */ -int snoop_send_pio_handler(struct hfi1_qp *qp, struct hfi1_pkt_state *ps, +int snoop_send_pio_handler(struct rvt_qp *qp, struct hfi1_pkt_state *ps, u64 pbc) { struct hfi1_qp_priv *priv = qp->priv; struct ahg_ib_header *ahdr = priv->s_hdr; u32 hdrwords = qp->s_hdrwords; - struct hfi1_sge_state *ss = qp->s_cur_sge; + struct rvt_sge_state *ss = qp->s_cur_sge; u32 len = qp->s_cur_size; u32 dwords = (len + 3) >> 2; u32 plen = hdrwords + dwords + 2; /* includes pbc */ @@ -1630,7 +1630,7 @@ int snoop_send_pio_handler(struct hfi1_qp *qp, struct hfi1_pkt_state *ps, struct snoop_packet *s_packet = NULL; u32 *hdr = (u32 *)&ahdr->ibh; u32 length = 0; - struct hfi1_sge_state temp_ss; + struct rvt_sge_state temp_ss; void *data = NULL; void *data_start = NULL; int ret; diff --git a/drivers/staging/rdma/hfi1/driver.c b/drivers/staging/rdma/hfi1/driver.c index aa309a5..eaed692 100644 --- a/drivers/staging/rdma/hfi1/driver.c +++ b/drivers/staging/rdma/hfi1/driver.c @@ -318,7 +318,7 @@ static void rcv_hdrerr(struct hfi1_ctxtdata *rcd, struct hfi1_pportdata *ppd, /* Get the destination QP number. */ qp_num = be32_to_cpu(ohdr->bth[1]) & HFI1_QPN_MASK; if (lid < be16_to_cpu(IB_MULTICAST_LID_BASE)) { - struct hfi1_qp *qp; + struct rvt_qp *qp; unsigned long flags; rcu_read_lock(); @@ -387,7 +387,7 @@ static void rcv_hdrerr(struct hfi1_ctxtdata *rcd, struct hfi1_pportdata *ppd, * Only in pre-B0 h/w is the CNP_OPCODE handled * via this code path. */ - struct hfi1_qp *qp = NULL; + struct rvt_qp *qp = NULL; u32 lqpn, rqpn; u16 rlid; u8 svc_type, sl, sc5; @@ -456,7 +456,7 @@ static void prescan_rxq(struct hfi1_packet *packet) {} #else /* !CONFIG_PRESCAN_RXQ */ static int prescan_receive_queue; -static void process_ecn(struct hfi1_qp *qp, struct hfi1_ib_header *hdr, +static void process_ecn(struct rvt_qp *qp, struct hfi1_ib_header *hdr, struct hfi1_other_headers *ohdr, u64 rhf, u32 bth1, struct ib_grh *grh) { @@ -595,7 +595,7 @@ static void prescan_rxq(struct hfi1_packet *packet) struct hfi1_ibport *ibp = &rcd->ppd->ibport_data; __le32 *rhf_addr = (__le32 *) rcd->rcvhdrq + mdata.ps_head + dd->rhf_offset; - struct hfi1_qp *qp; + struct rvt_qp *qp; struct hfi1_ib_header *hdr; struct hfi1_other_headers *ohdr; struct ib_grh *grh = NULL; @@ -770,7 +770,7 @@ static inline void process_rcv_qp_work(struct hfi1_packet *packet) { struct hfi1_ctxtdata *rcd; - struct hfi1_qp *qp, *nqp; + struct rvt_qp *qp, *nqp; rcd = packet->rcd; rcd->head = packet->rhqoff; diff --git a/drivers/staging/rdma/hfi1/hfi.h b/drivers/staging/rdma/hfi1/hfi.h index cff966e..d52dbda 100644 --- a/drivers/staging/rdma/hfi1/hfi.h +++ b/drivers/staging/rdma/hfi1/hfi.h @@ -334,7 +334,7 @@ struct hfi1_packet { void *hdr; struct hfi1_ctxtdata *rcd; __le32 *rhf_addr; - struct hfi1_qp *qp; + struct rvt_qp *qp; struct hfi1_other_headers *ohdr; u64 rhf; u32 maxcnt; @@ -374,7 +374,7 @@ struct hfi1_snoop_data { #define HFI1_PORT_SNOOP_MODE 1U #define HFI1_PORT_CAPTURE_MODE 2U -struct hfi1_sge_state; +struct rvt_sge_state; /* * Get/Set IB link-level config parameters for f_get/set_ib_cfg() @@ -1091,9 +1091,9 @@ struct hfi1_devdata { * Handlers for outgoing data so that snoop/capture does not * have to have its hooks in the send path */ - int (*process_pio_send)(struct hfi1_qp *qp, struct hfi1_pkt_state *ps, + int (*process_pio_send)(struct rvt_qp *qp, struct hfi1_pkt_state *ps, u64 pbc); - int (*process_dma_send)(struct hfi1_qp *qp, struct hfi1_pkt_state *ps, + int (*process_dma_send)(struct rvt_qp *qp, struct hfi1_pkt_state *ps, u64 pbc); void (*pio_inline_send)(struct hfi1_devdata *dd, struct pio_buf *pbuf, u64 pbc, const void *from, size_t count); @@ -1276,7 +1276,7 @@ static inline u32 egress_cycles(u32 len, u32 rate) void set_link_ipg(struct hfi1_pportdata *ppd); void process_becn(struct hfi1_pportdata *ppd, u8 sl, u16 rlid, u32 lqpn, u32 rqpn, u8 svc_type); -void return_cnp(struct hfi1_ibport *ibp, struct hfi1_qp *qp, u32 remote_qpn, +void return_cnp(struct hfi1_ibport *ibp, struct rvt_qp *qp, u32 remote_qpn, u32 pkey, u32 slid, u32 dlid, u8 sc5, const struct ib_grh *old_grh); @@ -1468,9 +1468,9 @@ void reset_link_credits(struct hfi1_devdata *dd); void assign_remote_cm_au_table(struct hfi1_devdata *dd, u8 vcu); int snoop_recv_handler(struct hfi1_packet *packet); -int snoop_send_dma_handler(struct hfi1_qp *qp, struct hfi1_pkt_state *ps, +int snoop_send_dma_handler(struct rvt_qp *qp, struct hfi1_pkt_state *ps, u64 pbc); -int snoop_send_pio_handler(struct hfi1_qp *qp, struct hfi1_pkt_state *ps, +int snoop_send_pio_handler(struct rvt_qp *qp, struct hfi1_pkt_state *ps, u64 pbc); void snoop_inline_pio_send(struct hfi1_devdata *dd, struct pio_buf *pbuf, u64 pbc, const void *from, size_t count); @@ -1682,7 +1682,7 @@ int process_receive_invalid(struct hfi1_packet *packet); extern rhf_rcv_function_ptr snoop_rhf_rcv_functions[8]; -void update_sge(struct hfi1_sge_state *ss, u32 length); +void update_sge(struct rvt_sge_state *ss, u32 length); /* global module parameter variables */ extern unsigned int hfi1_max_mtu; diff --git a/drivers/staging/rdma/hfi1/keys.c b/drivers/staging/rdma/hfi1/keys.c deleted file mode 100644 index ffaaa6f..0000000 --- a/drivers/staging/rdma/hfi1/keys.c +++ /dev/null @@ -1,356 +0,0 @@ -/* - * - * This file is provided under a dual BSD/GPLv2 license. When using or - * redistributing this file, you may do so under either license. - * - * GPL LICENSE SUMMARY - * - * Copyright(c) 2015 Intel Corporation. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of version 2 of the GNU General Public License as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * BSD LICENSE - * - * Copyright(c) 2015 Intel Corporation. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * - Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * - Neither the name of Intel Corporation nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - */ - -#include "hfi.h" - -/** - * hfi1_alloc_lkey - allocate an lkey - * @mr: memory region that this lkey protects - * @dma_region: 0->normal key, 1->restricted DMA key - * - * Returns 0 if successful, otherwise returns -errno. - * - * Increments mr reference count as required. - * - * Sets the lkey field mr for non-dma regions. - * - */ - -int hfi1_alloc_lkey(struct rvt_mregion *mr, int dma_region) -{ - unsigned long flags; - u32 r; - u32 n; - int ret = 0; - struct hfi1_ibdev *dev = to_idev(mr->pd->device); - struct rvt_lkey_table *rkt = &dev->lk_table; - - hfi1_get_mr(mr); - spin_lock_irqsave(&rkt->lock, flags); - - /* special case for dma_mr lkey == 0 */ - if (dma_region) { - struct rvt_mregion *tmr; - - tmr = rcu_access_pointer(dev->dma_mr); - if (!tmr) { - rcu_assign_pointer(dev->dma_mr, mr); - mr->lkey_published = 1; - } else { - hfi1_put_mr(mr); - } - goto success; - } - - /* Find the next available LKEY */ - r = rkt->next; - n = r; - for (;;) { - if (!rcu_access_pointer(rkt->table[r])) - break; - r = (r + 1) & (rkt->max - 1); - if (r == n) - goto bail; - } - rkt->next = (r + 1) & (rkt->max - 1); - /* - * Make sure lkey is never zero which is reserved to indicate an - * unrestricted LKEY. - */ - rkt->gen++; - /* - * bits are capped in verbs.c to ensure enough bits for - * generation number - */ - mr->lkey = (r << (32 - hfi1_lkey_table_size)) | - ((((1 << (24 - hfi1_lkey_table_size)) - 1) & rkt->gen) - << 8); - if (mr->lkey == 0) { - mr->lkey |= 1 << 8; - rkt->gen++; - } - rcu_assign_pointer(rkt->table[r], mr); - mr->lkey_published = 1; -success: - spin_unlock_irqrestore(&rkt->lock, flags); -out: - return ret; -bail: - hfi1_put_mr(mr); - spin_unlock_irqrestore(&rkt->lock, flags); - ret = -ENOMEM; - goto out; -} - -/** - * hfi1_free_lkey - free an lkey - * @mr: mr to free from tables - */ -void hfi1_free_lkey(struct rvt_mregion *mr) -{ - unsigned long flags; - u32 lkey = mr->lkey; - u32 r; - struct hfi1_ibdev *dev = to_idev(mr->pd->device); - struct rvt_lkey_table *rkt = &dev->lk_table; - int freed = 0; - - spin_lock_irqsave(&rkt->lock, flags); - if (!mr->lkey_published) - goto out; - if (lkey == 0) - RCU_INIT_POINTER(dev->dma_mr, NULL); - else { - r = lkey >> (32 - hfi1_lkey_table_size); - RCU_INIT_POINTER(rkt->table[r], NULL); - } - mr->lkey_published = 0; - freed++; -out: - spin_unlock_irqrestore(&rkt->lock, flags); - if (freed) { - synchronize_rcu(); - hfi1_put_mr(mr); - } -} - -/** - * hfi1_lkey_ok - check IB SGE for validity and initialize - * @rkt: table containing lkey to check SGE against - * @pd: protection domain - * @isge: outgoing internal SGE - * @sge: SGE to check - * @acc: access flags - * - * Return 1 if valid and successful, otherwise returns 0. - * - * increments the reference count upon success - * - * Check the IB SGE for validity and initialize our internal version - * of it. - */ -int hfi1_lkey_ok(struct rvt_lkey_table *rkt, struct rvt_pd *pd, - struct hfi1_sge *isge, struct ib_sge *sge, int acc) -{ - struct rvt_mregion *mr; - unsigned n, m; - size_t off; - - /* - * We use LKEY == zero for kernel virtual addresses - * (see hfi1_get_dma_mr and dma.c). - */ - rcu_read_lock(); - if (sge->lkey == 0) { - struct hfi1_ibdev *dev = to_idev(pd->ibpd.device); - - if (pd->user) - goto bail; - mr = rcu_dereference(dev->dma_mr); - if (!mr) - goto bail; - atomic_inc(&mr->refcount); - rcu_read_unlock(); - - isge->mr = mr; - isge->vaddr = (void *) sge->addr; - isge->length = sge->length; - isge->sge_length = sge->length; - isge->m = 0; - isge->n = 0; - goto ok; - } - mr = rcu_dereference( - rkt->table[(sge->lkey >> (32 - hfi1_lkey_table_size))]); - if (unlikely(!mr || mr->lkey != sge->lkey || mr->pd != &pd->ibpd)) - goto bail; - - off = sge->addr - mr->user_base; - if (unlikely(sge->addr < mr->user_base || - off + sge->length > mr->length || - (mr->access_flags & acc) != acc)) - goto bail; - atomic_inc(&mr->refcount); - rcu_read_unlock(); - - off += mr->offset; - if (mr->page_shift) { - /* - page sizes are uniform power of 2 so no loop is necessary - entries_spanned_by_off is the number of times the loop below - would have executed. - */ - size_t entries_spanned_by_off; - - entries_spanned_by_off = off >> mr->page_shift; - off -= (entries_spanned_by_off << mr->page_shift); - m = entries_spanned_by_off / RVT_SEGSZ; - n = entries_spanned_by_off % RVT_SEGSZ; - } else { - m = 0; - n = 0; - while (off >= mr->map[m]->segs[n].length) { - off -= mr->map[m]->segs[n].length; - n++; - if (n >= RVT_SEGSZ) { - m++; - n = 0; - } - } - } - isge->mr = mr; - isge->vaddr = mr->map[m]->segs[n].vaddr + off; - isge->length = mr->map[m]->segs[n].length - off; - isge->sge_length = sge->length; - isge->m = m; - isge->n = n; -ok: - return 1; -bail: - rcu_read_unlock(); - return 0; -} - -/** - * hfi1_rkey_ok - check the IB virtual address, length, and RKEY - * @qp: qp for validation - * @sge: SGE state - * @len: length of data - * @vaddr: virtual address to place data - * @rkey: rkey to check - * @acc: access flags - * - * Return 1 if successful, otherwise 0. - * - * increments the reference count upon success - */ -int hfi1_rkey_ok(struct hfi1_qp *qp, struct hfi1_sge *sge, - u32 len, u64 vaddr, u32 rkey, int acc) -{ - struct rvt_lkey_table *rkt = &to_idev(qp->ibqp.device)->lk_table; - struct rvt_mregion *mr; - unsigned n, m; - size_t off; - - /* - * We use RKEY == zero for kernel virtual addresses - * (see hfi1_get_dma_mr and dma.c). - */ - rcu_read_lock(); - if (rkey == 0) { - struct rvt_pd *pd = ibpd_to_rvtpd(qp->ibqp.pd); - struct hfi1_ibdev *dev = to_idev(pd->ibpd.device); - - if (pd->user) - goto bail; - mr = rcu_dereference(dev->dma_mr); - if (!mr) - goto bail; - atomic_inc(&mr->refcount); - rcu_read_unlock(); - - sge->mr = mr; - sge->vaddr = (void *) vaddr; - sge->length = len; - sge->sge_length = len; - sge->m = 0; - sge->n = 0; - goto ok; - } - - mr = rcu_dereference( - rkt->table[(rkey >> (32 - hfi1_lkey_table_size))]); - if (unlikely(!mr || mr->lkey != rkey || qp->ibqp.pd != mr->pd)) - goto bail; - - off = vaddr - mr->iova; - if (unlikely(vaddr < mr->iova || off + len > mr->length || - (mr->access_flags & acc) == 0)) - goto bail; - atomic_inc(&mr->refcount); - rcu_read_unlock(); - - off += mr->offset; - if (mr->page_shift) { - /* - page sizes are uniform power of 2 so no loop is necessary - entries_spanned_by_off is the number of times the loop below - would have executed. - */ - size_t entries_spanned_by_off; - - entries_spanned_by_off = off >> mr->page_shift; - off -= (entries_spanned_by_off << mr->page_shift); - m = entries_spanned_by_off / RVT_SEGSZ; - n = entries_spanned_by_off % RVT_SEGSZ; - } else { - m = 0; - n = 0; - while (off >= mr->map[m]->segs[n].length) { - off -= mr->map[m]->segs[n].length; - n++; - if (n >= RVT_SEGSZ) { - m++; - n = 0; - } - } - } - sge->mr = mr; - sge->vaddr = mr->map[m]->segs[n].vaddr + off; - sge->length = mr->map[m]->segs[n].length - off; - sge->sge_length = len; - sge->m = m; - sge->n = n; -ok: - return 1; -bail: - rcu_read_unlock(); - return 0; -} diff --git a/drivers/staging/rdma/hfi1/mmap.c b/drivers/staging/rdma/hfi1/mmap.c index 5173b1c..4ce6be6 100644 --- a/drivers/staging/rdma/hfi1/mmap.c +++ b/drivers/staging/rdma/hfi1/mmap.c @@ -59,12 +59,12 @@ /** * hfi1_release_mmap_info - free mmap info structure - * @ref: a pointer to the kref within struct hfi1_mmap_info + * @ref: a pointer to the kref within struct rvt_mmap_info */ void hfi1_release_mmap_info(struct kref *ref) { - struct hfi1_mmap_info *ip = - container_of(ref, struct hfi1_mmap_info, ref); + struct rvt_mmap_info *ip = + container_of(ref, struct rvt_mmap_info, ref); struct hfi1_ibdev *dev = to_idev(ip->context->device); spin_lock_irq(&dev->pending_lock); @@ -81,14 +81,14 @@ void hfi1_release_mmap_info(struct kref *ref) */ static void hfi1_vma_open(struct vm_area_struct *vma) { - struct hfi1_mmap_info *ip = vma->vm_private_data; + struct rvt_mmap_info *ip = vma->vm_private_data; kref_get(&ip->ref); } static void hfi1_vma_close(struct vm_area_struct *vma) { - struct hfi1_mmap_info *ip = vma->vm_private_data; + struct rvt_mmap_info *ip = vma->vm_private_data; kref_put(&ip->ref, hfi1_release_mmap_info); } @@ -109,7 +109,7 @@ int hfi1_mmap(struct ib_ucontext *context, struct vm_area_struct *vma) struct hfi1_ibdev *dev = to_idev(context->device); unsigned long offset = vma->vm_pgoff << PAGE_SHIFT; unsigned long size = vma->vm_end - vma->vm_start; - struct hfi1_mmap_info *ip, *pp; + struct rvt_mmap_info *ip, *pp; int ret = -EINVAL; /* @@ -146,11 +146,11 @@ done: /* * Allocate information for hfi1_mmap */ -struct hfi1_mmap_info *hfi1_create_mmap_info(struct hfi1_ibdev *dev, - u32 size, - struct ib_ucontext *context, - void *obj) { - struct hfi1_mmap_info *ip; +struct rvt_mmap_info *hfi1_create_mmap_info(struct hfi1_ibdev *dev, + u32 size, + struct ib_ucontext *context, + void *obj) { + struct rvt_mmap_info *ip; ip = kmalloc(sizeof(*ip), GFP_KERNEL); if (!ip) @@ -175,7 +175,7 @@ bail: return ip; } -void hfi1_update_mmap_info(struct hfi1_ibdev *dev, struct hfi1_mmap_info *ip, +void hfi1_update_mmap_info(struct hfi1_ibdev *dev, struct rvt_mmap_info *ip, u32 size, void *obj) { size = PAGE_ALIGN(size); diff --git a/drivers/staging/rdma/hfi1/mr.c b/drivers/staging/rdma/hfi1/mr.c deleted file mode 100644 index 7e14965..0000000 --- a/drivers/staging/rdma/hfi1/mr.c +++ /dev/null @@ -1,473 +0,0 @@ -/* - * - * This file is provided under a dual BSD/GPLv2 license. When using or - * redistributing this file, you may do so under either license. - * - * GPL LICENSE SUMMARY - * - * Copyright(c) 2015 Intel Corporation. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of version 2 of the GNU General Public License as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * BSD LICENSE - * - * Copyright(c) 2015 Intel Corporation. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * - Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * - Neither the name of Intel Corporation nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - */ - -#include -#include - -#include "hfi.h" - -/* Fast memory region */ -struct hfi1_fmr { - struct ib_fmr ibfmr; - struct rvt_mregion mr; /* must be last */ -}; - -static inline struct hfi1_fmr *to_ifmr(struct ib_fmr *ibfmr) -{ - return container_of(ibfmr, struct hfi1_fmr, ibfmr); -} - -static int init_mregion(struct rvt_mregion *mr, struct ib_pd *pd, - int count) -{ - int m, i = 0; - int rval = 0; - - m = (count + RVT_SEGSZ - 1) / RVT_SEGSZ; - for (; i < m; i++) { - mr->map[i] = kzalloc(sizeof(*mr->map[0]), GFP_KERNEL); - if (!mr->map[i]) - goto bail; - } - mr->mapsz = m; - init_completion(&mr->comp); - /* count returning the ptr to user */ - atomic_set(&mr->refcount, 1); - mr->pd = pd; - mr->max_segs = count; -out: - return rval; -bail: - while (i) - kfree(mr->map[--i]); - rval = -ENOMEM; - goto out; -} - -static void deinit_mregion(struct rvt_mregion *mr) -{ - int i = mr->mapsz; - - mr->mapsz = 0; - while (i) - kfree(mr->map[--i]); -} - - -/** - * hfi1_get_dma_mr - get a DMA memory region - * @pd: protection domain for this memory region - * @acc: access flags - * - * Returns the memory region on success, otherwise returns an errno. - * Note that all DMA addresses should be created via the - * struct ib_dma_mapping_ops functions (see dma.c). - */ -struct ib_mr *hfi1_get_dma_mr(struct ib_pd *pd, int acc) -{ - struct hfi1_mr *mr = NULL; - struct ib_mr *ret; - int rval; - - if (ibpd_to_rvtpd(pd)->user) { - ret = ERR_PTR(-EPERM); - goto bail; - } - - mr = kzalloc(sizeof(*mr), GFP_KERNEL); - if (!mr) { - ret = ERR_PTR(-ENOMEM); - goto bail; - } - - rval = init_mregion(&mr->mr, pd, 0); - if (rval) { - ret = ERR_PTR(rval); - goto bail; - } - - - rval = hfi1_alloc_lkey(&mr->mr, 1); - if (rval) { - ret = ERR_PTR(rval); - goto bail_mregion; - } - - mr->mr.access_flags = acc; - ret = &mr->ibmr; -done: - return ret; - -bail_mregion: - deinit_mregion(&mr->mr); -bail: - kfree(mr); - goto done; -} - -static struct hfi1_mr *alloc_mr(int count, struct ib_pd *pd) -{ - struct hfi1_mr *mr; - int rval = -ENOMEM; - int m; - - /* Allocate struct plus pointers to first level page tables. */ - m = (count + RVT_SEGSZ - 1) / RVT_SEGSZ; - mr = kzalloc(sizeof(*mr) + m * sizeof(mr->mr.map[0]), GFP_KERNEL); - if (!mr) - goto bail; - - rval = init_mregion(&mr->mr, pd, count); - if (rval) - goto bail; - - rval = hfi1_alloc_lkey(&mr->mr, 0); - if (rval) - goto bail_mregion; - mr->ibmr.lkey = mr->mr.lkey; - mr->ibmr.rkey = mr->mr.lkey; -done: - return mr; - -bail_mregion: - deinit_mregion(&mr->mr); -bail: - kfree(mr); - mr = ERR_PTR(rval); - goto done; -} - -/** - * hfi1_reg_user_mr - register a userspace memory region - * @pd: protection domain for this memory region - * @start: starting userspace address - * @length: length of region to register - * @mr_access_flags: access flags for this memory region - * @udata: unused by the driver - * - * Returns the memory region on success, otherwise returns an errno. - */ -struct ib_mr *hfi1_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, - u64 virt_addr, int mr_access_flags, - struct ib_udata *udata) -{ - struct hfi1_mr *mr; - struct ib_umem *umem; - struct scatterlist *sg; - int n, m, entry; - struct ib_mr *ret; - - if (length == 0) { - ret = ERR_PTR(-EINVAL); - goto bail; - } - - umem = ib_umem_get(pd->uobject->context, start, length, - mr_access_flags, 0); - if (IS_ERR(umem)) - return (void *) umem; - - n = umem->nmap; - - mr = alloc_mr(n, pd); - if (IS_ERR(mr)) { - ret = (struct ib_mr *)mr; - ib_umem_release(umem); - goto bail; - } - - mr->mr.user_base = start; - mr->mr.iova = virt_addr; - mr->mr.length = length; - mr->mr.offset = ib_umem_offset(umem); - mr->mr.access_flags = mr_access_flags; - mr->umem = umem; - - if (is_power_of_2(umem->page_size)) - mr->mr.page_shift = ilog2(umem->page_size); - m = 0; - n = 0; - for_each_sg(umem->sg_head.sgl, sg, umem->nmap, entry) { - void *vaddr; - - vaddr = page_address(sg_page(sg)); - if (!vaddr) { - ret = ERR_PTR(-EINVAL); - goto bail; - } - mr->mr.map[m]->segs[n].vaddr = vaddr; - mr->mr.map[m]->segs[n].length = umem->page_size; - n++; - if (n == RVT_SEGSZ) { - m++; - n = 0; - } - } - ret = &mr->ibmr; - -bail: - return ret; -} - -/** - * hfi1_dereg_mr - unregister and free a memory region - * @ibmr: the memory region to free - * - * Returns 0 on success. - * - * Note that this is called to free MRs created by hfi1_get_dma_mr() - * or hfi1_reg_user_mr(). - */ -int hfi1_dereg_mr(struct ib_mr *ibmr) -{ - struct hfi1_mr *mr = to_imr(ibmr); - int ret = 0; - unsigned long timeout; - - hfi1_free_lkey(&mr->mr); - - hfi1_put_mr(&mr->mr); /* will set completion if last */ - timeout = wait_for_completion_timeout(&mr->mr.comp, - 5 * HZ); - if (!timeout) { - dd_dev_err( - dd_from_ibdev(mr->mr.pd->device), - "hfi1_dereg_mr timeout mr %p pd %p refcount %u\n", - mr, mr->mr.pd, atomic_read(&mr->mr.refcount)); - hfi1_get_mr(&mr->mr); - ret = -EBUSY; - goto out; - } - deinit_mregion(&mr->mr); - if (mr->umem) - ib_umem_release(mr->umem); - kfree(mr); -out: - return ret; -} - -/* - * Allocate a memory region usable with the - * IB_WR_REG_MR send work request. - * - * Return the memory region on success, otherwise return an errno. - * FIXME: IB_WR_REG_MR is not supported - */ -struct ib_mr *hfi1_alloc_mr(struct ib_pd *pd, - enum ib_mr_type mr_type, - u32 max_num_sg) -{ - struct hfi1_mr *mr; - - if (mr_type != IB_MR_TYPE_MEM_REG) - return ERR_PTR(-EINVAL); - - mr = alloc_mr(max_num_sg, pd); - if (IS_ERR(mr)) - return (struct ib_mr *)mr; - - return &mr->ibmr; -} - -/** - * hfi1_alloc_fmr - allocate a fast memory region - * @pd: the protection domain for this memory region - * @mr_access_flags: access flags for this memory region - * @fmr_attr: fast memory region attributes - * - * Returns the memory region on success, otherwise returns an errno. - */ -struct ib_fmr *hfi1_alloc_fmr(struct ib_pd *pd, int mr_access_flags, - struct ib_fmr_attr *fmr_attr) -{ - struct hfi1_fmr *fmr; - int m; - struct ib_fmr *ret; - int rval = -ENOMEM; - - /* Allocate struct plus pointers to first level page tables. */ - m = (fmr_attr->max_pages + RVT_SEGSZ - 1) / RVT_SEGSZ; - fmr = kzalloc(sizeof(*fmr) + m * sizeof(fmr->mr.map[0]), GFP_KERNEL); - if (!fmr) - goto bail; - - rval = init_mregion(&fmr->mr, pd, fmr_attr->max_pages); - if (rval) - goto bail; - - /* - * ib_alloc_fmr() will initialize fmr->ibfmr except for lkey & - * rkey. - */ - rval = hfi1_alloc_lkey(&fmr->mr, 0); - if (rval) - goto bail_mregion; - fmr->ibfmr.rkey = fmr->mr.lkey; - fmr->ibfmr.lkey = fmr->mr.lkey; - /* - * Resources are allocated but no valid mapping (RKEY can't be - * used). - */ - fmr->mr.access_flags = mr_access_flags; - fmr->mr.max_segs = fmr_attr->max_pages; - fmr->mr.page_shift = fmr_attr->page_shift; - - ret = &fmr->ibfmr; -done: - return ret; - -bail_mregion: - deinit_mregion(&fmr->mr); -bail: - kfree(fmr); - ret = ERR_PTR(rval); - goto done; -} - -/** - * hfi1_map_phys_fmr - set up a fast memory region - * @ibmfr: the fast memory region to set up - * @page_list: the list of pages to associate with the fast memory region - * @list_len: the number of pages to associate with the fast memory region - * @iova: the virtual address of the start of the fast memory region - * - * This may be called from interrupt context. - */ - -int hfi1_map_phys_fmr(struct ib_fmr *ibfmr, u64 *page_list, - int list_len, u64 iova) -{ - struct hfi1_fmr *fmr = to_ifmr(ibfmr); - struct rvt_lkey_table *rkt; - unsigned long flags; - int m, n, i; - u32 ps; - int ret; - - i = atomic_read(&fmr->mr.refcount); - if (i > 2) - return -EBUSY; - - if (list_len > fmr->mr.max_segs) { - ret = -EINVAL; - goto bail; - } - rkt = &to_idev(ibfmr->device)->lk_table; - spin_lock_irqsave(&rkt->lock, flags); - fmr->mr.user_base = iova; - fmr->mr.iova = iova; - ps = 1 << fmr->mr.page_shift; - fmr->mr.length = list_len * ps; - m = 0; - n = 0; - for (i = 0; i < list_len; i++) { - fmr->mr.map[m]->segs[n].vaddr = (void *) page_list[i]; - fmr->mr.map[m]->segs[n].length = ps; - if (++n == RVT_SEGSZ) { - m++; - n = 0; - } - } - spin_unlock_irqrestore(&rkt->lock, flags); - ret = 0; - -bail: - return ret; -} - -/** - * hfi1_unmap_fmr - unmap fast memory regions - * @fmr_list: the list of fast memory regions to unmap - * - * Returns 0 on success. - */ -int hfi1_unmap_fmr(struct list_head *fmr_list) -{ - struct hfi1_fmr *fmr; - struct rvt_lkey_table *rkt; - unsigned long flags; - - list_for_each_entry(fmr, fmr_list, ibfmr.list) { - rkt = &to_idev(fmr->ibfmr.device)->lk_table; - spin_lock_irqsave(&rkt->lock, flags); - fmr->mr.user_base = 0; - fmr->mr.iova = 0; - fmr->mr.length = 0; - spin_unlock_irqrestore(&rkt->lock, flags); - } - return 0; -} - -/** - * hfi1_dealloc_fmr - deallocate a fast memory region - * @ibfmr: the fast memory region to deallocate - * - * Returns 0 on success. - */ -int hfi1_dealloc_fmr(struct ib_fmr *ibfmr) -{ - struct hfi1_fmr *fmr = to_ifmr(ibfmr); - int ret = 0; - unsigned long timeout; - - hfi1_free_lkey(&fmr->mr); - hfi1_put_mr(&fmr->mr); /* will set completion if last */ - timeout = wait_for_completion_timeout(&fmr->mr.comp, - 5 * HZ); - if (!timeout) { - hfi1_get_mr(&fmr->mr); - ret = -EBUSY; - goto out; - } - deinit_mregion(&fmr->mr); - kfree(fmr); -out: - return ret; -} diff --git a/drivers/staging/rdma/hfi1/pio.c b/drivers/staging/rdma/hfi1/pio.c index 25d65f9..8ee7ed8 100644 --- a/drivers/staging/rdma/hfi1/pio.c +++ b/drivers/staging/rdma/hfi1/pio.c @@ -1526,8 +1526,8 @@ static void sc_piobufavail(struct send_context *sc) struct hfi1_devdata *dd = sc->dd; struct hfi1_ibdev *dev = &dd->verbs_dev; struct list_head *list; - struct hfi1_qp *qps[PIO_WAIT_BATCH_SIZE]; - struct hfi1_qp *qp; + struct rvt_qp *qps[PIO_WAIT_BATCH_SIZE]; + struct rvt_qp *qp; struct hfi1_qp_priv *priv; unsigned long flags; unsigned i, n = 0; diff --git a/drivers/staging/rdma/hfi1/qp.c b/drivers/staging/rdma/hfi1/qp.c index 9fcf052..5a68455 100644 --- a/drivers/staging/rdma/hfi1/qp.c +++ b/drivers/staging/rdma/hfi1/qp.c @@ -67,7 +67,7 @@ static unsigned int hfi1_qp_table_size = 256; module_param_named(qp_table_size, hfi1_qp_table_size, uint, S_IRUGO); MODULE_PARM_DESC(qp_table_size, "QP table size"); -static void flush_tx_list(struct hfi1_qp *qp); +static void flush_tx_list(struct rvt_qp *qp); static int iowait_sleep( struct sdma_engine *sde, struct iowait *wait, @@ -229,7 +229,7 @@ static void free_qpn(struct hfi1_qpn_table *qpt, u32 qpn) * Put the QP into the hash table. * The hash table holds a reference to the QP. */ -static void insert_qp(struct hfi1_ibdev *dev, struct hfi1_qp *qp) +static void insert_qp(struct hfi1_ibdev *dev, struct rvt_qp *qp) { struct hfi1_ibport *ibp = to_iport(qp->ibqp.device, qp->port_num); unsigned long flags; @@ -254,7 +254,7 @@ static void insert_qp(struct hfi1_ibdev *dev, struct hfi1_qp *qp) * Remove the QP from the table so it can't be found asynchronously by * the receive interrupt routine. */ -static void remove_qp(struct hfi1_ibdev *dev, struct hfi1_qp *qp) +static void remove_qp(struct hfi1_ibdev *dev, struct rvt_qp *qp) { struct hfi1_ibport *ibp = to_iport(qp->ibqp.device, qp->port_num); u32 n = qpn_hash(dev->qp_dev, qp->ibqp.qp_num); @@ -270,8 +270,8 @@ static void remove_qp(struct hfi1_ibdev *dev, struct hfi1_qp *qp) lockdep_is_held(&dev->qp_dev->qpt_lock)) == qp) { RCU_INIT_POINTER(ibp->qp[1], NULL); } else { - struct hfi1_qp *q; - struct hfi1_qp __rcu **qpp; + struct rvt_qp *q; + struct rvt_qp __rcu **qpp; removed = 0; qpp = &dev->qp_dev->qp_table[n]; @@ -308,7 +308,7 @@ static unsigned free_all_qps(struct hfi1_devdata *dd) { struct hfi1_ibdev *dev = &dd->verbs_dev; unsigned long flags; - struct hfi1_qp *qp; + struct rvt_qp *qp; unsigned n, qp_inuse = 0; for (n = 0; n < dd->num_pports; n++) { @@ -347,7 +347,7 @@ bail: * @qp: the QP to reset * @type: the QP type */ -static void reset_qp(struct hfi1_qp *qp, enum ib_qp_type type) +static void reset_qp(struct rvt_qp *qp, enum ib_qp_type type) { struct hfi1_qp_priv *priv = qp->priv; qp->remote_qpn = 0; @@ -402,7 +402,7 @@ static void reset_qp(struct hfi1_qp *qp, enum ib_qp_type type) qp->r_sge.num_sge = 0; } -static void clear_mr_refs(struct hfi1_qp *qp, int clr_sends) +static void clear_mr_refs(struct rvt_qp *qp, int clr_sends) { unsigned n; @@ -413,13 +413,13 @@ static void clear_mr_refs(struct hfi1_qp *qp, int clr_sends) if (clr_sends) { while (qp->s_last != qp->s_head) { - struct hfi1_swqe *wqe = get_swqe_ptr(qp, qp->s_last); + struct rvt_swqe *wqe = get_swqe_ptr(qp, qp->s_last); unsigned i; for (i = 0; i < wqe->wr.num_sge; i++) { - struct hfi1_sge *sge = &wqe->sg_list[i]; + struct rvt_sge *sge = &wqe->sg_list[i]; - hfi1_put_mr(sge->mr); + rvt_put_mr(sge->mr); } if (qp->ibqp.qp_type == IB_QPT_UD || qp->ibqp.qp_type == IB_QPT_SMI || @@ -429,7 +429,7 @@ static void clear_mr_refs(struct hfi1_qp *qp, int clr_sends) qp->s_last = 0; } if (qp->s_rdma_mr) { - hfi1_put_mr(qp->s_rdma_mr); + rvt_put_mr(qp->s_rdma_mr); qp->s_rdma_mr = NULL; } } @@ -438,11 +438,11 @@ static void clear_mr_refs(struct hfi1_qp *qp, int clr_sends) return; for (n = 0; n < ARRAY_SIZE(qp->s_ack_queue); n++) { - struct hfi1_ack_entry *e = &qp->s_ack_queue[n]; + struct rvt_ack_entry *e = &qp->s_ack_queue[n]; if (e->opcode == IB_OPCODE_RC_RDMA_READ_REQUEST && e->rdma_sge.mr) { - hfi1_put_mr(e->rdma_sge.mr); + rvt_put_mr(e->rdma_sge.mr); e->rdma_sge.mr = NULL; } } @@ -458,7 +458,7 @@ static void clear_mr_refs(struct hfi1_qp *qp, int clr_sends) * The QP r_lock and s_lock should be held and interrupts disabled. * If we are already in error state, just return. */ -int hfi1_error_qp(struct hfi1_qp *qp, enum ib_wc_status err) +int hfi1_error_qp(struct rvt_qp *qp, enum ib_wc_status err) { struct hfi1_ibdev *dev = to_idev(qp->ibqp.device); struct hfi1_qp_priv *priv = qp->priv; @@ -490,7 +490,7 @@ int hfi1_error_qp(struct hfi1_qp *qp, enum ib_wc_status err) if (!(qp->s_flags & HFI1_S_BUSY)) { qp->s_hdrwords = 0; if (qp->s_rdma_mr) { - hfi1_put_mr(qp->s_rdma_mr); + rvt_put_mr(qp->s_rdma_mr); qp->s_rdma_mr = NULL; } flush_tx_list(qp); @@ -514,7 +514,7 @@ int hfi1_error_qp(struct hfi1_qp *qp, enum ib_wc_status err) wc.status = IB_WC_WR_FLUSH_ERR; if (qp->r_rq.wq) { - struct hfi1_rwq *wq; + struct rvt_rwq *wq; u32 head; u32 tail; @@ -544,7 +544,7 @@ bail: return ret; } -static void flush_tx_list(struct hfi1_qp *qp) +static void flush_tx_list(struct rvt_qp *qp) { struct hfi1_qp_priv *priv = qp->priv; @@ -561,7 +561,7 @@ static void flush_tx_list(struct hfi1_qp *qp) } } -static void flush_iowait(struct hfi1_qp *qp) +static void flush_iowait(struct rvt_qp *qp) { struct hfi1_qp_priv *priv = qp->priv; struct hfi1_ibdev *dev = to_idev(qp->ibqp.device); @@ -616,7 +616,7 @@ int hfi1_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask, struct ib_udata *udata) { struct hfi1_ibdev *dev = to_idev(ibqp->device); - struct hfi1_qp *qp = to_iqp(ibqp); + struct rvt_qp *qp = to_iqp(ibqp); struct hfi1_qp_priv *priv = qp->priv; enum ib_qp_state cur_state, new_state; struct ib_event ev; @@ -915,7 +915,7 @@ bail: int hfi1_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask, struct ib_qp_init_attr *init_attr) { - struct hfi1_qp *qp = to_iqp(ibqp); + struct rvt_qp *qp = to_iqp(ibqp); attr->qp_state = qp->state; attr->cur_qp_state = attr->qp_state; @@ -968,7 +968,7 @@ int hfi1_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, * * Returns the AETH. */ -__be32 hfi1_compute_aeth(struct hfi1_qp *qp) +__be32 hfi1_compute_aeth(struct rvt_qp *qp) { u32 aeth = qp->r_msn & HFI1_MSN_MASK; @@ -981,7 +981,7 @@ __be32 hfi1_compute_aeth(struct hfi1_qp *qp) } else { u32 min, max, x; u32 credits; - struct hfi1_rwq *wq = qp->r_rq.wq; + struct rvt_rwq *wq = qp->r_rq.wq; u32 head; u32 tail; @@ -1037,10 +1037,10 @@ struct ib_qp *hfi1_create_qp(struct ib_pd *ibpd, struct ib_qp_init_attr *init_attr, struct ib_udata *udata) { - struct hfi1_qp *qp; + struct rvt_qp *qp; struct hfi1_qp_priv *priv; int err; - struct hfi1_swqe *swq = NULL; + struct rvt_swqe *swq = NULL; struct hfi1_ibdev *dev; struct hfi1_devdata *dd; size_t sz; @@ -1081,9 +1081,9 @@ struct ib_qp *hfi1_create_qp(struct ib_pd *ibpd, case IB_QPT_UC: case IB_QPT_RC: case IB_QPT_UD: - sz = sizeof(struct hfi1_sge) * + sz = sizeof(struct rvt_sge) * init_attr->cap.max_send_sge + - sizeof(struct hfi1_swqe); + sizeof(struct rvt_swqe); swq = vmalloc((init_attr->cap.max_send_wr + 1) * sz); if (swq == NULL) { ret = ERR_PTR(-ENOMEM); @@ -1127,8 +1127,8 @@ struct ib_qp *hfi1_create_qp(struct ib_pd *ibpd, qp->r_rq.size = init_attr->cap.max_recv_wr + 1; qp->r_rq.max_sge = init_attr->cap.max_recv_sge; sz = (sizeof(struct ib_sge) * qp->r_rq.max_sge) + - sizeof(struct hfi1_rwqe); - qp->r_rq.wq = vmalloc_user(sizeof(struct hfi1_rwq) + + sizeof(struct rvt_rwqe); + qp->r_rq.wq = vmalloc_user(sizeof(struct rvt_rwq) + qp->r_rq.size * sz); if (!qp->r_rq.wq) { ret = ERR_PTR(-ENOMEM); @@ -1192,7 +1192,7 @@ struct ib_qp *hfi1_create_qp(struct ib_pd *ibpd, goto bail_ip; } } else { - u32 s = sizeof(struct hfi1_rwq) + qp->r_rq.size * sz; + u32 s = sizeof(struct rvt_rwq) + qp->r_rq.size * sz; qp->ip = hfi1_create_mmap_info(dev, s, ibpd->uobject->context, @@ -1281,7 +1281,7 @@ bail: */ int hfi1_destroy_qp(struct ib_qp *ibqp) { - struct hfi1_qp *qp = to_iqp(ibqp); + struct rvt_qp *qp = to_iqp(ibqp); struct hfi1_ibdev *dev = to_idev(ibqp->device); struct hfi1_qp_priv *priv = qp->priv; @@ -1387,7 +1387,7 @@ static void free_qpn_table(struct hfi1_qpn_table *qpt) * * The QP s_lock should be held. */ -void hfi1_get_credit(struct hfi1_qp *qp, u32 aeth) +void hfi1_get_credit(struct rvt_qp *qp, u32 aeth) { u32 credit = (aeth >> HFI1_AETH_CREDIT_SHIFT) & HFI1_AETH_CREDIT_MASK; @@ -1417,7 +1417,7 @@ void hfi1_get_credit(struct hfi1_qp *qp, u32 aeth) } } -void hfi1_qp_wakeup(struct hfi1_qp *qp, u32 flag) +void hfi1_qp_wakeup(struct rvt_qp *qp, u32 flag) { unsigned long flags; @@ -1440,7 +1440,7 @@ static int iowait_sleep( unsigned seq) { struct verbs_txreq *tx = container_of(stx, struct verbs_txreq, txreq); - struct hfi1_qp *qp; + struct rvt_qp *qp; struct hfi1_qp_priv *priv; unsigned long flags; int ret = 0; @@ -1491,7 +1491,7 @@ eagain: static void iowait_wakeup(struct iowait *wait, int reason) { - struct hfi1_qp *qp = iowait_to_qp(wait); + struct rvt_qp *qp = iowait_to_qp(wait); WARN_ON(reason != SDMA_AVAIL_REASON); hfi1_qp_wakeup(qp, HFI1_S_WAIT_DMA_DESC); @@ -1558,7 +1558,7 @@ void hfi1_qp_exit(struct hfi1_ibdev *dev) * Return: * A send engine for the qp or NULL for SMI type qp. */ -struct sdma_engine *qp_to_sdma_engine(struct hfi1_qp *qp, u8 sc5) +struct sdma_engine *qp_to_sdma_engine(struct rvt_qp *qp, u8 sc5) { struct hfi1_devdata *dd = dd_from_ibdev(qp->ibqp.device); struct sdma_engine *sde; @@ -1577,7 +1577,7 @@ struct sdma_engine *qp_to_sdma_engine(struct hfi1_qp *qp, u8 sc5) struct qp_iter { struct hfi1_ibdev *dev; - struct hfi1_qp *qp; + struct rvt_qp *qp; int specials; int n; }; @@ -1605,8 +1605,8 @@ int qp_iter_next(struct qp_iter *iter) struct hfi1_ibdev *dev = iter->dev; int n = iter->n; int ret = 1; - struct hfi1_qp *pqp = iter->qp; - struct hfi1_qp *qp; + struct rvt_qp *pqp = iter->qp; + struct rvt_qp *qp; /* * The approach is to consider the special qps @@ -1659,7 +1659,7 @@ static const char * const qp_type_str[] = { "SMI", "GSI", "RC", "UC", "UD", }; -static int qp_idle(struct hfi1_qp *qp) +static int qp_idle(struct rvt_qp *qp) { return qp->s_last == qp->s_acked && @@ -1670,8 +1670,8 @@ static int qp_idle(struct hfi1_qp *qp) void qp_iter_print(struct seq_file *s, struct qp_iter *iter) { - struct hfi1_swqe *wqe; - struct hfi1_qp *qp = iter->qp; + struct rvt_swqe *wqe; + struct rvt_qp *qp = iter->qp; struct hfi1_qp_priv *priv = qp->priv; struct sdma_engine *sde; @@ -1709,7 +1709,7 @@ void qp_iter_print(struct seq_file *s, struct qp_iter *iter) sde ? sde->this_idx : 0); } -void qp_comm_est(struct hfi1_qp *qp) +void qp_comm_est(struct rvt_qp *qp) { qp->r_flags |= HFI1_R_COMM_EST; if (qp->ibqp.event_handler) { @@ -1726,7 +1726,7 @@ void qp_comm_est(struct hfi1_qp *qp) * Switch to alternate path. * The QP s_lock should be held and interrupts disabled. */ -void hfi1_migrate_qp(struct hfi1_qp *qp) +void hfi1_migrate_qp(struct rvt_qp *qp) { struct hfi1_qp_priv *priv = qp->priv; struct ib_event ev; diff --git a/drivers/staging/rdma/hfi1/qp.h b/drivers/staging/rdma/hfi1/qp.h index 474c838..3dd31e9 100644 --- a/drivers/staging/rdma/hfi1/qp.h +++ b/drivers/staging/rdma/hfi1/qp.h @@ -80,7 +80,7 @@ struct hfi1_qpn_table { struct hfi1_qp_ibdev { u32 qp_table_size; u32 qp_table_bits; - struct hfi1_qp __rcu **qp_table; + struct rvt_qp __rcu **qp_table; spinlock_t qpt_lock; struct hfi1_qpn_table qpn_table; }; @@ -98,10 +98,10 @@ static inline u32 qpn_hash(struct hfi1_qp_ibdev *dev, u32 qpn) * The caller must hold the rcu_read_lock(), and keep the lock until * the returned qp is no longer in use. */ -static inline struct hfi1_qp *hfi1_lookup_qpn(struct hfi1_ibport *ibp, - u32 qpn) __must_hold(RCU) +static inline struct rvt_qp *hfi1_lookup_qpn(struct hfi1_ibport *ibp, + u32 qpn) __must_hold(RCU) { - struct hfi1_qp *qp = NULL; + struct rvt_qp *qp = NULL; if (unlikely(qpn <= 1)) { qp = rcu_dereference(ibp->qp[qpn]); @@ -117,11 +117,10 @@ static inline struct hfi1_qp *hfi1_lookup_qpn(struct hfi1_ibport *ibp, return qp; } -/** - * clear_ahg - reset ahg status in qp - * @qp - qp pointer +/* + * free_ahg - clear ahg from QP */ -static inline void clear_ahg(struct hfi1_qp *qp) +static inline void clear_ahg(struct rvt_qp *qp) { struct hfi1_qp_priv *priv = qp->priv; @@ -142,7 +141,7 @@ static inline void clear_ahg(struct hfi1_qp *qp) * The QP r_lock and s_lock should be held and interrupts disabled. * If we are already in error state, just return. */ -int hfi1_error_qp(struct hfi1_qp *qp, enum ib_wc_status err); +int hfi1_error_qp(struct rvt_qp *qp, enum ib_wc_status err); /** * hfi1_modify_qp - modify the attributes of a queue pair @@ -165,7 +164,7 @@ int hfi1_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, * * Returns the AETH. */ -__be32 hfi1_compute_aeth(struct hfi1_qp *qp); +__be32 hfi1_compute_aeth(struct rvt_qp *qp); /** * hfi1_create_qp - create a queue pair for a device @@ -198,7 +197,7 @@ int hfi1_destroy_qp(struct ib_qp *ibqp); * * The QP s_lock should be held. */ -void hfi1_get_credit(struct hfi1_qp *qp, u32 aeth); +void hfi1_get_credit(struct rvt_qp *qp, u32 aeth); /** * hfi1_qp_init - allocate QP tables @@ -217,9 +216,9 @@ void hfi1_qp_exit(struct hfi1_ibdev *dev); * @qp: the QP * @flag: flag the qp on which the qp is stalled */ -void hfi1_qp_wakeup(struct hfi1_qp *qp, u32 flag); +void hfi1_qp_wakeup(struct rvt_qp *qp, u32 flag); -struct sdma_engine *qp_to_sdma_engine(struct hfi1_qp *qp, u8 sc5); +struct sdma_engine *qp_to_sdma_engine(struct rvt_qp *qp, u8 sc5); struct qp_iter; @@ -246,7 +245,7 @@ void qp_iter_print(struct seq_file *s, struct qp_iter *iter); * qp_comm_est - handle trap with QP established * @qp: the QP */ -void qp_comm_est(struct hfi1_qp *qp); +void qp_comm_est(struct rvt_qp *qp); /** * _hfi1_schedule_send - schedule progress @@ -257,7 +256,7 @@ void qp_comm_est(struct hfi1_qp *qp); * It is only used in the post send, which doesn't hold * the s_lock. */ -static inline void _hfi1_schedule_send(struct hfi1_qp *qp) +static inline void _hfi1_schedule_send(struct rvt_qp *qp) { struct hfi1_qp_priv *priv = qp->priv; struct hfi1_ibport *ibp = @@ -278,12 +277,12 @@ static inline void _hfi1_schedule_send(struct hfi1_qp *qp) * This schedules qp progress and caller should hold * the s_lock. */ -static inline void hfi1_schedule_send(struct hfi1_qp *qp) +static inline void hfi1_schedule_send(struct rvt_qp *qp) { if (hfi1_send_ok(qp)) _hfi1_schedule_send(qp); } -void hfi1_migrate_qp(struct hfi1_qp *qp); +void hfi1_migrate_qp(struct rvt_qp *qp); #endif /* _QP_H */ diff --git a/drivers/staging/rdma/hfi1/rc.c b/drivers/staging/rdma/hfi1/rc.c index 70d5bd1..4b8518a 100644 --- a/drivers/staging/rdma/hfi1/rc.c +++ b/drivers/staging/rdma/hfi1/rc.c @@ -60,7 +60,7 @@ static void rc_timeout(unsigned long arg); -static u32 restart_sge(struct hfi1_sge_state *ss, struct hfi1_swqe *wqe, +static u32 restart_sge(struct rvt_sge_state *ss, struct rvt_swqe *wqe, u32 psn, u32 pmtu) { u32 len; @@ -74,7 +74,7 @@ static u32 restart_sge(struct hfi1_sge_state *ss, struct hfi1_swqe *wqe, return wqe->length - len; } -static void start_timer(struct hfi1_qp *qp) +static void start_timer(struct rvt_qp *qp) { qp->s_flags |= HFI1_S_TIMER; qp->s_timer.function = rc_timeout; @@ -94,10 +94,10 @@ static void start_timer(struct hfi1_qp *qp) * Note that we are in the responder's side of the QP context. * Note the QP s_lock must be held. */ -static int make_rc_ack(struct hfi1_ibdev *dev, struct hfi1_qp *qp, +static int make_rc_ack(struct hfi1_ibdev *dev, struct rvt_qp *qp, struct hfi1_other_headers *ohdr, u32 pmtu) { - struct hfi1_ack_entry *e; + struct rvt_ack_entry *e; u32 hwords; u32 len; u32 bth0; @@ -116,7 +116,7 @@ static int make_rc_ack(struct hfi1_ibdev *dev, struct hfi1_qp *qp, case OP(RDMA_READ_RESPONSE_ONLY): e = &qp->s_ack_queue[qp->s_tail_ack_queue]; if (e->rdma_sge.mr) { - hfi1_put_mr(e->rdma_sge.mr); + rvt_put_mr(e->rdma_sge.mr); e->rdma_sge.mr = NULL; } /* FALLTHROUGH */ @@ -154,7 +154,7 @@ static int make_rc_ack(struct hfi1_ibdev *dev, struct hfi1_qp *qp, /* Copy SGE state in case we need to resend */ qp->s_rdma_mr = e->rdma_sge.mr; if (qp->s_rdma_mr) - hfi1_get_mr(qp->s_rdma_mr); + rvt_get_mr(qp->s_rdma_mr); qp->s_ack_rdma_sge.sge = e->rdma_sge; qp->s_ack_rdma_sge.num_sge = 1; qp->s_cur_sge = &qp->s_ack_rdma_sge; @@ -193,7 +193,7 @@ static int make_rc_ack(struct hfi1_ibdev *dev, struct hfi1_qp *qp, qp->s_cur_sge = &qp->s_ack_rdma_sge; qp->s_rdma_mr = qp->s_ack_rdma_sge.sge.mr; if (qp->s_rdma_mr) - hfi1_get_mr(qp->s_rdma_mr); + rvt_get_mr(qp->s_rdma_mr); len = qp->s_ack_rdma_sge.sge.sge_length; if (len > pmtu) { len = pmtu; @@ -257,13 +257,13 @@ bail: * * Return 1 if constructed; otherwise, return 0. */ -int hfi1_make_rc_req(struct hfi1_qp *qp) +int hfi1_make_rc_req(struct rvt_qp *qp) { struct hfi1_qp_priv *priv = qp->priv; struct hfi1_ibdev *dev = to_idev(qp->ibqp.device); struct hfi1_other_headers *ohdr; - struct hfi1_sge_state *ss; - struct hfi1_swqe *wqe; + struct rvt_sge_state *ss; + struct rvt_swqe *wqe; /* header size in 32-bit words LRH+BTH = (8+12)/4. */ u32 hwords = 5; u32 len; @@ -683,7 +683,7 @@ unlock: * Note that RDMA reads and atomics are handled in the * send side QP state and tasklet. */ -void hfi1_send_rc_ack(struct hfi1_ctxtdata *rcd, struct hfi1_qp *qp, +void hfi1_send_rc_ack(struct hfi1_ctxtdata *rcd, struct rvt_qp *qp, int is_fecn) { struct hfi1_ibport *ibp = to_iport(qp->ibqp.device, qp->port_num); @@ -794,10 +794,10 @@ queue_ack: * for the given QP. * Called at interrupt level with the QP s_lock held. */ -static void reset_psn(struct hfi1_qp *qp, u32 psn) +static void reset_psn(struct rvt_qp *qp, u32 psn) { u32 n = qp->s_acked; - struct hfi1_swqe *wqe = get_swqe_ptr(qp, n); + struct rvt_swqe *wqe = get_swqe_ptr(qp, n); u32 opcode; qp->s_cur = n; @@ -880,9 +880,9 @@ done: * Back up requester to resend the last un-ACKed request. * The QP r_lock and s_lock should be held and interrupts disabled. */ -static void restart_rc(struct hfi1_qp *qp, u32 psn, int wait) +static void restart_rc(struct rvt_qp *qp, u32 psn, int wait) { - struct hfi1_swqe *wqe = get_swqe_ptr(qp, qp->s_acked); + struct rvt_swqe *wqe = get_swqe_ptr(qp, qp->s_acked); struct hfi1_ibport *ibp; if (qp->s_retry == 0) { @@ -917,7 +917,7 @@ static void restart_rc(struct hfi1_qp *qp, u32 psn, int wait) */ static void rc_timeout(unsigned long arg) { - struct hfi1_qp *qp = (struct hfi1_qp *)arg; + struct rvt_qp *qp = (struct rvt_qp *)arg; struct hfi1_ibport *ibp; unsigned long flags; @@ -941,7 +941,7 @@ static void rc_timeout(unsigned long arg) */ void hfi1_rc_rnr_retry(unsigned long arg) { - struct hfi1_qp *qp = (struct hfi1_qp *)arg; + struct rvt_qp *qp = (struct rvt_qp *)arg; unsigned long flags; spin_lock_irqsave(&qp->s_lock, flags); @@ -957,9 +957,9 @@ void hfi1_rc_rnr_retry(unsigned long arg) * Set qp->s_sending_psn to the next PSN after the given one. * This would be psn+1 except when RDMA reads are present. */ -static void reset_sending_psn(struct hfi1_qp *qp, u32 psn) +static void reset_sending_psn(struct rvt_qp *qp, u32 psn) { - struct hfi1_swqe *wqe; + struct rvt_swqe *wqe; u32 n = qp->s_last; /* Find the work request corresponding to the given PSN. */ @@ -982,10 +982,10 @@ static void reset_sending_psn(struct hfi1_qp *qp, u32 psn) /* * This should be called with the QP s_lock held and interrupts disabled. */ -void hfi1_rc_send_complete(struct hfi1_qp *qp, struct hfi1_ib_header *hdr) +void hfi1_rc_send_complete(struct rvt_qp *qp, struct hfi1_ib_header *hdr) { struct hfi1_other_headers *ohdr; - struct hfi1_swqe *wqe; + struct rvt_swqe *wqe; struct ib_wc wc; unsigned i; u32 opcode; @@ -1027,9 +1027,9 @@ void hfi1_rc_send_complete(struct hfi1_qp *qp, struct hfi1_ib_header *hdr) cmp_psn(qp->s_sending_psn, qp->s_sending_hpsn) <= 0) break; for (i = 0; i < wqe->wr.num_sge; i++) { - struct hfi1_sge *sge = &wqe->sg_list[i]; + struct rvt_sge *sge = &wqe->sg_list[i]; - hfi1_put_mr(sge->mr); + rvt_put_mr(sge->mr); } /* Post a send completion queue entry if requested. */ if (!(qp->s_flags & HFI1_S_SIGNAL_REQ_WR) || @@ -1059,7 +1059,7 @@ void hfi1_rc_send_complete(struct hfi1_qp *qp, struct hfi1_ib_header *hdr) } } -static inline void update_last_psn(struct hfi1_qp *qp, u32 psn) +static inline void update_last_psn(struct rvt_qp *qp, u32 psn) { qp->s_last_psn = psn; } @@ -1069,9 +1069,9 @@ static inline void update_last_psn(struct hfi1_qp *qp, u32 psn) * This is similar to hfi1_send_complete but has to check to be sure * that the SGEs are not being referenced if the SWQE is being resent. */ -static struct hfi1_swqe *do_rc_completion(struct hfi1_qp *qp, - struct hfi1_swqe *wqe, - struct hfi1_ibport *ibp) +static struct rvt_swqe *do_rc_completion(struct rvt_qp *qp, + struct rvt_swqe *wqe, + struct hfi1_ibport *ibp) { struct ib_wc wc; unsigned i; @@ -1084,9 +1084,9 @@ static struct hfi1_swqe *do_rc_completion(struct hfi1_qp *qp, if (cmp_psn(wqe->lpsn, qp->s_sending_psn) < 0 || cmp_psn(qp->s_sending_psn, qp->s_sending_hpsn) > 0) { for (i = 0; i < wqe->wr.num_sge; i++) { - struct hfi1_sge *sge = &wqe->sg_list[i]; + struct rvt_sge *sge = &wqe->sg_list[i]; - hfi1_put_mr(sge->mr); + rvt_put_mr(sge->mr); } /* Post a send completion queue entry if requested. */ if (!(qp->s_flags & HFI1_S_SIGNAL_REQ_WR) || @@ -1158,12 +1158,12 @@ static struct hfi1_swqe *do_rc_completion(struct hfi1_qp *qp, * May be called at interrupt level, with the QP s_lock held. * Returns 1 if OK, 0 if current operation should be aborted (NAK). */ -static int do_rc_ack(struct hfi1_qp *qp, u32 aeth, u32 psn, int opcode, +static int do_rc_ack(struct rvt_qp *qp, u32 aeth, u32 psn, int opcode, u64 val, struct hfi1_ctxtdata *rcd) { struct hfi1_ibport *ibp; enum ib_wc_status status; - struct hfi1_swqe *wqe; + struct rvt_swqe *wqe; int ret = 0; u32 ack_psn; int diff; @@ -1381,10 +1381,10 @@ bail: * We have seen an out of sequence RDMA read middle or last packet. * This ACKs SENDs and RDMA writes up to the first RDMA read or atomic SWQE. */ -static void rdma_seq_err(struct hfi1_qp *qp, struct hfi1_ibport *ibp, u32 psn, +static void rdma_seq_err(struct rvt_qp *qp, struct hfi1_ibport *ibp, u32 psn, struct hfi1_ctxtdata *rcd) { - struct hfi1_swqe *wqe; + struct rvt_swqe *wqe; /* Remove QP from retry timer */ if (qp->s_flags & (HFI1_S_TIMER | HFI1_S_WAIT_RNR)) { @@ -1430,11 +1430,11 @@ static void rdma_seq_err(struct hfi1_qp *qp, struct hfi1_ibport *ibp, u32 psn, */ static void rc_rcv_resp(struct hfi1_ibport *ibp, struct hfi1_other_headers *ohdr, - void *data, u32 tlen, struct hfi1_qp *qp, + void *data, u32 tlen, struct rvt_qp *qp, u32 opcode, u32 psn, u32 hdrsize, u32 pmtu, struct hfi1_ctxtdata *rcd) { - struct hfi1_swqe *wqe; + struct rvt_swqe *wqe; enum ib_wc_status status; unsigned long flags; int diff; @@ -1610,7 +1610,7 @@ bail: } static inline void rc_defered_ack(struct hfi1_ctxtdata *rcd, - struct hfi1_qp *qp) + struct rvt_qp *qp) { if (list_empty(&qp->rspwait)) { qp->r_flags |= HFI1_R_RSP_DEFERED_ACK; @@ -1619,7 +1619,7 @@ static inline void rc_defered_ack(struct hfi1_ctxtdata *rcd, } } -static inline void rc_cancel_ack(struct hfi1_qp *qp) +static inline void rc_cancel_ack(struct rvt_qp *qp) { struct hfi1_qp_priv *priv = qp->priv; @@ -1648,11 +1648,11 @@ static inline void rc_cancel_ack(struct hfi1_qp *qp) * schedule a response to be sent. */ static noinline int rc_rcv_error(struct hfi1_other_headers *ohdr, void *data, - struct hfi1_qp *qp, u32 opcode, u32 psn, int diff, + struct rvt_qp *qp, u32 opcode, u32 psn, int diff, struct hfi1_ctxtdata *rcd) { struct hfi1_ibport *ibp = to_iport(qp->ibqp.device, qp->port_num); - struct hfi1_ack_entry *e; + struct rvt_ack_entry *e; unsigned long flags; u8 i, prev; int old_req; @@ -1750,7 +1750,7 @@ static noinline int rc_rcv_error(struct hfi1_other_headers *ohdr, void *data, if (unlikely(offset + len != e->rdma_sge.sge_length)) goto unlock_done; if (e->rdma_sge.mr) { - hfi1_put_mr(e->rdma_sge.mr); + rvt_put_mr(e->rdma_sge.mr); e->rdma_sge.mr = NULL; } if (len != 0) { @@ -1758,8 +1758,8 @@ static noinline int rc_rcv_error(struct hfi1_other_headers *ohdr, void *data, u64 vaddr = be64_to_cpu(reth->vaddr); int ok; - ok = hfi1_rkey_ok(qp, &e->rdma_sge, len, vaddr, rkey, - IB_ACCESS_REMOTE_READ); + ok = rvt_rkey_ok(qp, &e->rdma_sge, len, vaddr, rkey, + IB_ACCESS_REMOTE_READ); if (unlikely(!ok)) goto unlock_done; } else { @@ -1826,7 +1826,7 @@ send_ack: return 0; } -void hfi1_rc_error(struct hfi1_qp *qp, enum ib_wc_status err) +void hfi1_rc_error(struct rvt_qp *qp, enum ib_wc_status err) { unsigned long flags; int lastwqe; @@ -1845,7 +1845,7 @@ void hfi1_rc_error(struct hfi1_qp *qp, enum ib_wc_status err) } } -static inline void update_ack_queue(struct hfi1_qp *qp, unsigned n) +static inline void update_ack_queue(struct rvt_qp *qp, unsigned n) { unsigned next; @@ -1960,7 +1960,7 @@ void hfi1_rc_rcv(struct hfi1_packet *packet) u32 rcv_flags = packet->rcv_flags; void *data = packet->ebuf; u32 tlen = packet->tlen; - struct hfi1_qp *qp = packet->qp; + struct rvt_qp *qp = packet->qp; struct hfi1_ibport *ibp = to_iport(qp->ibqp.device, qp->port_num); struct hfi1_pportdata *ppd = ppd_from_ibp(ibp); struct hfi1_other_headers *ohdr = packet->ohdr; @@ -2177,8 +2177,8 @@ send_last: int ok; /* Check rkey & NAK */ - ok = hfi1_rkey_ok(qp, &qp->r_sge.sge, qp->r_len, vaddr, - rkey, IB_ACCESS_REMOTE_WRITE); + ok = rvt_rkey_ok(qp, &qp->r_sge.sge, qp->r_len, vaddr, + rkey, IB_ACCESS_REMOTE_WRITE); if (unlikely(!ok)) goto nack_acc; qp->r_sge.num_sge = 1; @@ -2203,7 +2203,7 @@ send_last: goto send_last; case OP(RDMA_READ_REQUEST): { - struct hfi1_ack_entry *e; + struct rvt_ack_entry *e; u32 len; u8 next; @@ -2221,7 +2221,7 @@ send_last: } e = &qp->s_ack_queue[qp->r_head_ack_queue]; if (e->opcode == OP(RDMA_READ_REQUEST) && e->rdma_sge.mr) { - hfi1_put_mr(e->rdma_sge.mr); + rvt_put_mr(e->rdma_sge.mr); e->rdma_sge.mr = NULL; } reth = &ohdr->u.rc.reth; @@ -2232,8 +2232,8 @@ send_last: int ok; /* Check rkey & NAK */ - ok = hfi1_rkey_ok(qp, &e->rdma_sge, len, vaddr, - rkey, IB_ACCESS_REMOTE_READ); + ok = rvt_rkey_ok(qp, &e->rdma_sge, len, vaddr, + rkey, IB_ACCESS_REMOTE_READ); if (unlikely(!ok)) goto nack_acc_unlck; /* @@ -2276,7 +2276,7 @@ send_last: case OP(COMPARE_SWAP): case OP(FETCH_ADD): { struct ib_atomic_eth *ateth; - struct hfi1_ack_entry *e; + struct rvt_ack_entry *e; u64 vaddr; atomic64_t *maddr; u64 sdata; @@ -2296,7 +2296,7 @@ send_last: } e = &qp->s_ack_queue[qp->r_head_ack_queue]; if (e->opcode == OP(RDMA_READ_REQUEST) && e->rdma_sge.mr) { - hfi1_put_mr(e->rdma_sge.mr); + rvt_put_mr(e->rdma_sge.mr); e->rdma_sge.mr = NULL; } ateth = &ohdr->u.atomic_eth; @@ -2306,9 +2306,9 @@ send_last: goto nack_inv_unlck; rkey = be32_to_cpu(ateth->rkey); /* Check rkey & NAK */ - if (unlikely(!hfi1_rkey_ok(qp, &qp->r_sge.sge, sizeof(u64), - vaddr, rkey, - IB_ACCESS_REMOTE_ATOMIC))) + if (unlikely(!rvt_rkey_ok(qp, &qp->r_sge.sge, sizeof(u64), + vaddr, rkey, + IB_ACCESS_REMOTE_ATOMIC))) goto nack_acc_unlck; /* Perform atomic OP and save result. */ maddr = (atomic64_t *) qp->r_sge.sge.vaddr; @@ -2318,7 +2318,7 @@ send_last: (u64) cmpxchg((u64 *) qp->r_sge.sge.vaddr, be64_to_cpu(ateth->compare_data), sdata); - hfi1_put_mr(qp->r_sge.sge.mr); + rvt_put_mr(qp->r_sge.sge.mr); qp->r_sge.num_sge = 0; e->opcode = opcode; e->sent = 0; @@ -2408,7 +2408,7 @@ void hfi1_rc_hdrerr( struct hfi1_ctxtdata *rcd, struct hfi1_ib_header *hdr, u32 rcv_flags, - struct hfi1_qp *qp) + struct rvt_qp *qp) { int has_grh = rcv_flags & HFI1_HAS_GRH; struct hfi1_other_headers *ohdr; diff --git a/drivers/staging/rdma/hfi1/ruc.c b/drivers/staging/rdma/hfi1/ruc.c index 33bcfe5..762fca9 100644 --- a/drivers/staging/rdma/hfi1/ruc.c +++ b/drivers/staging/rdma/hfi1/ruc.c @@ -97,15 +97,15 @@ const u32 ib_hfi1_rnr_table[32] = { * Validate a RWQE and fill in the SGE state. * Return 1 if OK. */ -static int init_sge(struct hfi1_qp *qp, struct hfi1_rwqe *wqe) +static int init_sge(struct rvt_qp *qp, struct rvt_rwqe *wqe) { int i, j, ret; struct ib_wc wc; struct rvt_lkey_table *rkt; struct rvt_pd *pd; - struct hfi1_sge_state *ss; + struct rvt_sge_state *ss; - rkt = &to_idev(qp->ibqp.device)->lk_table; + rkt = &to_idev(qp->ibqp.device)->rdi.lkey_table; pd = ibpd_to_rvtpd(qp->ibqp.srq ? qp->ibqp.srq->pd : qp->ibqp.pd); ss = &qp->r_sge; ss->sg_list = qp->r_sg_list; @@ -114,8 +114,8 @@ static int init_sge(struct hfi1_qp *qp, struct hfi1_rwqe *wqe) if (wqe->sg_list[i].length == 0) continue; /* Check LKEY */ - if (!hfi1_lkey_ok(rkt, pd, j ? &ss->sg_list[j - 1] : &ss->sge, - &wqe->sg_list[i], IB_ACCESS_LOCAL_WRITE)) + if (!rvt_lkey_ok(rkt, pd, j ? &ss->sg_list[j - 1] : &ss->sge, + &wqe->sg_list[i], IB_ACCESS_LOCAL_WRITE)) goto bad_lkey; qp->r_len += wqe->sg_list[i].length; j++; @@ -127,9 +127,9 @@ static int init_sge(struct hfi1_qp *qp, struct hfi1_rwqe *wqe) bad_lkey: while (j) { - struct hfi1_sge *sge = --j ? &ss->sg_list[j - 1] : &ss->sge; + struct rvt_sge *sge = --j ? &ss->sg_list[j - 1] : &ss->sge; - hfi1_put_mr(sge->mr); + rvt_put_mr(sge->mr); } ss->num_sge = 0; memset(&wc, 0, sizeof(wc)); @@ -154,13 +154,13 @@ bail: * * Can be called from interrupt level. */ -int hfi1_get_rwqe(struct hfi1_qp *qp, int wr_id_only) +int hfi1_get_rwqe(struct rvt_qp *qp, int wr_id_only) { unsigned long flags; - struct hfi1_rq *rq; - struct hfi1_rwq *wq; + struct rvt_rq *rq; + struct rvt_rwq *wq; struct hfi1_srq *srq; - struct hfi1_rwqe *wqe; + struct rvt_rwqe *wqe; void (*handler)(struct ib_event *, void *); u32 tail; int ret; @@ -265,7 +265,7 @@ static int gid_ok(union ib_gid *gid, __be64 gid_prefix, __be64 id) * The s_lock will be acquired around the hfi1_migrate_qp() call. */ int hfi1_ruc_check_hdr(struct hfi1_ibport *ibp, struct hfi1_ib_header *hdr, - int has_grh, struct hfi1_qp *qp, u32 bth0) + int has_grh, struct rvt_qp *qp, u32 bth0) { __be64 guid; unsigned long flags; @@ -355,12 +355,12 @@ err: * receive interrupts since this is a connected protocol and all packets * will pass through here. */ -static void ruc_loopback(struct hfi1_qp *sqp) +static void ruc_loopback(struct rvt_qp *sqp) { struct hfi1_ibport *ibp = to_iport(sqp->ibqp.device, sqp->port_num); - struct hfi1_qp *qp; - struct hfi1_swqe *wqe; - struct hfi1_sge *sge; + struct rvt_qp *qp; + struct rvt_swqe *wqe; + struct rvt_sge *sge; unsigned long flags; struct ib_wc wc; u64 sdata; @@ -461,11 +461,10 @@ again: if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_WRITE))) goto inv_err; if (wqe->length == 0) - break; - if (unlikely(!hfi1_rkey_ok(qp, &qp->r_sge.sge, wqe->length, - wqe->rdma_wr.remote_addr, - wqe->rdma_wr.rkey, - IB_ACCESS_REMOTE_WRITE))) + if (unlikely(!rvt_rkey_ok(qp, &qp->r_sge.sge, wqe->length, + wqe->rdma_wr.remote_addr, + wqe->rdma_wr.rkey, + IB_ACCESS_REMOTE_WRITE))) goto acc_err; qp->r_sge.sg_list = NULL; qp->r_sge.num_sge = 1; @@ -475,10 +474,10 @@ again: case IB_WR_RDMA_READ: if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_READ))) goto inv_err; - if (unlikely(!hfi1_rkey_ok(qp, &sqp->s_sge.sge, wqe->length, - wqe->rdma_wr.remote_addr, - wqe->rdma_wr.rkey, - IB_ACCESS_REMOTE_READ))) + if (unlikely(!rvt_rkey_ok(qp, &sqp->s_sge.sge, wqe->length, + wqe->rdma_wr.remote_addr, + wqe->rdma_wr.rkey, + IB_ACCESS_REMOTE_READ))) goto acc_err; release = 0; sqp->s_sge.sg_list = NULL; @@ -493,10 +492,10 @@ again: case IB_WR_ATOMIC_FETCH_AND_ADD: if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_ATOMIC))) goto inv_err; - if (unlikely(!hfi1_rkey_ok(qp, &qp->r_sge.sge, sizeof(u64), - wqe->atomic_wr.remote_addr, - wqe->atomic_wr.rkey, - IB_ACCESS_REMOTE_ATOMIC))) + if (unlikely(!rvt_rkey_ok(qp, &qp->r_sge.sge, sizeof(u64), + wqe->atomic_wr.remote_addr, + wqe->atomic_wr.rkey, + IB_ACCESS_REMOTE_ATOMIC))) goto acc_err; /* Perform atomic OP and save result. */ maddr = (atomic64_t *) qp->r_sge.sge.vaddr; @@ -506,7 +505,7 @@ again: (u64) atomic64_add_return(sdata, maddr) - sdata : (u64) cmpxchg((u64 *) qp->r_sge.sge.vaddr, sdata, wqe->atomic_wr.swap); - hfi1_put_mr(qp->r_sge.sge.mr); + rvt_put_mr(qp->r_sge.sge.mr); qp->r_sge.num_sge = 0; goto send_comp; @@ -530,7 +529,7 @@ again: sge->sge_length -= len; if (sge->sge_length == 0) { if (!release) - hfi1_put_mr(sge->mr); + rvt_put_mr(sge->mr); if (--sqp->s_sge.num_sge) *sge = *sqp->s_sge.sg_list++; } else if (sge->length == 0 && sge->mr->lkey) { @@ -690,7 +689,7 @@ u32 hfi1_make_grh(struct hfi1_ibport *ibp, struct ib_grh *hdr, * Subsequent middles use the copied entry, editing the * PSN with 1 or 2 edits. */ -static inline void build_ahg(struct hfi1_qp *qp, u32 npsn) +static inline void build_ahg(struct rvt_qp *qp, u32 npsn) { struct hfi1_qp_priv *priv = qp->priv; if (unlikely(qp->s_flags & HFI1_S_AHG_CLEAR)) @@ -734,7 +733,7 @@ static inline void build_ahg(struct hfi1_qp *qp, u32 npsn) } } -void hfi1_make_ruc_header(struct hfi1_qp *qp, struct hfi1_other_headers *ohdr, +void hfi1_make_ruc_header(struct rvt_qp *qp, struct hfi1_other_headers *ohdr, u32 bth0, u32 bth2, int middle) { struct hfi1_ibport *ibp = to_iport(qp->ibqp.device, qp->port_num); @@ -812,9 +811,9 @@ void hfi1_make_ruc_header(struct hfi1_qp *qp, struct hfi1_other_headers *ohdr, void hfi1_do_send(struct work_struct *work) { struct iowait *wait = container_of(work, struct iowait, iowork); - struct hfi1_qp *qp = iowait_to_qp(wait); + struct rvt_qp *qp = iowait_to_qp(wait); struct hfi1_pkt_state ps; - int (*make_req)(struct hfi1_qp *qp); + int (*make_req)(struct rvt_qp *qp); unsigned long flags; unsigned long timeout; @@ -876,7 +875,7 @@ void hfi1_do_send(struct work_struct *work) /* * This should be called with s_lock held. */ -void hfi1_send_complete(struct hfi1_qp *qp, struct hfi1_swqe *wqe, +void hfi1_send_complete(struct rvt_qp *qp, struct rvt_swqe *wqe, enum ib_wc_status status) { u32 old_last, last; @@ -886,9 +885,9 @@ void hfi1_send_complete(struct hfi1_qp *qp, struct hfi1_swqe *wqe, return; for (i = 0; i < wqe->wr.num_sge; i++) { - struct hfi1_sge *sge = &wqe->sg_list[i]; + struct rvt_sge *sge = &wqe->sg_list[i]; - hfi1_put_mr(sge->mr); + rvt_put_mr(sge->mr); } if (qp->ibqp.qp_type == IB_QPT_UD || qp->ibqp.qp_type == IB_QPT_SMI || diff --git a/drivers/staging/rdma/hfi1/sdma.h b/drivers/staging/rdma/hfi1/sdma.h index fbd0e41..0f51c45 100644 --- a/drivers/staging/rdma/hfi1/sdma.h +++ b/drivers/staging/rdma/hfi1/sdma.h @@ -379,10 +379,10 @@ struct sdma_txreq { struct verbs_txreq { struct hfi1_pio_header phdr; struct sdma_txreq txreq; - struct hfi1_qp *qp; - struct hfi1_swqe *wqe; + struct rvt_qp *qp; + struct rvt_swqe *wqe; struct rvt_mregion *mr; - struct hfi1_sge_state *ss; + struct rvt_sge_state *ss; struct sdma_engine *sde; u16 hdr_dwords; u16 hdr_inx; diff --git a/drivers/staging/rdma/hfi1/srq.c b/drivers/staging/rdma/hfi1/srq.c index 67786d4..932bd96 100644 --- a/drivers/staging/rdma/hfi1/srq.c +++ b/drivers/staging/rdma/hfi1/srq.c @@ -66,12 +66,12 @@ int hfi1_post_srq_receive(struct ib_srq *ibsrq, struct ib_recv_wr *wr, struct ib_recv_wr **bad_wr) { struct hfi1_srq *srq = to_isrq(ibsrq); - struct hfi1_rwq *wq; + struct rvt_rwq *wq; unsigned long flags; int ret; for (; wr; wr = wr->next) { - struct hfi1_rwqe *wqe; + struct rvt_rwqe *wqe; u32 next; int i; @@ -149,8 +149,8 @@ struct ib_srq *hfi1_create_srq(struct ib_pd *ibpd, srq->rq.size = srq_init_attr->attr.max_wr + 1; srq->rq.max_sge = srq_init_attr->attr.max_sge; sz = sizeof(struct ib_sge) * srq->rq.max_sge + - sizeof(struct hfi1_rwqe); - srq->rq.wq = vmalloc_user(sizeof(struct hfi1_rwq) + srq->rq.size * sz); + sizeof(struct rvt_rwqe); + srq->rq.wq = vmalloc_user(sizeof(struct rvt_rwq) + srq->rq.size * sz); if (!srq->rq.wq) { ret = ERR_PTR(-ENOMEM); goto bail_srq; @@ -162,7 +162,7 @@ struct ib_srq *hfi1_create_srq(struct ib_pd *ibpd, */ if (udata && udata->outlen >= sizeof(__u64)) { int err; - u32 s = sizeof(struct hfi1_rwq) + srq->rq.size * sz; + u32 s = sizeof(struct rvt_rwq) + srq->rq.size * sz; srq->ip = hfi1_create_mmap_info(dev, s, ibpd->uobject->context, @@ -230,12 +230,12 @@ int hfi1_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr, struct ib_udata *udata) { struct hfi1_srq *srq = to_isrq(ibsrq); - struct hfi1_rwq *wq; + struct rvt_rwq *wq; int ret = 0; if (attr_mask & IB_SRQ_MAX_WR) { - struct hfi1_rwq *owq; - struct hfi1_rwqe *p; + struct rvt_rwq *owq; + struct rvt_rwqe *p; u32 sz, size, n, head, tail; /* Check that the requested sizes are below the limits. */ @@ -246,10 +246,10 @@ int hfi1_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr, goto bail; } - sz = sizeof(struct hfi1_rwqe) + + sz = sizeof(struct rvt_rwqe) + srq->rq.max_sge * sizeof(struct ib_sge); size = attr->max_wr + 1; - wq = vmalloc_user(sizeof(struct hfi1_rwq) + size * sz); + wq = vmalloc_user(sizeof(struct rvt_rwq) + size * sz); if (!wq) { ret = -ENOMEM; goto bail; @@ -296,7 +296,7 @@ int hfi1_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr, n = 0; p = wq->wq; while (tail != head) { - struct hfi1_rwqe *wqe; + struct rvt_rwqe *wqe; int i; wqe = get_rwqe_ptr(&srq->rq, tail); @@ -305,7 +305,7 @@ int hfi1_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr, for (i = 0; i < wqe->num_sge; i++) p->sg_list[i] = wqe->sg_list[i]; n++; - p = (struct hfi1_rwqe *)((char *)p + sz); + p = (struct rvt_rwqe *)((char *)p + sz); if (++tail >= srq->rq.size) tail = 0; } @@ -320,9 +320,9 @@ int hfi1_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr, vfree(owq); if (srq->ip) { - struct hfi1_mmap_info *ip = srq->ip; + struct rvt_mmap_info *ip = srq->ip; struct hfi1_ibdev *dev = to_idev(srq->ibsrq.device); - u32 s = sizeof(struct hfi1_rwq) + size * sz; + u32 s = sizeof(struct rvt_rwq) + size * sz; hfi1_update_mmap_info(dev, ip, s, wq); diff --git a/drivers/staging/rdma/hfi1/trace.h b/drivers/staging/rdma/hfi1/trace.h index 1e43567..14601d7 100644 --- a/drivers/staging/rdma/hfi1/trace.h +++ b/drivers/staging/rdma/hfi1/trace.h @@ -332,7 +332,7 @@ TRACE_EVENT(hfi1_wantpiointr, ); DECLARE_EVENT_CLASS(hfi1_qpsleepwakeup_template, - TP_PROTO(struct hfi1_qp *qp, u32 flags), + TP_PROTO(struct rvt_qp *qp, u32 flags), TP_ARGS(qp, flags), TP_STRUCT__entry( DD_DEV_ENTRY(dd_from_ibdev(qp->ibqp.device)) @@ -356,17 +356,17 @@ DECLARE_EVENT_CLASS(hfi1_qpsleepwakeup_template, ); DEFINE_EVENT(hfi1_qpsleepwakeup_template, hfi1_qpwakeup, - TP_PROTO(struct hfi1_qp *qp, u32 flags), + TP_PROTO(struct rvt_qp *qp, u32 flags), TP_ARGS(qp, flags)); DEFINE_EVENT(hfi1_qpsleepwakeup_template, hfi1_qpsleep, - TP_PROTO(struct hfi1_qp *qp, u32 flags), + TP_PROTO(struct rvt_qp *qp, u32 flags), TP_ARGS(qp, flags)); #undef TRACE_SYSTEM #define TRACE_SYSTEM hfi1_qphash DECLARE_EVENT_CLASS(hfi1_qphash_template, - TP_PROTO(struct hfi1_qp *qp, u32 bucket), + TP_PROTO(struct rvt_qp *qp, u32 bucket), TP_ARGS(qp, bucket), TP_STRUCT__entry( DD_DEV_ENTRY(dd_from_ibdev(qp->ibqp.device)) @@ -387,11 +387,11 @@ DECLARE_EVENT_CLASS(hfi1_qphash_template, ); DEFINE_EVENT(hfi1_qphash_template, hfi1_qpinsert, - TP_PROTO(struct hfi1_qp *qp, u32 bucket), + TP_PROTO(struct rvt_qp *qp, u32 bucket), TP_ARGS(qp, bucket)); DEFINE_EVENT(hfi1_qphash_template, hfi1_qpremove, - TP_PROTO(struct hfi1_qp *qp, u32 bucket), + TP_PROTO(struct rvt_qp *qp, u32 bucket), TP_ARGS(qp, bucket)); #undef TRACE_SYSTEM @@ -1292,7 +1292,7 @@ TRACE_EVENT(hfi1_sdma_state, #define TRACE_SYSTEM hfi1_rc DECLARE_EVENT_CLASS(hfi1_rc_template, - TP_PROTO(struct hfi1_qp *qp, u32 psn), + TP_PROTO(struct rvt_qp *qp, u32 psn), TP_ARGS(qp, psn), TP_STRUCT__entry( DD_DEV_ENTRY(dd_from_ibdev(qp->ibqp.device)) @@ -1331,22 +1331,22 @@ DECLARE_EVENT_CLASS(hfi1_rc_template, ); DEFINE_EVENT(hfi1_rc_template, hfi1_rc_sendcomplete, - TP_PROTO(struct hfi1_qp *qp, u32 psn), + TP_PROTO(struct rvt_qp *qp, u32 psn), TP_ARGS(qp, psn) ); DEFINE_EVENT(hfi1_rc_template, hfi1_rc_ack, - TP_PROTO(struct hfi1_qp *qp, u32 psn), + TP_PROTO(struct rvt_qp *qp, u32 psn), TP_ARGS(qp, psn) ); DEFINE_EVENT(hfi1_rc_template, hfi1_rc_timeout, - TP_PROTO(struct hfi1_qp *qp, u32 psn), + TP_PROTO(struct rvt_qp *qp, u32 psn), TP_ARGS(qp, psn) ); DEFINE_EVENT(hfi1_rc_template, hfi1_rc_rcv_error, - TP_PROTO(struct hfi1_qp *qp, u32 psn), + TP_PROTO(struct rvt_qp *qp, u32 psn), TP_ARGS(qp, psn) ); diff --git a/drivers/staging/rdma/hfi1/uc.c b/drivers/staging/rdma/hfi1/uc.c index 1908a28..6686331 100644 --- a/drivers/staging/rdma/hfi1/uc.c +++ b/drivers/staging/rdma/hfi1/uc.c @@ -61,11 +61,11 @@ * * Return 1 if constructed; otherwise, return 0. */ -int hfi1_make_uc_req(struct hfi1_qp *qp) +int hfi1_make_uc_req(struct rvt_qp *qp) { struct hfi1_qp_priv *priv = qp->priv; struct hfi1_other_headers *ohdr; - struct hfi1_swqe *wqe; + struct rvt_swqe *wqe; unsigned long flags; u32 hwords = 5; u32 bth0 = 0; @@ -267,7 +267,7 @@ void hfi1_uc_rcv(struct hfi1_packet *packet) u32 rcv_flags = packet->rcv_flags; void *data = packet->ebuf; u32 tlen = packet->tlen; - struct hfi1_qp *qp = packet->qp; + struct rvt_qp *qp = packet->qp; struct hfi1_other_headers *ohdr = packet->ohdr; u32 bth0, opcode; u32 hdrsize = packet->hlen; @@ -492,8 +492,8 @@ rdma_first: int ok; /* Check rkey */ - ok = hfi1_rkey_ok(qp, &qp->r_sge.sge, qp->r_len, - vaddr, rkey, IB_ACCESS_REMOTE_WRITE); + ok = rvt_rkey_ok(qp, &qp->r_sge.sge, qp->r_len, + vaddr, rkey, IB_ACCESS_REMOTE_WRITE); if (unlikely(!ok)) goto drop; qp->r_sge.num_sge = 1; diff --git a/drivers/staging/rdma/hfi1/ud.c b/drivers/staging/rdma/hfi1/ud.c index 820fef2..d54d56d 100644 --- a/drivers/staging/rdma/hfi1/ud.c +++ b/drivers/staging/rdma/hfi1/ud.c @@ -65,15 +65,15 @@ * Note that the receive interrupt handler may be calling hfi1_ud_rcv() * while this is being called. */ -static void ud_loopback(struct hfi1_qp *sqp, struct hfi1_swqe *swqe) +static void ud_loopback(struct rvt_qp *sqp, struct rvt_swqe *swqe) { struct hfi1_ibport *ibp = to_iport(sqp->ibqp.device, sqp->port_num); struct hfi1_pportdata *ppd; - struct hfi1_qp *qp; + struct rvt_qp *qp; struct ib_ah_attr *ah_attr; unsigned long flags; - struct hfi1_sge_state ssge; - struct hfi1_sge *sge; + struct rvt_sge_state ssge; + struct rvt_sge *sge; struct ib_wc wc; u32 length; enum ib_qp_type sqptype, dqptype; @@ -262,14 +262,14 @@ drop: * * Return 1 if constructed; otherwise, return 0. */ -int hfi1_make_ud_req(struct hfi1_qp *qp) +int hfi1_make_ud_req(struct rvt_qp *qp) { struct hfi1_qp_priv *priv = qp->priv; struct hfi1_other_headers *ohdr; struct ib_ah_attr *ah_attr; struct hfi1_pportdata *ppd; struct hfi1_ibport *ibp; - struct hfi1_swqe *wqe; + struct rvt_swqe *wqe; unsigned long flags; u32 nwords; u32 extra_bytes; @@ -477,7 +477,7 @@ int hfi1_lookup_pkey_idx(struct hfi1_ibport *ibp, u16 pkey) return -1; } -void return_cnp(struct hfi1_ibport *ibp, struct hfi1_qp *qp, u32 remote_qpn, +void return_cnp(struct hfi1_ibport *ibp, struct rvt_qp *qp, u32 remote_qpn, u32 pkey, u32 slid, u32 dlid, u8 sc5, const struct ib_grh *old_grh) { @@ -551,7 +551,7 @@ void return_cnp(struct hfi1_ibport *ibp, struct hfi1_qp *qp, u32 remote_qpn, * opa_smp_check() returns 0 if all checks succeed, 1 otherwise. */ static int opa_smp_check(struct hfi1_ibport *ibp, u16 pkey, u8 sc5, - struct hfi1_qp *qp, u16 slid, struct opa_smp *smp) + struct rvt_qp *qp, u16 slid, struct opa_smp *smp) { struct hfi1_pportdata *ppd = ppd_from_ibp(ibp); @@ -655,7 +655,7 @@ void hfi1_ud_rcv(struct hfi1_packet *packet) u32 rcv_flags = packet->rcv_flags; void *data = packet->ebuf; u32 tlen = packet->tlen; - struct hfi1_qp *qp = packet->qp; + struct rvt_qp *qp = packet->qp; bool has_grh = rcv_flags & HFI1_HAS_GRH; bool sc4_bit = has_sc4_bit(packet); u8 sc; diff --git a/drivers/staging/rdma/hfi1/verbs.c b/drivers/staging/rdma/hfi1/verbs.c index 657efd3..10d6547 100644 --- a/drivers/staging/rdma/hfi1/verbs.c +++ b/drivers/staging/rdma/hfi1/verbs.c @@ -65,7 +65,7 @@ #include "qp.h" #include "sdma.h" -unsigned int hfi1_lkey_table_size = 16; +static unsigned int hfi1_lkey_table_size = 16; module_param_named(lkey_table_size, hfi1_lkey_table_size, uint, S_IRUGO); MODULE_PARM_DESC(lkey_table_size, @@ -162,7 +162,7 @@ static inline struct hfi1_ucontext *to_iucontext(struct ib_ucontext return container_of(ibucontext, struct hfi1_ucontext, ibucontext); } -static inline void _hfi1_schedule_send(struct hfi1_qp *qp); +static inline void _hfi1_schedule_send(struct rvt_qp *qp); /* * Translate ib_wr_opcode into ib_wc_opcode. @@ -276,11 +276,11 @@ __be64 ib_hfi1_sys_image_guid; * @length: the length of the data */ void hfi1_copy_sge( - struct hfi1_sge_state *ss, + struct rvt_sge_state *ss, void *data, u32 length, int release) { - struct hfi1_sge *sge = &ss->sge; + struct rvt_sge *sge = &ss->sge; while (length) { u32 len = sge->length; @@ -296,7 +296,7 @@ void hfi1_copy_sge( sge->sge_length -= len; if (sge->sge_length == 0) { if (release) - hfi1_put_mr(sge->mr); + rvt_put_mr(sge->mr); if (--ss->num_sge) *sge = *ss->sg_list++; } else if (sge->length == 0 && sge->mr->lkey) { @@ -320,9 +320,9 @@ void hfi1_copy_sge( * @ss: the SGE state * @length: the number of bytes to skip */ -void hfi1_skip_sge(struct hfi1_sge_state *ss, u32 length, int release) +void hfi1_skip_sge(struct rvt_sge_state *ss, u32 length, int release) { - struct hfi1_sge *sge = &ss->sge; + struct rvt_sge *sge = &ss->sge; while (length) { u32 len = sge->length; @@ -337,7 +337,7 @@ void hfi1_skip_sge(struct hfi1_sge_state *ss, u32 length, int release) sge->sge_length -= len; if (sge->sge_length == 0) { if (release) - hfi1_put_mr(sge->mr); + rvt_put_mr(sge->mr); if (--ss->num_sge) *sge = *ss->sg_list++; } else if (sge->length == 0 && sge->mr->lkey) { @@ -360,9 +360,9 @@ void hfi1_skip_sge(struct hfi1_sge_state *ss, u32 length, int release) * @qp: the QP to post on * @wr: the work request to send */ -static int post_one_send(struct hfi1_qp *qp, struct ib_send_wr *wr) +static int post_one_send(struct rvt_qp *qp, struct ib_send_wr *wr) { - struct hfi1_swqe *wqe; + struct rvt_swqe *wqe; u32 next; int i; int j; @@ -412,7 +412,7 @@ static int post_one_send(struct hfi1_qp *qp, struct ib_send_wr *wr) if (next == qp->s_last) return -ENOMEM; - rkt = &to_idev(qp->ibqp.device)->lk_table; + rkt = &to_idev(qp->ibqp.device)->rdi.lkey_table; pd = ibpd_to_rvtpd(qp->ibqp.pd); wqe = get_swqe_ptr(qp, qp->s_head); @@ -441,8 +441,8 @@ static int post_one_send(struct hfi1_qp *qp, struct ib_send_wr *wr) if (length == 0) continue; - ok = hfi1_lkey_ok(rkt, pd, &wqe->sg_list[j], - &wr->sg_list[i], acc); + ok = rvt_lkey_ok(rkt, pd, &wqe->sg_list[j], + &wr->sg_list[i], acc); if (!ok) goto bail_inval_free; wqe->length += length; @@ -465,9 +465,9 @@ static int post_one_send(struct hfi1_qp *qp, struct ib_send_wr *wr) bail_inval_free: /* release mr holds */ while (j) { - struct hfi1_sge *sge = &wqe->sg_list[--j]; + struct rvt_sge *sge = &wqe->sg_list[--j]; - hfi1_put_mr(sge->mr); + rvt_put_mr(sge->mr); } return -EINVAL; } @@ -483,7 +483,7 @@ bail_inval_free: static int post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, struct ib_send_wr **bad_wr) { - struct hfi1_qp *qp = to_iqp(ibqp); + struct rvt_qp *qp = to_iqp(ibqp); struct hfi1_qp_priv *priv = qp->priv; int err = 0; int call_send; @@ -529,8 +529,8 @@ bail: static int post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr, struct ib_recv_wr **bad_wr) { - struct hfi1_qp *qp = to_iqp(ibqp); - struct hfi1_rwq *wq = qp->r_rq.wq; + struct rvt_qp *qp = to_iqp(ibqp); + struct rvt_rwq *wq = qp->r_rq.wq; unsigned long flags; int ret; @@ -542,7 +542,7 @@ static int post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr, } for (; wr; wr = wr->next) { - struct hfi1_rwqe *wqe; + struct rvt_rwqe *wqe; u32 next; int i; @@ -694,7 +694,7 @@ static void mem_timer(unsigned long data) { struct hfi1_ibdev *dev = (struct hfi1_ibdev *)data; struct list_head *list = &dev->memwait; - struct hfi1_qp *qp = NULL; + struct rvt_qp *qp = NULL; struct iowait *wait; unsigned long flags; struct hfi1_qp_priv *priv; @@ -715,9 +715,9 @@ static void mem_timer(unsigned long data) hfi1_qp_wakeup(qp, HFI1_S_WAIT_KMEM); } -void update_sge(struct hfi1_sge_state *ss, u32 length) +void update_sge(struct rvt_sge_state *ss, u32 length) { - struct hfi1_sge *sge = &ss->sge; + struct rvt_sge *sge = &ss->sge; sge->vaddr += length; sge->length -= length; @@ -737,7 +737,7 @@ void update_sge(struct hfi1_sge_state *ss, u32 length) } static noinline struct verbs_txreq *__get_txreq(struct hfi1_ibdev *dev, - struct hfi1_qp *qp) + struct rvt_qp *qp) { struct hfi1_qp_priv *priv = qp->priv; struct verbs_txreq *tx; @@ -764,7 +764,7 @@ static noinline struct verbs_txreq *__get_txreq(struct hfi1_ibdev *dev, } static inline struct verbs_txreq *get_txreq(struct hfi1_ibdev *dev, - struct hfi1_qp *qp) + struct rvt_qp *qp) { struct verbs_txreq *tx; @@ -782,7 +782,7 @@ static inline struct verbs_txreq *get_txreq(struct hfi1_ibdev *dev, void hfi1_put_txreq(struct verbs_txreq *tx) { struct hfi1_ibdev *dev; - struct hfi1_qp *qp; + struct rvt_qp *qp; unsigned long flags; unsigned int seq; struct hfi1_qp_priv *priv; @@ -791,7 +791,7 @@ void hfi1_put_txreq(struct verbs_txreq *tx) dev = to_idev(qp->ibqp.device); if (tx->mr) { - hfi1_put_mr(tx->mr); + rvt_put_mr(tx->mr); tx->mr = NULL; } sdma_txclean(dd_from_dev(dev), &tx->txreq); @@ -830,7 +830,7 @@ static void verbs_sdma_complete( { struct verbs_txreq *tx = container_of(cookie, struct verbs_txreq, txreq); - struct hfi1_qp *qp = tx->qp; + struct rvt_qp *qp = tx->qp; spin_lock(&qp->s_lock); if (tx->wqe) @@ -858,7 +858,7 @@ static void verbs_sdma_complete( hfi1_put_txreq(tx); } -static int wait_kmem(struct hfi1_ibdev *dev, struct hfi1_qp *qp) +static int wait_kmem(struct hfi1_ibdev *dev, struct rvt_qp *qp) { struct hfi1_qp_priv *priv = qp->priv; unsigned long flags; @@ -891,12 +891,12 @@ static int wait_kmem(struct hfi1_ibdev *dev, struct hfi1_qp *qp) */ static int build_verbs_ulp_payload( struct sdma_engine *sde, - struct hfi1_sge_state *ss, + struct rvt_sge_state *ss, u32 length, struct verbs_txreq *tx) { - struct hfi1_sge *sg_list = ss->sg_list; - struct hfi1_sge sge = ss->sge; + struct rvt_sge *sg_list = ss->sg_list; + struct rvt_sge sge = ss->sge; u8 num_sge = ss->num_sge; u32 len; int ret = 0; @@ -939,7 +939,7 @@ bail_txadd: /* New API */ static int build_verbs_tx_desc( struct sdma_engine *sde, - struct hfi1_sge_state *ss, + struct rvt_sge_state *ss, u32 length, struct verbs_txreq *tx, struct ahg_ib_header *ahdr, @@ -1006,13 +1006,13 @@ bail_txadd: return ret; } -int hfi1_verbs_send_dma(struct hfi1_qp *qp, struct hfi1_pkt_state *ps, +int hfi1_verbs_send_dma(struct rvt_qp *qp, struct hfi1_pkt_state *ps, u64 pbc) { struct hfi1_qp_priv *priv = qp->priv; struct ahg_ib_header *ahdr = priv->s_hdr; u32 hdrwords = qp->s_hdrwords; - struct hfi1_sge_state *ss = qp->s_cur_sge; + struct rvt_sge_state *ss = qp->s_cur_sge; u32 len = qp->s_cur_size; u32 plen = hdrwords + ((len + 3) >> 2) + 2; /* includes pbc */ struct hfi1_ibdev *dev = ps->dev; @@ -1080,7 +1080,7 @@ bail_tx: * If we are now in the error state, return zero to flush the * send work request. */ -static int no_bufs_available(struct hfi1_qp *qp, struct send_context *sc) +static int no_bufs_available(struct rvt_qp *qp, struct send_context *sc) { struct hfi1_qp_priv *priv = qp->priv; struct hfi1_devdata *dd = sc->dd; @@ -1119,7 +1119,7 @@ static int no_bufs_available(struct hfi1_qp *qp, struct send_context *sc) return ret; } -struct send_context *qp_to_send_context(struct hfi1_qp *qp, u8 sc5) +struct send_context *qp_to_send_context(struct rvt_qp *qp, u8 sc5) { struct hfi1_devdata *dd = dd_from_ibdev(qp->ibqp.device); struct hfi1_pportdata *ppd = dd->pport + (qp->port_num - 1); @@ -1131,13 +1131,13 @@ struct send_context *qp_to_send_context(struct hfi1_qp *qp, u8 sc5) return dd->vld[vl].sc; } -int hfi1_verbs_send_pio(struct hfi1_qp *qp, struct hfi1_pkt_state *ps, +int hfi1_verbs_send_pio(struct rvt_qp *qp, struct hfi1_pkt_state *ps, u64 pbc) { struct hfi1_qp_priv *priv = qp->priv; struct ahg_ib_header *ahdr = priv->s_hdr; u32 hdrwords = qp->s_hdrwords; - struct hfi1_sge_state *ss = qp->s_cur_sge; + struct rvt_sge_state *ss = qp->s_cur_sge; u32 len = qp->s_cur_size; u32 dwords = (len + 3) >> 2; u32 plen = hdrwords + dwords + 2; /* includes pbc */ @@ -1209,7 +1209,7 @@ int hfi1_verbs_send_pio(struct hfi1_qp *qp, struct hfi1_pkt_state *ps, trace_output_ibhdr(dd_from_ibdev(qp->ibqp.device), &ahdr->ibh); if (qp->s_rdma_mr) { - hfi1_put_mr(qp->s_rdma_mr); + rvt_put_mr(qp->s_rdma_mr); qp->s_rdma_mr = NULL; } @@ -1256,7 +1256,7 @@ static inline int egress_pkey_matches_entry(u16 pkey, u16 ent) */ static inline int egress_pkey_check(struct hfi1_pportdata *ppd, struct hfi1_ib_header *hdr, - struct hfi1_qp *qp) + struct rvt_qp *qp) { struct hfi1_qp_priv *priv = qp->priv; struct hfi1_other_headers *ohdr; @@ -1319,7 +1319,7 @@ bad: * Return zero if packet is sent or queued OK. * Return non-zero and clear qp->s_flags HFI1_S_BUSY otherwise. */ -int hfi1_verbs_send(struct hfi1_qp *qp, struct hfi1_pkt_state *ps) +int hfi1_verbs_send(struct rvt_qp *qp, struct hfi1_pkt_state *ps) { struct hfi1_devdata *dd = dd_from_ibdev(qp->ibqp.device); struct hfi1_qp_priv *priv = qp->priv; @@ -1402,8 +1402,8 @@ static int query_device(struct ib_device *ibdev, props->max_cq = hfi1_max_cqs; props->max_ah = hfi1_max_ahs; props->max_cqe = hfi1_max_cqes; - props->max_mr = dev->lk_table.max; - props->max_fmr = dev->lk_table.max; + props->max_mr = dev->rdi.lkey_table.max; + props->max_fmr = dev->rdi.lkey_table.max; props->max_map_per_fmr = 32767; props->max_pd = dev->rdi.dparms.props.max_pd; props->max_qp_rd_atom = HFI1_MAX_RDMA_ATOMIC; @@ -1657,7 +1657,7 @@ struct ib_ah *hfi1_create_qp0_ah(struct hfi1_ibport *ibp, u16 dlid) { struct ib_ah_attr attr; struct ib_ah *ah = ERR_PTR(-EINVAL); - struct hfi1_qp *qp0; + struct rvt_qp *qp0; memset(&attr, 0, sizeof(attr)); attr.dlid = dlid; @@ -1772,7 +1772,7 @@ int hfi1_register_ib_device(struct hfi1_devdata *dd) struct hfi1_ibdev *dev = &dd->verbs_dev; struct ib_device *ibdev = &dev->rdi.ibdev; struct hfi1_pportdata *ppd = dd->pport; - unsigned i, lk_tab_size; + unsigned i; int ret; size_t lcpysz = IB_DEVICE_NAME_MAX; u16 descq_cnt; @@ -1796,29 +1796,6 @@ int hfi1_register_ib_device(struct hfi1_devdata *dd) dev->mem_timer.function = mem_timer; dev->mem_timer.data = (unsigned long) dev; - /* - * The top hfi1_lkey_table_size bits are used to index the - * table. The lower 8 bits can be owned by the user (copied from - * the LKEY). The remaining bits act as a generation number or tag. - */ - spin_lock_init(&dev->lk_table.lock); - dev->lk_table.max = 1 << hfi1_lkey_table_size; - /* ensure generation is at least 4 bits (keys.c) */ - if (hfi1_lkey_table_size > RVT_MAX_LKEY_TABLE_BITS) { - dd_dev_warn(dd, "lkey bits %u too large, reduced to %u\n", - hfi1_lkey_table_size, RVT_MAX_LKEY_TABLE_BITS); - hfi1_lkey_table_size = RVT_MAX_LKEY_TABLE_BITS; - } - lk_tab_size = dev->lk_table.max * sizeof(*dev->lk_table.table); - dev->lk_table.table = (struct rvt_mregion __rcu **) - vmalloc(lk_tab_size); - if (dev->lk_table.table == NULL) { - ret = -ENOMEM; - goto err_lk; - } - RCU_INIT_POINTER(dev->dma_mr, NULL); - for (i = 0; i < dev->lk_table.max; i++) - RCU_INIT_POINTER(dev->lk_table.table[i], NULL); INIT_LIST_HEAD(&dev->pending_mmaps); spin_lock_init(&dev->pending_lock); seqlock_init(&dev->iowait_lock); @@ -1917,14 +1894,15 @@ int hfi1_register_ib_device(struct hfi1_devdata *dd) ibdev->resize_cq = hfi1_resize_cq; ibdev->poll_cq = hfi1_poll_cq; ibdev->req_notify_cq = hfi1_req_notify_cq; - ibdev->get_dma_mr = hfi1_get_dma_mr; - ibdev->reg_user_mr = hfi1_reg_user_mr; - ibdev->dereg_mr = hfi1_dereg_mr; - ibdev->alloc_mr = hfi1_alloc_mr; - ibdev->alloc_fmr = hfi1_alloc_fmr; - ibdev->map_phys_fmr = hfi1_map_phys_fmr; - ibdev->unmap_fmr = hfi1_unmap_fmr; - ibdev->dealloc_fmr = hfi1_dealloc_fmr; + ibdev->get_dma_mr = NULL; + ibdev->reg_user_mr = NULL; + ibdev->dereg_mr = NULL; + ibdev->alloc_mr = NULL; + ibdev->map_mr_sg = NULL; + ibdev->alloc_fmr = NULL; + ibdev->map_phys_fmr = NULL; + ibdev->unmap_fmr = NULL; + ibdev->dealloc_fmr = NULL; ibdev->attach_mcast = hfi1_multicast_attach; ibdev->detach_mcast = hfi1_multicast_detach; ibdev->process_mad = hfi1_process_mad; @@ -1945,9 +1923,9 @@ int hfi1_register_ib_device(struct hfi1_devdata *dd) dd->verbs_dev.rdi.driver_f.notify_new_ah = hfi1_notify_new_ah; dd->verbs_dev.rdi.dparms.props.max_ah = hfi1_max_ahs; dd->verbs_dev.rdi.dparms.props.max_pd = hfi1_max_pds; - dd->verbs_dev.rdi.flags = (RVT_FLAG_MR_INIT_DRIVER | - RVT_FLAG_QP_INIT_DRIVER | + dd->verbs_dev.rdi.flags = (RVT_FLAG_QP_INIT_DRIVER | RVT_FLAG_CQ_INIT_DRIVER); + dd->verbs_dev.rdi.dparms.lkey_table_size = hfi1_lkey_table_size; ret = rvt_register_device(&dd->verbs_dev.rdi); if (ret) @@ -1970,8 +1948,6 @@ err_agents: err_reg: err_verbs_txreq: kmem_cache_destroy(dev->verbs_txreq_cache); - vfree(dev->lk_table.table); -err_lk: hfi1_qp_exit(dev); err_qp_init: dd_dev_err(dd, "cannot register verbs: %d!\n", -ret); @@ -1993,13 +1969,10 @@ void hfi1_unregister_ib_device(struct hfi1_devdata *dd) dd_dev_err(dd, "txwait list not empty!\n"); if (!list_empty(&dev->memwait)) dd_dev_err(dd, "memwait list not empty!\n"); - if (dev->dma_mr) - dd_dev_err(dd, "DMA MR not NULL!\n"); hfi1_qp_exit(dev); del_timer_sync(&dev->mem_timer); kmem_cache_destroy(dev->verbs_txreq_cache); - vfree(dev->lk_table.table); } void hfi1_cnp_rcv(struct hfi1_packet *packet) @@ -2007,7 +1980,7 @@ void hfi1_cnp_rcv(struct hfi1_packet *packet) struct hfi1_ibport *ibp = &packet->rcd->ppd->ibport_data; struct hfi1_pportdata *ppd = ppd_from_ibp(ibp); struct hfi1_ib_header *hdr = packet->hdr; - struct hfi1_qp *qp = packet->qp; + struct rvt_qp *qp = packet->qp; u32 lqpn, rqpn = 0; u16 rlid = 0; u8 sl, sc5, sc4_bit, svc_type; diff --git a/drivers/staging/rdma/hfi1/verbs.h b/drivers/staging/rdma/hfi1/verbs.h index 4db6136..0782a85 100644 --- a/drivers/staging/rdma/hfi1/verbs.h +++ b/drivers/staging/rdma/hfi1/verbs.h @@ -222,7 +222,7 @@ struct tx_pio_header { */ struct hfi1_mcast_qp { struct list_head list; - struct hfi1_qp *qp; + struct rvt_qp *qp; }; struct hfi1_mcast { @@ -235,20 +235,6 @@ struct hfi1_mcast { }; /* - * This structure is used by hfi1_mmap() to validate an offset - * when an mmap() request is made. The vm_area_struct then uses - * this as its vm_private_data. - */ -struct hfi1_mmap_info { - struct list_head pending_mmaps; - struct ib_ucontext *context; - void *obj; - __u64 offset; - struct kref ref; - unsigned size; -}; - -/* * This structure is used to contain the head pointer, tail pointer, * and completion queue entries as a single memory allocation so * it can be mmap'ed into user space. @@ -274,238 +260,28 @@ struct hfi1_cq { u8 notify; u8 triggered; struct hfi1_cq_wc *queue; - struct hfi1_mmap_info *ip; -}; - -/* - * These keep track of the copy progress within a memory region. - * Used by the verbs layer. - */ -struct hfi1_sge { - struct rvt_mregion *mr; - void *vaddr; /* kernel virtual address of segment */ - u32 sge_length; /* length of the SGE */ - u32 length; /* remaining length of the segment */ - u16 m; /* current index: mr->map[m] */ - u16 n; /* current index: mr->map[m]->segs[n] */ -}; - -/* Memory region */ -struct hfi1_mr { - struct ib_mr ibmr; - struct ib_umem *umem; - struct rvt_mregion mr; /* must be last */ -}; - -/* - * Send work request queue entry. - * The size of the sg_list is determined when the QP is created and stored - * in qp->s_max_sge. - */ -struct hfi1_swqe { - union { - struct ib_send_wr wr; /* don't use wr.sg_list */ - struct ib_rdma_wr rdma_wr; - struct ib_atomic_wr atomic_wr; - struct ib_ud_wr ud_wr; - }; - u32 psn; /* first packet sequence number */ - u32 lpsn; /* last packet sequence number */ - u32 ssn; /* send sequence number */ - u32 length; /* total length of data in sg_list */ - struct hfi1_sge sg_list[0]; -}; - -/* - * Receive work request queue entry. - * The size of the sg_list is determined when the QP (or SRQ) is created - * and stored in qp->r_rq.max_sge (or srq->rq.max_sge). - */ -struct hfi1_rwqe { - u64 wr_id; - u8 num_sge; - struct ib_sge sg_list[0]; -}; - -/* - * This structure is used to contain the head pointer, tail pointer, - * and receive work queue entries as a single memory allocation so - * it can be mmap'ed into user space. - * Note that the wq array elements are variable size so you can't - * just index into the array to get the N'th element; - * use get_rwqe_ptr() instead. - */ -struct hfi1_rwq { - u32 head; /* new work requests posted to the head */ - u32 tail; /* receives pull requests from here. */ - struct hfi1_rwqe wq[0]; -}; - -struct hfi1_rq { - struct hfi1_rwq *wq; - u32 size; /* size of RWQE array */ - u8 max_sge; - /* protect changes in this struct */ - spinlock_t lock ____cacheline_aligned_in_smp; + struct rvt_mmap_info *ip; }; struct hfi1_srq { struct ib_srq ibsrq; - struct hfi1_rq rq; - struct hfi1_mmap_info *ip; + struct rvt_rq rq; + struct rvt_mmap_info *ip; /* send signal when number of RWQEs < limit */ u32 limit; }; -struct hfi1_sge_state { - struct hfi1_sge *sg_list; /* next SGE to be used if any */ - struct hfi1_sge sge; /* progress state for the current SGE */ - u32 total_len; - u8 num_sge; -}; - -/* - * This structure holds the information that the send tasklet needs - * to send a RDMA read response or atomic operation. - */ -struct hfi1_ack_entry { - u8 opcode; - u8 sent; - u32 psn; - u32 lpsn; - union { - struct hfi1_sge rdma_sge; - u64 atomic_data; - }; -}; - /* * hfi1 specific data structures that will be hidden from rvt after the queue * pair is made common */ -struct hfi1_qp; struct hfi1_qp_priv { struct ahg_ib_header *s_hdr; /* next packet header to send */ struct sdma_engine *s_sde; /* current sde */ u8 s_sc; /* SC[0..4] for next packet */ u8 r_adefered; /* number of acks defered */ struct iowait s_iowait; - struct hfi1_qp *owner; -}; - -/* - * Variables prefixed with s_ are for the requester (sender). - * Variables prefixed with r_ are for the responder (receiver). - * Variables prefixed with ack_ are for responder replies. - * - * Common variables are protected by both r_rq.lock and s_lock in that order - * which only happens in modify_qp() or changing the QP 'state'. - */ -struct hfi1_qp { - struct ib_qp ibqp; - void *priv; - /* read mostly fields above and below */ - struct ib_ah_attr remote_ah_attr; - struct ib_ah_attr alt_ah_attr; - struct hfi1_qp __rcu *next; /* link list for QPN hash table */ - struct hfi1_swqe *s_wq; /* send work queue */ - struct hfi1_mmap_info *ip; - unsigned long timeout_jiffies; /* computed from timeout */ - - enum ib_mtu path_mtu; - int srate_mbps; /* s_srate (below) converted to Mbit/s */ - u32 remote_qpn; - u32 pmtu; /* decoded from path_mtu */ - u32 qkey; /* QKEY for this QP (for UD or RD) */ - u32 s_size; /* send work queue size */ - u32 s_rnr_timeout; /* number of milliseconds for RNR timeout */ - u32 s_ahgpsn; /* set to the psn in the copy of the header */ - - u8 state; /* QP state */ - u8 allowed_ops; /* high order bits of allowed opcodes */ - u8 qp_access_flags; - u8 alt_timeout; /* Alternate path timeout for this QP */ - u8 timeout; /* Timeout for this QP */ - u8 s_srate; - u8 s_mig_state; - u8 port_num; - u8 s_pkey_index; /* PKEY index to use */ - u8 s_alt_pkey_index; /* Alternate path PKEY index to use */ - u8 r_max_rd_atomic; /* max number of RDMA read/atomic to receive */ - u8 s_max_rd_atomic; /* max number of RDMA read/atomic to send */ - u8 s_retry_cnt; /* number of times to retry */ - u8 s_rnr_retry_cnt; - u8 r_min_rnr_timer; /* retry timeout value for RNR NAKs */ - u8 s_max_sge; /* size of s_wq->sg_list */ - u8 s_draining; - - /* start of read/write fields */ - atomic_t refcount ____cacheline_aligned_in_smp; - wait_queue_head_t wait; - - - struct hfi1_ack_entry s_ack_queue[HFI1_MAX_RDMA_ATOMIC + 1] - ____cacheline_aligned_in_smp; - struct hfi1_sge_state s_rdma_read_sge; - - spinlock_t r_lock ____cacheline_aligned_in_smp; /* used for APM */ - unsigned long r_aflags; - u64 r_wr_id; /* ID for current receive WQE */ - u32 r_ack_psn; /* PSN for next ACK or atomic ACK */ - u32 r_len; /* total length of r_sge */ - u32 r_rcv_len; /* receive data len processed */ - u32 r_psn; /* expected rcv packet sequence number */ - u32 r_msn; /* message sequence number */ - - u8 r_state; /* opcode of last packet received */ - u8 r_flags; - u8 r_head_ack_queue; /* index into s_ack_queue[] */ - - struct list_head rspwait; /* link for waiting to respond */ - - struct hfi1_sge_state r_sge; /* current receive data */ - struct hfi1_rq r_rq; /* receive work queue */ - - spinlock_t s_lock ____cacheline_aligned_in_smp; - struct hfi1_sge_state *s_cur_sge; - u32 s_flags; - struct hfi1_swqe *s_wqe; - struct hfi1_sge_state s_sge; /* current send request data */ - struct rvt_mregion *s_rdma_mr; - u32 s_cur_size; /* size of send packet in bytes */ - u32 s_len; /* total length of s_sge */ - u32 s_rdma_read_len; /* total length of s_rdma_read_sge */ - u32 s_next_psn; /* PSN for next request */ - u32 s_last_psn; /* last response PSN processed */ - u32 s_sending_psn; /* lowest PSN that is being sent */ - u32 s_sending_hpsn; /* highest PSN that is being sent */ - u32 s_psn; /* current packet sequence number */ - u32 s_ack_rdma_psn; /* PSN for sending RDMA read responses */ - u32 s_ack_psn; /* PSN for acking sends and RDMA writes */ - u32 s_head; /* new entries added here */ - u32 s_tail; /* next entry to process */ - u32 s_cur; /* current work queue entry */ - u32 s_acked; /* last un-ACK'ed entry */ - u32 s_last; /* last completed entry */ - u32 s_ssn; /* SSN of tail entry */ - u32 s_lsn; /* limit sequence number (credit) */ - u16 s_hdrwords; /* size of s_hdr in 32 bit words */ - u16 s_rdma_ack_cnt; - s8 s_ahgidx; - u8 s_state; /* opcode of last packet sent */ - u8 s_ack_state; /* opcode of packet to ACK */ - u8 s_nak_state; /* non-zero if NAK is pending */ - u8 r_nak_state; /* non-zero if NAK is pending */ - u8 s_retry; /* requester retry counter */ - u8 s_rnr_retry; /* requester RNR retry counter */ - u8 s_num_rd_atomic; /* number of RDMA read/atomic pending */ - u8 s_tail_ack_queue; /* index into s_ack_queue[] */ - - struct hfi1_sge_state s_ack_rdma_sge; - struct timer_list s_timer; - - struct hfi1_sge r_sg_list[0] /* verified SGEs */ - ____cacheline_aligned_in_smp; + struct rvt_qp *owner; }; /* @@ -599,27 +375,27 @@ struct hfi1_pkt_state { #define HFI1_PSN_CREDIT 16 /* - * Since struct hfi1_swqe is not a fixed size, we can't simply index into + * Since struct rvt_swqe is not a fixed size, we can't simply index into * struct hfi1_qp.s_wq. This function does the array index computation. */ -static inline struct hfi1_swqe *get_swqe_ptr(struct hfi1_qp *qp, - unsigned n) +static inline struct rvt_swqe *get_swqe_ptr(struct rvt_qp *qp, + unsigned n) { - return (struct hfi1_swqe *)((char *)qp->s_wq + - (sizeof(struct hfi1_swqe) + + return (struct rvt_swqe *)((char *)qp->s_wq + + (sizeof(struct rvt_swqe) + qp->s_max_sge * - sizeof(struct hfi1_sge)) * n); + sizeof(struct rvt_sge)) * n); } /* - * Since struct hfi1_rwqe is not a fixed size, we can't simply index into - * struct hfi1_rwq.wq. This function does the array index computation. + * Since struct rvt_rwqe is not a fixed size, we can't simply index into + * struct rvt_rwq.wq. This function does the array index computation. */ -static inline struct hfi1_rwqe *get_rwqe_ptr(struct hfi1_rq *rq, unsigned n) +static inline struct rvt_rwqe *get_rwqe_ptr(struct rvt_rq *rq, unsigned n) { - return (struct hfi1_rwqe *) + return (struct rvt_rwqe *) ((char *) rq->wq->wq + - (sizeof(struct hfi1_rwqe) + + (sizeof(struct rvt_rwqe) + rq->max_sge * sizeof(struct ib_sge)) * n); } @@ -643,7 +419,7 @@ static inline void inc_opstats( } struct hfi1_ibport { - struct hfi1_qp __rcu *qp[2]; + struct rvt_qp __rcu *qp[2]; struct ib_mad_agent *send_agent; /* agent for SMI (traps) */ struct rvt_ah *sm_ah; struct rvt_ah *smi_ah; @@ -706,12 +482,10 @@ struct hfi1_ibdev { struct list_head pending_mmaps; spinlock_t mmap_offset_lock; /* protect mmap_offset */ u32 mmap_offset; - struct rvt_mregion __rcu *dma_mr; struct hfi1_qp_ibdev *qp_dev; /* QP numbers are shared by all IB ports */ - struct rvt_lkey_table lk_table; /* protect wait lists */ seqlock_t iowait_lock; struct list_head txwait; /* list for wait verbs_txreq */ @@ -760,11 +534,6 @@ struct hfi1_verbs_counters { u32 vl15_dropped; }; -static inline struct hfi1_mr *to_imr(struct ib_mr *ibmr) -{ - return container_of(ibmr, struct hfi1_mr, ibmr); -} - static inline struct hfi1_cq *to_icq(struct ib_cq *ibcq) { return container_of(ibcq, struct hfi1_cq, ibcq); @@ -775,9 +544,9 @@ static inline struct hfi1_srq *to_isrq(struct ib_srq *ibsrq) return container_of(ibsrq, struct hfi1_srq, ibsrq); } -static inline struct hfi1_qp *to_iqp(struct ib_qp *ibqp) +static inline struct rvt_qp *to_iqp(struct ib_qp *ibqp) { - return container_of(ibqp, struct hfi1_qp, ibqp); + return container_of(ibqp, struct rvt_qp, ibqp); } static inline struct hfi1_ibdev *to_idev(struct ib_device *ibdev) @@ -788,7 +557,7 @@ static inline struct hfi1_ibdev *to_idev(struct ib_device *ibdev) return container_of(rdi, struct hfi1_ibdev, rdi); } -static inline struct hfi1_qp *iowait_to_qp(struct iowait *s_iowait) +static inline struct rvt_qp *iowait_to_qp(struct iowait *s_iowait) { struct hfi1_qp_priv *priv; @@ -800,7 +569,7 @@ static inline struct hfi1_qp *iowait_to_qp(struct iowait *s_iowait) * Send if not busy or waiting for I/O and either * a RC response is pending or we can process send work requests. */ -static inline int hfi1_send_ok(struct hfi1_qp *qp) +static inline int hfi1_send_ok(struct rvt_qp *qp) { return !(qp->s_flags & (HFI1_S_BUSY | HFI1_S_ANY_WAIT_IO)) && (qp->s_hdrwords || (qp->s_flags & HFI1_S_RESP_PENDING) || @@ -890,12 +659,12 @@ int hfi1_mcast_tree_empty(struct hfi1_ibport *ibp); struct verbs_txreq; void hfi1_put_txreq(struct verbs_txreq *tx); -int hfi1_verbs_send(struct hfi1_qp *qp, struct hfi1_pkt_state *ps); +int hfi1_verbs_send(struct rvt_qp *qp, struct hfi1_pkt_state *ps); -void hfi1_copy_sge(struct hfi1_sge_state *ss, void *data, u32 length, +void hfi1_copy_sge(struct rvt_sge_state *ss, void *data, u32 length, int release); -void hfi1_skip_sge(struct hfi1_sge_state *ss, u32 length, int release); +void hfi1_skip_sge(struct rvt_sge_state *ss, u32 length, int release); void hfi1_cnp_rcv(struct hfi1_packet *packet); @@ -907,7 +676,7 @@ void hfi1_rc_hdrerr( struct hfi1_ctxtdata *rcd, struct hfi1_ib_header *hdr, u32 rcv_flags, - struct hfi1_qp *qp); + struct rvt_qp *qp); u8 ah_to_sc(struct ib_device *ibdev, struct ib_ah_attr *ah_attr); @@ -915,24 +684,14 @@ struct ib_ah *hfi1_create_qp0_ah(struct hfi1_ibport *ibp, u16 dlid); void hfi1_rc_rnr_retry(unsigned long arg); -void hfi1_rc_send_complete(struct hfi1_qp *qp, struct hfi1_ib_header *hdr); +void hfi1_rc_send_complete(struct rvt_qp *qp, struct hfi1_ib_header *hdr); -void hfi1_rc_error(struct hfi1_qp *qp, enum ib_wc_status err); +void hfi1_rc_error(struct rvt_qp *qp, enum ib_wc_status err); void hfi1_ud_rcv(struct hfi1_packet *packet); int hfi1_lookup_pkey_idx(struct hfi1_ibport *ibp, u16 pkey); -int hfi1_alloc_lkey(struct rvt_mregion *mr, int dma_region); - -void hfi1_free_lkey(struct rvt_mregion *mr); - -int hfi1_lkey_ok(struct rvt_lkey_table *rkt, struct rvt_pd *pd, - struct hfi1_sge *isge, struct ib_sge *sge, int acc); - -int hfi1_rkey_ok(struct hfi1_qp *qp, struct hfi1_sge *sge, - u32 len, u64 vaddr, u32 rkey, int acc); - int hfi1_post_srq_receive(struct ib_srq *ibsrq, struct ib_recv_wr *wr, struct ib_recv_wr **bad_wr); @@ -970,43 +729,10 @@ int hfi1_req_notify_cq( int hfi1_resize_cq(struct ib_cq *ibcq, int cqe, struct ib_udata *udata); -struct ib_mr *hfi1_get_dma_mr(struct ib_pd *pd, int acc); - -struct ib_mr *hfi1_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, - u64 virt_addr, int mr_access_flags, - struct ib_udata *udata); - -int hfi1_dereg_mr(struct ib_mr *ibmr); - -struct ib_mr *hfi1_alloc_mr(struct ib_pd *pd, - enum ib_mr_type mr_type, - u32 max_entries); - -struct ib_fmr *hfi1_alloc_fmr(struct ib_pd *pd, int mr_access_flags, - struct ib_fmr_attr *fmr_attr); - -int hfi1_map_phys_fmr(struct ib_fmr *ibfmr, u64 *page_list, - int list_len, u64 iova); - -int hfi1_unmap_fmr(struct list_head *fmr_list); - -int hfi1_dealloc_fmr(struct ib_fmr *ibfmr); - -static inline void hfi1_get_mr(struct rvt_mregion *mr) -{ - atomic_inc(&mr->refcount); -} - -static inline void hfi1_put_mr(struct rvt_mregion *mr) -{ - if (unlikely(atomic_dec_and_test(&mr->refcount))) - complete(&mr->comp); -} - -static inline void hfi1_put_ss(struct hfi1_sge_state *ss) +static inline void hfi1_put_ss(struct rvt_sge_state *ss) { while (ss->num_sge) { - hfi1_put_mr(ss->sge.mr); + rvt_put_mr(ss->sge.mr); if (--ss->num_sge) ss->sge = *ss->sg_list++; } @@ -1014,38 +740,40 @@ static inline void hfi1_put_ss(struct hfi1_sge_state *ss) void hfi1_release_mmap_info(struct kref *ref); -struct hfi1_mmap_info *hfi1_create_mmap_info(struct hfi1_ibdev *dev, u32 size, - struct ib_ucontext *context, - void *obj); +struct rvt_mmap_info *hfi1_create_mmap_info(struct hfi1_ibdev *dev, u32 size, + struct ib_ucontext *context, + void *obj); -void hfi1_update_mmap_info(struct hfi1_ibdev *dev, struct hfi1_mmap_info *ip, +void hfi1_update_mmap_info(struct hfi1_ibdev *dev, struct rvt_mmap_info *ip, u32 size, void *obj); int hfi1_mmap(struct ib_ucontext *context, struct vm_area_struct *vma); -int hfi1_get_rwqe(struct hfi1_qp *qp, int wr_id_only); +int hfi1_get_rwqe(struct rvt_qp *qp, int wr_id_only); + +void hfi1_migrate_qp(struct rvt_qp *qp); int hfi1_ruc_check_hdr(struct hfi1_ibport *ibp, struct hfi1_ib_header *hdr, - int has_grh, struct hfi1_qp *qp, u32 bth0); + int has_grh, struct rvt_qp *qp, u32 bth0); u32 hfi1_make_grh(struct hfi1_ibport *ibp, struct ib_grh *hdr, struct ib_global_route *grh, u32 hwords, u32 nwords); -void hfi1_make_ruc_header(struct hfi1_qp *qp, struct hfi1_other_headers *ohdr, +void hfi1_make_ruc_header(struct rvt_qp *qp, struct hfi1_other_headers *ohdr, u32 bth0, u32 bth2, int middle); void hfi1_do_send(struct work_struct *work); -void hfi1_send_complete(struct hfi1_qp *qp, struct hfi1_swqe *wqe, +void hfi1_send_complete(struct rvt_qp *qp, struct rvt_swqe *wqe, enum ib_wc_status status); -void hfi1_send_rc_ack(struct hfi1_ctxtdata *, struct hfi1_qp *qp, int is_fecn); +void hfi1_send_rc_ack(struct hfi1_ctxtdata *, struct rvt_qp *qp, int is_fecn); -int hfi1_make_rc_req(struct hfi1_qp *qp); +int hfi1_make_rc_req(struct rvt_qp *qp); -int hfi1_make_uc_req(struct hfi1_qp *qp); +int hfi1_make_uc_req(struct rvt_qp *qp); -int hfi1_make_ud_req(struct hfi1_qp *qp); +int hfi1_make_ud_req(struct rvt_qp *qp); int hfi1_register_ib_device(struct hfi1_devdata *); @@ -1055,13 +783,13 @@ void hfi1_ib_rcv(struct hfi1_packet *packet); unsigned hfi1_get_npkeys(struct hfi1_devdata *); -int hfi1_verbs_send_dma(struct hfi1_qp *qp, struct hfi1_pkt_state *ps, +int hfi1_verbs_send_dma(struct rvt_qp *qp, struct hfi1_pkt_state *ps, u64 pbc); -int hfi1_verbs_send_pio(struct hfi1_qp *qp, struct hfi1_pkt_state *ps, +int hfi1_verbs_send_pio(struct rvt_qp *qp, struct hfi1_pkt_state *ps, u64 pbc); -struct send_context *qp_to_send_context(struct hfi1_qp *qp, u8 sc5); +struct send_context *qp_to_send_context(struct rvt_qp *qp, u8 sc5); extern const enum ib_wc_opcode ib_hfi1_wc_opcode[]; @@ -1071,8 +799,6 @@ extern const int ib_hfi1_state_ops[]; extern __be64 ib_hfi1_sys_image_guid; /* in network order */ -extern unsigned int hfi1_lkey_table_size; - extern unsigned int hfi1_max_cqes; extern unsigned int hfi1_max_cqs; diff --git a/drivers/staging/rdma/hfi1/verbs_mcast.c b/drivers/staging/rdma/hfi1/verbs_mcast.c index afc6b4c..49954b9 100644 --- a/drivers/staging/rdma/hfi1/verbs_mcast.c +++ b/drivers/staging/rdma/hfi1/verbs_mcast.c @@ -56,7 +56,7 @@ * mcast_qp_alloc - alloc a struct to link a QP to mcast GID struct * @qp: the QP to link */ -static struct hfi1_mcast_qp *mcast_qp_alloc(struct hfi1_qp *qp) +static struct hfi1_mcast_qp *mcast_qp_alloc(struct rvt_qp *qp) { struct hfi1_mcast_qp *mqp; @@ -73,7 +73,7 @@ bail: static void mcast_qp_free(struct hfi1_mcast_qp *mqp) { - struct hfi1_qp *qp = mqp->qp; + struct rvt_qp *qp = mqp->qp; /* Notify hfi1_destroy_qp() if it is waiting. */ if (atomic_dec_and_test(&qp->refcount)) @@ -241,7 +241,7 @@ bail: int hfi1_multicast_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) { - struct hfi1_qp *qp = to_iqp(ibqp); + struct rvt_qp *qp = to_iqp(ibqp); struct hfi1_ibdev *dev = to_idev(ibqp->device); struct hfi1_ibport *ibp; struct hfi1_mcast *mcast; @@ -299,7 +299,7 @@ bail: int hfi1_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) { - struct hfi1_qp *qp = to_iqp(ibqp); + struct rvt_qp *qp = to_iqp(ibqp); struct hfi1_ibdev *dev = to_idev(ibqp->device); struct hfi1_ibport *ibp = to_iport(ibqp->device, qp->port_num); struct hfi1_mcast *mcast = NULL; -- cgit v0.10.2 From 39db3e66fa5f7d489f3eb9b0359d6d7e7bf0cd45 Mon Sep 17 00:00:00 2001 From: Dennis Dalessandro Date: Tue, 19 Jan 2016 14:42:33 -0800 Subject: staging/rdma/hfi1: Remove srq from hfi1 SRQ data structure has been moved to rdmavt. Make use of it. Reviewed-by: Harish Chegondi Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford diff --git a/drivers/staging/rdma/hfi1/qp.c b/drivers/staging/rdma/hfi1/qp.c index 5a68455..0f00365 100644 --- a/drivers/staging/rdma/hfi1/qp.c +++ b/drivers/staging/rdma/hfi1/qp.c @@ -1092,7 +1092,7 @@ struct ib_qp *hfi1_create_qp(struct ib_pd *ibpd, sz = sizeof(*qp); sg_list_sz = 0; if (init_attr->srq) { - struct hfi1_srq *srq = to_isrq(init_attr->srq); + struct rvt_srq *srq = ibsrq_to_rvtsrq(init_attr->srq); if (srq->rq.max_sge > 1) sg_list_sz = sizeof(*qp->r_sg_list) * diff --git a/drivers/staging/rdma/hfi1/ruc.c b/drivers/staging/rdma/hfi1/ruc.c index 762fca9..3b2f032 100644 --- a/drivers/staging/rdma/hfi1/ruc.c +++ b/drivers/staging/rdma/hfi1/ruc.c @@ -159,14 +159,14 @@ int hfi1_get_rwqe(struct rvt_qp *qp, int wr_id_only) unsigned long flags; struct rvt_rq *rq; struct rvt_rwq *wq; - struct hfi1_srq *srq; + struct rvt_srq *srq; struct rvt_rwqe *wqe; void (*handler)(struct ib_event *, void *); u32 tail; int ret; if (qp->ibqp.srq) { - srq = to_isrq(qp->ibqp.srq); + srq = ibsrq_to_rvtsrq(qp->ibqp.srq); handler = srq->ibsrq.event_handler; rq = &srq->rq; } else { diff --git a/drivers/staging/rdma/hfi1/srq.c b/drivers/staging/rdma/hfi1/srq.c index 932bd96..78f190a 100644 --- a/drivers/staging/rdma/hfi1/srq.c +++ b/drivers/staging/rdma/hfi1/srq.c @@ -65,7 +65,7 @@ int hfi1_post_srq_receive(struct ib_srq *ibsrq, struct ib_recv_wr *wr, struct ib_recv_wr **bad_wr) { - struct hfi1_srq *srq = to_isrq(ibsrq); + struct rvt_srq *srq = ibsrq_to_rvtsrq(ibsrq); struct rvt_rwq *wq; unsigned long flags; int ret; @@ -120,7 +120,7 @@ struct ib_srq *hfi1_create_srq(struct ib_pd *ibpd, struct ib_udata *udata) { struct hfi1_ibdev *dev = to_idev(ibpd->device); - struct hfi1_srq *srq; + struct rvt_srq *srq; u32 sz; struct ib_srq *ret; @@ -229,7 +229,7 @@ int hfi1_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr, enum ib_srq_attr_mask attr_mask, struct ib_udata *udata) { - struct hfi1_srq *srq = to_isrq(ibsrq); + struct rvt_srq *srq = ibsrq_to_rvtsrq(ibsrq); struct rvt_rwq *wq; int ret = 0; @@ -367,7 +367,7 @@ bail: int hfi1_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr) { - struct hfi1_srq *srq = to_isrq(ibsrq); + struct rvt_srq *srq = ibsrq_to_rvtsrq(ibsrq); attr->max_wr = srq->rq.size - 1; attr->max_sge = srq->rq.max_sge; @@ -381,7 +381,7 @@ int hfi1_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr) */ int hfi1_destroy_srq(struct ib_srq *ibsrq) { - struct hfi1_srq *srq = to_isrq(ibsrq); + struct rvt_srq *srq = ibsrq_to_rvtsrq(ibsrq); struct hfi1_ibdev *dev = to_idev(ibsrq->device); spin_lock(&dev->n_srqs_lock); diff --git a/drivers/staging/rdma/hfi1/verbs.h b/drivers/staging/rdma/hfi1/verbs.h index 0782a85..97df555 100644 --- a/drivers/staging/rdma/hfi1/verbs.h +++ b/drivers/staging/rdma/hfi1/verbs.h @@ -263,14 +263,6 @@ struct hfi1_cq { struct rvt_mmap_info *ip; }; -struct hfi1_srq { - struct ib_srq ibsrq; - struct rvt_rq rq; - struct rvt_mmap_info *ip; - /* send signal when number of RWQEs < limit */ - u32 limit; -}; - /* * hfi1 specific data structures that will be hidden from rvt after the queue * pair is made common @@ -539,11 +531,6 @@ static inline struct hfi1_cq *to_icq(struct ib_cq *ibcq) return container_of(ibcq, struct hfi1_cq, ibcq); } -static inline struct hfi1_srq *to_isrq(struct ib_srq *ibsrq) -{ - return container_of(ibsrq, struct hfi1_srq, ibsrq); -} - static inline struct rvt_qp *to_iqp(struct ib_qp *ibqp) { return container_of(ibqp, struct rvt_qp, ibqp); -- cgit v0.10.2 From 4eb068824abb0fb335f87f268681a55a147a176f Mon Sep 17 00:00:00 2001 From: Dennis Dalessandro Date: Tue, 19 Jan 2016 14:42:39 -0800 Subject: staging/rdma/hfi1: Remove ibport and use rdmavt version Remove most of the ibport members from hfi1 and use the rdmavt version. Also register the port with rdmavt. Reviewed-by: Mike Marciniszyn Reviewed-by: Harish Chegondi Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford diff --git a/drivers/staging/rdma/hfi1/chip.c b/drivers/staging/rdma/hfi1/chip.c index da2718f..93e152d 100644 --- a/drivers/staging/rdma/hfi1/chip.c +++ b/drivers/staging/rdma/hfi1/chip.c @@ -3933,8 +3933,8 @@ static u64 access_sw_cpu_##cntr(const struct cntr_entry *entry, \ void *context, int vl, int mode, u64 data) \ { \ struct hfi1_pportdata *ppd = (struct hfi1_pportdata *)context; \ - return read_write_cpu(ppd->dd, &ppd->ibport_data.z_ ##cntr, \ - ppd->ibport_data.cntr, vl, \ + return read_write_cpu(ppd->dd, &ppd->ibport_data.rvp.z_ ##cntr, \ + ppd->ibport_data.rvp.cntr, vl, \ mode, data); \ } @@ -3951,7 +3951,7 @@ static u64 access_ibp_##cntr(const struct cntr_entry *entry, \ if (vl != CNTR_INVALID_VL) \ return 0; \ \ - return read_write_sw(ppd->dd, &ppd->ibport_data.n_ ##cntr, \ + return read_write_sw(ppd->dd, &ppd->ibport_data.rvp.n_ ##cntr, \ mode, data); \ } @@ -9239,14 +9239,14 @@ static inline int init_cpu_counters(struct hfi1_devdata *dd) ppd = (struct hfi1_pportdata *)(dd + 1); for (i = 0; i < dd->num_pports; i++, ppd++) { - ppd->ibport_data.rc_acks = NULL; - ppd->ibport_data.rc_qacks = NULL; - ppd->ibport_data.rc_acks = alloc_percpu(u64); - ppd->ibport_data.rc_qacks = alloc_percpu(u64); - ppd->ibport_data.rc_delayed_comp = alloc_percpu(u64); - if ((ppd->ibport_data.rc_acks == NULL) || - (ppd->ibport_data.rc_delayed_comp == NULL) || - (ppd->ibport_data.rc_qacks == NULL)) + ppd->ibport_data.rvp.rc_acks = NULL; + ppd->ibport_data.rvp.rc_qacks = NULL; + ppd->ibport_data.rvp.rc_acks = alloc_percpu(u64); + ppd->ibport_data.rvp.rc_qacks = alloc_percpu(u64); + ppd->ibport_data.rvp.rc_delayed_comp = alloc_percpu(u64); + if (!ppd->ibport_data.rvp.rc_acks || + !ppd->ibport_data.rvp.rc_delayed_comp || + !ppd->ibport_data.rvp.rc_qacks) return -ENOMEM; } @@ -11318,14 +11318,14 @@ static void free_cntrs(struct hfi1_devdata *dd) for (i = 0; i < dd->num_pports; i++, ppd++) { kfree(ppd->cntrs); kfree(ppd->scntrs); - free_percpu(ppd->ibport_data.rc_acks); - free_percpu(ppd->ibport_data.rc_qacks); - free_percpu(ppd->ibport_data.rc_delayed_comp); + free_percpu(ppd->ibport_data.rvp.rc_acks); + free_percpu(ppd->ibport_data.rvp.rc_qacks); + free_percpu(ppd->ibport_data.rvp.rc_delayed_comp); ppd->cntrs = NULL; ppd->scntrs = NULL; - ppd->ibport_data.rc_acks = NULL; - ppd->ibport_data.rc_qacks = NULL; - ppd->ibport_data.rc_delayed_comp = NULL; + ppd->ibport_data.rvp.rc_acks = NULL; + ppd->ibport_data.rvp.rc_qacks = NULL; + ppd->ibport_data.rvp.rc_delayed_comp = NULL; } kfree(dd->portcntrnames); dd->portcntrnames = NULL; diff --git a/drivers/staging/rdma/hfi1/driver.c b/drivers/staging/rdma/hfi1/driver.c index eaed692..da55e39 100644 --- a/drivers/staging/rdma/hfi1/driver.c +++ b/drivers/staging/rdma/hfi1/driver.c @@ -337,7 +337,7 @@ static void rcv_hdrerr(struct hfi1_ctxtdata *rcd, struct hfi1_pportdata *ppd, /* Check for valid receive state. */ if (!(ib_hfi1_state_ops[qp->state] & HFI1_PROCESS_RECV_OK)) { - ibp->n_pkt_drops++; + ibp->rvp.n_pkt_drops++; } switch (qp->ibqp.qp_type) { diff --git a/drivers/staging/rdma/hfi1/hfi.h b/drivers/staging/rdma/hfi1/hfi.h index d52dbda..e5f3451 100644 --- a/drivers/staging/rdma/hfi1/hfi.h +++ b/drivers/staging/rdma/hfi1/hfi.h @@ -1855,10 +1855,10 @@ static inline void hfi1_reset_cpu_counters(struct hfi1_devdata *dd) ppd = (struct hfi1_pportdata *)(dd + 1); for (i = 0; i < dd->num_pports; i++, ppd++) { - ppd->ibport_data.z_rc_acks = - get_all_cpu_total(ppd->ibport_data.rc_acks); - ppd->ibport_data.z_rc_qacks = - get_all_cpu_total(ppd->ibport_data.rc_qacks); + ppd->ibport_data.rvp.z_rc_acks = + get_all_cpu_total(ppd->ibport_data.rvp.rc_acks); + ppd->ibport_data.rvp.z_rc_qacks = + get_all_cpu_total(ppd->ibport_data.rvp.rc_qacks); } } diff --git a/drivers/staging/rdma/hfi1/mad.c b/drivers/staging/rdma/hfi1/mad.c index 1190f8d..6daf277 100644 --- a/drivers/staging/rdma/hfi1/mad.c +++ b/drivers/staging/rdma/hfi1/mad.c @@ -91,7 +91,7 @@ static void send_trap(struct hfi1_ibport *ibp, void *data, unsigned len) int pkey_idx; u32 qpn = ppd_from_ibp(ibp)->sm_trap_qp; - agent = ibp->send_agent; + agent = ibp->rvp.send_agent; if (!agent) return; @@ -100,7 +100,8 @@ static void send_trap(struct hfi1_ibport *ibp, void *data, unsigned len) return; /* o14-2 */ - if (ibp->trap_timeout && time_before(jiffies, ibp->trap_timeout)) + if (ibp->rvp.trap_timeout && time_before(jiffies, + ibp->rvp.trap_timeout)) return; pkey_idx = hfi1_lookup_pkey_idx(ibp, LIM_MGMT_P_KEY); @@ -121,18 +122,18 @@ static void send_trap(struct hfi1_ibport *ibp, void *data, unsigned len) smp->mgmt_class = IB_MGMT_CLASS_SUBN_LID_ROUTED; smp->class_version = OPA_SMI_CLASS_VERSION; smp->method = IB_MGMT_METHOD_TRAP; - ibp->tid++; - smp->tid = cpu_to_be64(ibp->tid); + ibp->rvp.tid++; + smp->tid = cpu_to_be64(ibp->rvp.tid); smp->attr_id = IB_SMP_ATTR_NOTICE; /* o14-1: smp->mkey = 0; */ memcpy(smp->route.lid.data, data, len); - spin_lock_irqsave(&ibp->lock, flags); + spin_lock_irqsave(&ibp->rvp.lock, flags); if (!ibp->sm_ah) { - if (ibp->sm_lid != be16_to_cpu(IB_LID_PERMISSIVE)) { + if (ibp->rvp.sm_lid != be16_to_cpu(IB_LID_PERMISSIVE)) { struct ib_ah *ah; - ah = hfi1_create_qp0_ah(ibp, ibp->sm_lid); + ah = hfi1_create_qp0_ah(ibp, ibp->rvp.sm_lid); if (IS_ERR(ah)) ret = PTR_ERR(ah); else { @@ -146,17 +147,17 @@ static void send_trap(struct hfi1_ibport *ibp, void *data, unsigned len) send_buf->ah = &ibp->sm_ah->ibah; ret = 0; } - spin_unlock_irqrestore(&ibp->lock, flags); + spin_unlock_irqrestore(&ibp->rvp.lock, flags); if (!ret) ret = ib_post_send_mad(send_buf, NULL); if (!ret) { /* 4.096 usec. */ - timeout = (4096 * (1UL << ibp->subnet_timeout)) / 1000; - ibp->trap_timeout = jiffies + usecs_to_jiffies(timeout); + timeout = (4096 * (1UL << ibp->rvp.subnet_timeout)) / 1000; + ibp->rvp.trap_timeout = jiffies + usecs_to_jiffies(timeout); } else { ib_free_send_mad(send_buf); - ibp->trap_timeout = 0; + ibp->rvp.trap_timeout = 0; } } @@ -174,10 +175,10 @@ void hfi1_bad_pqkey(struct hfi1_ibport *ibp, __be16 trap_num, u32 key, u32 sl, memset(&data, 0, sizeof(data)); if (trap_num == OPA_TRAP_BAD_P_KEY) - ibp->pkey_violations++; + ibp->rvp.pkey_violations++; else - ibp->qkey_violations++; - ibp->n_pkt_drops++; + ibp->rvp.qkey_violations++; + ibp->rvp.n_pkt_drops++; /* Send violation trap */ data.generic_type = IB_NOTICE_TYPE_SECURITY; @@ -245,7 +246,7 @@ void hfi1_cap_mask_chg(struct hfi1_ibport *ibp) data.trap_num = OPA_TRAP_CHANGE_CAPABILITY; data.issuer_lid = cpu_to_be32(lid); data.ntc_144.lid = data.issuer_lid; - data.ntc_144.new_cap_mask = cpu_to_be32(ibp->port_cap_flags); + data.ntc_144.new_cap_mask = cpu_to_be32(ibp->rvp.port_cap_flags); send_trap(ibp, &data, sizeof(data)); } @@ -407,37 +408,38 @@ static int check_mkey(struct hfi1_ibport *ibp, struct ib_mad_hdr *mad, int ret = 0; /* Is the mkey in the process of expiring? */ - if (ibp->mkey_lease_timeout && - time_after_eq(jiffies, ibp->mkey_lease_timeout)) { + if (ibp->rvp.mkey_lease_timeout && + time_after_eq(jiffies, ibp->rvp.mkey_lease_timeout)) { /* Clear timeout and mkey protection field. */ - ibp->mkey_lease_timeout = 0; - ibp->mkeyprot = 0; + ibp->rvp.mkey_lease_timeout = 0; + ibp->rvp.mkeyprot = 0; } - if ((mad_flags & IB_MAD_IGNORE_MKEY) || ibp->mkey == 0 || - ibp->mkey == mkey) + if ((mad_flags & IB_MAD_IGNORE_MKEY) || ibp->rvp.mkey == 0 || + ibp->rvp.mkey == mkey) valid_mkey = 1; /* Unset lease timeout on any valid Get/Set/TrapRepress */ - if (valid_mkey && ibp->mkey_lease_timeout && + if (valid_mkey && ibp->rvp.mkey_lease_timeout && (mad->method == IB_MGMT_METHOD_GET || mad->method == IB_MGMT_METHOD_SET || mad->method == IB_MGMT_METHOD_TRAP_REPRESS)) - ibp->mkey_lease_timeout = 0; + ibp->rvp.mkey_lease_timeout = 0; if (!valid_mkey) { switch (mad->method) { case IB_MGMT_METHOD_GET: /* Bad mkey not a violation below level 2 */ - if (ibp->mkeyprot < 2) + if (ibp->rvp.mkeyprot < 2) break; case IB_MGMT_METHOD_SET: case IB_MGMT_METHOD_TRAP_REPRESS: - if (ibp->mkey_violations != 0xFFFF) - ++ibp->mkey_violations; - if (!ibp->mkey_lease_timeout && ibp->mkey_lease_period) - ibp->mkey_lease_timeout = jiffies + - ibp->mkey_lease_period * HZ; + if (ibp->rvp.mkey_violations != 0xFFFF) + ++ibp->rvp.mkey_violations; + if (!ibp->rvp.mkey_lease_timeout && + ibp->rvp.mkey_lease_period) + ibp->rvp.mkey_lease_timeout = jiffies + + ibp->rvp.mkey_lease_period * HZ; /* Generate a trap notice. */ bad_mkey(ibp, mad, mkey, dr_slid, return_path, hop_cnt); @@ -548,14 +550,14 @@ static int __subn_get_opa_portinfo(struct opa_smp *smp, u32 am, u8 *data, /* Only return the mkey if the protection field allows it. */ if (!(smp->method == IB_MGMT_METHOD_GET && - ibp->mkey != smp->mkey && - ibp->mkeyprot == 1)) - pi->mkey = ibp->mkey; - - pi->subnet_prefix = ibp->gid_prefix; - pi->sm_lid = cpu_to_be32(ibp->sm_lid); - pi->ib_cap_mask = cpu_to_be32(ibp->port_cap_flags); - pi->mkey_lease_period = cpu_to_be16(ibp->mkey_lease_period); + ibp->rvp.mkey != smp->mkey && + ibp->rvp.mkeyprot == 1)) + pi->mkey = ibp->rvp.mkey; + + pi->subnet_prefix = ibp->rvp.gid_prefix; + pi->sm_lid = cpu_to_be32(ibp->rvp.sm_lid); + pi->ib_cap_mask = cpu_to_be32(ibp->rvp.port_cap_flags); + pi->mkey_lease_period = cpu_to_be16(ibp->rvp.mkey_lease_period); pi->sm_trap_qp = cpu_to_be32(ppd->sm_trap_qp); pi->sa_qp = cpu_to_be32(ppd->sa_qp); @@ -599,7 +601,7 @@ static int __subn_get_opa_portinfo(struct opa_smp *smp, u32 am, u8 *data, pi->port_states.portphysstate_portstate = (hfi1_ibphys_portstate(ppd) << 4) | state; - pi->mkeyprotect_lmc = (ibp->mkeyprot << 6) | ppd->lmc; + pi->mkeyprotect_lmc = (ibp->rvp.mkeyprot << 6) | ppd->lmc; memset(pi->neigh_mtu.pvlx_to_mtu, 0, sizeof(pi->neigh_mtu.pvlx_to_mtu)); for (i = 0; i < ppd->vls_supported; i++) { @@ -612,7 +614,7 @@ static int __subn_get_opa_portinfo(struct opa_smp *smp, u32 am, u8 *data, /* don't forget VL 15 */ mtu = mtu_to_enum(dd->vld[15].mtu, 2048); pi->neigh_mtu.pvlx_to_mtu[15/2] |= mtu; - pi->smsl = ibp->sm_sl & OPA_PI_MASK_SMSL; + pi->smsl = ibp->rvp.sm_sl & OPA_PI_MASK_SMSL; pi->operational_vls = hfi1_get_ib_cfg(ppd, HFI1_IB_CFG_OP_VLS); pi->partenforce_filterraw |= (ppd->linkinit_reason & OPA_PI_MASK_LINKINIT_REASON); @@ -620,17 +622,17 @@ static int __subn_get_opa_portinfo(struct opa_smp *smp, u32 am, u8 *data, pi->partenforce_filterraw |= OPA_PI_MASK_PARTITION_ENFORCE_IN; if (ppd->part_enforce & HFI1_PART_ENFORCE_OUT) pi->partenforce_filterraw |= OPA_PI_MASK_PARTITION_ENFORCE_OUT; - pi->mkey_violations = cpu_to_be16(ibp->mkey_violations); + pi->mkey_violations = cpu_to_be16(ibp->rvp.mkey_violations); /* P_KeyViolations are counted by hardware. */ - pi->pkey_violations = cpu_to_be16(ibp->pkey_violations); - pi->qkey_violations = cpu_to_be16(ibp->qkey_violations); + pi->pkey_violations = cpu_to_be16(ibp->rvp.pkey_violations); + pi->qkey_violations = cpu_to_be16(ibp->rvp.qkey_violations); pi->vl.cap = ppd->vls_supported; - pi->vl.high_limit = cpu_to_be16(ibp->vl_high_limit); + pi->vl.high_limit = cpu_to_be16(ibp->rvp.vl_high_limit); pi->vl.arb_high_cap = (u8)hfi1_get_ib_cfg(ppd, HFI1_IB_CFG_VL_HIGH_CAP); pi->vl.arb_low_cap = (u8)hfi1_get_ib_cfg(ppd, HFI1_IB_CFG_VL_LOW_CAP); - pi->clientrereg_subnettimeout = ibp->subnet_timeout; + pi->clientrereg_subnettimeout = ibp->rvp.subnet_timeout; pi->port_link_mode = cpu_to_be16(OPA_PORT_LINK_MODE_OPA << 10 | OPA_PORT_LINK_MODE_OPA << 5 | @@ -1091,9 +1093,9 @@ static int __subn_set_opa_portinfo(struct opa_smp *smp, u32 am, u8 *data, ls_old = driver_lstate(ppd); - ibp->mkey = pi->mkey; - ibp->gid_prefix = pi->subnet_prefix; - ibp->mkey_lease_period = be16_to_cpu(pi->mkey_lease_period); + ibp->rvp.mkey = pi->mkey; + ibp->rvp.gid_prefix = pi->subnet_prefix; + ibp->rvp.mkey_lease_period = be16_to_cpu(pi->mkey_lease_period); /* Must be a valid unicast LID address. */ if ((lid == 0 && ls_old > IB_PORT_INIT) || @@ -1133,20 +1135,20 @@ static int __subn_set_opa_portinfo(struct opa_smp *smp, u32 am, u8 *data, smlid >= be16_to_cpu(IB_MULTICAST_LID_BASE)) { smp->status |= IB_SMP_INVALID_FIELD; pr_warn("SubnSet(OPA_PortInfo) smlid invalid 0x%x\n", smlid); - } else if (smlid != ibp->sm_lid || msl != ibp->sm_sl) { + } else if (smlid != ibp->rvp.sm_lid || msl != ibp->rvp.sm_sl) { pr_warn("SubnSet(OPA_PortInfo) smlid 0x%x\n", smlid); - spin_lock_irqsave(&ibp->lock, flags); + spin_lock_irqsave(&ibp->rvp.lock, flags); if (ibp->sm_ah) { - if (smlid != ibp->sm_lid) + if (smlid != ibp->rvp.sm_lid) ibp->sm_ah->attr.dlid = smlid; - if (msl != ibp->sm_sl) + if (msl != ibp->rvp.sm_sl) ibp->sm_ah->attr.sl = msl; } - spin_unlock_irqrestore(&ibp->lock, flags); - if (smlid != ibp->sm_lid) - ibp->sm_lid = smlid; - if (msl != ibp->sm_sl) - ibp->sm_sl = msl; + spin_unlock_irqrestore(&ibp->rvp.lock, flags); + if (smlid != ibp->rvp.sm_lid) + ibp->rvp.sm_lid = smlid; + if (msl != ibp->rvp.sm_sl) + ibp->rvp.sm_sl = msl; event.event = IB_EVENT_SM_CHANGE; ib_dispatch_event(&event); } @@ -1198,10 +1200,11 @@ static int __subn_set_opa_portinfo(struct opa_smp *smp, u32 am, u8 *data, smp->status |= IB_SMP_INVALID_FIELD; } - ibp->mkeyprot = (pi->mkeyprotect_lmc & OPA_PI_MASK_MKEY_PROT_BIT) >> 6; - ibp->vl_high_limit = be16_to_cpu(pi->vl.high_limit) & 0xFF; + ibp->rvp.mkeyprot = + (pi->mkeyprotect_lmc & OPA_PI_MASK_MKEY_PROT_BIT) >> 6; + ibp->rvp.vl_high_limit = be16_to_cpu(pi->vl.high_limit) & 0xFF; (void)hfi1_set_ib_cfg(ppd, HFI1_IB_CFG_VL_HIGH_LIMIT, - ibp->vl_high_limit); + ibp->rvp.vl_high_limit); if (ppd->vls_supported/2 > ARRAY_SIZE(pi->neigh_mtu.pvlx_to_mtu) || ppd->vls_supported > ARRAY_SIZE(dd->vld)) { @@ -1260,15 +1263,15 @@ static int __subn_set_opa_portinfo(struct opa_smp *smp, u32 am, u8 *data, } if (pi->mkey_violations == 0) - ibp->mkey_violations = 0; + ibp->rvp.mkey_violations = 0; if (pi->pkey_violations == 0) - ibp->pkey_violations = 0; + ibp->rvp.pkey_violations = 0; if (pi->qkey_violations == 0) - ibp->qkey_violations = 0; + ibp->rvp.qkey_violations = 0; - ibp->subnet_timeout = + ibp->rvp.subnet_timeout = pi->clientrereg_subnettimeout & OPA_PI_MASK_SUBNET_TIMEOUT; crc_enabled = be16_to_cpu(pi->port_ltp_crc_mode); @@ -3532,9 +3535,9 @@ static int subn_get_opa_sma(__be16 attr_id, struct opa_smp *smp, u32 am, resp_len); break; case IB_SMP_ATTR_SM_INFO: - if (ibp->port_cap_flags & IB_PORT_SM_DISABLED) + if (ibp->rvp.port_cap_flags & IB_PORT_SM_DISABLED) return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_CONSUMED; - if (ibp->port_cap_flags & IB_PORT_SM) + if (ibp->rvp.port_cap_flags & IB_PORT_SM) return IB_MAD_RESULT_SUCCESS; /* FALLTHROUGH */ default: @@ -3602,9 +3605,9 @@ static int subn_set_opa_sma(__be16 attr_id, struct opa_smp *smp, u32 am, resp_len); break; case IB_SMP_ATTR_SM_INFO: - if (ibp->port_cap_flags & IB_PORT_SM_DISABLED) + if (ibp->rvp.port_cap_flags & IB_PORT_SM_DISABLED) return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_CONSUMED; - if (ibp->port_cap_flags & IB_PORT_SM) + if (ibp->rvp.port_cap_flags & IB_PORT_SM) return IB_MAD_RESULT_SUCCESS; /* FALLTHROUGH */ default: @@ -4180,7 +4183,7 @@ int hfi1_create_agents(struct hfi1_ibdev *dev) goto err; } - ibp->send_agent = agent; + ibp->rvp.send_agent = agent; } return 0; @@ -4188,9 +4191,9 @@ int hfi1_create_agents(struct hfi1_ibdev *dev) err: for (p = 0; p < dd->num_pports; p++) { ibp = &dd->pport[p].ibport_data; - if (ibp->send_agent) { - agent = ibp->send_agent; - ibp->send_agent = NULL; + if (ibp->rvp.send_agent) { + agent = ibp->rvp.send_agent; + ibp->rvp.send_agent = NULL; ib_unregister_mad_agent(agent); } } @@ -4207,9 +4210,9 @@ void hfi1_free_agents(struct hfi1_ibdev *dev) for (p = 0; p < dd->num_pports; p++) { ibp = &dd->pport[p].ibport_data; - if (ibp->send_agent) { - agent = ibp->send_agent; - ibp->send_agent = NULL; + if (ibp->rvp.send_agent) { + agent = ibp->rvp.send_agent; + ibp->rvp.send_agent = NULL; ib_unregister_mad_agent(agent); } if (ibp->sm_ah) { diff --git a/drivers/staging/rdma/hfi1/qp.c b/drivers/staging/rdma/hfi1/qp.c index 0f00365..a1dfb71 100644 --- a/drivers/staging/rdma/hfi1/qp.c +++ b/drivers/staging/rdma/hfi1/qp.c @@ -238,7 +238,7 @@ static void insert_qp(struct hfi1_ibdev *dev, struct rvt_qp *qp) spin_lock_irqsave(&dev->qp_dev->qpt_lock, flags); if (qp->ibqp.qp_num <= 1) { - rcu_assign_pointer(ibp->qp[qp->ibqp.qp_num], qp); + rcu_assign_pointer(ibp->rvp.qp[qp->ibqp.qp_num], qp); } else { u32 n = qpn_hash(dev->qp_dev, qp->ibqp.qp_num); @@ -263,12 +263,13 @@ static void remove_qp(struct hfi1_ibdev *dev, struct rvt_qp *qp) spin_lock_irqsave(&dev->qp_dev->qpt_lock, flags); - if (rcu_dereference_protected(ibp->qp[0], + if (rcu_dereference_protected(ibp->rvp.qp[0], + lockdep_is_held( + &dev->qp_dev->qpt_lock)) == qp) { + RCU_INIT_POINTER(ibp->rvp.qp[0], NULL); + } else if (rcu_dereference_protected(ibp->rvp.qp[1], lockdep_is_held(&dev->qp_dev->qpt_lock)) == qp) { - RCU_INIT_POINTER(ibp->qp[0], NULL); - } else if (rcu_dereference_protected(ibp->qp[1], - lockdep_is_held(&dev->qp_dev->qpt_lock)) == qp) { - RCU_INIT_POINTER(ibp->qp[1], NULL); + RCU_INIT_POINTER(ibp->rvp.qp[1], NULL); } else { struct rvt_qp *q; struct rvt_qp __rcu **qpp; @@ -317,9 +318,9 @@ static unsigned free_all_qps(struct hfi1_devdata *dd) if (!hfi1_mcast_tree_empty(ibp)) qp_inuse++; rcu_read_lock(); - if (rcu_dereference(ibp->qp[0])) + if (rcu_dereference(ibp->rvp.qp[0])) qp_inuse++; - if (rcu_dereference(ibp->qp[1])) + if (rcu_dereference(ibp->rvp.qp[1])) qp_inuse++; rcu_read_unlock(); } @@ -1467,7 +1468,7 @@ static int iowait_sleep( struct hfi1_ibport *ibp = to_iport(qp->ibqp.device, qp->port_num); - ibp->n_dmawait++; + ibp->rvp.n_dmawait++; qp->s_flags |= HFI1_S_WAIT_DMA_DESC; list_add_tail(&priv->s_iowait.list, &sde->dmawait); trace_hfi1_qpsleep(qp, HFI1_S_WAIT_DMA_DESC); @@ -1636,9 +1637,9 @@ int qp_iter_next(struct qp_iter *iter) ibp = &ppd->ibport_data; if (!(n & 1)) - qp = rcu_dereference(ibp->qp[0]); + qp = rcu_dereference(ibp->rvp.qp[0]); else - qp = rcu_dereference(ibp->qp[1]); + qp = rcu_dereference(ibp->rvp.qp[1]); } else { qp = rcu_dereference( dev->qp_dev->qp_table[ diff --git a/drivers/staging/rdma/hfi1/qp.h b/drivers/staging/rdma/hfi1/qp.h index 3dd31e9..8e66562 100644 --- a/drivers/staging/rdma/hfi1/qp.h +++ b/drivers/staging/rdma/hfi1/qp.h @@ -104,7 +104,7 @@ static inline struct rvt_qp *hfi1_lookup_qpn(struct hfi1_ibport *ibp, struct rvt_qp *qp = NULL; if (unlikely(qpn <= 1)) { - qp = rcu_dereference(ibp->qp[qpn]); + qp = rcu_dereference(ibp->rvp.qp[qpn]); } else { struct hfi1_ibdev *dev = &ppd_from_ibp(ibp)->dd->verbs_dev; u32 n = qpn_hash(dev->qp_dev, qpn); diff --git a/drivers/staging/rdma/hfi1/rc.c b/drivers/staging/rdma/hfi1/rc.c index 4b8518a..d7334f4 100644 --- a/drivers/staging/rdma/hfi1/rc.c +++ b/drivers/staging/rdma/hfi1/rc.c @@ -772,7 +772,7 @@ void hfi1_send_rc_ack(struct hfi1_ctxtdata *rcd, struct rvt_qp *qp, return; queue_ack: - this_cpu_inc(*ibp->rc_qacks); + this_cpu_inc(*ibp->rvp.rc_qacks); spin_lock_irqsave(&qp->s_lock, flags); qp->s_flags |= HFI1_S_ACK_PENDING | HFI1_S_RESP_PENDING; qp->s_nak_state = qp->r_nak_state; @@ -900,9 +900,9 @@ static void restart_rc(struct rvt_qp *qp, u32 psn, int wait) ibp = to_iport(qp->ibqp.device, qp->port_num); if (wqe->wr.opcode == IB_WR_RDMA_READ) - ibp->n_rc_resends++; + ibp->rvp.n_rc_resends++; else - ibp->n_rc_resends += delta_psn(qp->s_psn, psn); + ibp->rvp.n_rc_resends += delta_psn(qp->s_psn, psn); qp->s_flags &= ~(HFI1_S_WAIT_FENCE | HFI1_S_WAIT_RDMAR | HFI1_S_WAIT_SSN_CREDIT | HFI1_S_WAIT_PSN | @@ -925,7 +925,7 @@ static void rc_timeout(unsigned long arg) spin_lock(&qp->s_lock); if (qp->s_flags & HFI1_S_TIMER) { ibp = to_iport(qp->ibqp.device, qp->port_num); - ibp->n_rc_timeouts++; + ibp->rvp.n_rc_timeouts++; qp->s_flags &= ~HFI1_S_TIMER; del_timer(&qp->s_timer); trace_hfi1_rc_timeout(qp, qp->s_last_psn + 1); @@ -1104,7 +1104,7 @@ static struct rvt_swqe *do_rc_completion(struct rvt_qp *qp, } else { struct hfi1_pportdata *ppd = ppd_from_ibp(ibp); - this_cpu_inc(*ibp->rc_delayed_comp); + this_cpu_inc(*ibp->rvp.rc_delayed_comp); /* * If send progress not running attempt to progress * SDMA queue. @@ -1263,7 +1263,7 @@ static int do_rc_ack(struct rvt_qp *qp, u32 aeth, u32 psn, int opcode, switch (aeth >> 29) { case 0: /* ACK */ - this_cpu_inc(*ibp->rc_acks); + this_cpu_inc(*ibp->rvp.rc_acks); if (qp->s_acked != qp->s_tail) { /* * We are expecting more ACKs so @@ -1292,7 +1292,7 @@ static int do_rc_ack(struct rvt_qp *qp, u32 aeth, u32 psn, int opcode, goto bail; case 1: /* RNR NAK */ - ibp->n_rnr_naks++; + ibp->rvp.n_rnr_naks++; if (qp->s_acked == qp->s_tail) goto bail; if (qp->s_flags & HFI1_S_WAIT_RNR) @@ -1307,7 +1307,7 @@ static int do_rc_ack(struct rvt_qp *qp, u32 aeth, u32 psn, int opcode, /* The last valid PSN is the previous PSN. */ update_last_psn(qp, psn - 1); - ibp->n_rc_resends += delta_psn(qp->s_psn, psn); + ibp->rvp.n_rc_resends += delta_psn(qp->s_psn, psn); reset_psn(qp, psn); @@ -1328,7 +1328,7 @@ static int do_rc_ack(struct rvt_qp *qp, u32 aeth, u32 psn, int opcode, switch ((aeth >> HFI1_AETH_CREDIT_SHIFT) & HFI1_AETH_CREDIT_MASK) { case 0: /* PSN sequence error */ - ibp->n_seq_naks++; + ibp->rvp.n_seq_naks++; /* * Back up to the responder's expected PSN. * Note that we might get a NAK in the middle of an @@ -1341,17 +1341,17 @@ static int do_rc_ack(struct rvt_qp *qp, u32 aeth, u32 psn, int opcode, case 1: /* Invalid Request */ status = IB_WC_REM_INV_REQ_ERR; - ibp->n_other_naks++; + ibp->rvp.n_other_naks++; goto class_b; case 2: /* Remote Access Error */ status = IB_WC_REM_ACCESS_ERR; - ibp->n_other_naks++; + ibp->rvp.n_other_naks++; goto class_b; case 3: /* Remote Operation Error */ status = IB_WC_REM_OP_ERR; - ibp->n_other_naks++; + ibp->rvp.n_other_naks++; class_b: if (qp->s_last == qp->s_acked) { hfi1_send_complete(qp, wqe, status); @@ -1402,7 +1402,7 @@ static void rdma_seq_err(struct rvt_qp *qp, struct hfi1_ibport *ibp, u32 psn, wqe = do_rc_completion(qp, wqe, ibp); } - ibp->n_rdma_seq++; + ibp->rvp.n_rdma_seq++; qp->r_flags |= HFI1_R_RDMAR_SEQ; restart_rc(qp, qp->s_last_psn + 1, 0); if (list_empty(&qp->rspwait)) { @@ -1665,7 +1665,7 @@ static noinline int rc_rcv_error(struct hfi1_other_headers *ohdr, void *data, * Don't queue the NAK if we already sent one. */ if (!qp->r_nak_state) { - ibp->n_rc_seqnak++; + ibp->rvp.n_rc_seqnak++; qp->r_nak_state = IB_NAK_PSN_ERROR; /* Use the expected PSN. */ qp->r_ack_psn = qp->r_psn; @@ -1697,7 +1697,7 @@ static noinline int rc_rcv_error(struct hfi1_other_headers *ohdr, void *data, */ e = NULL; old_req = 1; - ibp->n_rc_dupreq++; + ibp->rvp.n_rc_dupreq++; spin_lock_irqsave(&qp->s_lock, flags); @@ -2433,7 +2433,7 @@ void hfi1_rc_hdrerr( if (opcode < IB_OPCODE_RC_RDMA_READ_RESPONSE_FIRST) { diff = delta_psn(psn, qp->r_psn); if (!qp->r_nak_state && diff >= 0) { - ibp->n_rc_seqnak++; + ibp->rvp.n_rc_seqnak++; qp->r_nak_state = IB_NAK_PSN_ERROR; /* Use the expected PSN. */ qp->r_ack_psn = qp->r_psn; diff --git a/drivers/staging/rdma/hfi1/ruc.c b/drivers/staging/rdma/hfi1/ruc.c index 3b2f032..98a4798 100644 --- a/drivers/staging/rdma/hfi1/ruc.c +++ b/drivers/staging/rdma/hfi1/ruc.c @@ -279,7 +279,8 @@ int hfi1_ruc_check_hdr(struct hfi1_ibport *ibp, struct hfi1_ib_header *hdr, if (!(qp->alt_ah_attr.ah_flags & IB_AH_GRH)) goto err; guid = get_sguid(ibp, qp->alt_ah_attr.grh.sgid_index); - if (!gid_ok(&hdr->u.l.grh.dgid, ibp->gid_prefix, guid)) + if (!gid_ok(&hdr->u.l.grh.dgid, ibp->rvp.gid_prefix, + guid)) goto err; if (!gid_ok(&hdr->u.l.grh.sgid, qp->alt_ah_attr.grh.dgid.global.subnet_prefix, @@ -312,7 +313,8 @@ int hfi1_ruc_check_hdr(struct hfi1_ibport *ibp, struct hfi1_ib_header *hdr, goto err; guid = get_sguid(ibp, qp->remote_ah_attr.grh.sgid_index); - if (!gid_ok(&hdr->u.l.grh.dgid, ibp->gid_prefix, guid)) + if (!gid_ok(&hdr->u.l.grh.dgid, ibp->rvp.gid_prefix, + guid)) goto err; if (!gid_ok(&hdr->u.l.grh.sgid, qp->remote_ah_attr.grh.dgid.global.subnet_prefix, @@ -413,7 +415,7 @@ again: if (!qp || !(ib_hfi1_state_ops[qp->state] & HFI1_PROCESS_RECV_OK) || qp->ibqp.qp_type != sqp->ibqp.qp_type) { - ibp->n_pkt_drops++; + ibp->rvp.n_pkt_drops++; /* * For RC, the requester would timeout and retry so * shortcut the timeouts and just signal too many retries. @@ -569,7 +571,7 @@ again: send_comp: spin_lock_irqsave(&sqp->s_lock, flags); - ibp->n_loop_pkts++; + ibp->rvp.n_loop_pkts++; flush_send: sqp->s_rnr_retry = sqp->s_rnr_retry_cnt; hfi1_send_complete(sqp, wqe, send_status); @@ -579,7 +581,7 @@ rnr_nak: /* Handle RNR NAK */ if (qp->ibqp.qp_type == IB_QPT_UC) goto send_comp; - ibp->n_rnr_naks++; + ibp->rvp.n_rnr_naks++; /* * Note: we don't need the s_lock held since the BUSY flag * makes this single threaded. @@ -665,7 +667,7 @@ u32 hfi1_make_grh(struct hfi1_ibport *ibp, struct ib_grh *hdr, hdr->next_hdr = IB_GRH_NEXT_HDR; hdr->hop_limit = grh->hop_limit; /* The SGID is 32-bit aligned. */ - hdr->sgid.global.subnet_prefix = ibp->gid_prefix; + hdr->sgid.global.subnet_prefix = ibp->rvp.gid_prefix; hdr->sgid.global.interface_id = grh->sgid_index && grh->sgid_index < ARRAY_SIZE(ibp->guids) ? ibp->guids[grh->sgid_index - 1] : diff --git a/drivers/staging/rdma/hfi1/uc.c b/drivers/staging/rdma/hfi1/uc.c index 6686331..cac3724 100644 --- a/drivers/staging/rdma/hfi1/uc.c +++ b/drivers/staging/rdma/hfi1/uc.c @@ -579,7 +579,7 @@ rewind: set_bit(HFI1_R_REWIND_SGE, &qp->r_aflags); qp->r_sge.num_sge = 0; drop: - ibp->n_pkt_drops++; + ibp->rvp.n_pkt_drops++; return; op_err: diff --git a/drivers/staging/rdma/hfi1/ud.c b/drivers/staging/rdma/hfi1/ud.c index d54d56d..e058fd2 100644 --- a/drivers/staging/rdma/hfi1/ud.c +++ b/drivers/staging/rdma/hfi1/ud.c @@ -82,7 +82,7 @@ static void ud_loopback(struct rvt_qp *sqp, struct rvt_swqe *swqe) qp = hfi1_lookup_qpn(ibp, swqe->ud_wr.remote_qpn); if (!qp) { - ibp->n_pkt_drops++; + ibp->rvp.n_pkt_drops++; rcu_read_unlock(); return; } @@ -94,7 +94,7 @@ static void ud_loopback(struct rvt_qp *sqp, struct rvt_swqe *swqe) if (dqptype != sqptype || !(ib_hfi1_state_ops[qp->state] & HFI1_PROCESS_RECV_OK)) { - ibp->n_pkt_drops++; + ibp->rvp.n_pkt_drops++; goto drop; } @@ -173,14 +173,14 @@ static void ud_loopback(struct rvt_qp *sqp, struct rvt_swqe *swqe) } if (!ret) { if (qp->ibqp.qp_num == 0) - ibp->n_vl15_dropped++; + ibp->rvp.n_vl15_dropped++; goto bail_unlock; } } /* Silently drop packets which are too big. */ if (unlikely(wc.byte_len > qp->r_len)) { qp->r_flags |= HFI1_R_REUSE_SGE; - ibp->n_pkt_drops++; + ibp->rvp.n_pkt_drops++; goto bail_unlock; } @@ -249,7 +249,7 @@ static void ud_loopback(struct rvt_qp *sqp, struct rvt_swqe *swqe) /* Signal completion event if the solicited bit is set. */ hfi1_cq_enter(to_icq(qp->ibqp.recv_cq), &wc, swqe->wr.send_flags & IB_SEND_SOLICITED); - ibp->n_loop_pkts++; + ibp->rvp.n_loop_pkts++; bail_unlock: spin_unlock_irqrestore(&qp->r_lock, flags); drop: @@ -608,7 +608,7 @@ static int opa_smp_check(struct hfi1_ibport *ibp, u16 pkey, u8 sc5, case IB_MGMT_METHOD_TRAP: case IB_MGMT_METHOD_GET_RESP: case IB_MGMT_METHOD_REPORT_RESP: - if (ibp->port_cap_flags & IB_PORT_SM) + if (ibp->rvp.port_cap_flags & IB_PORT_SM) return 0; if (pkey == FULL_MGMT_P_KEY) { smp->status |= IB_SMP_UNSUP_METHOD; @@ -824,7 +824,7 @@ void hfi1_ud_rcv(struct hfi1_packet *packet) } if (!ret) { if (qp->ibqp.qp_num == 0) - ibp->n_vl15_dropped++; + ibp->rvp.n_vl15_dropped++; return; } } @@ -884,5 +884,5 @@ void hfi1_ud_rcv(struct hfi1_packet *packet) return; drop: - ibp->n_pkt_drops++; + ibp->rvp.n_pkt_drops++; } diff --git a/drivers/staging/rdma/hfi1/verbs.c b/drivers/staging/rdma/hfi1/verbs.c index 10d6547..1c5e477 100644 --- a/drivers/staging/rdma/hfi1/verbs.c +++ b/drivers/staging/rdma/hfi1/verbs.c @@ -593,7 +593,7 @@ static inline int qp_ok(int opcode, struct hfi1_packet *packet) return 1; dropit: ibp = &packet->rcd->ppd->ibport_data; - ibp->n_pkt_drops++; + ibp->rvp.n_pkt_drops++; return 0; } @@ -683,7 +683,7 @@ void hfi1_ib_rcv(struct hfi1_packet *packet) return; drop: - ibp->n_pkt_drops++; + ibp->rvp.n_pkt_drops++; } /* @@ -1465,17 +1465,17 @@ static int query_port(struct ib_device *ibdev, u8 port, memset(props, 0, sizeof(*props)); props->lid = lid ? lid : 0; props->lmc = ppd->lmc; - props->sm_lid = ibp->sm_lid; - props->sm_sl = ibp->sm_sl; + props->sm_lid = ibp->rvp.sm_lid; + props->sm_sl = ibp->rvp.sm_sl; /* OPA logical states match IB logical states */ props->state = driver_lstate(ppd); props->phys_state = hfi1_ibphys_portstate(ppd); - props->port_cap_flags = ibp->port_cap_flags; + props->port_cap_flags = ibp->rvp.port_cap_flags; props->gid_tbl_len = HFI1_GUIDS_PER_PORT; props->max_msg_sz = 0x80000000; props->pkey_tbl_len = hfi1_get_npkeys(dd); - props->bad_pkey_cntr = ibp->pkey_violations; - props->qkey_viol_cntr = ibp->qkey_violations; + props->bad_pkey_cntr = ibp->rvp.pkey_violations; + props->qkey_viol_cntr = ibp->rvp.qkey_violations; props->active_width = (u8)opa_width_to_ib(ppd->link_width_active); /* see rate_show() in ib core/sysfs.c */ props->active_speed = (u8)opa_speed_to_ib(ppd->link_speed_active); @@ -1494,7 +1494,7 @@ static int query_port(struct ib_device *ibdev, u8 port, 4096 : hfi1_max_mtu), IB_MTU_4096); props->active_mtu = !valid_ib_mtu(ppd->ibmtu) ? props->max_mtu : mtu_to_enum(ppd->ibmtu, IB_MTU_2048); - props->subnet_timeout = ibp->subnet_timeout; + props->subnet_timeout = ibp->rvp.subnet_timeout; return 0; } @@ -1565,8 +1565,8 @@ static int modify_port(struct ib_device *ibdev, u8 port, struct hfi1_pportdata *ppd = ppd_from_ibp(ibp); int ret = 0; - ibp->port_cap_flags |= props->set_port_cap_mask; - ibp->port_cap_flags &= ~props->clr_port_cap_mask; + ibp->rvp.port_cap_flags |= props->set_port_cap_mask; + ibp->rvp.port_cap_flags &= ~props->clr_port_cap_mask; if (props->set_port_cap_mask || props->clr_port_cap_mask) hfi1_cap_mask_chg(ibp); if (port_modify_mask & IB_PORT_SHUTDOWN) { @@ -1575,7 +1575,7 @@ static int modify_port(struct ib_device *ibdev, u8 port, ret = set_link_state(ppd, HLS_DN_DOWNDEF); } if (port_modify_mask & IB_PORT_RESET_QKEY_CNTR) - ibp->qkey_violations = 0; + ibp->rvp.qkey_violations = 0; return ret; } @@ -1591,7 +1591,7 @@ static int query_gid(struct ib_device *ibdev, u8 port, struct hfi1_ibport *ibp = to_iport(ibdev, port); struct hfi1_pportdata *ppd = ppd_from_ibp(ibp); - gid->global.subnet_prefix = ibp->gid_prefix; + gid->global.subnet_prefix = ibp->rvp.gid_prefix; if (index == 0) gid->global.interface_id = cpu_to_be64(ppd->guid); else if (index < HFI1_GUIDS_PER_PORT) @@ -1663,7 +1663,7 @@ struct ib_ah *hfi1_create_qp0_ah(struct hfi1_ibport *ibp, u16 dlid) attr.dlid = dlid; attr.port_num = ppd_from_ibp(ibp)->port; rcu_read_lock(); - qp0 = rcu_dereference(ibp->qp[0]); + qp0 = rcu_dereference(ibp->rvp.qp[0]); if (qp0) ah = ib_create_ah(qp0->ibqp.pd, &attr); rcu_read_unlock(); @@ -1738,21 +1738,21 @@ static void init_ibport(struct hfi1_pportdata *ppd) ibp->sc_to_sl[i] = i; } - spin_lock_init(&ibp->lock); + spin_lock_init(&ibp->rvp.lock); /* Set the prefix to the default value (see ch. 4.1.1) */ - ibp->gid_prefix = IB_DEFAULT_GID_PREFIX; - ibp->sm_lid = 0; + ibp->rvp.gid_prefix = IB_DEFAULT_GID_PREFIX; + ibp->rvp.sm_lid = 0; /* Below should only set bits defined in OPA PortInfo.CapabilityMask */ - ibp->port_cap_flags = IB_PORT_AUTO_MIGR_SUP | + ibp->rvp.port_cap_flags = IB_PORT_AUTO_MIGR_SUP | IB_PORT_CAP_MASK_NOTICE_SUP; - ibp->pma_counter_select[0] = IB_PMA_PORT_XMIT_DATA; - ibp->pma_counter_select[1] = IB_PMA_PORT_RCV_DATA; - ibp->pma_counter_select[2] = IB_PMA_PORT_XMIT_PKTS; - ibp->pma_counter_select[3] = IB_PMA_PORT_RCV_PKTS; - ibp->pma_counter_select[4] = IB_PMA_PORT_XMIT_WAIT; - - RCU_INIT_POINTER(ibp->qp[0], NULL); - RCU_INIT_POINTER(ibp->qp[1], NULL); + ibp->rvp.pma_counter_select[0] = IB_PMA_PORT_XMIT_DATA; + ibp->rvp.pma_counter_select[1] = IB_PMA_PORT_RCV_DATA; + ibp->rvp.pma_counter_select[2] = IB_PMA_PORT_XMIT_PKTS; + ibp->rvp.pma_counter_select[3] = IB_PMA_PORT_RCV_PKTS; + ibp->rvp.pma_counter_select[4] = IB_PMA_PORT_XMIT_WAIT; + + RCU_INIT_POINTER(ibp->rvp.qp[0], NULL); + RCU_INIT_POINTER(ibp->rvp.qp[1], NULL); } static void verbs_txreq_kmem_cache_ctor(void *obj) @@ -1926,6 +1926,15 @@ int hfi1_register_ib_device(struct hfi1_devdata *dd) dd->verbs_dev.rdi.flags = (RVT_FLAG_QP_INIT_DRIVER | RVT_FLAG_CQ_INIT_DRIVER); dd->verbs_dev.rdi.dparms.lkey_table_size = hfi1_lkey_table_size; + dd->verbs_dev.rdi.dparms.nports = dd->num_pports; + dd->verbs_dev.rdi.dparms.npkeys = hfi1_get_npkeys(dd); + + ppd = dd->pport; + for (i = 0; i < dd->num_pports; i++, ppd++) + rvt_init_port(&dd->verbs_dev.rdi, + &ppd->ibport_data.rvp, + i, + ppd->pkeys); ret = rvt_register_device(&dd->verbs_dev.rdi); if (ret) @@ -2003,7 +2012,7 @@ void hfi1_cnp_rcv(struct hfi1_packet *packet) svc_type = IB_CC_SVCTYPE_UD; break; default: - ibp->n_pkt_drops++; + ibp->rvp.n_pkt_drops++; return; } diff --git a/drivers/staging/rdma/hfi1/verbs.h b/drivers/staging/rdma/hfi1/verbs.h index 97df555..6a7ee46 100644 --- a/drivers/staging/rdma/hfi1/verbs.h +++ b/drivers/staging/rdma/hfi1/verbs.h @@ -412,62 +412,17 @@ static inline void inc_opstats( struct hfi1_ibport { struct rvt_qp __rcu *qp[2]; - struct ib_mad_agent *send_agent; /* agent for SMI (traps) */ + struct rvt_ibport rvp; struct rvt_ah *sm_ah; struct rvt_ah *smi_ah; - struct rb_root mcast_tree; - spinlock_t lock; /* protect changes in this struct */ - - /* non-zero when timer is set */ - unsigned long mkey_lease_timeout; - unsigned long trap_timeout; - __be64 gid_prefix; /* in network order */ - __be64 mkey; + __be64 guids[HFI1_GUIDS_PER_PORT - 1]; /* writable GUIDs */ - u64 tid; /* TID for traps */ - u64 n_rc_resends; - u64 n_seq_naks; - u64 n_rdma_seq; - u64 n_rnr_naks; - u64 n_other_naks; - u64 n_loop_pkts; - u64 n_pkt_drops; - u64 n_vl15_dropped; - u64 n_rc_timeouts; - u64 n_dmawait; - u64 n_unaligned; - u64 n_rc_dupreq; - u64 n_rc_seqnak; - - /* Hot-path per CPU counters to avoid cacheline trading to update */ - u64 z_rc_acks; - u64 z_rc_qacks; - u64 z_rc_delayed_comp; - u64 __percpu *rc_acks; - u64 __percpu *rc_qacks; - u64 __percpu *rc_delayed_comp; - - u32 port_cap_flags; - u32 pma_sample_start; - u32 pma_sample_interval; - __be16 pma_counter_select[5]; - u16 pma_tag; - u16 pkey_violations; - u16 qkey_violations; - u16 mkey_violations; - u16 mkey_lease_period; - u16 sm_lid; - u16 repress_traps; - u8 sm_sl; - u8 mkeyprot; - u8 subnet_timeout; - u8 vl_high_limit; + /* the first 16 entries are sl_to_vl for !OPA */ u8 sl_to_sc[32]; u8 sc_to_sl[32]; }; - struct hfi1_qp_ibdev; struct hfi1_ibdev { struct rvt_dev_info rdi; /* Must be first */ diff --git a/drivers/staging/rdma/hfi1/verbs_mcast.c b/drivers/staging/rdma/hfi1/verbs_mcast.c index 49954b9..aa3f560 100644 --- a/drivers/staging/rdma/hfi1/verbs_mcast.c +++ b/drivers/staging/rdma/hfi1/verbs_mcast.c @@ -131,8 +131,8 @@ struct hfi1_mcast *hfi1_mcast_find(struct hfi1_ibport *ibp, union ib_gid *mgid) unsigned long flags; struct hfi1_mcast *mcast; - spin_lock_irqsave(&ibp->lock, flags); - n = ibp->mcast_tree.rb_node; + spin_lock_irqsave(&ibp->rvp.lock, flags); + n = ibp->rvp.mcast_tree.rb_node; while (n) { int ret; @@ -146,11 +146,11 @@ struct hfi1_mcast *hfi1_mcast_find(struct hfi1_ibport *ibp, union ib_gid *mgid) n = n->rb_right; else { atomic_inc(&mcast->refcount); - spin_unlock_irqrestore(&ibp->lock, flags); + spin_unlock_irqrestore(&ibp->rvp.lock, flags); goto bail; } } - spin_unlock_irqrestore(&ibp->lock, flags); + spin_unlock_irqrestore(&ibp->rvp.lock, flags); mcast = NULL; @@ -170,11 +170,11 @@ bail: static int mcast_add(struct hfi1_ibdev *dev, struct hfi1_ibport *ibp, struct hfi1_mcast *mcast, struct hfi1_mcast_qp *mqp) { - struct rb_node **n = &ibp->mcast_tree.rb_node; + struct rb_node **n = &ibp->rvp.mcast_tree.rb_node; struct rb_node *pn = NULL; int ret; - spin_lock_irq(&ibp->lock); + spin_lock_irq(&ibp->rvp.lock); while (*n) { struct hfi1_mcast *tmcast; @@ -229,12 +229,12 @@ static int mcast_add(struct hfi1_ibdev *dev, struct hfi1_ibport *ibp, atomic_inc(&mcast->refcount); rb_link_node(&mcast->rb_node, pn, n); - rb_insert_color(&mcast->rb_node, &ibp->mcast_tree); + rb_insert_color(&mcast->rb_node, &ibp->rvp.mcast_tree); ret = 0; bail: - spin_unlock_irq(&ibp->lock); + spin_unlock_irq(&ibp->rvp.lock); return ret; } @@ -313,13 +313,13 @@ int hfi1_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) goto bail; } - spin_lock_irq(&ibp->lock); + spin_lock_irq(&ibp->rvp.lock); /* Find the GID in the mcast table. */ - n = ibp->mcast_tree.rb_node; + n = ibp->rvp.mcast_tree.rb_node; while (1) { if (n == NULL) { - spin_unlock_irq(&ibp->lock); + spin_unlock_irq(&ibp->rvp.lock); ret = -EINVAL; goto bail; } @@ -348,13 +348,13 @@ int hfi1_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) /* If this was the last attached QP, remove the GID too. */ if (list_empty(&mcast->qp_list)) { - rb_erase(&mcast->rb_node, &ibp->mcast_tree); + rb_erase(&mcast->rb_node, &ibp->rvp.mcast_tree); last = 1; } break; } - spin_unlock_irq(&ibp->lock); + spin_unlock_irq(&ibp->rvp.lock); if (p) { /* @@ -381,5 +381,5 @@ bail: int hfi1_mcast_tree_empty(struct hfi1_ibport *ibp) { - return ibp->mcast_tree.rb_node == NULL; + return !ibp->rvp.mcast_tree.rb_node; } -- cgit v0.10.2 From 92c24be1e8a1a9110428130271c7dc670fb1e0a2 Mon Sep 17 00:00:00 2001 From: Dennis Dalessandro Date: Tue, 19 Jan 2016 14:42:44 -0800 Subject: staging/rdma/hfi1: Remove mmap from hfi1 Mmap data structure has already been moved to rdmavt and hfi1 supports it. Now that the mmap functionality has also been moved to rdmavt its time for hfi1 to use that as well. Reviewed-by: Mike Marciniszyn Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford diff --git a/drivers/staging/rdma/hfi1/Makefile b/drivers/staging/rdma/hfi1/Makefile index d82d9dc..55077f3 100644 --- a/drivers/staging/rdma/hfi1/Makefile +++ b/drivers/staging/rdma/hfi1/Makefile @@ -8,7 +8,7 @@ obj-$(CONFIG_INFINIBAND_HFI1) += hfi1.o hfi1-y := chip.o cq.o device.o diag.o driver.o efivar.o eprom.o file_ops.o firmware.o \ - init.o intr.o mad.o mmap.o pcie.o pio.o pio_copy.o \ + init.o intr.o mad.o pcie.o pio.o pio_copy.o \ qp.o qsfp.o rc.o ruc.o sdma.o srq.o sysfs.o trace.o twsi.o \ uc.o ud.o user_exp_rcv.o user_pages.o user_sdma.o verbs_mcast.o verbs.o hfi1-$(CONFIG_DEBUG_FS) += debugfs.o diff --git a/drivers/staging/rdma/hfi1/cq.c b/drivers/staging/rdma/hfi1/cq.c index ffd0e7a..25d1a2a 100644 --- a/drivers/staging/rdma/hfi1/cq.c +++ b/drivers/staging/rdma/hfi1/cq.c @@ -277,7 +277,7 @@ struct ib_cq *hfi1_create_cq( if (udata && udata->outlen >= sizeof(__u64)) { int err; - cq->ip = hfi1_create_mmap_info(dev, sz, context, wc); + cq->ip = rvt_create_mmap_info(&dev->rdi, sz, context, wc); if (!cq->ip) { ret = ERR_PTR(-ENOMEM); goto bail_wc; @@ -303,9 +303,9 @@ struct ib_cq *hfi1_create_cq( spin_unlock(&dev->n_cqs_lock); if (cq->ip) { - spin_lock_irq(&dev->pending_lock); - list_add(&cq->ip->pending_mmaps, &dev->pending_mmaps); - spin_unlock_irq(&dev->pending_lock); + spin_lock_irq(&dev->rdi.pending_lock); + list_add(&cq->ip->pending_mmaps, &dev->rdi.pending_mmaps); + spin_unlock_irq(&dev->rdi.pending_lock); } /* @@ -355,7 +355,7 @@ int hfi1_destroy_cq(struct ib_cq *ibcq) dev->n_cqs_allocated--; spin_unlock(&dev->n_cqs_lock); if (cq->ip) - kref_put(&cq->ip->ref, hfi1_release_mmap_info); + kref_put(&cq->ip->ref, rvt_release_mmap_info); else vfree(cq->queue); kfree(cq); @@ -481,7 +481,7 @@ int hfi1_resize_cq(struct ib_cq *ibcq, int cqe, struct ib_udata *udata) struct hfi1_ibdev *dev = to_idev(ibcq->device); struct rvt_mmap_info *ip = cq->ip; - hfi1_update_mmap_info(dev, ip, sz, wc); + rvt_update_mmap_info(&dev->rdi, ip, sz, wc); /* * Return the offset to mmap. @@ -494,10 +494,10 @@ int hfi1_resize_cq(struct ib_cq *ibcq, int cqe, struct ib_udata *udata) goto bail; } - spin_lock_irq(&dev->pending_lock); + spin_lock_irq(&dev->rdi.pending_lock); if (list_empty(&ip->pending_mmaps)) - list_add(&ip->pending_mmaps, &dev->pending_mmaps); - spin_unlock_irq(&dev->pending_lock); + list_add(&ip->pending_mmaps, &dev->rdi.pending_mmaps); + spin_unlock_irq(&dev->rdi.pending_lock); } ret = 0; diff --git a/drivers/staging/rdma/hfi1/mmap.c b/drivers/staging/rdma/hfi1/mmap.c deleted file mode 100644 index 4ce6be6..0000000 --- a/drivers/staging/rdma/hfi1/mmap.c +++ /dev/null @@ -1,192 +0,0 @@ -/* - * - * This file is provided under a dual BSD/GPLv2 license. When using or - * redistributing this file, you may do so under either license. - * - * GPL LICENSE SUMMARY - * - * Copyright(c) 2015 Intel Corporation. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of version 2 of the GNU General Public License as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * BSD LICENSE - * - * Copyright(c) 2015 Intel Corporation. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * - Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * - Neither the name of Intel Corporation nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - */ - -#include -#include -#include -#include -#include -#include - -#include "verbs.h" - -/** - * hfi1_release_mmap_info - free mmap info structure - * @ref: a pointer to the kref within struct rvt_mmap_info - */ -void hfi1_release_mmap_info(struct kref *ref) -{ - struct rvt_mmap_info *ip = - container_of(ref, struct rvt_mmap_info, ref); - struct hfi1_ibdev *dev = to_idev(ip->context->device); - - spin_lock_irq(&dev->pending_lock); - list_del(&ip->pending_mmaps); - spin_unlock_irq(&dev->pending_lock); - - vfree(ip->obj); - kfree(ip); -} - -/* - * open and close keep track of how many times the CQ is mapped, - * to avoid releasing it. - */ -static void hfi1_vma_open(struct vm_area_struct *vma) -{ - struct rvt_mmap_info *ip = vma->vm_private_data; - - kref_get(&ip->ref); -} - -static void hfi1_vma_close(struct vm_area_struct *vma) -{ - struct rvt_mmap_info *ip = vma->vm_private_data; - - kref_put(&ip->ref, hfi1_release_mmap_info); -} - -static struct vm_operations_struct hfi1_vm_ops = { - .open = hfi1_vma_open, - .close = hfi1_vma_close, -}; - -/** - * hfi1_mmap - create a new mmap region - * @context: the IB user context of the process making the mmap() call - * @vma: the VMA to be initialized - * Return zero if the mmap is OK. Otherwise, return an errno. - */ -int hfi1_mmap(struct ib_ucontext *context, struct vm_area_struct *vma) -{ - struct hfi1_ibdev *dev = to_idev(context->device); - unsigned long offset = vma->vm_pgoff << PAGE_SHIFT; - unsigned long size = vma->vm_end - vma->vm_start; - struct rvt_mmap_info *ip, *pp; - int ret = -EINVAL; - - /* - * Search the device's list of objects waiting for a mmap call. - * Normally, this list is very short since a call to create a - * CQ, QP, or SRQ is soon followed by a call to mmap(). - */ - spin_lock_irq(&dev->pending_lock); - list_for_each_entry_safe(ip, pp, &dev->pending_mmaps, - pending_mmaps) { - /* Only the creator is allowed to mmap the object */ - if (context != ip->context || (__u64) offset != ip->offset) - continue; - /* Don't allow a mmap larger than the object. */ - if (size > ip->size) - break; - - list_del_init(&ip->pending_mmaps); - spin_unlock_irq(&dev->pending_lock); - - ret = remap_vmalloc_range(vma, ip->obj, 0); - if (ret) - goto done; - vma->vm_ops = &hfi1_vm_ops; - vma->vm_private_data = ip; - hfi1_vma_open(vma); - goto done; - } - spin_unlock_irq(&dev->pending_lock); -done: - return ret; -} - -/* - * Allocate information for hfi1_mmap - */ -struct rvt_mmap_info *hfi1_create_mmap_info(struct hfi1_ibdev *dev, - u32 size, - struct ib_ucontext *context, - void *obj) { - struct rvt_mmap_info *ip; - - ip = kmalloc(sizeof(*ip), GFP_KERNEL); - if (!ip) - goto bail; - - size = PAGE_ALIGN(size); - - spin_lock_irq(&dev->mmap_offset_lock); - if (dev->mmap_offset == 0) - dev->mmap_offset = PAGE_SIZE; - ip->offset = dev->mmap_offset; - dev->mmap_offset += size; - spin_unlock_irq(&dev->mmap_offset_lock); - - INIT_LIST_HEAD(&ip->pending_mmaps); - ip->size = size; - ip->context = context; - ip->obj = obj; - kref_init(&ip->ref); - -bail: - return ip; -} - -void hfi1_update_mmap_info(struct hfi1_ibdev *dev, struct rvt_mmap_info *ip, - u32 size, void *obj) -{ - size = PAGE_ALIGN(size); - - spin_lock_irq(&dev->mmap_offset_lock); - if (dev->mmap_offset == 0) - dev->mmap_offset = PAGE_SIZE; - ip->offset = dev->mmap_offset; - dev->mmap_offset += size; - spin_unlock_irq(&dev->mmap_offset_lock); - - ip->size = size; - ip->obj = obj; -} diff --git a/drivers/staging/rdma/hfi1/qp.c b/drivers/staging/rdma/hfi1/qp.c index a1dfb71..20b1a84 100644 --- a/drivers/staging/rdma/hfi1/qp.c +++ b/drivers/staging/rdma/hfi1/qp.c @@ -1195,7 +1195,7 @@ struct ib_qp *hfi1_create_qp(struct ib_pd *ibpd, } else { u32 s = sizeof(struct rvt_rwq) + qp->r_rq.size * sz; - qp->ip = hfi1_create_mmap_info(dev, s, + qp->ip = rvt_create_mmap_info(&dev->rdi, s, ibpd->uobject->context, qp->r_rq.wq); if (!qp->ip) { @@ -1223,9 +1223,9 @@ struct ib_qp *hfi1_create_qp(struct ib_pd *ibpd, spin_unlock(&dev->n_qps_lock); if (qp->ip) { - spin_lock_irq(&dev->pending_lock); - list_add(&qp->ip->pending_mmaps, &dev->pending_mmaps); - spin_unlock_irq(&dev->pending_lock); + spin_lock_irq(&dev->rdi.pending_lock); + list_add(&qp->ip->pending_mmaps, &dev->rdi.pending_mmaps); + spin_unlock_irq(&dev->rdi.pending_lock); } ret = &qp->ibqp; @@ -1256,7 +1256,7 @@ struct ib_qp *hfi1_create_qp(struct ib_pd *ibpd, bail_ip: if (qp->ip) - kref_put(&qp->ip->ref, hfi1_release_mmap_info); + kref_put(&qp->ip->ref, rvt_release_mmap_info); else vfree(qp->r_rq.wq); free_qpn(&dev->qp_dev->qpn_table, qp->ibqp.qp_num); @@ -1316,7 +1316,7 @@ int hfi1_destroy_qp(struct ib_qp *ibqp) spin_unlock(&dev->n_qps_lock); if (qp->ip) - kref_put(&qp->ip->ref, hfi1_release_mmap_info); + kref_put(&qp->ip->ref, rvt_release_mmap_info); else vfree(qp->r_rq.wq); vfree(qp->s_wq); diff --git a/drivers/staging/rdma/hfi1/srq.c b/drivers/staging/rdma/hfi1/srq.c index 78f190a..c53b378 100644 --- a/drivers/staging/rdma/hfi1/srq.c +++ b/drivers/staging/rdma/hfi1/srq.c @@ -165,8 +165,8 @@ struct ib_srq *hfi1_create_srq(struct ib_pd *ibpd, u32 s = sizeof(struct rvt_rwq) + srq->rq.size * sz; srq->ip = - hfi1_create_mmap_info(dev, s, ibpd->uobject->context, - srq->rq.wq); + rvt_create_mmap_info(&dev->rdi, s, ibpd->uobject->context, + srq->rq.wq); if (!srq->ip) { ret = ERR_PTR(-ENOMEM); goto bail_wq; @@ -200,9 +200,9 @@ struct ib_srq *hfi1_create_srq(struct ib_pd *ibpd, spin_unlock(&dev->n_srqs_lock); if (srq->ip) { - spin_lock_irq(&dev->pending_lock); - list_add(&srq->ip->pending_mmaps, &dev->pending_mmaps); - spin_unlock_irq(&dev->pending_lock); + spin_lock_irq(&dev->rdi.pending_lock); + list_add(&srq->ip->pending_mmaps, &dev->rdi.pending_mmaps); + spin_unlock_irq(&dev->rdi.pending_lock); } ret = &srq->ibsrq; @@ -324,7 +324,7 @@ int hfi1_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr, struct hfi1_ibdev *dev = to_idev(srq->ibsrq.device); u32 s = sizeof(struct rvt_rwq) + size * sz; - hfi1_update_mmap_info(dev, ip, s, wq); + rvt_update_mmap_info(&dev->rdi, ip, s, wq); /* * Return the offset to mmap. @@ -341,11 +341,11 @@ int hfi1_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr, * Put user mapping info onto the pending list * unless it already is on the list. */ - spin_lock_irq(&dev->pending_lock); + spin_lock_irq(&dev->rdi.pending_lock); if (list_empty(&ip->pending_mmaps)) list_add(&ip->pending_mmaps, - &dev->pending_mmaps); - spin_unlock_irq(&dev->pending_lock); + &dev->rdi.pending_mmaps); + spin_unlock_irq(&dev->rdi.pending_lock); } } else if (attr_mask & IB_SRQ_LIMIT) { spin_lock_irq(&srq->rq.lock); @@ -388,7 +388,7 @@ int hfi1_destroy_srq(struct ib_srq *ibsrq) dev->n_srqs_allocated--; spin_unlock(&dev->n_srqs_lock); if (srq->ip) - kref_put(&srq->ip->ref, hfi1_release_mmap_info); + kref_put(&srq->ip->ref, rvt_release_mmap_info); else vfree(srq->rq.wq); kfree(srq); diff --git a/drivers/staging/rdma/hfi1/verbs.c b/drivers/staging/rdma/hfi1/verbs.c index 1c5e477..11f08ea 100644 --- a/drivers/staging/rdma/hfi1/verbs.c +++ b/drivers/staging/rdma/hfi1/verbs.c @@ -1796,11 +1796,7 @@ int hfi1_register_ib_device(struct hfi1_devdata *dd) dev->mem_timer.function = mem_timer; dev->mem_timer.data = (unsigned long) dev; - INIT_LIST_HEAD(&dev->pending_mmaps); - spin_lock_init(&dev->pending_lock); seqlock_init(&dev->iowait_lock); - dev->mmap_offset = PAGE_SIZE; - spin_lock_init(&dev->mmap_offset_lock); INIT_LIST_HEAD(&dev->txwait); INIT_LIST_HEAD(&dev->memwait); @@ -1906,7 +1902,7 @@ int hfi1_register_ib_device(struct hfi1_devdata *dd) ibdev->attach_mcast = hfi1_multicast_attach; ibdev->detach_mcast = hfi1_multicast_detach; ibdev->process_mad = hfi1_process_mad; - ibdev->mmap = hfi1_mmap; + ibdev->mmap = NULL; ibdev->dma_ops = NULL; ibdev->get_port_immutable = port_immutable; diff --git a/drivers/staging/rdma/hfi1/verbs.h b/drivers/staging/rdma/hfi1/verbs.h index 6a7ee46..eb12978 100644 --- a/drivers/staging/rdma/hfi1/verbs.h +++ b/drivers/staging/rdma/hfi1/verbs.h @@ -426,9 +426,6 @@ struct hfi1_ibport { struct hfi1_qp_ibdev; struct hfi1_ibdev { struct rvt_dev_info rdi; /* Must be first */ - struct list_head pending_mmaps; - spinlock_t mmap_offset_lock; /* protect mmap_offset */ - u32 mmap_offset; struct hfi1_qp_ibdev *qp_dev; @@ -441,9 +438,6 @@ struct hfi1_ibdev { struct kmem_cache *verbs_txreq_cache; struct timer_list mem_timer; - /* other waiters */ - spinlock_t pending_lock; - u64 n_piowait; u64 n_txwait; u64 n_kmem_wait; @@ -680,17 +674,6 @@ static inline void hfi1_put_ss(struct rvt_sge_state *ss) } } -void hfi1_release_mmap_info(struct kref *ref); - -struct rvt_mmap_info *hfi1_create_mmap_info(struct hfi1_ibdev *dev, u32 size, - struct ib_ucontext *context, - void *obj); - -void hfi1_update_mmap_info(struct hfi1_ibdev *dev, struct rvt_mmap_info *ip, - u32 size, void *obj); - -int hfi1_mmap(struct ib_ucontext *context, struct vm_area_struct *vma); - int hfi1_get_rwqe(struct rvt_qp *qp, int wr_id_only); void hfi1_migrate_qp(struct rvt_qp *qp); -- cgit v0.10.2 From d604e1d2cd0e5225e1b6132f27734137b8e0d63e Mon Sep 17 00:00:00 2001 From: Dennis Dalessandro Date: Tue, 19 Jan 2016 14:42:50 -0800 Subject: staging/rdma/hfi1: Use rdmavt pkey verbs function No need to keep providing the query pkey function. This is now being done in rdmavt. Remove support from hfi1. The allocation and maintenance of the list still resides in the driver. Reviewed-by: Mike Marciniszyn Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford diff --git a/drivers/staging/rdma/hfi1/verbs.c b/drivers/staging/rdma/hfi1/verbs.c index 11f08ea..fa5b9c1 100644 --- a/drivers/staging/rdma/hfi1/verbs.c +++ b/drivers/staging/rdma/hfi1/verbs.c @@ -1679,24 +1679,6 @@ unsigned hfi1_get_npkeys(struct hfi1_devdata *dd) return ARRAY_SIZE(dd->pport[0].pkeys); } -static int query_pkey(struct ib_device *ibdev, u8 port, u16 index, - u16 *pkey) -{ - struct hfi1_devdata *dd = dd_from_ibdev(ibdev); - int ret; - - if (index >= hfi1_get_npkeys(dd)) { - ret = -EINVAL; - goto bail; - } - - *pkey = hfi1_get_pkey(to_iport(ibdev, port), index); - ret = 0; - -bail: - return ret; -} - /** * alloc_ucontext - allocate a ucontest * @ibdev: the infiniband device @@ -1864,7 +1846,7 @@ int hfi1_register_ib_device(struct hfi1_devdata *dd) ibdev->modify_device = modify_device; ibdev->query_port = query_port; ibdev->modify_port = modify_port; - ibdev->query_pkey = query_pkey; + ibdev->query_pkey = NULL; ibdev->query_gid = query_gid; ibdev->alloc_ucontext = alloc_ucontext; ibdev->dealloc_ucontext = dealloc_ucontext; -- cgit v0.10.2 From 90963ad735efd191d9e31c0720238406afd89e19 Mon Sep 17 00:00:00 2001 From: Harish Chegondi Date: Tue, 19 Jan 2016 14:42:55 -0800 Subject: staging/rdma/hfi1: Remove user context allocation and de-alloction functions IB user context alloc and dealloc functions have been added to rdmavt. This patch removes them from hfi1. Reviewed-by: Dennis Dalessandro Signed-off-by: Harish Chegondi Signed-off-by: Doug Ledford diff --git a/drivers/staging/rdma/hfi1/verbs.c b/drivers/staging/rdma/hfi1/verbs.c index fa5b9c1..301716a 100644 --- a/drivers/staging/rdma/hfi1/verbs.c +++ b/drivers/staging/rdma/hfi1/verbs.c @@ -152,16 +152,6 @@ const int ib_hfi1_state_ops[IB_QPS_ERR + 1] = { HFI1_POST_SEND_OK | HFI1_FLUSH_SEND, }; -struct hfi1_ucontext { - struct ib_ucontext ibucontext; -}; - -static inline struct hfi1_ucontext *to_iucontext(struct ib_ucontext - *ibucontext) -{ - return container_of(ibucontext, struct hfi1_ucontext, ibucontext); -} - static inline void _hfi1_schedule_send(struct rvt_qp *qp); /* @@ -1679,36 +1669,6 @@ unsigned hfi1_get_npkeys(struct hfi1_devdata *dd) return ARRAY_SIZE(dd->pport[0].pkeys); } -/** - * alloc_ucontext - allocate a ucontest - * @ibdev: the infiniband device - * @udata: not used by the driver - */ - -static struct ib_ucontext *alloc_ucontext(struct ib_device *ibdev, - struct ib_udata *udata) -{ - struct hfi1_ucontext *context; - struct ib_ucontext *ret; - - context = kmalloc(sizeof(*context), GFP_KERNEL); - if (!context) { - ret = ERR_PTR(-ENOMEM); - goto bail; - } - - ret = &context->ibucontext; - -bail: - return ret; -} - -static int dealloc_ucontext(struct ib_ucontext *context) -{ - kfree(to_iucontext(context)); - return 0; -} - static void init_ibport(struct hfi1_pportdata *ppd) { struct hfi1_ibport *ibp = &ppd->ibport_data; @@ -1848,8 +1808,8 @@ int hfi1_register_ib_device(struct hfi1_devdata *dd) ibdev->modify_port = modify_port; ibdev->query_pkey = NULL; ibdev->query_gid = query_gid; - ibdev->alloc_ucontext = alloc_ucontext; - ibdev->dealloc_ucontext = dealloc_ucontext; + ibdev->alloc_ucontext = NULL; + ibdev->dealloc_ucontext = NULL; ibdev->alloc_pd = NULL; ibdev->dealloc_pd = NULL; ibdev->create_ah = NULL; -- cgit v0.10.2 From 54d10c1eb1dc381e62361213bbd100a433b733c9 Mon Sep 17 00:00:00 2001 From: Dennis Dalessandro Date: Tue, 19 Jan 2016 14:43:01 -0800 Subject: staging/rdma/hfi1: Use rdmavt send flags and recv flags Use the definitions of the s_flags and r_flags which are now in rdmavt. Reviewed-by: Ira Weiny Reviewed-by: Harish Chegondi Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford diff --git a/drivers/staging/rdma/hfi1/driver.c b/drivers/staging/rdma/hfi1/driver.c index da55e39..ec2286a 100644 --- a/drivers/staging/rdma/hfi1/driver.c +++ b/drivers/staging/rdma/hfi1/driver.c @@ -781,14 +781,14 @@ static inline void process_rcv_qp_work(struct hfi1_packet *packet) */ list_for_each_entry_safe(qp, nqp, &rcd->qp_wait_list, rspwait) { list_del_init(&qp->rspwait); - if (qp->r_flags & HFI1_R_RSP_DEFERED_ACK) { - qp->r_flags &= ~HFI1_R_RSP_DEFERED_ACK; + if (qp->r_flags & RVT_R_RSP_NAK) { + qp->r_flags &= ~RVT_R_RSP_NAK; hfi1_send_rc_ack(rcd, qp, 0); } - if (qp->r_flags & HFI1_R_RSP_SEND) { + if (qp->r_flags & RVT_R_RSP_SEND) { unsigned long flags; - qp->r_flags &= ~HFI1_R_RSP_SEND; + qp->r_flags &= ~RVT_R_RSP_SEND; spin_lock_irqsave(&qp->s_lock, flags); if (ib_hfi1_state_ops[qp->state] & HFI1_PROCESS_OR_FLUSH_SEND) diff --git a/drivers/staging/rdma/hfi1/pio.c b/drivers/staging/rdma/hfi1/pio.c index 8ee7ed8..be0dcc3 100644 --- a/drivers/staging/rdma/hfi1/pio.c +++ b/drivers/staging/rdma/hfi1/pio.c @@ -1564,7 +1564,7 @@ full: write_sequnlock_irqrestore(&dev->iowait_lock, flags); for (i = 0; i < n; i++) - hfi1_qp_wakeup(qps[i], HFI1_S_WAIT_PIO); + hfi1_qp_wakeup(qps[i], RVT_S_WAIT_PIO); } /* translate a send credit update to a bit code of reasons */ diff --git a/drivers/staging/rdma/hfi1/qp.c b/drivers/staging/rdma/hfi1/qp.c index 20b1a84..d5620ba 100644 --- a/drivers/staging/rdma/hfi1/qp.c +++ b/drivers/staging/rdma/hfi1/qp.c @@ -360,7 +360,7 @@ static void reset_qp(struct rvt_qp *qp, enum ib_qp_type type) hfi1_do_send, iowait_sleep, iowait_wakeup); - qp->s_flags &= HFI1_S_SIGNAL_REQ_WR; + qp->s_flags &= RVT_S_SIGNAL_REQ_WR; qp->s_hdrwords = 0; qp->s_wqe = NULL; qp->s_draining = 0; @@ -407,7 +407,7 @@ static void clear_mr_refs(struct rvt_qp *qp, int clr_sends) { unsigned n; - if (test_and_clear_bit(HFI1_R_REWIND_SGE, &qp->r_aflags)) + if (test_and_clear_bit(RVT_R_REWIND_SGE, &qp->r_aflags)) hfi1_put_ss(&qp->s_rdma_read_sge); hfi1_put_ss(&qp->r_sge); @@ -471,24 +471,24 @@ int hfi1_error_qp(struct rvt_qp *qp, enum ib_wc_status err) qp->state = IB_QPS_ERR; - if (qp->s_flags & (HFI1_S_TIMER | HFI1_S_WAIT_RNR)) { - qp->s_flags &= ~(HFI1_S_TIMER | HFI1_S_WAIT_RNR); + if (qp->s_flags & (RVT_S_TIMER | RVT_S_WAIT_RNR)) { + qp->s_flags &= ~(RVT_S_TIMER | RVT_S_WAIT_RNR); del_timer(&qp->s_timer); } - if (qp->s_flags & HFI1_S_ANY_WAIT_SEND) - qp->s_flags &= ~HFI1_S_ANY_WAIT_SEND; + if (qp->s_flags & RVT_S_ANY_WAIT_SEND) + qp->s_flags &= ~RVT_S_ANY_WAIT_SEND; write_seqlock(&dev->iowait_lock); - if (!list_empty(&priv->s_iowait.list) && !(qp->s_flags & HFI1_S_BUSY)) { - qp->s_flags &= ~HFI1_S_ANY_WAIT_IO; + if (!list_empty(&priv->s_iowait.list) && !(qp->s_flags & RVT_S_BUSY)) { + qp->s_flags &= ~RVT_S_ANY_WAIT_IO; list_del_init(&priv->s_iowait.list); if (atomic_dec_and_test(&qp->refcount)) wake_up(&qp->wait); } write_sequnlock(&dev->iowait_lock); - if (!(qp->s_flags & HFI1_S_BUSY)) { + if (!(qp->s_flags & RVT_S_BUSY)) { qp->s_hdrwords = 0; if (qp->s_rdma_mr) { rvt_put_mr(qp->s_rdma_mr); @@ -507,7 +507,7 @@ int hfi1_error_qp(struct rvt_qp *qp, enum ib_wc_status err) wc.qp = &qp->ibqp; wc.opcode = IB_WC_RECV; - if (test_and_clear_bit(HFI1_R_WRID_VALID, &qp->r_aflags)) { + if (test_and_clear_bit(RVT_R_WRID_VALID, &qp->r_aflags)) { wc.wr_id = qp->r_wr_id; wc.status = err; hfi1_cq_enter(to_icq(qp->ibqp.recv_cq), &wc, 1); @@ -742,7 +742,7 @@ int hfi1_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, if (qp->state != IB_QPS_RESET) { qp->state = IB_QPS_RESET; flush_iowait(qp); - qp->s_flags &= ~(HFI1_S_TIMER | HFI1_S_ANY_WAIT); + qp->s_flags &= ~(RVT_S_TIMER | RVT_S_ANY_WAIT); spin_unlock(&qp->s_lock); spin_unlock_irq(&qp->r_lock); /* Stop the sending work queue and retry timer */ @@ -762,7 +762,7 @@ int hfi1_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, case IB_QPS_RTR: /* Allow event to re-trigger if QP set to RTR more than once */ - qp->r_flags &= ~HFI1_R_COMM_EST; + qp->r_flags &= ~RVT_R_COMM_EST; qp->state = new_state; break; @@ -828,7 +828,7 @@ int hfi1_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, qp->remote_ah_attr = qp->alt_ah_attr; qp->port_num = qp->alt_ah_attr.port_num; qp->s_pkey_index = qp->s_alt_pkey_index; - qp->s_flags |= HFI1_S_AHG_CLEAR; + qp->s_flags |= RVT_S_AHG_CLEAR; priv->s_sc = ah_to_sc(ibqp->device, &qp->remote_ah_attr); priv->s_sde = qp_to_sdma_engine(qp, priv->s_sc); } @@ -954,7 +954,7 @@ int hfi1_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, init_attr->recv_cq = qp->ibqp.recv_cq; init_attr->srq = qp->ibqp.srq; init_attr->cap = attr->cap; - if (qp->s_flags & HFI1_S_SIGNAL_REQ_WR) + if (qp->s_flags & RVT_S_SIGNAL_REQ_WR) init_attr->sq_sig_type = IB_SIGNAL_REQ_WR; else init_attr->sq_sig_type = IB_SIGNAL_ALL_WR; @@ -1154,7 +1154,7 @@ struct ib_qp *hfi1_create_qp(struct ib_pd *ibpd, qp->s_size = init_attr->cap.max_send_wr + 1; qp->s_max_sge = init_attr->cap.max_send_sge; if (init_attr->sq_sig_type == IB_SIGNAL_REQ_WR) - qp->s_flags = HFI1_S_SIGNAL_REQ_WR; + qp->s_flags = RVT_S_SIGNAL_REQ_WR; dev = to_idev(ibpd->device); dd = dd_from_dev(dev); err = alloc_qpn(dd, &dev->qp_dev->qpn_table, init_attr->qp_type, @@ -1292,7 +1292,7 @@ int hfi1_destroy_qp(struct ib_qp *ibqp) if (qp->state != IB_QPS_RESET) { qp->state = IB_QPS_RESET; flush_iowait(qp); - qp->s_flags &= ~(HFI1_S_TIMER | HFI1_S_ANY_WAIT); + qp->s_flags &= ~(RVT_S_TIMER | RVT_S_ANY_WAIT); spin_unlock(&qp->s_lock); spin_unlock_irq(&qp->r_lock); cancel_work_sync(&priv->s_iowait.iowork); @@ -1398,20 +1398,20 @@ void hfi1_get_credit(struct rvt_qp *qp, u32 aeth) * honor the credit field. */ if (credit == HFI1_AETH_CREDIT_INVAL) { - if (!(qp->s_flags & HFI1_S_UNLIMITED_CREDIT)) { - qp->s_flags |= HFI1_S_UNLIMITED_CREDIT; - if (qp->s_flags & HFI1_S_WAIT_SSN_CREDIT) { - qp->s_flags &= ~HFI1_S_WAIT_SSN_CREDIT; + if (!(qp->s_flags & RVT_S_UNLIMITED_CREDIT)) { + qp->s_flags |= RVT_S_UNLIMITED_CREDIT; + if (qp->s_flags & RVT_S_WAIT_SSN_CREDIT) { + qp->s_flags &= ~RVT_S_WAIT_SSN_CREDIT; hfi1_schedule_send(qp); } } - } else if (!(qp->s_flags & HFI1_S_UNLIMITED_CREDIT)) { + } else if (!(qp->s_flags & RVT_S_UNLIMITED_CREDIT)) { /* Compute new LSN (i.e., MSN + credit) */ credit = (aeth + credit_table[credit]) & HFI1_MSN_MASK; if (cmp_msn(credit, qp->s_lsn) > 0) { qp->s_lsn = credit; - if (qp->s_flags & HFI1_S_WAIT_SSN_CREDIT) { - qp->s_flags &= ~HFI1_S_WAIT_SSN_CREDIT; + if (qp->s_flags & RVT_S_WAIT_SSN_CREDIT) { + qp->s_flags &= ~RVT_S_WAIT_SSN_CREDIT; hfi1_schedule_send(qp); } } @@ -1469,13 +1469,13 @@ static int iowait_sleep( to_iport(qp->ibqp.device, qp->port_num); ibp->rvp.n_dmawait++; - qp->s_flags |= HFI1_S_WAIT_DMA_DESC; + qp->s_flags |= RVT_S_WAIT_DMA_DESC; list_add_tail(&priv->s_iowait.list, &sde->dmawait); - trace_hfi1_qpsleep(qp, HFI1_S_WAIT_DMA_DESC); + trace_hfi1_qpsleep(qp, RVT_S_WAIT_DMA_DESC); atomic_inc(&qp->refcount); } write_sequnlock(&dev->iowait_lock); - qp->s_flags &= ~HFI1_S_BUSY; + qp->s_flags &= ~RVT_S_BUSY; spin_unlock_irqrestore(&qp->s_lock, flags); ret = -EBUSY; } else { @@ -1495,7 +1495,7 @@ static void iowait_wakeup(struct iowait *wait, int reason) struct rvt_qp *qp = iowait_to_qp(wait); WARN_ON(reason != SDMA_AVAIL_REASON); - hfi1_qp_wakeup(qp, HFI1_S_WAIT_DMA_DESC); + hfi1_qp_wakeup(qp, RVT_S_WAIT_DMA_DESC); } int hfi1_qp_init(struct hfi1_ibdev *dev) @@ -1712,7 +1712,7 @@ void qp_iter_print(struct seq_file *s, struct qp_iter *iter) void qp_comm_est(struct rvt_qp *qp) { - qp->r_flags |= HFI1_R_COMM_EST; + qp->r_flags |= RVT_R_COMM_EST; if (qp->ibqp.event_handler) { struct ib_event ev; @@ -1736,7 +1736,7 @@ void hfi1_migrate_qp(struct rvt_qp *qp) qp->remote_ah_attr = qp->alt_ah_attr; qp->port_num = qp->alt_ah_attr.port_num; qp->s_pkey_index = qp->s_alt_pkey_index; - qp->s_flags |= HFI1_S_AHG_CLEAR; + qp->s_flags |= RVT_S_AHG_CLEAR; priv->s_sc = ah_to_sc(qp->ibqp.device, &qp->remote_ah_attr); priv->s_sde = qp_to_sdma_engine(qp, priv->s_sc); diff --git a/drivers/staging/rdma/hfi1/qp.h b/drivers/staging/rdma/hfi1/qp.h index 8e66562..9efa4bc 100644 --- a/drivers/staging/rdma/hfi1/qp.h +++ b/drivers/staging/rdma/hfi1/qp.h @@ -125,7 +125,7 @@ static inline void clear_ahg(struct rvt_qp *qp) struct hfi1_qp_priv *priv = qp->priv; priv->s_hdr->ahgcount = 0; - qp->s_flags &= ~(HFI1_S_AHG_VALID | HFI1_S_AHG_CLEAR); + qp->s_flags &= ~(RVT_S_AHG_VALID | RVT_S_AHG_CLEAR); if (priv->s_sde && qp->s_ahgidx >= 0) sdma_ahg_free(priv->s_sde, qp->s_ahgidx); qp->s_ahgidx = -1; diff --git a/drivers/staging/rdma/hfi1/rc.c b/drivers/staging/rdma/hfi1/rc.c index d7334f4..bd504de 100644 --- a/drivers/staging/rdma/hfi1/rc.c +++ b/drivers/staging/rdma/hfi1/rc.c @@ -76,7 +76,7 @@ static u32 restart_sge(struct rvt_sge_state *ss, struct rvt_swqe *wqe, static void start_timer(struct rvt_qp *qp) { - qp->s_flags |= HFI1_S_TIMER; + qp->s_flags |= RVT_S_TIMER; qp->s_timer.function = rc_timeout; /* 4.096 usec. * (1 << qp->timeout) */ qp->s_timer.expires = jiffies + qp->timeout_jiffies; @@ -133,7 +133,7 @@ static int make_rc_ack(struct hfi1_ibdev *dev, struct rvt_qp *qp, case OP(ACKNOWLEDGE): /* Check for no next entry in the queue. */ if (qp->r_head_ack_queue == qp->s_tail_ack_queue) { - if (qp->s_flags & HFI1_S_ACK_PENDING) + if (qp->s_flags & RVT_S_ACK_PENDING) goto normal; goto bail; } @@ -218,7 +218,7 @@ normal: * (see above). */ qp->s_ack_state = OP(SEND_ONLY); - qp->s_flags &= ~HFI1_S_ACK_PENDING; + qp->s_flags &= ~RVT_S_ACK_PENDING; qp->s_cur_sge = NULL; if (qp->s_nak_state) ohdr->u.aeth = @@ -242,12 +242,12 @@ bail: qp->s_ack_state = OP(ACKNOWLEDGE); /* * Ensure s_rdma_ack_cnt changes are committed prior to resetting - * HFI1_S_RESP_PENDING + * RVT_S_RESP_PENDING */ smp_wmb(); - qp->s_flags &= ~(HFI1_S_RESP_PENDING - | HFI1_S_ACK_PENDING - | HFI1_S_AHG_VALID); + qp->s_flags &= ~(RVT_S_RESP_PENDING + | RVT_S_ACK_PENDING + | RVT_S_AHG_VALID); return 0; } @@ -287,7 +287,7 @@ int hfi1_make_rc_req(struct rvt_qp *qp) spin_lock_irqsave(&qp->s_lock, flags); /* Sending responses has higher priority over sending requests. */ - if ((qp->s_flags & HFI1_S_RESP_PENDING) && + if ((qp->s_flags & RVT_S_RESP_PENDING) && make_rc_ack(dev, qp, ohdr, pmtu)) goto done; @@ -299,7 +299,7 @@ int hfi1_make_rc_req(struct rvt_qp *qp) goto bail; /* If DMAs are in progress, we can't flush immediately. */ if (atomic_read(&priv->s_iowait.sdma_busy)) { - qp->s_flags |= HFI1_S_WAIT_DMA; + qp->s_flags |= RVT_S_WAIT_DMA; goto bail; } clear_ahg(qp); @@ -310,12 +310,12 @@ int hfi1_make_rc_req(struct rvt_qp *qp) goto done; } - if (qp->s_flags & (HFI1_S_WAIT_RNR | HFI1_S_WAIT_ACK)) + if (qp->s_flags & (RVT_S_WAIT_RNR | RVT_S_WAIT_ACK)) goto bail; if (cmp_psn(qp->s_psn, qp->s_sending_hpsn) <= 0) { if (cmp_psn(qp->s_sending_psn, qp->s_sending_hpsn) <= 0) { - qp->s_flags |= HFI1_S_WAIT_PSN; + qp->s_flags |= RVT_S_WAIT_PSN; goto bail; } qp->s_sending_psn = qp->s_psn; @@ -348,7 +348,7 @@ int hfi1_make_rc_req(struct rvt_qp *qp) */ if ((wqe->wr.send_flags & IB_SEND_FENCE) && qp->s_num_rd_atomic) { - qp->s_flags |= HFI1_S_WAIT_FENCE; + qp->s_flags |= RVT_S_WAIT_FENCE; goto bail; } wqe->psn = qp->s_next_psn; @@ -366,9 +366,9 @@ int hfi1_make_rc_req(struct rvt_qp *qp) case IB_WR_SEND: case IB_WR_SEND_WITH_IMM: /* If no credit, return. */ - if (!(qp->s_flags & HFI1_S_UNLIMITED_CREDIT) && + if (!(qp->s_flags & RVT_S_UNLIMITED_CREDIT) && cmp_msn(wqe->ssn, qp->s_lsn + 1) > 0) { - qp->s_flags |= HFI1_S_WAIT_SSN_CREDIT; + qp->s_flags |= RVT_S_WAIT_SSN_CREDIT; goto bail; } wqe->lpsn = wqe->psn; @@ -394,14 +394,14 @@ int hfi1_make_rc_req(struct rvt_qp *qp) break; case IB_WR_RDMA_WRITE: - if (newreq && !(qp->s_flags & HFI1_S_UNLIMITED_CREDIT)) + if (newreq && !(qp->s_flags & RVT_S_UNLIMITED_CREDIT)) qp->s_lsn++; /* FALLTHROUGH */ case IB_WR_RDMA_WRITE_WITH_IMM: /* If no credit, return. */ - if (!(qp->s_flags & HFI1_S_UNLIMITED_CREDIT) && + if (!(qp->s_flags & RVT_S_UNLIMITED_CREDIT) && cmp_msn(wqe->ssn, qp->s_lsn + 1) > 0) { - qp->s_flags |= HFI1_S_WAIT_SSN_CREDIT; + qp->s_flags |= RVT_S_WAIT_SSN_CREDIT; goto bail; } ohdr->u.rc.reth.vaddr = @@ -441,11 +441,11 @@ int hfi1_make_rc_req(struct rvt_qp *qp) if (newreq) { if (qp->s_num_rd_atomic >= qp->s_max_rd_atomic) { - qp->s_flags |= HFI1_S_WAIT_RDMAR; + qp->s_flags |= RVT_S_WAIT_RDMAR; goto bail; } qp->s_num_rd_atomic++; - if (!(qp->s_flags & HFI1_S_UNLIMITED_CREDIT)) + if (!(qp->s_flags & RVT_S_UNLIMITED_CREDIT)) qp->s_lsn++; /* * Adjust s_next_psn to count the @@ -478,11 +478,11 @@ int hfi1_make_rc_req(struct rvt_qp *qp) if (newreq) { if (qp->s_num_rd_atomic >= qp->s_max_rd_atomic) { - qp->s_flags |= HFI1_S_WAIT_RDMAR; + qp->s_flags |= RVT_S_WAIT_RDMAR; goto bail; } qp->s_num_rd_atomic++; - if (!(qp->s_flags & HFI1_S_UNLIMITED_CREDIT)) + if (!(qp->s_flags & RVT_S_UNLIMITED_CREDIT)) qp->s_lsn++; wqe->lpsn = wqe->psn; } @@ -649,9 +649,9 @@ int hfi1_make_rc_req(struct rvt_qp *qp) delta = delta_psn(bth2, wqe->psn); if (delta && delta % HFI1_PSN_CREDIT == 0) bth2 |= IB_BTH_REQ_ACK; - if (qp->s_flags & HFI1_S_SEND_ONE) { - qp->s_flags &= ~HFI1_S_SEND_ONE; - qp->s_flags |= HFI1_S_WAIT_ACK; + if (qp->s_flags & RVT_S_SEND_ONE) { + qp->s_flags &= ~RVT_S_SEND_ONE; + qp->s_flags |= RVT_S_WAIT_ACK; bth2 |= IB_BTH_REQ_ACK; } qp->s_len -= len; @@ -669,7 +669,7 @@ done: goto unlock; bail: - qp->s_flags &= ~HFI1_S_BUSY; + qp->s_flags &= ~RVT_S_BUSY; unlock: spin_unlock_irqrestore(&qp->s_lock, flags); return ret; @@ -701,7 +701,7 @@ void hfi1_send_rc_ack(struct hfi1_ctxtdata *rcd, struct rvt_qp *qp, unsigned long flags; /* Don't send ACK or NAK if a RDMA read or atomic is pending. */ - if (qp->s_flags & HFI1_S_RESP_PENDING) + if (qp->s_flags & RVT_S_RESP_PENDING) goto queue_ack; /* Ensure s_rdma_ack_cnt changes are committed */ @@ -774,11 +774,11 @@ void hfi1_send_rc_ack(struct hfi1_ctxtdata *rcd, struct rvt_qp *qp, queue_ack: this_cpu_inc(*ibp->rvp.rc_qacks); spin_lock_irqsave(&qp->s_lock, flags); - qp->s_flags |= HFI1_S_ACK_PENDING | HFI1_S_RESP_PENDING; + qp->s_flags |= RVT_S_ACK_PENDING | RVT_S_RESP_PENDING; qp->s_nak_state = qp->r_nak_state; qp->s_ack_psn = qp->r_ack_psn; if (is_fecn) - qp->s_flags |= HFI1_S_ECN; + qp->s_flags |= RVT_S_ECN; /* Schedule the send tasklet. */ hfi1_schedule_send(qp); @@ -866,14 +866,14 @@ static void reset_psn(struct rvt_qp *qp, u32 psn) done: qp->s_psn = psn; /* - * Set HFI1_S_WAIT_PSN as rc_complete() may start the timer + * Set RVT_S_WAIT_PSN as rc_complete() may start the timer * asynchronously before the send tasklet can get scheduled. * Doing it in hfi1_make_rc_req() is too late. */ if ((cmp_psn(qp->s_psn, qp->s_sending_hpsn) <= 0) && (cmp_psn(qp->s_sending_psn, qp->s_sending_hpsn) <= 0)) - qp->s_flags |= HFI1_S_WAIT_PSN; - qp->s_flags &= ~HFI1_S_AHG_VALID; + qp->s_flags |= RVT_S_WAIT_PSN; + qp->s_flags &= ~RVT_S_AHG_VALID; } /* @@ -904,11 +904,11 @@ static void restart_rc(struct rvt_qp *qp, u32 psn, int wait) else ibp->rvp.n_rc_resends += delta_psn(qp->s_psn, psn); - qp->s_flags &= ~(HFI1_S_WAIT_FENCE | HFI1_S_WAIT_RDMAR | - HFI1_S_WAIT_SSN_CREDIT | HFI1_S_WAIT_PSN | - HFI1_S_WAIT_ACK); + qp->s_flags &= ~(RVT_S_WAIT_FENCE | RVT_S_WAIT_RDMAR | + RVT_S_WAIT_SSN_CREDIT | RVT_S_WAIT_PSN | + RVT_S_WAIT_ACK); if (wait) - qp->s_flags |= HFI1_S_SEND_ONE; + qp->s_flags |= RVT_S_SEND_ONE; reset_psn(qp, psn); } @@ -923,10 +923,10 @@ static void rc_timeout(unsigned long arg) spin_lock_irqsave(&qp->r_lock, flags); spin_lock(&qp->s_lock); - if (qp->s_flags & HFI1_S_TIMER) { + if (qp->s_flags & RVT_S_TIMER) { ibp = to_iport(qp->ibqp.device, qp->port_num); ibp->rvp.n_rc_timeouts++; - qp->s_flags &= ~HFI1_S_TIMER; + qp->s_flags &= ~RVT_S_TIMER; del_timer(&qp->s_timer); trace_hfi1_rc_timeout(qp, qp->s_last_psn + 1); restart_rc(qp, qp->s_last_psn + 1, 1); @@ -945,8 +945,8 @@ void hfi1_rc_rnr_retry(unsigned long arg) unsigned long flags; spin_lock_irqsave(&qp->s_lock, flags); - if (qp->s_flags & HFI1_S_WAIT_RNR) { - qp->s_flags &= ~HFI1_S_WAIT_RNR; + if (qp->s_flags & RVT_S_WAIT_RNR) { + qp->s_flags &= ~RVT_S_WAIT_RNR; del_timer(&qp->s_timer); hfi1_schedule_send(qp); } @@ -1017,7 +1017,7 @@ void hfi1_rc_send_complete(struct rvt_qp *qp, struct hfi1_ib_header *hdr) */ if ((psn & IB_BTH_REQ_ACK) && qp->s_acked != qp->s_tail && !(qp->s_flags & - (HFI1_S_TIMER | HFI1_S_WAIT_RNR | HFI1_S_WAIT_PSN)) && + (RVT_S_TIMER | RVT_S_WAIT_RNR | RVT_S_WAIT_PSN)) && (ib_hfi1_state_ops[qp->state] & HFI1_PROCESS_RECV_OK)) start_timer(qp); @@ -1032,7 +1032,7 @@ void hfi1_rc_send_complete(struct rvt_qp *qp, struct hfi1_ib_header *hdr) rvt_put_mr(sge->mr); } /* Post a send completion queue entry if requested. */ - if (!(qp->s_flags & HFI1_S_SIGNAL_REQ_WR) || + if (!(qp->s_flags & RVT_S_SIGNAL_REQ_WR) || (wqe->wr.send_flags & IB_SEND_SIGNALED)) { memset(&wc, 0, sizeof(wc)); wc.wr_id = wqe->wr.wr_id; @@ -1050,9 +1050,9 @@ void hfi1_rc_send_complete(struct rvt_qp *qp, struct hfi1_ib_header *hdr) * and they are now complete, restart sending. */ trace_hfi1_rc_sendcomplete(qp, psn); - if (qp->s_flags & HFI1_S_WAIT_PSN && + if (qp->s_flags & RVT_S_WAIT_PSN && cmp_psn(qp->s_sending_psn, qp->s_sending_hpsn) > 0) { - qp->s_flags &= ~HFI1_S_WAIT_PSN; + qp->s_flags &= ~RVT_S_WAIT_PSN; qp->s_sending_psn = qp->s_psn; qp->s_sending_hpsn = qp->s_psn - 1; hfi1_schedule_send(qp); @@ -1089,7 +1089,7 @@ static struct rvt_swqe *do_rc_completion(struct rvt_qp *qp, rvt_put_mr(sge->mr); } /* Post a send completion queue entry if requested. */ - if (!(qp->s_flags & HFI1_S_SIGNAL_REQ_WR) || + if (!(qp->s_flags & RVT_S_SIGNAL_REQ_WR) || (wqe->wr.send_flags & IB_SEND_SIGNALED)) { memset(&wc, 0, sizeof(wc)); wc.wr_id = wqe->wr.wr_id; @@ -1169,8 +1169,8 @@ static int do_rc_ack(struct rvt_qp *qp, u32 aeth, u32 psn, int opcode, int diff; /* Remove QP from retry timer */ - if (qp->s_flags & (HFI1_S_TIMER | HFI1_S_WAIT_RNR)) { - qp->s_flags &= ~(HFI1_S_TIMER | HFI1_S_WAIT_RNR); + if (qp->s_flags & (RVT_S_TIMER | RVT_S_WAIT_RNR)) { + qp->s_flags &= ~(RVT_S_TIMER | RVT_S_WAIT_RNR); del_timer(&qp->s_timer); } @@ -1218,11 +1218,11 @@ static int do_rc_ack(struct rvt_qp *qp, u32 aeth, u32 psn, int opcode, wqe->wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD) && (opcode != OP(ATOMIC_ACKNOWLEDGE) || diff != 0))) { /* Retry this request. */ - if (!(qp->r_flags & HFI1_R_RDMAR_SEQ)) { - qp->r_flags |= HFI1_R_RDMAR_SEQ; + if (!(qp->r_flags & RVT_R_RDMAR_SEQ)) { + qp->r_flags |= RVT_R_RDMAR_SEQ; restart_rc(qp, qp->s_last_psn + 1, 0); if (list_empty(&qp->rspwait)) { - qp->r_flags |= HFI1_R_RSP_SEND; + qp->r_flags |= RVT_R_RSP_SEND; atomic_inc(&qp->refcount); list_add_tail(&qp->rspwait, &rcd->qp_wait_list); @@ -1245,14 +1245,14 @@ static int do_rc_ack(struct rvt_qp *qp, u32 aeth, u32 psn, int opcode, wqe->wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD)) { qp->s_num_rd_atomic--; /* Restart sending task if fence is complete */ - if ((qp->s_flags & HFI1_S_WAIT_FENCE) && + if ((qp->s_flags & RVT_S_WAIT_FENCE) && !qp->s_num_rd_atomic) { - qp->s_flags &= ~(HFI1_S_WAIT_FENCE | - HFI1_S_WAIT_ACK); + qp->s_flags &= ~(RVT_S_WAIT_FENCE | + RVT_S_WAIT_ACK); hfi1_schedule_send(qp); - } else if (qp->s_flags & HFI1_S_WAIT_RDMAR) { - qp->s_flags &= ~(HFI1_S_WAIT_RDMAR | - HFI1_S_WAIT_ACK); + } else if (qp->s_flags & RVT_S_WAIT_RDMAR) { + qp->s_flags &= ~(RVT_S_WAIT_RDMAR | + RVT_S_WAIT_ACK); hfi1_schedule_send(qp); } } @@ -1280,8 +1280,8 @@ static int do_rc_ack(struct rvt_qp *qp, u32 aeth, u32 psn, int opcode, qp->s_state = OP(SEND_LAST); qp->s_psn = psn + 1; } - if (qp->s_flags & HFI1_S_WAIT_ACK) { - qp->s_flags &= ~HFI1_S_WAIT_ACK; + if (qp->s_flags & RVT_S_WAIT_ACK) { + qp->s_flags &= ~RVT_S_WAIT_ACK; hfi1_schedule_send(qp); } hfi1_get_credit(qp, aeth); @@ -1295,7 +1295,7 @@ static int do_rc_ack(struct rvt_qp *qp, u32 aeth, u32 psn, int opcode, ibp->rvp.n_rnr_naks++; if (qp->s_acked == qp->s_tail) goto bail; - if (qp->s_flags & HFI1_S_WAIT_RNR) + if (qp->s_flags & RVT_S_WAIT_RNR) goto bail; if (qp->s_rnr_retry == 0) { status = IB_WC_RNR_RETRY_EXC_ERR; @@ -1311,8 +1311,8 @@ static int do_rc_ack(struct rvt_qp *qp, u32 aeth, u32 psn, int opcode, reset_psn(qp, psn); - qp->s_flags &= ~(HFI1_S_WAIT_SSN_CREDIT | HFI1_S_WAIT_ACK); - qp->s_flags |= HFI1_S_WAIT_RNR; + qp->s_flags &= ~(RVT_S_WAIT_SSN_CREDIT | RVT_S_WAIT_ACK); + qp->s_flags |= RVT_S_WAIT_RNR; qp->s_timer.function = hfi1_rc_rnr_retry; qp->s_timer.expires = jiffies + usecs_to_jiffies( ib_hfi1_rnr_table[(aeth >> HFI1_AETH_CREDIT_SHIFT) & @@ -1387,8 +1387,8 @@ static void rdma_seq_err(struct rvt_qp *qp, struct hfi1_ibport *ibp, u32 psn, struct rvt_swqe *wqe; /* Remove QP from retry timer */ - if (qp->s_flags & (HFI1_S_TIMER | HFI1_S_WAIT_RNR)) { - qp->s_flags &= ~(HFI1_S_TIMER | HFI1_S_WAIT_RNR); + if (qp->s_flags & (RVT_S_TIMER | RVT_S_WAIT_RNR)) { + qp->s_flags &= ~(RVT_S_TIMER | RVT_S_WAIT_RNR); del_timer(&qp->s_timer); } @@ -1403,10 +1403,10 @@ static void rdma_seq_err(struct rvt_qp *qp, struct hfi1_ibport *ibp, u32 psn, } ibp->rvp.n_rdma_seq++; - qp->r_flags |= HFI1_R_RDMAR_SEQ; + qp->r_flags |= RVT_R_RDMAR_SEQ; restart_rc(qp, qp->s_last_psn + 1, 0); if (list_empty(&qp->rspwait)) { - qp->r_flags |= HFI1_R_RSP_SEND; + qp->r_flags |= RVT_R_RSP_SEND; atomic_inc(&qp->refcount); list_add_tail(&qp->rspwait, &rcd->qp_wait_list); } @@ -1466,10 +1466,10 @@ static void rc_rcv_resp(struct hfi1_ibport *ibp, * Skip everything other than the PSN we expect, if we are waiting * for a reply to a restarted RDMA read or atomic op. */ - if (qp->r_flags & HFI1_R_RDMAR_SEQ) { + if (qp->r_flags & RVT_R_RDMAR_SEQ) { if (cmp_psn(psn, qp->s_last_psn + 1) != 0) goto ack_done; - qp->r_flags &= ~HFI1_R_RDMAR_SEQ; + qp->r_flags &= ~RVT_R_RDMAR_SEQ; } if (unlikely(qp->s_acked == qp->s_tail)) @@ -1520,10 +1520,10 @@ read_middle: * We got a response so update the timeout. * 4.096 usec. * (1 << qp->timeout) */ - qp->s_flags |= HFI1_S_TIMER; + qp->s_flags |= RVT_S_TIMER; mod_timer(&qp->s_timer, jiffies + qp->timeout_jiffies); - if (qp->s_flags & HFI1_S_WAIT_ACK) { - qp->s_flags &= ~HFI1_S_WAIT_ACK; + if (qp->s_flags & RVT_S_WAIT_ACK) { + qp->s_flags &= ~RVT_S_WAIT_ACK; hfi1_schedule_send(qp); } @@ -1613,7 +1613,7 @@ static inline void rc_defered_ack(struct hfi1_ctxtdata *rcd, struct rvt_qp *qp) { if (list_empty(&qp->rspwait)) { - qp->r_flags |= HFI1_R_RSP_DEFERED_ACK; + qp->r_flags |= RVT_R_RSP_NAK; atomic_inc(&qp->refcount); list_add_tail(&qp->rspwait, &rcd->qp_wait_list); } @@ -1627,7 +1627,7 @@ static inline void rc_cancel_ack(struct rvt_qp *qp) if (list_empty(&qp->rspwait)) return; list_del_init(&qp->rspwait); - qp->r_flags &= ~HFI1_R_RSP_DEFERED_ACK; + qp->r_flags &= ~RVT_R_RSP_NAK; if (atomic_dec_and_test(&qp->refcount)) wake_up(&qp->wait); } @@ -1813,7 +1813,7 @@ static noinline int rc_rcv_error(struct hfi1_other_headers *ohdr, void *data, break; } qp->s_ack_state = OP(ACKNOWLEDGE); - qp->s_flags |= HFI1_S_RESP_PENDING; + qp->s_flags |= RVT_S_RESP_PENDING; qp->r_nak_state = 0; hfi1_schedule_send(qp); @@ -2057,7 +2057,7 @@ void hfi1_rc_rcv(struct hfi1_packet *packet) break; } - if (qp->state == IB_QPS_RTR && !(qp->r_flags & HFI1_R_COMM_EST)) + if (qp->state == IB_QPS_RTR && !(qp->r_flags & RVT_R_COMM_EST)) qp_comm_est(qp); /* OK, process the packet. */ @@ -2127,7 +2127,7 @@ send_last: hfi1_copy_sge(&qp->r_sge, data, tlen, 1); hfi1_put_ss(&qp->r_sge); qp->r_msn++; - if (!test_and_clear_bit(HFI1_R_WRID_VALID, &qp->r_aflags)) + if (!test_and_clear_bit(RVT_R_WRID_VALID, &qp->r_aflags)) break; wc.wr_id = qp->r_wr_id; wc.status = IB_WC_SUCCESS; @@ -2264,7 +2264,7 @@ send_last: qp->r_head_ack_queue = next; /* Schedule the send tasklet. */ - qp->s_flags |= HFI1_S_RESP_PENDING; + qp->s_flags |= RVT_S_RESP_PENDING; hfi1_schedule_send(qp); spin_unlock_irqrestore(&qp->s_lock, flags); @@ -2331,7 +2331,7 @@ send_last: qp->r_head_ack_queue = next; /* Schedule the send tasklet. */ - qp->s_flags |= HFI1_S_RESP_PENDING; + qp->s_flags |= RVT_S_RESP_PENDING; hfi1_schedule_send(qp); spin_unlock_irqrestore(&qp->s_lock, flags); diff --git a/drivers/staging/rdma/hfi1/ruc.c b/drivers/staging/rdma/hfi1/ruc.c index 98a4798..0b324b1 100644 --- a/drivers/staging/rdma/hfi1/ruc.c +++ b/drivers/staging/rdma/hfi1/ruc.c @@ -208,7 +208,7 @@ int hfi1_get_rwqe(struct rvt_qp *qp, int wr_id_only) qp->r_wr_id = wqe->wr_id; ret = 1; - set_bit(HFI1_R_WRID_VALID, &qp->r_aflags); + set_bit(RVT_R_WRID_VALID, &qp->r_aflags); if (handler) { u32 n; @@ -382,11 +382,11 @@ static void ruc_loopback(struct rvt_qp *sqp) spin_lock_irqsave(&sqp->s_lock, flags); /* Return if we are already busy processing a work request. */ - if ((sqp->s_flags & (HFI1_S_BUSY | HFI1_S_ANY_WAIT)) || + if ((sqp->s_flags & (RVT_S_BUSY | RVT_S_ANY_WAIT)) || !(ib_hfi1_state_ops[sqp->state] & HFI1_PROCESS_OR_FLUSH_SEND)) goto unlock; - sqp->s_flags |= HFI1_S_BUSY; + sqp->s_flags |= RVT_S_BUSY; again: if (sqp->s_last == sqp->s_head) @@ -550,7 +550,7 @@ again: if (release) hfi1_put_ss(&qp->r_sge); - if (!test_and_clear_bit(HFI1_R_WRID_VALID, &qp->r_aflags)) + if (!test_and_clear_bit(RVT_R_WRID_VALID, &qp->r_aflags)) goto send_comp; if (wqe->wr.opcode == IB_WR_RDMA_WRITE_WITH_IMM) @@ -595,7 +595,7 @@ rnr_nak: spin_lock_irqsave(&sqp->s_lock, flags); if (!(ib_hfi1_state_ops[sqp->state] & HFI1_PROCESS_RECV_OK)) goto clr_busy; - sqp->s_flags |= HFI1_S_WAIT_RNR; + sqp->s_flags |= RVT_S_WAIT_RNR; sqp->s_timer.function = hfi1_rc_rnr_retry; sqp->s_timer.expires = jiffies + usecs_to_jiffies(ib_hfi1_rnr_table[qp->r_min_rnr_timer]); @@ -625,7 +625,7 @@ serr: if (sqp->ibqp.qp_type == IB_QPT_RC) { int lastwqe = hfi1_error_qp(sqp, IB_WC_WR_FLUSH_ERR); - sqp->s_flags &= ~HFI1_S_BUSY; + sqp->s_flags &= ~RVT_S_BUSY; spin_unlock_irqrestore(&sqp->s_lock, flags); if (lastwqe) { struct ib_event ev; @@ -638,7 +638,7 @@ serr: goto done; } clr_busy: - sqp->s_flags &= ~HFI1_S_BUSY; + sqp->s_flags &= ~RVT_S_BUSY; unlock: spin_unlock_irqrestore(&sqp->s_lock, flags); done: @@ -694,9 +694,9 @@ u32 hfi1_make_grh(struct hfi1_ibport *ibp, struct ib_grh *hdr, static inline void build_ahg(struct rvt_qp *qp, u32 npsn) { struct hfi1_qp_priv *priv = qp->priv; - if (unlikely(qp->s_flags & HFI1_S_AHG_CLEAR)) + if (unlikely(qp->s_flags & RVT_S_AHG_CLEAR)) clear_ahg(qp); - if (!(qp->s_flags & HFI1_S_AHG_VALID)) { + if (!(qp->s_flags & RVT_S_AHG_VALID)) { /* first middle that needs copy */ if (qp->s_ahgidx < 0) qp->s_ahgidx = sdma_ahg_alloc(priv->s_sde); @@ -706,7 +706,7 @@ static inline void build_ahg(struct rvt_qp *qp, u32 npsn) /* save to protect a change in another thread */ priv->s_hdr->sde = priv->s_sde; priv->s_hdr->ahgidx = qp->s_ahgidx; - qp->s_flags |= HFI1_S_AHG_VALID; + qp->s_flags |= RVT_S_AHG_VALID; } } else { /* subsequent middle after valid */ @@ -779,7 +779,7 @@ void hfi1_make_ruc_header(struct rvt_qp *qp, struct hfi1_other_headers *ohdr, if (middle) build_ahg(qp, bth2); else - qp->s_flags &= ~HFI1_S_AHG_VALID; + qp->s_flags &= ~RVT_S_AHG_VALID; priv->s_hdr->ibh.lrh[0] = cpu_to_be16(lrh0); priv->s_hdr->ibh.lrh[1] = cpu_to_be16(qp->remote_ah_attr.dlid); priv->s_hdr->ibh.lrh[2] = @@ -790,8 +790,8 @@ void hfi1_make_ruc_header(struct rvt_qp *qp, struct hfi1_other_headers *ohdr, bth0 |= extra_bytes << 20; ohdr->bth[0] = cpu_to_be32(bth0); bth1 = qp->remote_qpn; - if (qp->s_flags & HFI1_S_ECN) { - qp->s_flags &= ~HFI1_S_ECN; + if (qp->s_flags & RVT_S_ECN) { + qp->s_flags &= ~RVT_S_ECN; /* we recently received a FECN, so return a BECN */ bth1 |= (HFI1_BECN_MASK << HFI1_BECN_SHIFT); } @@ -847,7 +847,7 @@ void hfi1_do_send(struct work_struct *work) return; } - qp->s_flags |= HFI1_S_BUSY; + qp->s_flags |= RVT_S_BUSY; spin_unlock_irqrestore(&qp->s_lock, flags); @@ -897,7 +897,7 @@ void hfi1_send_complete(struct rvt_qp *qp, struct rvt_swqe *wqe, atomic_dec(&ibah_to_rvtah(wqe->ud_wr.ah)->refcount); /* See ch. 11.2.4.1 and 10.7.3.1 */ - if (!(qp->s_flags & HFI1_S_SIGNAL_REQ_WR) || + if (!(qp->s_flags & RVT_S_SIGNAL_REQ_WR) || (wqe->wr.send_flags & IB_SEND_SIGNALED) || status != IB_WC_SUCCESS) { struct ib_wc wc; diff --git a/drivers/staging/rdma/hfi1/uc.c b/drivers/staging/rdma/hfi1/uc.c index cac3724..0935182 100644 --- a/drivers/staging/rdma/hfi1/uc.c +++ b/drivers/staging/rdma/hfi1/uc.c @@ -84,7 +84,7 @@ int hfi1_make_uc_req(struct rvt_qp *qp) goto bail; /* If DMAs are in progress, we can't flush immediately. */ if (atomic_read(&priv->s_iowait.sdma_busy)) { - qp->s_flags |= HFI1_S_WAIT_DMA; + qp->s_flags |= RVT_S_WAIT_DMA; goto bail; } clear_ahg(qp); @@ -241,7 +241,7 @@ done: goto unlock; bail: - qp->s_flags &= ~HFI1_S_BUSY; + qp->s_flags &= ~RVT_S_BUSY; unlock: spin_unlock_irqrestore(&qp->s_lock, flags); return ret; @@ -332,7 +332,7 @@ void hfi1_uc_rcv(struct hfi1_packet *packet) inv: if (qp->r_state == OP(SEND_FIRST) || qp->r_state == OP(SEND_MIDDLE)) { - set_bit(HFI1_R_REWIND_SGE, &qp->r_aflags); + set_bit(RVT_R_REWIND_SGE, &qp->r_aflags); qp->r_sge.num_sge = 0; } else hfi1_put_ss(&qp->r_sge); @@ -382,7 +382,7 @@ inv: goto inv; } - if (qp->state == IB_QPS_RTR && !(qp->r_flags & HFI1_R_COMM_EST)) + if (qp->state == IB_QPS_RTR && !(qp->r_flags & RVT_R_COMM_EST)) qp_comm_est(qp); /* OK, process the packet. */ @@ -391,7 +391,7 @@ inv: case OP(SEND_ONLY): case OP(SEND_ONLY_WITH_IMMEDIATE): send_first: - if (test_and_clear_bit(HFI1_R_REWIND_SGE, &qp->r_aflags)) + if (test_and_clear_bit(RVT_R_REWIND_SGE, &qp->r_aflags)) qp->r_sge = qp->s_rdma_read_sge; else { ret = hfi1_get_rwqe(qp, 0); @@ -536,7 +536,7 @@ rdma_last_imm: tlen -= (hdrsize + pad + 4); if (unlikely(tlen + qp->r_rcv_len != qp->r_len)) goto drop; - if (test_and_clear_bit(HFI1_R_REWIND_SGE, &qp->r_aflags)) + if (test_and_clear_bit(RVT_R_REWIND_SGE, &qp->r_aflags)) hfi1_put_ss(&qp->s_rdma_read_sge); else { ret = hfi1_get_rwqe(qp, 1); @@ -576,7 +576,7 @@ rdma_last: return; rewind: - set_bit(HFI1_R_REWIND_SGE, &qp->r_aflags); + set_bit(RVT_R_REWIND_SGE, &qp->r_aflags); qp->r_sge.num_sge = 0; drop: ibp->rvp.n_pkt_drops++; diff --git a/drivers/staging/rdma/hfi1/ud.c b/drivers/staging/rdma/hfi1/ud.c index e058fd2..a0e6222 100644 --- a/drivers/staging/rdma/hfi1/ud.c +++ b/drivers/staging/rdma/hfi1/ud.c @@ -161,8 +161,8 @@ static void ud_loopback(struct rvt_qp *sqp, struct rvt_swqe *swqe) /* * Get the next work request entry to find where to put the data. */ - if (qp->r_flags & HFI1_R_REUSE_SGE) - qp->r_flags &= ~HFI1_R_REUSE_SGE; + if (qp->r_flags & RVT_R_REUSE_SGE) + qp->r_flags &= ~RVT_R_REUSE_SGE; else { int ret; @@ -179,7 +179,7 @@ static void ud_loopback(struct rvt_qp *sqp, struct rvt_swqe *swqe) } /* Silently drop packets which are too big. */ if (unlikely(wc.byte_len > qp->r_len)) { - qp->r_flags |= HFI1_R_REUSE_SGE; + qp->r_flags |= RVT_R_REUSE_SGE; ibp->rvp.n_pkt_drops++; goto bail_unlock; } @@ -223,7 +223,7 @@ static void ud_loopback(struct rvt_qp *sqp, struct rvt_swqe *swqe) length -= len; } hfi1_put_ss(&qp->r_sge); - if (!test_and_clear_bit(HFI1_R_WRID_VALID, &qp->r_aflags)) + if (!test_and_clear_bit(RVT_R_WRID_VALID, &qp->r_aflags)) goto bail_unlock; wc.wr_id = qp->r_wr_id; wc.status = IB_WC_SUCCESS; @@ -290,7 +290,7 @@ int hfi1_make_ud_req(struct rvt_qp *qp) goto bail; /* If DMAs are in progress, we can't flush immediately. */ if (atomic_read(&priv->s_iowait.sdma_busy)) { - qp->s_flags |= HFI1_S_WAIT_DMA; + qp->s_flags |= RVT_S_WAIT_DMA; goto bail; } wqe = get_swqe_ptr(qp, qp->s_last); @@ -324,7 +324,7 @@ int hfi1_make_ud_req(struct rvt_qp *qp) * zero length descriptor so we get a callback. */ if (atomic_read(&priv->s_iowait.sdma_busy)) { - qp->s_flags |= HFI1_S_WAIT_DMA; + qp->s_flags |= RVT_S_WAIT_DMA; goto bail; } qp->s_cur = next_cur; @@ -426,7 +426,7 @@ done: goto unlock; bail: - qp->s_flags &= ~HFI1_S_BUSY; + qp->s_flags &= ~RVT_S_BUSY; unlock: spin_unlock_irqrestore(&qp->s_lock, flags); return ret; @@ -812,8 +812,8 @@ void hfi1_ud_rcv(struct hfi1_packet *packet) /* * Get the next work request entry to find where to put the data. */ - if (qp->r_flags & HFI1_R_REUSE_SGE) - qp->r_flags &= ~HFI1_R_REUSE_SGE; + if (qp->r_flags & RVT_R_REUSE_SGE) + qp->r_flags &= ~RVT_R_REUSE_SGE; else { int ret; @@ -830,7 +830,7 @@ void hfi1_ud_rcv(struct hfi1_packet *packet) } /* Silently drop packets which are too big. */ if (unlikely(wc.byte_len > qp->r_len)) { - qp->r_flags |= HFI1_R_REUSE_SGE; + qp->r_flags |= RVT_R_REUSE_SGE; goto drop; } if (has_grh) { @@ -841,7 +841,7 @@ void hfi1_ud_rcv(struct hfi1_packet *packet) hfi1_skip_sge(&qp->r_sge, sizeof(struct ib_grh), 1); hfi1_copy_sge(&qp->r_sge, data, wc.byte_len - sizeof(struct ib_grh), 1); hfi1_put_ss(&qp->r_sge); - if (!test_and_clear_bit(HFI1_R_WRID_VALID, &qp->r_aflags)) + if (!test_and_clear_bit(RVT_R_WRID_VALID, &qp->r_aflags)) return; wc.wr_id = qp->r_wr_id; wc.status = IB_WC_SUCCESS; diff --git a/drivers/staging/rdma/hfi1/verbs.c b/drivers/staging/rdma/hfi1/verbs.c index 301716a..a1e9f0b 100644 --- a/drivers/staging/rdma/hfi1/verbs.c +++ b/drivers/staging/rdma/hfi1/verbs.c @@ -702,7 +702,7 @@ static void mem_timer(unsigned long data) write_sequnlock_irqrestore(&dev->iowait_lock, flags); if (qp) - hfi1_qp_wakeup(qp, HFI1_S_WAIT_KMEM); + hfi1_qp_wakeup(qp, RVT_S_WAIT_KMEM); } void update_sge(struct rvt_sge_state *ss, u32 length) @@ -740,12 +740,12 @@ static noinline struct verbs_txreq *__get_txreq(struct hfi1_ibdev *dev, if (ib_hfi1_state_ops[qp->state] & HFI1_PROCESS_RECV_OK && list_empty(&priv->s_iowait.list)) { dev->n_txwait++; - qp->s_flags |= HFI1_S_WAIT_TX; + qp->s_flags |= RVT_S_WAIT_TX; list_add_tail(&priv->s_iowait.list, &dev->txwait); - trace_hfi1_qpsleep(qp, HFI1_S_WAIT_TX); + trace_hfi1_qpsleep(qp, RVT_S_WAIT_TX); atomic_inc(&qp->refcount); } - qp->s_flags &= ~HFI1_S_BUSY; + qp->s_flags &= ~RVT_S_BUSY; write_sequnlock(&dev->iowait_lock); spin_unlock_irqrestore(&qp->s_lock, flags); tx = ERR_PTR(-EBUSY); @@ -803,7 +803,7 @@ void hfi1_put_txreq(struct verbs_txreq *tx) list_del_init(&priv->s_iowait.list); /* refcount held until actual wake up */ write_sequnlock_irqrestore(&dev->iowait_lock, flags); - hfi1_qp_wakeup(qp, HFI1_S_WAIT_TX); + hfi1_qp_wakeup(qp, RVT_S_WAIT_TX); break; } } while (read_seqretry(&dev->iowait_lock, seq)); @@ -838,8 +838,8 @@ static void verbs_sdma_complete( * do the flush work until that QP's * sdma work has finished. */ - if (qp->s_flags & HFI1_S_WAIT_DMA) { - qp->s_flags &= ~HFI1_S_WAIT_DMA; + if (qp->s_flags & RVT_S_WAIT_DMA) { + qp->s_flags &= ~RVT_S_WAIT_DMA; hfi1_schedule_send(qp); } } @@ -860,13 +860,13 @@ static int wait_kmem(struct hfi1_ibdev *dev, struct rvt_qp *qp) if (list_empty(&priv->s_iowait.list)) { if (list_empty(&dev->memwait)) mod_timer(&dev->mem_timer, jiffies + 1); - qp->s_flags |= HFI1_S_WAIT_KMEM; + qp->s_flags |= RVT_S_WAIT_KMEM; list_add_tail(&priv->s_iowait.list, &dev->memwait); - trace_hfi1_qpsleep(qp, HFI1_S_WAIT_KMEM); + trace_hfi1_qpsleep(qp, RVT_S_WAIT_KMEM); atomic_inc(&qp->refcount); } write_sequnlock(&dev->iowait_lock); - qp->s_flags &= ~HFI1_S_BUSY; + qp->s_flags &= ~RVT_S_BUSY; ret = -EBUSY; } spin_unlock_irqrestore(&qp->s_lock, flags); @@ -1092,17 +1092,17 @@ static int no_bufs_available(struct rvt_qp *qp, struct send_context *sc) int was_empty; dev->n_piowait++; - qp->s_flags |= HFI1_S_WAIT_PIO; + qp->s_flags |= RVT_S_WAIT_PIO; was_empty = list_empty(&sc->piowait); list_add_tail(&priv->s_iowait.list, &sc->piowait); - trace_hfi1_qpsleep(qp, HFI1_S_WAIT_PIO); + trace_hfi1_qpsleep(qp, RVT_S_WAIT_PIO); atomic_inc(&qp->refcount); /* counting: only call wantpiobuf_intr if first user */ if (was_empty) hfi1_sc_wantpiobuf_intr(sc, 1); } write_sequnlock(&dev->iowait_lock); - qp->s_flags &= ~HFI1_S_BUSY; + qp->s_flags &= ~RVT_S_BUSY; ret = -EBUSY; } spin_unlock_irqrestore(&qp->s_lock, flags); @@ -1307,7 +1307,7 @@ bad: * @ps: the state of the packet to send * * Return zero if packet is sent or queued OK. - * Return non-zero and clear qp->s_flags HFI1_S_BUSY otherwise. + * Return non-zero and clear qp->s_flags RVT_S_BUSY otherwise. */ int hfi1_verbs_send(struct rvt_qp *qp, struct hfi1_pkt_state *ps) { diff --git a/drivers/staging/rdma/hfi1/verbs.h b/drivers/staging/rdma/hfi1/verbs.h index eb12978..b9843a5 100644 --- a/drivers/staging/rdma/hfi1/verbs.h +++ b/drivers/staging/rdma/hfi1/verbs.h @@ -63,6 +63,7 @@ #include #include #include +#include struct hfi1_ctxtdata; struct hfi1_pportdata; @@ -286,84 +287,6 @@ struct hfi1_pkt_state { struct hfi1_pportdata *ppd; }; -/* - * Atomic bit definitions for r_aflags. - */ -#define HFI1_R_WRID_VALID 0 -#define HFI1_R_REWIND_SGE 1 - -/* - * Bit definitions for r_flags. - */ -#define HFI1_R_REUSE_SGE 0x01 -#define HFI1_R_RDMAR_SEQ 0x02 -/* defer ack until end of interrupt session */ -#define HFI1_R_RSP_DEFERED_ACK 0x04 -/* relay ack to send engine */ -#define HFI1_R_RSP_SEND 0x08 -#define HFI1_R_COMM_EST 0x10 - -/* - * Bit definitions for s_flags. - * - * HFI1_S_SIGNAL_REQ_WR - set if QP send WRs contain completion signaled - * HFI1_S_BUSY - send tasklet is processing the QP - * HFI1_S_TIMER - the RC retry timer is active - * HFI1_S_ACK_PENDING - an ACK is waiting to be sent after RDMA read/atomics - * HFI1_S_WAIT_FENCE - waiting for all prior RDMA read or atomic SWQEs - * before processing the next SWQE - * HFI1_S_WAIT_RDMAR - waiting for a RDMA read or atomic SWQE to complete - * before processing the next SWQE - * HFI1_S_WAIT_RNR - waiting for RNR timeout - * HFI1_S_WAIT_SSN_CREDIT - waiting for RC credits to process next SWQE - * HFI1_S_WAIT_DMA - waiting for send DMA queue to drain before generating - * next send completion entry not via send DMA - * HFI1_S_WAIT_PIO - waiting for a send buffer to be available - * HFI1_S_WAIT_TX - waiting for a struct verbs_txreq to be available - * HFI1_S_WAIT_DMA_DESC - waiting for DMA descriptors to be available - * HFI1_S_WAIT_KMEM - waiting for kernel memory to be available - * HFI1_S_WAIT_PSN - waiting for a packet to exit the send DMA queue - * HFI1_S_WAIT_ACK - waiting for an ACK packet before sending more requests - * HFI1_S_SEND_ONE - send one packet, request ACK, then wait for ACK - * HFI1_S_ECN - a BECN was queued to the send engine - */ -#define HFI1_S_SIGNAL_REQ_WR 0x0001 -#define HFI1_S_BUSY 0x0002 -#define HFI1_S_TIMER 0x0004 -#define HFI1_S_RESP_PENDING 0x0008 -#define HFI1_S_ACK_PENDING 0x0010 -#define HFI1_S_WAIT_FENCE 0x0020 -#define HFI1_S_WAIT_RDMAR 0x0040 -#define HFI1_S_WAIT_RNR 0x0080 -#define HFI1_S_WAIT_SSN_CREDIT 0x0100 -#define HFI1_S_WAIT_DMA 0x0200 -#define HFI1_S_WAIT_PIO 0x0400 -#define HFI1_S_WAIT_TX 0x0800 -#define HFI1_S_WAIT_DMA_DESC 0x1000 -#define HFI1_S_WAIT_KMEM 0x2000 -#define HFI1_S_WAIT_PSN 0x4000 -#define HFI1_S_WAIT_ACK 0x8000 -#define HFI1_S_SEND_ONE 0x10000 -#define HFI1_S_UNLIMITED_CREDIT 0x20000 -#define HFI1_S_AHG_VALID 0x40000 -#define HFI1_S_AHG_CLEAR 0x80000 -#define HFI1_S_ECN 0x100000 - -/* - * Wait flags that would prevent any packet type from being sent. - */ -#define HFI1_S_ANY_WAIT_IO (HFI1_S_WAIT_PIO | HFI1_S_WAIT_TX | \ - HFI1_S_WAIT_DMA_DESC | HFI1_S_WAIT_KMEM) - -/* - * Wait flags that would prevent send work requests from making progress. - */ -#define HFI1_S_ANY_WAIT_SEND (HFI1_S_WAIT_FENCE | HFI1_S_WAIT_RDMAR | \ - HFI1_S_WAIT_RNR | HFI1_S_WAIT_SSN_CREDIT | HFI1_S_WAIT_DMA | \ - HFI1_S_WAIT_PSN | HFI1_S_WAIT_ACK) - -#define HFI1_S_ANY_WAIT (HFI1_S_ANY_WAIT_IO | HFI1_S_ANY_WAIT_SEND) - #define HFI1_PSN_CREDIT 16 /* @@ -507,9 +430,9 @@ static inline struct rvt_qp *iowait_to_qp(struct iowait *s_iowait) */ static inline int hfi1_send_ok(struct rvt_qp *qp) { - return !(qp->s_flags & (HFI1_S_BUSY | HFI1_S_ANY_WAIT_IO)) && - (qp->s_hdrwords || (qp->s_flags & HFI1_S_RESP_PENDING) || - !(qp->s_flags & HFI1_S_ANY_WAIT_SEND)); + return !(qp->s_flags & (RVT_S_BUSY | RVT_S_ANY_WAIT_IO)) && + (qp->s_hdrwords || (qp->s_flags & RVT_S_RESP_PENDING) || + !(qp->s_flags & RVT_S_ANY_WAIT_SEND)); } /* -- cgit v0.10.2 From 1c4b7d971d6679277844cefc0f5c191c800bf955 Mon Sep 17 00:00:00 2001 From: Dennis Dalessandro Date: Tue, 19 Jan 2016 14:43:06 -0800 Subject: staging/rdma/hfi1: Remove qpdev and qpn table from hfi1 Another change on the way to removing queue pair functionality from hfi1. This patch removes the private queue pair structure and the table which holds the queue pair numbers in favor of using what is provided by rdmavt. Reviewed-by: Ira Weiny Reviewed-by: Harish Chegondi Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford diff --git a/drivers/staging/rdma/hfi1/qp.c b/drivers/staging/rdma/hfi1/qp.c index d5620ba..1bf8083 100644 --- a/drivers/staging/rdma/hfi1/qp.c +++ b/drivers/staging/rdma/hfi1/qp.c @@ -60,9 +60,6 @@ #include "trace.h" #include "sdma.h" -#define BITS_PER_PAGE (PAGE_SIZE*BITS_PER_BYTE) -#define BITS_PER_PAGE_MASK (BITS_PER_PAGE-1) - static unsigned int hfi1_qp_table_size = 256; module_param_named(qp_table_size, hfi1_qp_table_size, uint, S_IRUGO); MODULE_PARM_DESC(qp_table_size, "QP table size"); @@ -75,10 +72,10 @@ static int iowait_sleep( unsigned seq); static void iowait_wakeup(struct iowait *wait, int reason); -static inline unsigned mk_qpn(struct hfi1_qpn_table *qpt, - struct qpn_map *map, unsigned off) +static inline unsigned mk_qpn(struct rvt_qpn_table *qpt, + struct rvt_qpn_map *map, unsigned off) { - return (map - qpt->map) * BITS_PER_PAGE + off; + return (map - qpt->map) * RVT_BITS_PER_PAGE + off; } /* @@ -118,7 +115,7 @@ static const u16 credit_table[31] = { 32768 /* 1E */ }; -static void get_map_page(struct hfi1_qpn_table *qpt, struct qpn_map *map) +static void get_map_page(struct rvt_qpn_table *qpt, struct rvt_qpn_map *map) { unsigned long page = get_zeroed_page(GFP_KERNEL); @@ -138,11 +135,11 @@ static void get_map_page(struct hfi1_qpn_table *qpt, struct qpn_map *map) * Allocate the next available QPN or * zero/one for QP type IB_QPT_SMI/IB_QPT_GSI. */ -static int alloc_qpn(struct hfi1_devdata *dd, struct hfi1_qpn_table *qpt, +static int alloc_qpn(struct hfi1_devdata *dd, struct rvt_qpn_table *qpt, enum ib_qp_type type, u8 port) { u32 i, offset, max_scan, qpn; - struct qpn_map *map; + struct rvt_qpn_map *map; u32 ret; if (type == IB_QPT_SMI || type == IB_QPT_GSI) { @@ -160,11 +157,11 @@ static int alloc_qpn(struct hfi1_devdata *dd, struct hfi1_qpn_table *qpt, } qpn = qpt->last + qpt->incr; - if (qpn >= QPN_MAX) + if (qpn >= RVT_QPN_MAX) qpn = qpt->incr | ((qpt->last & 1) ^ 1); /* offset carries bit 0 */ - offset = qpn & BITS_PER_PAGE_MASK; - map = &qpt->map[qpn / BITS_PER_PAGE]; + offset = qpn & RVT_BITS_PER_PAGE_MASK; + map = &qpt->map[qpn / RVT_BITS_PER_PAGE]; max_scan = qpt->nmaps - !offset; for (i = 0;;) { if (unlikely(!map->page)) { @@ -180,18 +177,19 @@ static int alloc_qpn(struct hfi1_devdata *dd, struct hfi1_qpn_table *qpt, } offset += qpt->incr; /* - * This qpn might be bogus if offset >= BITS_PER_PAGE. - * That is OK. It gets re-assigned below + * This qpn might be bogus if offset >= + * RVT_BITS_PER_PAGE. That is OK. It gets re-assigned + * below */ qpn = mk_qpn(qpt, map, offset); - } while (offset < BITS_PER_PAGE && qpn < QPN_MAX); + } while (offset < RVT_BITS_PER_PAGE && qpn < RVT_QPN_MAX); /* * In order to keep the number of pages allocated to a * minimum, we scan the all existing pages before increasing * the size of the bitmap table. */ if (++i > max_scan) { - if (qpt->nmaps == QPNMAP_ENTRIES) + if (qpt->nmaps == RVT_QPNMAP_ENTRIES) break; map = &qpt->map[qpt->nmaps++]; /* start at incr with current bit 0 */ @@ -216,13 +214,13 @@ bail: return ret; } -static void free_qpn(struct hfi1_qpn_table *qpt, u32 qpn) +static void free_qpn(struct rvt_qpn_table *qpt, u32 qpn) { - struct qpn_map *map; + struct rvt_qpn_map *map; - map = qpt->map + qpn / BITS_PER_PAGE; + map = qpt->map + qpn / RVT_BITS_PER_PAGE; if (map->page) - clear_bit(qpn & BITS_PER_PAGE_MASK, map->page); + clear_bit(qpn & RVT_BITS_PER_PAGE_MASK, map->page); } /* @@ -235,19 +233,19 @@ static void insert_qp(struct hfi1_ibdev *dev, struct rvt_qp *qp) unsigned long flags; atomic_inc(&qp->refcount); - spin_lock_irqsave(&dev->qp_dev->qpt_lock, flags); + spin_lock_irqsave(&dev->rdi.qp_dev->qpt_lock, flags); if (qp->ibqp.qp_num <= 1) { rcu_assign_pointer(ibp->rvp.qp[qp->ibqp.qp_num], qp); } else { - u32 n = qpn_hash(dev->qp_dev, qp->ibqp.qp_num); + u32 n = qpn_hash(dev->rdi.qp_dev, qp->ibqp.qp_num); - qp->next = dev->qp_dev->qp_table[n]; - rcu_assign_pointer(dev->qp_dev->qp_table[n], qp); + qp->next = dev->rdi.qp_dev->qp_table[n]; + rcu_assign_pointer(dev->rdi.qp_dev->qp_table[n], qp); trace_hfi1_qpinsert(qp, n); } - spin_unlock_irqrestore(&dev->qp_dev->qpt_lock, flags); + spin_unlock_irqrestore(&dev->rdi.qp_dev->qpt_lock, flags); } /* @@ -257,40 +255,40 @@ static void insert_qp(struct hfi1_ibdev *dev, struct rvt_qp *qp) static void remove_qp(struct hfi1_ibdev *dev, struct rvt_qp *qp) { struct hfi1_ibport *ibp = to_iport(qp->ibqp.device, qp->port_num); - u32 n = qpn_hash(dev->qp_dev, qp->ibqp.qp_num); + u32 n = qpn_hash(dev->rdi.qp_dev, qp->ibqp.qp_num); unsigned long flags; int removed = 1; - spin_lock_irqsave(&dev->qp_dev->qpt_lock, flags); + spin_lock_irqsave(&dev->rdi.qp_dev->qpt_lock, flags); if (rcu_dereference_protected(ibp->rvp.qp[0], lockdep_is_held( - &dev->qp_dev->qpt_lock)) == qp) { + &dev->rdi.qp_dev->qpt_lock)) == qp) { RCU_INIT_POINTER(ibp->rvp.qp[0], NULL); } else if (rcu_dereference_protected(ibp->rvp.qp[1], - lockdep_is_held(&dev->qp_dev->qpt_lock)) == qp) { + lockdep_is_held(&dev->rdi.qp_dev->qpt_lock)) == qp) { RCU_INIT_POINTER(ibp->rvp.qp[1], NULL); } else { struct rvt_qp *q; struct rvt_qp __rcu **qpp; removed = 0; - qpp = &dev->qp_dev->qp_table[n]; + qpp = &dev->rdi.qp_dev->qp_table[n]; for (; (q = rcu_dereference_protected(*qpp, - lockdep_is_held(&dev->qp_dev->qpt_lock))) + lockdep_is_held(&dev->rdi.qp_dev->qpt_lock))) != NULL; qpp = &q->next) if (q == qp) { RCU_INIT_POINTER(*qpp, rcu_dereference_protected(qp->next, - lockdep_is_held(&dev->qp_dev->qpt_lock))); + lockdep_is_held(&dev->rdi.qp_dev->qpt_lock))); removed = 1; trace_hfi1_qpremove(qp, n); break; } } - spin_unlock_irqrestore(&dev->qp_dev->qpt_lock, flags); + spin_unlock_irqrestore(&dev->rdi.qp_dev->qpt_lock, flags); if (removed) { synchronize_rcu(); if (atomic_dec_and_test(&qp->refcount)) @@ -311,6 +309,7 @@ static unsigned free_all_qps(struct hfi1_devdata *dd) unsigned long flags; struct rvt_qp *qp; unsigned n, qp_inuse = 0; + spinlock_t *l; /* useless pointer to shutup checkpatch */ for (n = 0; n < dd->num_pports; n++) { struct hfi1_ibport *ibp = &dd->pport[n].ibport_data; @@ -325,19 +324,20 @@ static unsigned free_all_qps(struct hfi1_devdata *dd) rcu_read_unlock(); } - if (!dev->qp_dev) + if (!dev->rdi.qp_dev) goto bail; - spin_lock_irqsave(&dev->qp_dev->qpt_lock, flags); - for (n = 0; n < dev->qp_dev->qp_table_size; n++) { - qp = rcu_dereference_protected(dev->qp_dev->qp_table[n], - lockdep_is_held(&dev->qp_dev->qpt_lock)); - RCU_INIT_POINTER(dev->qp_dev->qp_table[n], NULL); + spin_lock_irqsave(&dev->rdi.qp_dev->qpt_lock, flags); + for (n = 0; n < dev->rdi.qp_dev->qp_table_size; n++) { + l = &dev->rdi.qp_dev->qpt_lock; + qp = rcu_dereference_protected(dev->rdi.qp_dev->qp_table[n], + lockdep_is_held(l)); + RCU_INIT_POINTER(dev->rdi.qp_dev->qp_table[n], NULL); for (; qp; qp = rcu_dereference_protected(qp->next, - lockdep_is_held(&dev->qp_dev->qpt_lock))) + lockdep_is_held(l))) qp_inuse++; } - spin_unlock_irqrestore(&dev->qp_dev->qpt_lock, flags); + spin_unlock_irqrestore(&dev->rdi.qp_dev->qpt_lock, flags); synchronize_rcu(); bail: return qp_inuse; @@ -1157,7 +1157,8 @@ struct ib_qp *hfi1_create_qp(struct ib_pd *ibpd, qp->s_flags = RVT_S_SIGNAL_REQ_WR; dev = to_idev(ibpd->device); dd = dd_from_dev(dev); - err = alloc_qpn(dd, &dev->qp_dev->qpn_table, init_attr->qp_type, + err = alloc_qpn(dd, &dev->rdi.qp_dev->qpn_table, + init_attr->qp_type, init_attr->port_num); if (err < 0) { ret = ERR_PTR(err); @@ -1259,7 +1260,7 @@ bail_ip: kref_put(&qp->ip->ref, rvt_release_mmap_info); else vfree(qp->r_rq.wq); - free_qpn(&dev->qp_dev->qpn_table, qp->ibqp.qp_num); + free_qpn(&dev->rdi.qp_dev->qpn_table, qp->ibqp.qp_num); bail_qp: kfree(priv->s_hdr); kfree(priv); @@ -1310,7 +1311,7 @@ int hfi1_destroy_qp(struct ib_qp *ibqp) spin_unlock_irq(&qp->r_lock); /* all user's cleaned up, mark it available */ - free_qpn(&dev->qp_dev->qpn_table, qp->ibqp.qp_num); + free_qpn(&dev->rdi.qp_dev->qpn_table, qp->ibqp.qp_num); spin_lock(&dev->n_qps_lock); dev->n_qps_allocated--; spin_unlock(&dev->n_qps_lock); @@ -1330,10 +1331,10 @@ int hfi1_destroy_qp(struct ib_qp *ibqp) * init_qpn_table - initialize the QP number table for a device * @qpt: the QPN table */ -static int init_qpn_table(struct hfi1_devdata *dd, struct hfi1_qpn_table *qpt) +static int init_qpn_table(struct hfi1_devdata *dd, struct rvt_qpn_table *qpt) { u32 offset, qpn, i; - struct qpn_map *map; + struct rvt_qpn_map *map; int ret = 0; spin_lock_init(&qpt->lock); @@ -1343,9 +1344,9 @@ static int init_qpn_table(struct hfi1_devdata *dd, struct hfi1_qpn_table *qpt) /* insure we don't assign QPs from KDETH 64K window */ qpn = kdeth_qp << 16; - qpt->nmaps = qpn / BITS_PER_PAGE; + qpt->nmaps = qpn / RVT_BITS_PER_PAGE; /* This should always be zero */ - offset = qpn & BITS_PER_PAGE_MASK; + offset = qpn & RVT_BITS_PER_PAGE_MASK; map = &qpt->map[qpt->nmaps]; dd_dev_info(dd, "Reserving QPNs for KDETH window from 0x%x to 0x%x\n", qpn, qpn + 65535); @@ -1359,7 +1360,7 @@ static int init_qpn_table(struct hfi1_devdata *dd, struct hfi1_qpn_table *qpt) } set_bit(offset, map->page); offset++; - if (offset == BITS_PER_PAGE) { + if (offset == RVT_BITS_PER_PAGE) { /* next page */ qpt->nmaps++; map++; @@ -1373,7 +1374,7 @@ static int init_qpn_table(struct hfi1_devdata *dd, struct hfi1_qpn_table *qpt) * free_qpn_table - free the QP number table for a device * @qpt: the QPN table */ -static void free_qpn_table(struct hfi1_qpn_table *qpt) +static void free_qpn_table(struct rvt_qpn_table *qpt) { int i; @@ -1505,31 +1506,31 @@ int hfi1_qp_init(struct hfi1_ibdev *dev) int ret = -ENOMEM; /* allocate parent object */ - dev->qp_dev = kzalloc(sizeof(*dev->qp_dev), GFP_KERNEL); - if (!dev->qp_dev) + dev->rdi.qp_dev = kzalloc(sizeof(*dev->rdi.qp_dev), GFP_KERNEL); + if (!dev->rdi.qp_dev) goto nomem; /* allocate hash table */ - dev->qp_dev->qp_table_size = hfi1_qp_table_size; - dev->qp_dev->qp_table_bits = ilog2(hfi1_qp_table_size); - dev->qp_dev->qp_table = - kmalloc(dev->qp_dev->qp_table_size * - sizeof(*dev->qp_dev->qp_table), + dev->rdi.qp_dev->qp_table_size = hfi1_qp_table_size; + dev->rdi.qp_dev->qp_table_bits = ilog2(hfi1_qp_table_size); + dev->rdi.qp_dev->qp_table = + kmalloc(dev->rdi.qp_dev->qp_table_size * + sizeof(*dev->rdi.qp_dev->qp_table), GFP_KERNEL); - if (!dev->qp_dev->qp_table) + if (!dev->rdi.qp_dev->qp_table) goto nomem; - for (i = 0; i < dev->qp_dev->qp_table_size; i++) - RCU_INIT_POINTER(dev->qp_dev->qp_table[i], NULL); - spin_lock_init(&dev->qp_dev->qpt_lock); + for (i = 0; i < dev->rdi.qp_dev->qp_table_size; i++) + RCU_INIT_POINTER(dev->rdi.qp_dev->qp_table[i], NULL); + spin_lock_init(&dev->rdi.qp_dev->qpt_lock); /* initialize qpn map */ - ret = init_qpn_table(dd, &dev->qp_dev->qpn_table); + ret = init_qpn_table(dd, &dev->rdi.qp_dev->qpn_table); if (ret) goto nomem; return ret; nomem: - if (dev->qp_dev) { - kfree(dev->qp_dev->qp_table); - free_qpn_table(&dev->qp_dev->qpn_table); - kfree(dev->qp_dev); + if (dev->rdi.qp_dev) { + kfree(dev->rdi.qp_dev->qp_table); + free_qpn_table(&dev->rdi.qp_dev->qpn_table); + kfree(dev->rdi.qp_dev); } return ret; } @@ -1543,10 +1544,10 @@ void hfi1_qp_exit(struct hfi1_ibdev *dev) if (qps_inuse) dd_dev_err(dd, "QP memory leak! %u still in use\n", qps_inuse); - if (dev->qp_dev) { - kfree(dev->qp_dev->qp_table); - free_qpn_table(&dev->qp_dev->qpn_table); - kfree(dev->qp_dev); + if (dev->rdi.qp_dev) { + kfree(dev->rdi.qp_dev->qp_table); + free_qpn_table(&dev->rdi.qp_dev->qpn_table); + kfree(dev->rdi.qp_dev); } } @@ -1619,11 +1620,11 @@ int qp_iter_next(struct qp_iter *iter) * * n = 0..iter->specials is the special qp indices * - * n = iter->specials..dev->qp_dev->qp_table_size+iter->specials are + * n = iter->specials..dev->rdi.qp_dev->qp_table_size+iter->specials are * the potential hash bucket entries * */ - for (; n < dev->qp_dev->qp_table_size + iter->specials; n++) { + for (; n < dev->rdi.qp_dev->qp_table_size + iter->specials; n++) { if (pqp) { qp = rcu_dereference(pqp->next); } else { @@ -1642,7 +1643,7 @@ int qp_iter_next(struct qp_iter *iter) qp = rcu_dereference(ibp->rvp.qp[1]); } else { qp = rcu_dereference( - dev->qp_dev->qp_table[ + dev->rdi.qp_dev->qp_table[ (n - iter->specials)]); } } diff --git a/drivers/staging/rdma/hfi1/qp.h b/drivers/staging/rdma/hfi1/qp.h index 9efa4bc..18b0f0e 100644 --- a/drivers/staging/rdma/hfi1/qp.h +++ b/drivers/staging/rdma/hfi1/qp.h @@ -51,41 +51,11 @@ */ #include +#include #include "verbs.h" #include "sdma.h" -#define QPN_MAX BIT(24) -#define QPNMAP_ENTRIES (QPN_MAX / PAGE_SIZE / BITS_PER_BYTE) - -/* - * QPN-map pages start out as NULL, they get allocated upon - * first use and are never deallocated. This way, - * large bitmaps are not allocated unless large numbers of QPs are used. - */ -struct qpn_map { - void *page; -}; - -struct hfi1_qpn_table { - spinlock_t lock; /* protect changes in this struct */ - unsigned flags; /* flags for QP0/1 allocated for each port */ - u32 last; /* last QP number allocated */ - u32 nmaps; /* size of the map table */ - u16 limit; - u8 incr; - /* bit map of free QP numbers other than 0/1 */ - struct qpn_map map[QPNMAP_ENTRIES]; -}; - -struct hfi1_qp_ibdev { - u32 qp_table_size; - u32 qp_table_bits; - struct rvt_qp __rcu **qp_table; - spinlock_t qpt_lock; - struct hfi1_qpn_table qpn_table; -}; - -static inline u32 qpn_hash(struct hfi1_qp_ibdev *dev, u32 qpn) +static inline u32 qpn_hash(struct rvt_qp_ibdev *dev, u32 qpn) { return hash_32(qpn, dev->qp_table_bits); } @@ -107,9 +77,9 @@ static inline struct rvt_qp *hfi1_lookup_qpn(struct hfi1_ibport *ibp, qp = rcu_dereference(ibp->rvp.qp[qpn]); } else { struct hfi1_ibdev *dev = &ppd_from_ibp(ibp)->dd->verbs_dev; - u32 n = qpn_hash(dev->qp_dev, qpn); + u32 n = qpn_hash(dev->rdi.qp_dev, qpn); - for (qp = rcu_dereference(dev->qp_dev->qp_table[n]); qp; + for (qp = rcu_dereference(dev->rdi.qp_dev->qp_table[n]); qp; qp = rcu_dereference(qp->next)) if (qp->ibqp.qp_num == qpn) break; diff --git a/drivers/staging/rdma/hfi1/verbs.h b/drivers/staging/rdma/hfi1/verbs.h index b9843a5..c22f0d1 100644 --- a/drivers/staging/rdma/hfi1/verbs.h +++ b/drivers/staging/rdma/hfi1/verbs.h @@ -346,12 +346,9 @@ struct hfi1_ibport { u8 sc_to_sl[32]; }; -struct hfi1_qp_ibdev; struct hfi1_ibdev { struct rvt_dev_info rdi; /* Must be first */ - struct hfi1_qp_ibdev *qp_dev; - /* QP numbers are shared by all IB ports */ /* protect wait lists */ seqlock_t iowait_lock; -- cgit v0.10.2 From a2c2d608957c1b6f444e092fa7f49c1f1ac7fa0a Mon Sep 17 00:00:00 2001 From: Dennis Dalessandro Date: Tue, 19 Jan 2016 14:43:12 -0800 Subject: staging/rdma/hfi1: Remove create_qp functionality Rely on rdmavt to provide queue pair creation. Reviewed-by: Ira Weiny Reviewed-by: Harish Chegondi Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford diff --git a/drivers/staging/rdma/hfi1/qp.c b/drivers/staging/rdma/hfi1/qp.c index 1bf8083..a336d2a 100644 --- a/drivers/staging/rdma/hfi1/qp.c +++ b/drivers/staging/rdma/hfi1/qp.c @@ -60,7 +60,7 @@ #include "trace.h" #include "sdma.h" -static unsigned int hfi1_qp_table_size = 256; +unsigned int hfi1_qp_table_size = 256; module_param_named(qp_table_size, hfi1_qp_table_size, uint, S_IRUGO); MODULE_PARM_DESC(qp_table_size, "QP table size"); @@ -115,105 +115,6 @@ static const u16 credit_table[31] = { 32768 /* 1E */ }; -static void get_map_page(struct rvt_qpn_table *qpt, struct rvt_qpn_map *map) -{ - unsigned long page = get_zeroed_page(GFP_KERNEL); - - /* - * Free the page if someone raced with us installing it. - */ - - spin_lock(&qpt->lock); - if (map->page) - free_page(page); - else - map->page = (void *)page; - spin_unlock(&qpt->lock); -} - -/* - * Allocate the next available QPN or - * zero/one for QP type IB_QPT_SMI/IB_QPT_GSI. - */ -static int alloc_qpn(struct hfi1_devdata *dd, struct rvt_qpn_table *qpt, - enum ib_qp_type type, u8 port) -{ - u32 i, offset, max_scan, qpn; - struct rvt_qpn_map *map; - u32 ret; - - if (type == IB_QPT_SMI || type == IB_QPT_GSI) { - unsigned n; - - ret = type == IB_QPT_GSI; - n = 1 << (ret + 2 * (port - 1)); - spin_lock(&qpt->lock); - if (qpt->flags & n) - ret = -EINVAL; - else - qpt->flags |= n; - spin_unlock(&qpt->lock); - goto bail; - } - - qpn = qpt->last + qpt->incr; - if (qpn >= RVT_QPN_MAX) - qpn = qpt->incr | ((qpt->last & 1) ^ 1); - /* offset carries bit 0 */ - offset = qpn & RVT_BITS_PER_PAGE_MASK; - map = &qpt->map[qpn / RVT_BITS_PER_PAGE]; - max_scan = qpt->nmaps - !offset; - for (i = 0;;) { - if (unlikely(!map->page)) { - get_map_page(qpt, map); - if (unlikely(!map->page)) - break; - } - do { - if (!test_and_set_bit(offset, map->page)) { - qpt->last = qpn; - ret = qpn; - goto bail; - } - offset += qpt->incr; - /* - * This qpn might be bogus if offset >= - * RVT_BITS_PER_PAGE. That is OK. It gets re-assigned - * below - */ - qpn = mk_qpn(qpt, map, offset); - } while (offset < RVT_BITS_PER_PAGE && qpn < RVT_QPN_MAX); - /* - * In order to keep the number of pages allocated to a - * minimum, we scan the all existing pages before increasing - * the size of the bitmap table. - */ - if (++i > max_scan) { - if (qpt->nmaps == RVT_QPNMAP_ENTRIES) - break; - map = &qpt->map[qpt->nmaps++]; - /* start at incr with current bit 0 */ - offset = qpt->incr | (offset & 1); - } else if (map < &qpt->map[qpt->nmaps]) { - ++map; - /* start at incr with current bit 0 */ - offset = qpt->incr | (offset & 1); - } else { - map = &qpt->map[0]; - /* wrap to first map page, invert bit 0 */ - offset = qpt->incr | ((offset & 1) ^ 1); - } - /* there can be no bits at shift and below */ - WARN_ON(offset & (dd->qos_shift - 1)); - qpn = mk_qpn(qpt, map, offset); - } - - ret = -ENOMEM; - -bail: - return ret; -} - static void free_qpn(struct rvt_qpn_table *qpt, u32 qpn) { struct rvt_qpn_map *map; @@ -296,113 +197,6 @@ static void remove_qp(struct hfi1_ibdev *dev, struct rvt_qp *qp) } } -/** - * free_all_qps - check for QPs still in use - * @qpt: the QP table to empty - * - * There should not be any QPs still in use. - * Free memory for table. - */ -static unsigned free_all_qps(struct hfi1_devdata *dd) -{ - struct hfi1_ibdev *dev = &dd->verbs_dev; - unsigned long flags; - struct rvt_qp *qp; - unsigned n, qp_inuse = 0; - spinlock_t *l; /* useless pointer to shutup checkpatch */ - - for (n = 0; n < dd->num_pports; n++) { - struct hfi1_ibport *ibp = &dd->pport[n].ibport_data; - - if (!hfi1_mcast_tree_empty(ibp)) - qp_inuse++; - rcu_read_lock(); - if (rcu_dereference(ibp->rvp.qp[0])) - qp_inuse++; - if (rcu_dereference(ibp->rvp.qp[1])) - qp_inuse++; - rcu_read_unlock(); - } - - if (!dev->rdi.qp_dev) - goto bail; - spin_lock_irqsave(&dev->rdi.qp_dev->qpt_lock, flags); - for (n = 0; n < dev->rdi.qp_dev->qp_table_size; n++) { - l = &dev->rdi.qp_dev->qpt_lock; - qp = rcu_dereference_protected(dev->rdi.qp_dev->qp_table[n], - lockdep_is_held(l)); - RCU_INIT_POINTER(dev->rdi.qp_dev->qp_table[n], NULL); - - for (; qp; qp = rcu_dereference_protected(qp->next, - lockdep_is_held(l))) - qp_inuse++; - } - spin_unlock_irqrestore(&dev->rdi.qp_dev->qpt_lock, flags); - synchronize_rcu(); -bail: - return qp_inuse; -} - -/** - * reset_qp - initialize the QP state to the reset state - * @qp: the QP to reset - * @type: the QP type - */ -static void reset_qp(struct rvt_qp *qp, enum ib_qp_type type) -{ - struct hfi1_qp_priv *priv = qp->priv; - qp->remote_qpn = 0; - qp->qkey = 0; - qp->qp_access_flags = 0; - iowait_init( - &priv->s_iowait, - 1, - hfi1_do_send, - iowait_sleep, - iowait_wakeup); - qp->s_flags &= RVT_S_SIGNAL_REQ_WR; - qp->s_hdrwords = 0; - qp->s_wqe = NULL; - qp->s_draining = 0; - qp->s_next_psn = 0; - qp->s_last_psn = 0; - qp->s_sending_psn = 0; - qp->s_sending_hpsn = 0; - qp->s_psn = 0; - qp->r_psn = 0; - qp->r_msn = 0; - if (type == IB_QPT_RC) { - qp->s_state = IB_OPCODE_RC_SEND_LAST; - qp->r_state = IB_OPCODE_RC_SEND_LAST; - } else { - qp->s_state = IB_OPCODE_UC_SEND_LAST; - qp->r_state = IB_OPCODE_UC_SEND_LAST; - } - qp->s_ack_state = IB_OPCODE_RC_ACKNOWLEDGE; - qp->r_nak_state = 0; - priv->r_adefered = 0; - qp->r_aflags = 0; - qp->r_flags = 0; - qp->s_head = 0; - qp->s_tail = 0; - qp->s_cur = 0; - qp->s_acked = 0; - qp->s_last = 0; - qp->s_ssn = 1; - qp->s_lsn = 0; - clear_ahg(qp); - qp->s_mig_state = IB_MIG_MIGRATED; - memset(qp->s_ack_queue, 0, sizeof(qp->s_ack_queue)); - qp->r_head_ack_queue = 0; - qp->s_tail_ack_queue = 0; - qp->s_num_rd_atomic = 0; - if (qp->r_rq.wq) { - qp->r_rq.wq->head = 0; - qp->r_rq.wq->tail = 0; - } - qp->r_sge.num_sge = 0; -} - static void clear_mr_refs(struct rvt_qp *qp, int clr_sends) { unsigned n; @@ -756,7 +550,7 @@ int hfi1_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, spin_lock(&qp->s_lock); clear_mr_refs(qp, 1); clear_ahg(qp); - reset_qp(qp, ibqp->qp_type); + rvt_reset_qp(&dev->rdi, qp, ibqp->qp_type); } break; @@ -1025,254 +819,6 @@ __be32 hfi1_compute_aeth(struct rvt_qp *qp) } /** - * hfi1_create_qp - create a queue pair for a device - * @ibpd: the protection domain who's device we create the queue pair for - * @init_attr: the attributes of the queue pair - * @udata: user data for libibverbs.so - * - * Returns the queue pair on success, otherwise returns an errno. - * - * Called by the ib_create_qp() core verbs function. - */ -struct ib_qp *hfi1_create_qp(struct ib_pd *ibpd, - struct ib_qp_init_attr *init_attr, - struct ib_udata *udata) -{ - struct rvt_qp *qp; - struct hfi1_qp_priv *priv; - int err; - struct rvt_swqe *swq = NULL; - struct hfi1_ibdev *dev; - struct hfi1_devdata *dd; - size_t sz; - size_t sg_list_sz; - struct ib_qp *ret; - - if (init_attr->cap.max_send_sge > hfi1_max_sges || - init_attr->cap.max_send_wr > hfi1_max_qp_wrs || - init_attr->create_flags) { - ret = ERR_PTR(-EINVAL); - goto bail; - } - - /* Check receive queue parameters if no SRQ is specified. */ - if (!init_attr->srq) { - if (init_attr->cap.max_recv_sge > hfi1_max_sges || - init_attr->cap.max_recv_wr > hfi1_max_qp_wrs) { - ret = ERR_PTR(-EINVAL); - goto bail; - } - if (init_attr->cap.max_send_sge + - init_attr->cap.max_send_wr + - init_attr->cap.max_recv_sge + - init_attr->cap.max_recv_wr == 0) { - ret = ERR_PTR(-EINVAL); - goto bail; - } - } - - switch (init_attr->qp_type) { - case IB_QPT_SMI: - case IB_QPT_GSI: - if (init_attr->port_num == 0 || - init_attr->port_num > ibpd->device->phys_port_cnt) { - ret = ERR_PTR(-EINVAL); - goto bail; - } - case IB_QPT_UC: - case IB_QPT_RC: - case IB_QPT_UD: - sz = sizeof(struct rvt_sge) * - init_attr->cap.max_send_sge + - sizeof(struct rvt_swqe); - swq = vmalloc((init_attr->cap.max_send_wr + 1) * sz); - if (swq == NULL) { - ret = ERR_PTR(-ENOMEM); - goto bail; - } - sz = sizeof(*qp); - sg_list_sz = 0; - if (init_attr->srq) { - struct rvt_srq *srq = ibsrq_to_rvtsrq(init_attr->srq); - - if (srq->rq.max_sge > 1) - sg_list_sz = sizeof(*qp->r_sg_list) * - (srq->rq.max_sge - 1); - } else if (init_attr->cap.max_recv_sge > 1) - sg_list_sz = sizeof(*qp->r_sg_list) * - (init_attr->cap.max_recv_sge - 1); - qp = kzalloc(sz + sg_list_sz, GFP_KERNEL); - if (!qp) { - ret = ERR_PTR(-ENOMEM); - goto bail_swq; - } - RCU_INIT_POINTER(qp->next, NULL); - priv = kzalloc(sizeof(*priv), GFP_KERNEL); - if (!priv) { - ret = ERR_PTR(-ENOMEM); - goto bail_qp_priv; - } - priv->owner = qp; - priv->s_hdr = kzalloc(sizeof(*priv->s_hdr), GFP_KERNEL); - if (!priv->s_hdr) { - ret = ERR_PTR(-ENOMEM); - goto bail_qp; - } - qp->priv = priv; - qp->timeout_jiffies = - usecs_to_jiffies((4096UL * (1UL << qp->timeout)) / - 1000UL); - if (init_attr->srq) - sz = 0; - else { - qp->r_rq.size = init_attr->cap.max_recv_wr + 1; - qp->r_rq.max_sge = init_attr->cap.max_recv_sge; - sz = (sizeof(struct ib_sge) * qp->r_rq.max_sge) + - sizeof(struct rvt_rwqe); - qp->r_rq.wq = vmalloc_user(sizeof(struct rvt_rwq) + - qp->r_rq.size * sz); - if (!qp->r_rq.wq) { - ret = ERR_PTR(-ENOMEM); - goto bail_qp; - } - } - - /* - * ib_create_qp() will initialize qp->ibqp - * except for qp->ibqp.qp_num. - */ - spin_lock_init(&qp->r_lock); - spin_lock_init(&qp->s_lock); - spin_lock_init(&qp->r_rq.lock); - atomic_set(&qp->refcount, 0); - init_waitqueue_head(&qp->wait); - init_timer(&qp->s_timer); - qp->s_timer.data = (unsigned long)qp; - INIT_LIST_HEAD(&qp->rspwait); - qp->state = IB_QPS_RESET; - qp->s_wq = swq; - qp->s_size = init_attr->cap.max_send_wr + 1; - qp->s_max_sge = init_attr->cap.max_send_sge; - if (init_attr->sq_sig_type == IB_SIGNAL_REQ_WR) - qp->s_flags = RVT_S_SIGNAL_REQ_WR; - dev = to_idev(ibpd->device); - dd = dd_from_dev(dev); - err = alloc_qpn(dd, &dev->rdi.qp_dev->qpn_table, - init_attr->qp_type, - init_attr->port_num); - if (err < 0) { - ret = ERR_PTR(err); - vfree(qp->r_rq.wq); - goto bail_qp; - } - qp->ibqp.qp_num = err; - qp->port_num = init_attr->port_num; - reset_qp(qp, init_attr->qp_type); - - break; - - default: - /* Don't support raw QPs */ - ret = ERR_PTR(-ENOSYS); - goto bail; - } - - init_attr->cap.max_inline_data = 0; - - /* - * Return the address of the RWQ as the offset to mmap. - * See hfi1_mmap() for details. - */ - if (udata && udata->outlen >= sizeof(__u64)) { - if (!qp->r_rq.wq) { - __u64 offset = 0; - - err = ib_copy_to_udata(udata, &offset, - sizeof(offset)); - if (err) { - ret = ERR_PTR(err); - goto bail_ip; - } - } else { - u32 s = sizeof(struct rvt_rwq) + qp->r_rq.size * sz; - - qp->ip = rvt_create_mmap_info(&dev->rdi, s, - ibpd->uobject->context, - qp->r_rq.wq); - if (!qp->ip) { - ret = ERR_PTR(-ENOMEM); - goto bail_ip; - } - - err = ib_copy_to_udata(udata, &(qp->ip->offset), - sizeof(qp->ip->offset)); - if (err) { - ret = ERR_PTR(err); - goto bail_ip; - } - } - } - - spin_lock(&dev->n_qps_lock); - if (dev->n_qps_allocated == hfi1_max_qps) { - spin_unlock(&dev->n_qps_lock); - ret = ERR_PTR(-ENOMEM); - goto bail_ip; - } - - dev->n_qps_allocated++; - spin_unlock(&dev->n_qps_lock); - - if (qp->ip) { - spin_lock_irq(&dev->rdi.pending_lock); - list_add(&qp->ip->pending_mmaps, &dev->rdi.pending_mmaps); - spin_unlock_irq(&dev->rdi.pending_lock); - } - - ret = &qp->ibqp; - - /* - * We have our QP and its good, now keep track of what types of opcodes - * can be processed on this QP. We do this by keeping track of what the - * 3 high order bits of the opcode are. - */ - switch (init_attr->qp_type) { - case IB_QPT_SMI: - case IB_QPT_GSI: - case IB_QPT_UD: - qp->allowed_ops = IB_OPCODE_UD_SEND_ONLY & OPCODE_QP_MASK; - break; - case IB_QPT_RC: - qp->allowed_ops = IB_OPCODE_RC_SEND_ONLY & OPCODE_QP_MASK; - break; - case IB_QPT_UC: - qp->allowed_ops = IB_OPCODE_UC_SEND_ONLY & OPCODE_QP_MASK; - break; - default: - ret = ERR_PTR(-EINVAL); - goto bail_ip; - } - - goto bail; - -bail_ip: - if (qp->ip) - kref_put(&qp->ip->ref, rvt_release_mmap_info); - else - vfree(qp->r_rq.wq); - free_qpn(&dev->rdi.qp_dev->qpn_table, qp->ibqp.qp_num); -bail_qp: - kfree(priv->s_hdr); - kfree(priv); -bail_qp_priv: - kfree(qp); -bail_swq: - vfree(swq); -bail: - return ret; -} - -/** * hfi1_destroy_qp - destroy a queue pair * @ibqp: the queue pair to destroy * @@ -1328,61 +874,6 @@ int hfi1_destroy_qp(struct ib_qp *ibqp) } /** - * init_qpn_table - initialize the QP number table for a device - * @qpt: the QPN table - */ -static int init_qpn_table(struct hfi1_devdata *dd, struct rvt_qpn_table *qpt) -{ - u32 offset, qpn, i; - struct rvt_qpn_map *map; - int ret = 0; - - spin_lock_init(&qpt->lock); - - qpt->last = 0; - qpt->incr = 1 << dd->qos_shift; - - /* insure we don't assign QPs from KDETH 64K window */ - qpn = kdeth_qp << 16; - qpt->nmaps = qpn / RVT_BITS_PER_PAGE; - /* This should always be zero */ - offset = qpn & RVT_BITS_PER_PAGE_MASK; - map = &qpt->map[qpt->nmaps]; - dd_dev_info(dd, "Reserving QPNs for KDETH window from 0x%x to 0x%x\n", - qpn, qpn + 65535); - for (i = 0; i < 65536; i++) { - if (!map->page) { - get_map_page(qpt, map); - if (!map->page) { - ret = -ENOMEM; - break; - } - } - set_bit(offset, map->page); - offset++; - if (offset == RVT_BITS_PER_PAGE) { - /* next page */ - qpt->nmaps++; - map++; - offset = 0; - } - } - return ret; -} - -/** - * free_qpn_table - free the QP number table for a device - * @qpt: the QPN table - */ -static void free_qpn_table(struct rvt_qpn_table *qpt) -{ - int i; - - for (i = 0; i < ARRAY_SIZE(qpt->map); i++) - free_page((unsigned long) qpt->map[i].page); -} - -/** * hfi1_get_credit - flush the send work queue of a QP * @qp: the qp who's send work queue to flush * @aeth: the Acknowledge Extended Transport Header @@ -1499,58 +990,6 @@ static void iowait_wakeup(struct iowait *wait, int reason) hfi1_qp_wakeup(qp, RVT_S_WAIT_DMA_DESC); } -int hfi1_qp_init(struct hfi1_ibdev *dev) -{ - struct hfi1_devdata *dd = dd_from_dev(dev); - int i; - int ret = -ENOMEM; - - /* allocate parent object */ - dev->rdi.qp_dev = kzalloc(sizeof(*dev->rdi.qp_dev), GFP_KERNEL); - if (!dev->rdi.qp_dev) - goto nomem; - /* allocate hash table */ - dev->rdi.qp_dev->qp_table_size = hfi1_qp_table_size; - dev->rdi.qp_dev->qp_table_bits = ilog2(hfi1_qp_table_size); - dev->rdi.qp_dev->qp_table = - kmalloc(dev->rdi.qp_dev->qp_table_size * - sizeof(*dev->rdi.qp_dev->qp_table), - GFP_KERNEL); - if (!dev->rdi.qp_dev->qp_table) - goto nomem; - for (i = 0; i < dev->rdi.qp_dev->qp_table_size; i++) - RCU_INIT_POINTER(dev->rdi.qp_dev->qp_table[i], NULL); - spin_lock_init(&dev->rdi.qp_dev->qpt_lock); - /* initialize qpn map */ - ret = init_qpn_table(dd, &dev->rdi.qp_dev->qpn_table); - if (ret) - goto nomem; - return ret; -nomem: - if (dev->rdi.qp_dev) { - kfree(dev->rdi.qp_dev->qp_table); - free_qpn_table(&dev->rdi.qp_dev->qpn_table); - kfree(dev->rdi.qp_dev); - } - return ret; -} - -void hfi1_qp_exit(struct hfi1_ibdev *dev) -{ - struct hfi1_devdata *dd = dd_from_dev(dev); - u32 qps_inuse; - - qps_inuse = free_all_qps(dd); - if (qps_inuse) - dd_dev_err(dd, "QP memory leak! %u still in use\n", - qps_inuse); - if (dev->rdi.qp_dev) { - kfree(dev->rdi.qp_dev->qp_table); - free_qpn_table(&dev->rdi.qp_dev->qpn_table); - kfree(dev->rdi.qp_dev); - } -} - /** * * qp_to_sdma_engine - map a qp to a send engine @@ -1724,6 +1163,75 @@ void qp_comm_est(struct rvt_qp *qp) } } +void *qp_priv_alloc(struct rvt_dev_info *rdi, struct rvt_qp *qp, + gfp_t gfp) +{ + struct hfi1_qp_priv *priv; + + priv = kzalloc(sizeof(*priv), gfp); + if (!priv) + return ERR_PTR(-ENOMEM); + + priv->owner = qp; + + priv->s_hdr = kzalloc(sizeof(*priv->s_hdr), gfp); + if (!priv->s_hdr) { + kfree(priv); + return ERR_PTR(-ENOMEM); + } + + return priv; +} + +void qp_priv_free(struct rvt_dev_info *rdi, struct rvt_qp *qp) +{ + struct hfi1_qp_priv *priv = qp->priv; + + kfree(priv->s_hdr); + kfree(priv); +} + +unsigned free_all_qps(struct rvt_dev_info *rdi) +{ + struct hfi1_ibdev *verbs_dev = container_of(rdi, + struct hfi1_ibdev, + rdi); + struct hfi1_devdata *dd = container_of(verbs_dev, + struct hfi1_devdata, + verbs_dev); + int n; + unsigned qp_inuse = 0; + + for (n = 0; n < dd->num_pports; n++) { + struct hfi1_ibport *ibp = &dd->pport[n].ibport_data; + + if (!hfi1_mcast_tree_empty(ibp)) + qp_inuse++; + rcu_read_lock(); + if (rcu_dereference(ibp->rvp.qp[0])) + qp_inuse++; + if (rcu_dereference(ibp->rvp.qp[1])) + qp_inuse++; + rcu_read_unlock(); + } + + return qp_inuse; +} + +void notify_qp_reset(struct rvt_qp *qp) +{ + struct hfi1_qp_priv *priv = qp->priv; + + iowait_init( + &priv->s_iowait, + 1, + hfi1_do_send, + iowait_sleep, + iowait_wakeup); + priv->r_adefered = 0; + clear_ahg(qp); +} + /* * Switch to alternate path. * The QP s_lock should be held and interrupts disabled. diff --git a/drivers/staging/rdma/hfi1/qp.h b/drivers/staging/rdma/hfi1/qp.h index 18b0f0e..b825cb3 100644 --- a/drivers/staging/rdma/hfi1/qp.h +++ b/drivers/staging/rdma/hfi1/qp.h @@ -55,6 +55,8 @@ #include "verbs.h" #include "sdma.h" +extern unsigned int hfi1_qp_table_size; + static inline u32 qpn_hash(struct rvt_qp_ibdev *dev, u32 qpn) { return hash_32(qpn, dev->qp_table_bits); @@ -170,18 +172,6 @@ int hfi1_destroy_qp(struct ib_qp *ibqp); void hfi1_get_credit(struct rvt_qp *qp, u32 aeth); /** - * hfi1_qp_init - allocate QP tables - * @dev: a pointer to the hfi1_ibdev - */ -int hfi1_qp_init(struct hfi1_ibdev *dev); - -/** - * hfi1_qp_exit - free the QP related structures - * @dev: a pointer to the hfi1_ibdev - */ -void hfi1_qp_exit(struct hfi1_ibdev *dev); - -/** * hfi1_qp_wakeup - wake up on the indicated event * @qp: the QP * @flag: flag the qp on which the qp is stalled @@ -255,4 +245,13 @@ static inline void hfi1_schedule_send(struct rvt_qp *qp) void hfi1_migrate_qp(struct rvt_qp *qp); +/* + * Functions provided by hfi1 driver for rdmavt to use + */ +void *qp_priv_alloc(struct rvt_dev_info *rdi, struct rvt_qp *qp, + gfp_t gfp); +void qp_priv_free(struct rvt_dev_info *rdi, struct rvt_qp *qp); +unsigned free_all_qps(struct rvt_dev_info *rdi); +void notify_qp_reset(struct rvt_qp *qp); + #endif /* _QP_H */ diff --git a/drivers/staging/rdma/hfi1/verbs.c b/drivers/staging/rdma/hfi1/verbs.c index a1e9f0b..3f02d0a 100644 --- a/drivers/staging/rdma/hfi1/verbs.c +++ b/drivers/staging/rdma/hfi1/verbs.c @@ -1720,11 +1720,6 @@ int hfi1_register_ib_device(struct hfi1_devdata *dd) u16 descq_cnt; char buf[TXREQ_NAME_LEN]; - ret = hfi1_qp_init(dev); - if (ret) - goto err_qp_init; - - for (i = 0; i < dd->num_pports; i++) init_ibport(ppd + i); @@ -1820,7 +1815,7 @@ int hfi1_register_ib_device(struct hfi1_devdata *dd) ibdev->modify_srq = hfi1_modify_srq; ibdev->query_srq = hfi1_query_srq; ibdev->destroy_srq = hfi1_destroy_srq; - ibdev->create_qp = hfi1_create_qp; + ibdev->create_qp = NULL; ibdev->modify_qp = hfi1_modify_qp; ibdev->query_qp = hfi1_query_qp; ibdev->destroy_qp = hfi1_destroy_qp; @@ -1861,8 +1856,25 @@ int hfi1_register_ib_device(struct hfi1_devdata *dd) dd->verbs_dev.rdi.driver_f.notify_new_ah = hfi1_notify_new_ah; dd->verbs_dev.rdi.dparms.props.max_ah = hfi1_max_ahs; dd->verbs_dev.rdi.dparms.props.max_pd = hfi1_max_pds; - dd->verbs_dev.rdi.flags = (RVT_FLAG_QP_INIT_DRIVER | - RVT_FLAG_CQ_INIT_DRIVER); + dd->verbs_dev.rdi.dparms.props.max_sge = hfi1_max_sges; + + /* queue pair */ + dd->verbs_dev.rdi.dparms.props.max_qp = hfi1_max_qps; + dd->verbs_dev.rdi.dparms.props.max_qp_wr = hfi1_max_qp_wrs; + dd->verbs_dev.rdi.dparms.qp_table_size = hfi1_qp_table_size; + dd->verbs_dev.rdi.dparms.qpn_start = 0; + dd->verbs_dev.rdi.dparms.qpn_inc = 1; + dd->verbs_dev.rdi.dparms.qos_shift = dd->qos_shift; + dd->verbs_dev.rdi.dparms.qpn_res_start = kdeth_qp << 16; + dd->verbs_dev.rdi.dparms.qpn_res_end = + dd->verbs_dev.rdi.dparms.qpn_res_start + 65535; + dd->verbs_dev.rdi.driver_f.qp_priv_alloc = qp_priv_alloc; + dd->verbs_dev.rdi.driver_f.qp_priv_free = qp_priv_free; + dd->verbs_dev.rdi.driver_f.free_all_qps = free_all_qps; + dd->verbs_dev.rdi.driver_f.notify_qp_reset = notify_qp_reset; + + /* misc settings */ + dd->verbs_dev.rdi.flags = RVT_FLAG_CQ_INIT_DRIVER; dd->verbs_dev.rdi.dparms.lkey_table_size = hfi1_lkey_table_size; dd->verbs_dev.rdi.dparms.nports = dd->num_pports; dd->verbs_dev.rdi.dparms.npkeys = hfi1_get_npkeys(dd); @@ -1895,8 +1907,6 @@ err_agents: err_reg: err_verbs_txreq: kmem_cache_destroy(dev->verbs_txreq_cache); - hfi1_qp_exit(dev); -err_qp_init: dd_dev_err(dd, "cannot register verbs: %d!\n", -ret); bail: return ret; @@ -1917,7 +1927,6 @@ void hfi1_unregister_ib_device(struct hfi1_devdata *dd) if (!list_empty(&dev->memwait)) dd_dev_err(dd, "memwait list not empty!\n"); - hfi1_qp_exit(dev); del_timer_sync(&dev->mem_timer); kmem_cache_destroy(dev->verbs_txreq_cache); } -- cgit v0.10.2 From 94d5171cf2d10174e0ee9c3df463607cb0f4dd53 Mon Sep 17 00:00:00 2001 From: Harish Chegondi Date: Tue, 19 Jan 2016 14:43:17 -0800 Subject: staging/rdma/hfi1: Remove query_device function Removed hfi1 query_device function to use rdmavt rvt_query_device function The rvt dev info device attributes still need to be filled in by the driver Reviewed-by: Dennis Dalessandro Signed-off-by: Harish Chegondi Signed-off-by: Doug Ledford diff --git a/drivers/staging/rdma/hfi1/verbs.c b/drivers/staging/rdma/hfi1/verbs.c index 3f02d0a..70af487 100644 --- a/drivers/staging/rdma/hfi1/verbs.c +++ b/drivers/staging/rdma/hfi1/verbs.c @@ -1362,55 +1362,49 @@ int hfi1_verbs_send(struct rvt_qp *qp, struct hfi1_pkt_state *ps) return ret; } -static int query_device(struct ib_device *ibdev, - struct ib_device_attr *props, - struct ib_udata *uhw) +/** + * hfi1_fill_device_attr - Fill in rvt dev info device attributes. + * @dd: the device data structure + */ +static void hfi1_fill_device_attr(struct hfi1_devdata *dd) { - struct hfi1_devdata *dd = dd_from_ibdev(ibdev); - struct hfi1_ibdev *dev = to_idev(ibdev); - - if (uhw->inlen || uhw->outlen) - return -EINVAL; - memset(props, 0, sizeof(*props)); - - props->device_cap_flags = IB_DEVICE_BAD_PKEY_CNTR | - IB_DEVICE_BAD_QKEY_CNTR | IB_DEVICE_SHUTDOWN_PORT | - IB_DEVICE_SYS_IMAGE_GUID | IB_DEVICE_RC_RNR_NAK_GEN | - IB_DEVICE_PORT_ACTIVE_EVENT | IB_DEVICE_SRQ_RESIZE; - - props->page_size_cap = PAGE_SIZE; - props->vendor_id = - dd->oui1 << 16 | dd->oui2 << 8 | dd->oui3; - props->vendor_part_id = dd->pcidev->device; - props->hw_ver = dd->minrev; - props->sys_image_guid = ib_hfi1_sys_image_guid; - props->max_mr_size = ~0ULL; - props->max_qp = hfi1_max_qps; - props->max_qp_wr = hfi1_max_qp_wrs; - props->max_sge = hfi1_max_sges; - props->max_sge_rd = hfi1_max_sges; - props->max_cq = hfi1_max_cqs; - props->max_ah = hfi1_max_ahs; - props->max_cqe = hfi1_max_cqes; - props->max_mr = dev->rdi.lkey_table.max; - props->max_fmr = dev->rdi.lkey_table.max; - props->max_map_per_fmr = 32767; - props->max_pd = dev->rdi.dparms.props.max_pd; - props->max_qp_rd_atom = HFI1_MAX_RDMA_ATOMIC; - props->max_qp_init_rd_atom = 255; - /* props->max_res_rd_atom */ - props->max_srq = hfi1_max_srqs; - props->max_srq_wr = hfi1_max_srq_wrs; - props->max_srq_sge = hfi1_max_srq_sges; - /* props->local_ca_ack_delay */ - props->atomic_cap = IB_ATOMIC_GLOB; - props->max_pkeys = hfi1_get_npkeys(dd); - props->max_mcast_grp = hfi1_max_mcast_grps; - props->max_mcast_qp_attach = hfi1_max_mcast_qp_attached; - props->max_total_mcast_qp_attach = props->max_mcast_qp_attach * - props->max_mcast_grp; - - return 0; + struct rvt_dev_info *rdi = &dd->verbs_dev.rdi; + + memset(&rdi->dparms.props, 0, sizeof(rdi->dparms.props)); + + rdi->dparms.props.device_cap_flags = IB_DEVICE_BAD_PKEY_CNTR | + IB_DEVICE_BAD_QKEY_CNTR | IB_DEVICE_SHUTDOWN_PORT | + IB_DEVICE_SYS_IMAGE_GUID | IB_DEVICE_RC_RNR_NAK_GEN | + IB_DEVICE_PORT_ACTIVE_EVENT | IB_DEVICE_SRQ_RESIZE; + rdi->dparms.props.page_size_cap = PAGE_SIZE; + rdi->dparms.props.vendor_id = dd->oui1 << 16 | dd->oui2 << 8 | dd->oui3; + rdi->dparms.props.vendor_part_id = dd->pcidev->device; + rdi->dparms.props.hw_ver = dd->minrev; + rdi->dparms.props.sys_image_guid = ib_hfi1_sys_image_guid; + rdi->dparms.props.max_mr_size = ~0ULL; + rdi->dparms.props.max_qp = hfi1_max_qps; + rdi->dparms.props.max_qp_wr = hfi1_max_qp_wrs; + rdi->dparms.props.max_sge = hfi1_max_sges; + rdi->dparms.props.max_sge_rd = hfi1_max_sges; + rdi->dparms.props.max_cq = hfi1_max_cqs; + rdi->dparms.props.max_ah = hfi1_max_ahs; + rdi->dparms.props.max_cqe = hfi1_max_cqes; + rdi->dparms.props.max_mr = rdi->lkey_table.max; + rdi->dparms.props.max_fmr = rdi->lkey_table.max; + rdi->dparms.props.max_map_per_fmr = 32767; + rdi->dparms.props.max_pd = hfi1_max_pds; + rdi->dparms.props.max_qp_rd_atom = HFI1_MAX_RDMA_ATOMIC; + rdi->dparms.props.max_qp_init_rd_atom = 255; + rdi->dparms.props.max_srq = hfi1_max_srqs; + rdi->dparms.props.max_srq_wr = hfi1_max_srq_wrs; + rdi->dparms.props.max_srq_sge = hfi1_max_srq_sges; + rdi->dparms.props.atomic_cap = IB_ATOMIC_GLOB; + rdi->dparms.props.max_pkeys = hfi1_get_npkeys(dd); + rdi->dparms.props.max_mcast_grp = hfi1_max_mcast_grps; + rdi->dparms.props.max_mcast_qp_attach = hfi1_max_mcast_qp_attached; + rdi->dparms.props.max_total_mcast_qp_attach = + rdi->dparms.props.max_mcast_qp_attach * + rdi->dparms.props.max_mcast_grp; } static inline u16 opa_speed_to_ib(u16 in) @@ -1797,7 +1791,7 @@ int hfi1_register_ib_device(struct hfi1_devdata *dd) ibdev->phys_port_cnt = dd->num_pports; ibdev->num_comp_vectors = 1; ibdev->dma_device = &dd->pcidev->dev; - ibdev->query_device = query_device; + ibdev->query_device = NULL; ibdev->modify_device = modify_device; ibdev->query_port = query_port; ibdev->modify_port = modify_port; @@ -1854,13 +1848,12 @@ int hfi1_register_ib_device(struct hfi1_devdata *dd) dd->verbs_dev.rdi.driver_f.get_pci_dev = get_pci_dev; dd->verbs_dev.rdi.driver_f.check_ah = hfi1_check_ah; dd->verbs_dev.rdi.driver_f.notify_new_ah = hfi1_notify_new_ah; - dd->verbs_dev.rdi.dparms.props.max_ah = hfi1_max_ahs; - dd->verbs_dev.rdi.dparms.props.max_pd = hfi1_max_pds; - dd->verbs_dev.rdi.dparms.props.max_sge = hfi1_max_sges; + /* + * Fill in rvt info device attributes. + */ + hfi1_fill_device_attr(dd); /* queue pair */ - dd->verbs_dev.rdi.dparms.props.max_qp = hfi1_max_qps; - dd->verbs_dev.rdi.dparms.props.max_qp_wr = hfi1_max_qp_wrs; dd->verbs_dev.rdi.dparms.qp_table_size = hfi1_qp_table_size; dd->verbs_dev.rdi.dparms.qpn_start = 0; dd->verbs_dev.rdi.dparms.qpn_inc = 1; -- cgit v0.10.2 From abd712daeeb4461aee5ca5a2bfe2717dc22577ea Mon Sep 17 00:00:00 2001 From: Dennis Dalessandro Date: Tue, 19 Jan 2016 14:43:22 -0800 Subject: staging/rdma/hfi1: Remove CQ data structures and functions from hfi1 The completion queue is not a complex data structure and it can be removed at the same time as its functions. Unlike the more complicated queue pair which was done in multiple patches. This single patch removes all traces of hfi1 specific completeion queues from the hfi1 driver. Reviewed-by: Ira Weiny Reviewed-by: Harish Chegondi Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford diff --git a/drivers/staging/rdma/hfi1/Makefile b/drivers/staging/rdma/hfi1/Makefile index 55077f3..7797f2c 100644 --- a/drivers/staging/rdma/hfi1/Makefile +++ b/drivers/staging/rdma/hfi1/Makefile @@ -7,7 +7,7 @@ # obj-$(CONFIG_INFINIBAND_HFI1) += hfi1.o -hfi1-y := chip.o cq.o device.o diag.o driver.o efivar.o eprom.o file_ops.o firmware.o \ +hfi1-y := chip.o device.o diag.o driver.o efivar.o eprom.o file_ops.o firmware.o \ init.o intr.o mad.o pcie.o pio.o pio_copy.o \ qp.o qsfp.o rc.o ruc.o sdma.o srq.o sysfs.o trace.o twsi.o \ uc.o ud.o user_exp_rcv.o user_pages.o user_sdma.o verbs_mcast.o verbs.o diff --git a/drivers/staging/rdma/hfi1/cq.c b/drivers/staging/rdma/hfi1/cq.c deleted file mode 100644 index 25d1a2a..0000000 --- a/drivers/staging/rdma/hfi1/cq.c +++ /dev/null @@ -1,558 +0,0 @@ -/* - * - * This file is provided under a dual BSD/GPLv2 license. When using or - * redistributing this file, you may do so under either license. - * - * GPL LICENSE SUMMARY - * - * Copyright(c) 2015 Intel Corporation. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of version 2 of the GNU General Public License as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * BSD LICENSE - * - * Copyright(c) 2015 Intel Corporation. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * - Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * - Neither the name of Intel Corporation nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - */ - -#include -#include -#include -#include - -#include "verbs.h" -#include "hfi.h" - -/** - * hfi1_cq_enter - add a new entry to the completion queue - * @cq: completion queue - * @entry: work completion entry to add - * @sig: true if @entry is a solicited entry - * - * This may be called with qp->s_lock held. - */ -void hfi1_cq_enter(struct hfi1_cq *cq, struct ib_wc *entry, int solicited) -{ - struct hfi1_cq_wc *wc; - unsigned long flags; - u32 head; - u32 next; - - spin_lock_irqsave(&cq->lock, flags); - - /* - * Note that the head pointer might be writable by user processes. - * Take care to verify it is a sane value. - */ - wc = cq->queue; - head = wc->head; - if (head >= (unsigned) cq->ibcq.cqe) { - head = cq->ibcq.cqe; - next = 0; - } else - next = head + 1; - if (unlikely(next == wc->tail)) { - spin_unlock_irqrestore(&cq->lock, flags); - if (cq->ibcq.event_handler) { - struct ib_event ev; - - ev.device = cq->ibcq.device; - ev.element.cq = &cq->ibcq; - ev.event = IB_EVENT_CQ_ERR; - cq->ibcq.event_handler(&ev, cq->ibcq.cq_context); - } - return; - } - if (cq->ip) { - wc->uqueue[head].wr_id = entry->wr_id; - wc->uqueue[head].status = entry->status; - wc->uqueue[head].opcode = entry->opcode; - wc->uqueue[head].vendor_err = entry->vendor_err; - wc->uqueue[head].byte_len = entry->byte_len; - wc->uqueue[head].ex.imm_data = - (__u32 __force)entry->ex.imm_data; - wc->uqueue[head].qp_num = entry->qp->qp_num; - wc->uqueue[head].src_qp = entry->src_qp; - wc->uqueue[head].wc_flags = entry->wc_flags; - wc->uqueue[head].pkey_index = entry->pkey_index; - wc->uqueue[head].slid = entry->slid; - wc->uqueue[head].sl = entry->sl; - wc->uqueue[head].dlid_path_bits = entry->dlid_path_bits; - wc->uqueue[head].port_num = entry->port_num; - /* Make sure entry is written before the head index. */ - smp_wmb(); - } else - wc->kqueue[head] = *entry; - wc->head = next; - - if (cq->notify == IB_CQ_NEXT_COMP || - (cq->notify == IB_CQ_SOLICITED && - (solicited || entry->status != IB_WC_SUCCESS))) { - struct kthread_worker *worker; - /* - * This will cause send_complete() to be called in - * another thread. - */ - smp_read_barrier_depends(); /* see hfi1_cq_exit */ - worker = cq->dd->worker; - if (likely(worker)) { - cq->notify = IB_CQ_NONE; - cq->triggered++; - queue_kthread_work(worker, &cq->comptask); - } - } - - spin_unlock_irqrestore(&cq->lock, flags); -} - -/** - * hfi1_poll_cq - poll for work completion entries - * @ibcq: the completion queue to poll - * @num_entries: the maximum number of entries to return - * @entry: pointer to array where work completions are placed - * - * Returns the number of completion entries polled. - * - * This may be called from interrupt context. Also called by ib_poll_cq() - * in the generic verbs code. - */ -int hfi1_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *entry) -{ - struct hfi1_cq *cq = to_icq(ibcq); - struct hfi1_cq_wc *wc; - unsigned long flags; - int npolled; - u32 tail; - - /* The kernel can only poll a kernel completion queue */ - if (cq->ip) { - npolled = -EINVAL; - goto bail; - } - - spin_lock_irqsave(&cq->lock, flags); - - wc = cq->queue; - tail = wc->tail; - if (tail > (u32) cq->ibcq.cqe) - tail = (u32) cq->ibcq.cqe; - for (npolled = 0; npolled < num_entries; ++npolled, ++entry) { - if (tail == wc->head) - break; - /* The kernel doesn't need a RMB since it has the lock. */ - *entry = wc->kqueue[tail]; - if (tail >= cq->ibcq.cqe) - tail = 0; - else - tail++; - } - wc->tail = tail; - - spin_unlock_irqrestore(&cq->lock, flags); - -bail: - return npolled; -} - -static void send_complete(struct kthread_work *work) -{ - struct hfi1_cq *cq = container_of(work, struct hfi1_cq, comptask); - - /* - * The completion handler will most likely rearm the notification - * and poll for all pending entries. If a new completion entry - * is added while we are in this routine, queue_work() - * won't call us again until we return so we check triggered to - * see if we need to call the handler again. - */ - for (;;) { - u8 triggered = cq->triggered; - - /* - * IPoIB connected mode assumes the callback is from a - * soft IRQ. We simulate this by blocking "bottom halves". - * See the implementation for ipoib_cm_handle_tx_wc(), - * netif_tx_lock_bh() and netif_tx_lock(). - */ - local_bh_disable(); - cq->ibcq.comp_handler(&cq->ibcq, cq->ibcq.cq_context); - local_bh_enable(); - - if (cq->triggered == triggered) - return; - } -} - -/** - * hfi1_create_cq - create a completion queue - * @ibdev: the device this completion queue is attached to - * @attr: creation attributes - * @context: unused by the driver - * @udata: user data for libibverbs.so - * - * Returns a pointer to the completion queue or negative errno values - * for failure. - * - * Called by ib_create_cq() in the generic verbs code. - */ -struct ib_cq *hfi1_create_cq( - struct ib_device *ibdev, - const struct ib_cq_init_attr *attr, - struct ib_ucontext *context, - struct ib_udata *udata) -{ - struct hfi1_ibdev *dev = to_idev(ibdev); - struct hfi1_cq *cq; - struct hfi1_cq_wc *wc; - struct ib_cq *ret; - u32 sz; - unsigned int entries = attr->cqe; - - if (attr->flags) - return ERR_PTR(-EINVAL); - - if (entries < 1 || entries > hfi1_max_cqes) - return ERR_PTR(-EINVAL); - - /* Allocate the completion queue structure. */ - cq = kmalloc(sizeof(*cq), GFP_KERNEL); - if (!cq) - return ERR_PTR(-ENOMEM); - - /* - * Allocate the completion queue entries and head/tail pointers. - * This is allocated separately so that it can be resized and - * also mapped into user space. - * We need to use vmalloc() in order to support mmap and large - * numbers of entries. - */ - sz = sizeof(*wc); - if (udata && udata->outlen >= sizeof(__u64)) - sz += sizeof(struct ib_uverbs_wc) * (entries + 1); - else - sz += sizeof(struct ib_wc) * (entries + 1); - wc = vmalloc_user(sz); - if (!wc) { - ret = ERR_PTR(-ENOMEM); - goto bail_cq; - } - - /* - * Return the address of the WC as the offset to mmap. - * See hfi1_mmap() for details. - */ - if (udata && udata->outlen >= sizeof(__u64)) { - int err; - - cq->ip = rvt_create_mmap_info(&dev->rdi, sz, context, wc); - if (!cq->ip) { - ret = ERR_PTR(-ENOMEM); - goto bail_wc; - } - - err = ib_copy_to_udata(udata, &cq->ip->offset, - sizeof(cq->ip->offset)); - if (err) { - ret = ERR_PTR(err); - goto bail_ip; - } - } else - cq->ip = NULL; - - spin_lock(&dev->n_cqs_lock); - if (dev->n_cqs_allocated == hfi1_max_cqs) { - spin_unlock(&dev->n_cqs_lock); - ret = ERR_PTR(-ENOMEM); - goto bail_ip; - } - - dev->n_cqs_allocated++; - spin_unlock(&dev->n_cqs_lock); - - if (cq->ip) { - spin_lock_irq(&dev->rdi.pending_lock); - list_add(&cq->ip->pending_mmaps, &dev->rdi.pending_mmaps); - spin_unlock_irq(&dev->rdi.pending_lock); - } - - /* - * ib_create_cq() will initialize cq->ibcq except for cq->ibcq.cqe. - * The number of entries should be >= the number requested or return - * an error. - */ - cq->dd = dd_from_dev(dev); - cq->ibcq.cqe = entries; - cq->notify = IB_CQ_NONE; - cq->triggered = 0; - spin_lock_init(&cq->lock); - init_kthread_work(&cq->comptask, send_complete); - wc->head = 0; - wc->tail = 0; - cq->queue = wc; - - ret = &cq->ibcq; - - goto done; - -bail_ip: - kfree(cq->ip); -bail_wc: - vfree(wc); -bail_cq: - kfree(cq); -done: - return ret; -} - -/** - * hfi1_destroy_cq - destroy a completion queue - * @ibcq: the completion queue to destroy. - * - * Returns 0 for success. - * - * Called by ib_destroy_cq() in the generic verbs code. - */ -int hfi1_destroy_cq(struct ib_cq *ibcq) -{ - struct hfi1_ibdev *dev = to_idev(ibcq->device); - struct hfi1_cq *cq = to_icq(ibcq); - - flush_kthread_work(&cq->comptask); - spin_lock(&dev->n_cqs_lock); - dev->n_cqs_allocated--; - spin_unlock(&dev->n_cqs_lock); - if (cq->ip) - kref_put(&cq->ip->ref, rvt_release_mmap_info); - else - vfree(cq->queue); - kfree(cq); - - return 0; -} - -/** - * hfi1_req_notify_cq - change the notification type for a completion queue - * @ibcq: the completion queue - * @notify_flags: the type of notification to request - * - * Returns 0 for success. - * - * This may be called from interrupt context. Also called by - * ib_req_notify_cq() in the generic verbs code. - */ -int hfi1_req_notify_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags notify_flags) -{ - struct hfi1_cq *cq = to_icq(ibcq); - unsigned long flags; - int ret = 0; - - spin_lock_irqsave(&cq->lock, flags); - /* - * Don't change IB_CQ_NEXT_COMP to IB_CQ_SOLICITED but allow - * any other transitions (see C11-31 and C11-32 in ch. 11.4.2.2). - */ - if (cq->notify != IB_CQ_NEXT_COMP) - cq->notify = notify_flags & IB_CQ_SOLICITED_MASK; - - if ((notify_flags & IB_CQ_REPORT_MISSED_EVENTS) && - cq->queue->head != cq->queue->tail) - ret = 1; - - spin_unlock_irqrestore(&cq->lock, flags); - - return ret; -} - -/** - * hfi1_resize_cq - change the size of the CQ - * @ibcq: the completion queue - * - * Returns 0 for success. - */ -int hfi1_resize_cq(struct ib_cq *ibcq, int cqe, struct ib_udata *udata) -{ - struct hfi1_cq *cq = to_icq(ibcq); - struct hfi1_cq_wc *old_wc; - struct hfi1_cq_wc *wc; - u32 head, tail, n; - int ret; - u32 sz; - - if (cqe < 1 || cqe > hfi1_max_cqes) { - ret = -EINVAL; - goto bail; - } - - /* - * Need to use vmalloc() if we want to support large #s of entries. - */ - sz = sizeof(*wc); - if (udata && udata->outlen >= sizeof(__u64)) - sz += sizeof(struct ib_uverbs_wc) * (cqe + 1); - else - sz += sizeof(struct ib_wc) * (cqe + 1); - wc = vmalloc_user(sz); - if (!wc) { - ret = -ENOMEM; - goto bail; - } - - /* Check that we can write the offset to mmap. */ - if (udata && udata->outlen >= sizeof(__u64)) { - __u64 offset = 0; - - ret = ib_copy_to_udata(udata, &offset, sizeof(offset)); - if (ret) - goto bail_free; - } - - spin_lock_irq(&cq->lock); - /* - * Make sure head and tail are sane since they - * might be user writable. - */ - old_wc = cq->queue; - head = old_wc->head; - if (head > (u32) cq->ibcq.cqe) - head = (u32) cq->ibcq.cqe; - tail = old_wc->tail; - if (tail > (u32) cq->ibcq.cqe) - tail = (u32) cq->ibcq.cqe; - if (head < tail) - n = cq->ibcq.cqe + 1 + head - tail; - else - n = head - tail; - if (unlikely((u32)cqe < n)) { - ret = -EINVAL; - goto bail_unlock; - } - for (n = 0; tail != head; n++) { - if (cq->ip) - wc->uqueue[n] = old_wc->uqueue[tail]; - else - wc->kqueue[n] = old_wc->kqueue[tail]; - if (tail == (u32) cq->ibcq.cqe) - tail = 0; - else - tail++; - } - cq->ibcq.cqe = cqe; - wc->head = n; - wc->tail = 0; - cq->queue = wc; - spin_unlock_irq(&cq->lock); - - vfree(old_wc); - - if (cq->ip) { - struct hfi1_ibdev *dev = to_idev(ibcq->device); - struct rvt_mmap_info *ip = cq->ip; - - rvt_update_mmap_info(&dev->rdi, ip, sz, wc); - - /* - * Return the offset to mmap. - * See hfi1_mmap() for details. - */ - if (udata && udata->outlen >= sizeof(__u64)) { - ret = ib_copy_to_udata(udata, &ip->offset, - sizeof(ip->offset)); - if (ret) - goto bail; - } - - spin_lock_irq(&dev->rdi.pending_lock); - if (list_empty(&ip->pending_mmaps)) - list_add(&ip->pending_mmaps, &dev->rdi.pending_mmaps); - spin_unlock_irq(&dev->rdi.pending_lock); - } - - ret = 0; - goto bail; - -bail_unlock: - spin_unlock_irq(&cq->lock); -bail_free: - vfree(wc); -bail: - return ret; -} - -int hfi1_cq_init(struct hfi1_devdata *dd) -{ - int ret = 0; - int cpu; - struct task_struct *task; - - if (dd->worker) - return 0; - dd->worker = kzalloc(sizeof(*dd->worker), GFP_KERNEL); - if (!dd->worker) - return -ENOMEM; - init_kthread_worker(dd->worker); - task = kthread_create_on_node( - kthread_worker_fn, - dd->worker, - dd->assigned_node_id, - "hfi1_cq%d", dd->unit); - if (IS_ERR(task)) - goto task_fail; - cpu = cpumask_first(cpumask_of_node(dd->assigned_node_id)); - kthread_bind(task, cpu); - wake_up_process(task); -out: - return ret; -task_fail: - ret = PTR_ERR(task); - kfree(dd->worker); - dd->worker = NULL; - goto out; -} - -void hfi1_cq_exit(struct hfi1_devdata *dd) -{ - struct kthread_worker *worker; - - worker = dd->worker; - if (!worker) - return; - /* blocks future queuing from send_complete() */ - dd->worker = NULL; - smp_wmb(); /* See hfi1_cq_enter */ - flush_kthread_worker(worker); - kthread_stop(worker->task); - kfree(worker); -} diff --git a/drivers/staging/rdma/hfi1/hfi.h b/drivers/staging/rdma/hfi1/hfi.h index e5f3451..e6a5fed 100644 --- a/drivers/staging/rdma/hfi1/hfi.h +++ b/drivers/staging/rdma/hfi1/hfi.h @@ -1009,8 +1009,6 @@ struct hfi1_devdata { u16 psxmitwait_check_rate; /* high volume overflow errors deferred to tasklet */ struct tasklet_struct error_tasklet; - /* per device cq worker */ - struct kthread_worker *worker; /* MSI-X information */ struct hfi1_msix_entry *msix_entries; diff --git a/drivers/staging/rdma/hfi1/init.c b/drivers/staging/rdma/hfi1/init.c index b4076b2..d1cb2c8 100644 --- a/drivers/staging/rdma/hfi1/init.c +++ b/drivers/staging/rdma/hfi1/init.c @@ -765,7 +765,6 @@ int hfi1_init(struct hfi1_devdata *dd, int reinit) /* enable chip even if we have an error, so we can debug cause */ enable_chip(dd); - ret = hfi1_cq_init(dd); done: /* * Set status even if port serdes is not initialized @@ -1312,7 +1311,6 @@ static void cleanup_device_data(struct hfi1_devdata *dd) kfree(dd->boardname); vfree(dd->events); vfree(dd->status); - hfi1_cq_exit(dd); } /* diff --git a/drivers/staging/rdma/hfi1/qp.c b/drivers/staging/rdma/hfi1/qp.c index a336d2a..5e50dea 100644 --- a/drivers/staging/rdma/hfi1/qp.c +++ b/drivers/staging/rdma/hfi1/qp.c @@ -304,7 +304,7 @@ int hfi1_error_qp(struct rvt_qp *qp, enum ib_wc_status err) if (test_and_clear_bit(RVT_R_WRID_VALID, &qp->r_aflags)) { wc.wr_id = qp->r_wr_id; wc.status = err; - hfi1_cq_enter(to_icq(qp->ibqp.recv_cq), &wc, 1); + rvt_cq_enter(ibcq_to_rvtcq(qp->ibqp.recv_cq), &wc, 1); } wc.status = IB_WC_WR_FLUSH_ERR; @@ -327,7 +327,7 @@ int hfi1_error_qp(struct rvt_qp *qp, enum ib_wc_status err) wc.wr_id = get_rwqe_ptr(&qp->r_rq, tail)->wr_id; if (++tail >= qp->r_rq.size) tail = 0; - hfi1_cq_enter(to_icq(qp->ibqp.recv_cq), &wc, 1); + rvt_cq_enter(ibcq_to_rvtcq(qp->ibqp.recv_cq), &wc, 1); } wq->tail = tail; diff --git a/drivers/staging/rdma/hfi1/rc.c b/drivers/staging/rdma/hfi1/rc.c index bd504de..e80a092 100644 --- a/drivers/staging/rdma/hfi1/rc.c +++ b/drivers/staging/rdma/hfi1/rc.c @@ -1040,7 +1040,7 @@ void hfi1_rc_send_complete(struct rvt_qp *qp, struct hfi1_ib_header *hdr) wc.opcode = ib_hfi1_wc_opcode[wqe->wr.opcode]; wc.byte_len = wqe->length; wc.qp = &qp->ibqp; - hfi1_cq_enter(to_icq(qp->ibqp.send_cq), &wc, 0); + rvt_cq_enter(ibcq_to_rvtcq(qp->ibqp.send_cq), &wc, 0); } if (++qp->s_last >= qp->s_size) qp->s_last = 0; @@ -1097,7 +1097,7 @@ static struct rvt_swqe *do_rc_completion(struct rvt_qp *qp, wc.opcode = ib_hfi1_wc_opcode[wqe->wr.opcode]; wc.byte_len = wqe->length; wc.qp = &qp->ibqp; - hfi1_cq_enter(to_icq(qp->ibqp.send_cq), &wc, 0); + rvt_cq_enter(ibcq_to_rvtcq(qp->ibqp.send_cq), &wc, 0); } if (++qp->s_last >= qp->s_size) qp->s_last = 0; @@ -2157,8 +2157,8 @@ send_last: wc.dlid_path_bits = 0; wc.port_num = 0; /* Signal completion event if the solicited bit is set. */ - hfi1_cq_enter(to_icq(qp->ibqp.recv_cq), &wc, - (bth0 & IB_BTH_SOLICITED) != 0); + rvt_cq_enter(ibcq_to_rvtcq(qp->ibqp.recv_cq), &wc, + (bth0 & IB_BTH_SOLICITED) != 0); break; case OP(RDMA_WRITE_FIRST): diff --git a/drivers/staging/rdma/hfi1/ruc.c b/drivers/staging/rdma/hfi1/ruc.c index 0b324b1..c659cf8 100644 --- a/drivers/staging/rdma/hfi1/ruc.c +++ b/drivers/staging/rdma/hfi1/ruc.c @@ -138,7 +138,7 @@ bad_lkey: wc.opcode = IB_WC_RECV; wc.qp = &qp->ibqp; /* Signal solicited completion event. */ - hfi1_cq_enter(to_icq(qp->ibqp.recv_cq), &wc, 1); + rvt_cq_enter(ibcq_to_rvtcq(qp->ibqp.recv_cq), &wc, 1); ret = 0; bail: return ret; @@ -566,8 +566,8 @@ again: wc.sl = qp->remote_ah_attr.sl; wc.port_num = 1; /* Signal completion event if the solicited bit is set. */ - hfi1_cq_enter(to_icq(qp->ibqp.recv_cq), &wc, - wqe->wr.send_flags & IB_SEND_SOLICITED); + rvt_cq_enter(ibcq_to_rvtcq(qp->ibqp.recv_cq), &wc, + wqe->wr.send_flags & IB_SEND_SOLICITED); send_comp: spin_lock_irqsave(&sqp->s_lock, flags); @@ -909,8 +909,8 @@ void hfi1_send_complete(struct rvt_qp *qp, struct rvt_swqe *wqe, wc.qp = &qp->ibqp; if (status == IB_WC_SUCCESS) wc.byte_len = wqe->length; - hfi1_cq_enter(to_icq(qp->ibqp.send_cq), &wc, - status != IB_WC_SUCCESS); + rvt_cq_enter(ibcq_to_rvtcq(qp->ibqp.send_cq), &wc, + status != IB_WC_SUCCESS); } last = qp->s_last; diff --git a/drivers/staging/rdma/hfi1/uc.c b/drivers/staging/rdma/hfi1/uc.c index 0935182..75cded3 100644 --- a/drivers/staging/rdma/hfi1/uc.c +++ b/drivers/staging/rdma/hfi1/uc.c @@ -469,9 +469,9 @@ last_imm: wc.dlid_path_bits = 0; wc.port_num = 0; /* Signal completion event if the solicited bit is set. */ - hfi1_cq_enter(to_icq(qp->ibqp.recv_cq), &wc, - (ohdr->bth[0] & - cpu_to_be32(IB_BTH_SOLICITED)) != 0); + rvt_cq_enter(ibcq_to_rvtcq(qp->ibqp.recv_cq), &wc, + (ohdr->bth[0] & + cpu_to_be32(IB_BTH_SOLICITED)) != 0); break; case OP(RDMA_WRITE_FIRST): diff --git a/drivers/staging/rdma/hfi1/ud.c b/drivers/staging/rdma/hfi1/ud.c index a0e6222..a4746e8 100644 --- a/drivers/staging/rdma/hfi1/ud.c +++ b/drivers/staging/rdma/hfi1/ud.c @@ -247,8 +247,8 @@ static void ud_loopback(struct rvt_qp *sqp, struct rvt_swqe *swqe) wc.dlid_path_bits = ah_attr->dlid & ((1 << ppd->lmc) - 1); wc.port_num = qp->port_num; /* Signal completion event if the solicited bit is set. */ - hfi1_cq_enter(to_icq(qp->ibqp.recv_cq), &wc, - swqe->wr.send_flags & IB_SEND_SOLICITED); + rvt_cq_enter(ibcq_to_rvtcq(qp->ibqp.recv_cq), &wc, + swqe->wr.send_flags & IB_SEND_SOLICITED); ibp->rvp.n_loop_pkts++; bail_unlock: spin_unlock_irqrestore(&qp->r_lock, flags); @@ -878,9 +878,9 @@ void hfi1_ud_rcv(struct hfi1_packet *packet) dlid & ((1 << ppd_from_ibp(ibp)->lmc) - 1); wc.port_num = qp->port_num; /* Signal completion event if the solicited bit is set. */ - hfi1_cq_enter(to_icq(qp->ibqp.recv_cq), &wc, - (ohdr->bth[0] & - cpu_to_be32(IB_BTH_SOLICITED)) != 0); + rvt_cq_enter(ibcq_to_rvtcq(qp->ibqp.recv_cq), &wc, + (ohdr->bth[0] & + cpu_to_be32(IB_BTH_SOLICITED)) != 0); return; drop: diff --git a/drivers/staging/rdma/hfi1/verbs.c b/drivers/staging/rdma/hfi1/verbs.c index 70af487..ee969d0 100644 --- a/drivers/staging/rdma/hfi1/verbs.c +++ b/drivers/staging/rdma/hfi1/verbs.c @@ -1719,7 +1719,6 @@ int hfi1_register_ib_device(struct hfi1_devdata *dd) /* Only need to initialize non-zero fields. */ - spin_lock_init(&dev->n_cqs_lock); spin_lock_init(&dev->n_qps_lock); spin_lock_init(&dev->n_srqs_lock); spin_lock_init(&dev->n_mcast_grps_lock); @@ -1816,11 +1815,11 @@ int hfi1_register_ib_device(struct hfi1_devdata *dd) ibdev->post_send = post_send; ibdev->post_recv = post_receive; ibdev->post_srq_recv = hfi1_post_srq_receive; - ibdev->create_cq = hfi1_create_cq; - ibdev->destroy_cq = hfi1_destroy_cq; - ibdev->resize_cq = hfi1_resize_cq; - ibdev->poll_cq = hfi1_poll_cq; - ibdev->req_notify_cq = hfi1_req_notify_cq; + ibdev->create_cq = NULL; + ibdev->destroy_cq = NULL; + ibdev->resize_cq = NULL; + ibdev->poll_cq = NULL; + ibdev->req_notify_cq = NULL; ibdev->get_dma_mr = NULL; ibdev->reg_user_mr = NULL; ibdev->dereg_mr = NULL; @@ -1860,14 +1859,20 @@ int hfi1_register_ib_device(struct hfi1_devdata *dd) dd->verbs_dev.rdi.dparms.qos_shift = dd->qos_shift; dd->verbs_dev.rdi.dparms.qpn_res_start = kdeth_qp << 16; dd->verbs_dev.rdi.dparms.qpn_res_end = - dd->verbs_dev.rdi.dparms.qpn_res_start + 65535; + dd->verbs_dev.rdi.dparms.qpn_res_start + 65535; dd->verbs_dev.rdi.driver_f.qp_priv_alloc = qp_priv_alloc; dd->verbs_dev.rdi.driver_f.qp_priv_free = qp_priv_free; dd->verbs_dev.rdi.driver_f.free_all_qps = free_all_qps; dd->verbs_dev.rdi.driver_f.notify_qp_reset = notify_qp_reset; + /* completeion queue */ + snprintf(dd->verbs_dev.rdi.dparms.cq_name, + sizeof(dd->verbs_dev.rdi.dparms.cq_name), + "hfi1_cq%d", dd->unit); + dd->verbs_dev.rdi.dparms.node = dd->assigned_node_id; + /* misc settings */ - dd->verbs_dev.rdi.flags = RVT_FLAG_CQ_INIT_DRIVER; + dd->verbs_dev.rdi.flags = 0; /* Let rdmavt handle it all */ dd->verbs_dev.rdi.dparms.lkey_table_size = hfi1_lkey_table_size; dd->verbs_dev.rdi.dparms.nports = dd->num_pports; dd->verbs_dev.rdi.dparms.npkeys = hfi1_get_npkeys(dd); diff --git a/drivers/staging/rdma/hfi1/verbs.h b/drivers/staging/rdma/hfi1/verbs.h index c22f0d1..ef8fb13 100644 --- a/drivers/staging/rdma/hfi1/verbs.h +++ b/drivers/staging/rdma/hfi1/verbs.h @@ -64,6 +64,7 @@ #include #include #include +#include struct hfi1_ctxtdata; struct hfi1_pportdata; @@ -81,12 +82,6 @@ struct hfi1_packet; */ #define HFI1_UVERBS_ABI_VERSION 2 -/* - * Define an ib_cq_notify value that is not valid so we know when CQ - * notifications are armed. - */ -#define IB_CQ_NONE (IB_CQ_NEXT_COMP + 1) - #define IB_SEQ_NAK (3 << 29) /* AETH NAK opcode values */ @@ -236,35 +231,6 @@ struct hfi1_mcast { }; /* - * This structure is used to contain the head pointer, tail pointer, - * and completion queue entries as a single memory allocation so - * it can be mmap'ed into user space. - */ -struct hfi1_cq_wc { - u32 head; /* index of next entry to fill */ - u32 tail; /* index of next ib_poll_cq() entry */ - union { - /* these are actually size ibcq.cqe + 1 */ - struct ib_uverbs_wc uqueue[0]; - struct ib_wc kqueue[0]; - }; -}; - -/* - * The completion queue structure. - */ -struct hfi1_cq { - struct ib_cq ibcq; - struct kthread_work comptask; - struct hfi1_devdata *dd; - spinlock_t lock; /* protect changes in this struct */ - u8 notify; - u8 triggered; - struct hfi1_cq_wc *queue; - struct rvt_mmap_info *ip; -}; - -/* * hfi1 specific data structures that will be hidden from rvt after the queue * pair is made common */ @@ -363,8 +329,6 @@ struct hfi1_ibdev { u64 n_kmem_wait; u64 n_send_schedule; - u32 n_cqs_allocated; /* number of CQs allocated for device */ - spinlock_t n_cqs_lock; u32 n_qps_allocated; /* number of QPs allocated for device */ spinlock_t n_qps_lock; u32 n_srqs_allocated; /* number of SRQs allocated for device */ @@ -395,11 +359,6 @@ struct hfi1_verbs_counters { u32 vl15_dropped; }; -static inline struct hfi1_cq *to_icq(struct ib_cq *ibcq) -{ - return container_of(ibcq, struct hfi1_cq, ibcq); -} - static inline struct rvt_qp *to_iqp(struct ib_qp *ibqp) { return container_of(ibqp, struct rvt_qp, ibqp); @@ -563,28 +522,6 @@ int hfi1_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr); int hfi1_destroy_srq(struct ib_srq *ibsrq); -int hfi1_cq_init(struct hfi1_devdata *dd); - -void hfi1_cq_exit(struct hfi1_devdata *dd); - -void hfi1_cq_enter(struct hfi1_cq *cq, struct ib_wc *entry, int sig); - -int hfi1_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *entry); - -struct ib_cq *hfi1_create_cq( - struct ib_device *ibdev, - const struct ib_cq_init_attr *attr, - struct ib_ucontext *context, - struct ib_udata *udata); - -int hfi1_destroy_cq(struct ib_cq *ibcq); - -int hfi1_req_notify_cq( - struct ib_cq *ibcq, - enum ib_cq_notify_flags notify_flags); - -int hfi1_resize_cq(struct ib_cq *ibcq, int cqe, struct ib_udata *udata); - static inline void hfi1_put_ss(struct rvt_sge_state *ss) { while (ss->num_sge) { -- cgit v0.10.2 From 6366dfa6185ebb3709a2a05b64760d7c3abdfd95 Mon Sep 17 00:00:00 2001 From: Dennis Dalessandro Date: Tue, 19 Jan 2016 14:43:28 -0800 Subject: staging/rdma/hfi1: Clean up return handling Return directly from rvt_resize_cq rather than use a goto/label. Reviewed-by: Ira Weiny Reviewed-by: Harish Chegondi Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford diff --git a/drivers/infiniband/sw/rdmavt/cq.c b/drivers/infiniband/sw/rdmavt/cq.c index 055aa71..0e6dbe5 100644 --- a/drivers/infiniband/sw/rdmavt/cq.c +++ b/drivers/infiniband/sw/rdmavt/cq.c @@ -432,7 +432,7 @@ int rvt_resize_cq(struct ib_cq *ibcq, int cqe, struct ib_udata *udata) ret = ib_copy_to_udata(udata, &ip->offset, sizeof(ip->offset)); if (ret) - goto bail; + return ret; } spin_lock_irq(&rdi->pending_lock); @@ -447,9 +447,7 @@ bail_unlock: spin_unlock_irq(&cq->lock); bail_free: vfree(wc); -bail: return ret; - } /** -- cgit v0.10.2 From 83693bd146063e6843efafbedf302014511fee25 Mon Sep 17 00:00:00 2001 From: Dennis Dalessandro Date: Tue, 19 Jan 2016 14:43:33 -0800 Subject: staging/rdma/hfi1: Use rdmavt version of post_send This patch removes the post_send and post_one_send from the hfi1 driver. The "posting" of sends will be done by rdmavt which will walk a WQE and queue work. This patch will still provide the capability to schedule that work as well as kick the progress. These are provided to the rdmavt layer. Reviewed-by: Jubin John Signed-off-by: Dean Luick Signed-off-by: Harish Chegondi Signed-off-by: Edward Mascarenhas Signed-off-by: Mike Marciniszyn Signed-off-by: Ira Weiny Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford diff --git a/drivers/staging/rdma/hfi1/driver.c b/drivers/staging/rdma/hfi1/driver.c index ec2286a..d57c08f 100644 --- a/drivers/staging/rdma/hfi1/driver.c +++ b/drivers/staging/rdma/hfi1/driver.c @@ -335,8 +335,8 @@ static void rcv_hdrerr(struct hfi1_ctxtdata *rcd, struct hfi1_pportdata *ppd, spin_lock_irqsave(&qp->r_lock, flags); /* Check for valid receive state. */ - if (!(ib_hfi1_state_ops[qp->state] & - HFI1_PROCESS_RECV_OK)) { + if (!(ib_rvt_state_ops[qp->state] & + RVT_PROCESS_RECV_OK)) { ibp->rvp.n_pkt_drops++; } @@ -790,8 +790,8 @@ static inline void process_rcv_qp_work(struct hfi1_packet *packet) qp->r_flags &= ~RVT_R_RSP_SEND; spin_lock_irqsave(&qp->s_lock, flags); - if (ib_hfi1_state_ops[qp->state] & - HFI1_PROCESS_OR_FLUSH_SEND) + if (ib_rvt_state_ops[qp->state] & + RVT_PROCESS_OR_FLUSH_SEND) hfi1_schedule_send(qp); spin_unlock_irqrestore(&qp->s_lock, flags); } diff --git a/drivers/staging/rdma/hfi1/qp.c b/drivers/staging/rdma/hfi1/qp.c index 5e50dea..ff27f1a 100644 --- a/drivers/staging/rdma/hfi1/qp.c +++ b/drivers/staging/rdma/hfi1/qp.c @@ -208,7 +208,7 @@ static void clear_mr_refs(struct rvt_qp *qp, int clr_sends) if (clr_sends) { while (qp->s_last != qp->s_head) { - struct rvt_swqe *wqe = get_swqe_ptr(qp, qp->s_last); + struct rvt_swqe *wqe = rvt_get_swqe_ptr(qp, qp->s_last); unsigned i; for (i = 0; i < wqe->wr.num_sge; i++) { @@ -411,7 +411,7 @@ int hfi1_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask, struct ib_udata *udata) { struct hfi1_ibdev *dev = to_idev(ibqp->device); - struct rvt_qp *qp = to_iqp(ibqp); + struct rvt_qp *qp = ibqp_to_rvtqp(ibqp); struct hfi1_qp_priv *priv = qp->priv; enum ib_qp_state cur_state, new_state; struct ib_event ev; @@ -710,7 +710,7 @@ bail: int hfi1_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask, struct ib_qp_init_attr *init_attr) { - struct rvt_qp *qp = to_iqp(ibqp); + struct rvt_qp *qp = ibqp_to_rvtqp(ibqp); attr->qp_state = qp->state; attr->cur_qp_state = attr->qp_state; @@ -829,7 +829,7 @@ __be32 hfi1_compute_aeth(struct rvt_qp *qp) */ int hfi1_destroy_qp(struct ib_qp *ibqp) { - struct rvt_qp *qp = to_iqp(ibqp); + struct rvt_qp *qp = ibqp_to_rvtqp(ibqp); struct hfi1_ibdev *dev = to_idev(ibqp->device); struct hfi1_qp_priv *priv = qp->priv; @@ -943,7 +943,7 @@ static int iowait_sleep( priv = qp->priv; spin_lock_irqsave(&qp->s_lock, flags); - if (ib_hfi1_state_ops[qp->state] & HFI1_PROCESS_RECV_OK) { + if (ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK) { /* * If we couldn't queue the DMA request, save the info @@ -1117,7 +1117,7 @@ void qp_iter_print(struct seq_file *s, struct qp_iter *iter) struct sdma_engine *sde; sde = qp_to_sdma_engine(qp, priv->s_sc); - wqe = get_swqe_ptr(qp, qp->s_last); + wqe = rvt_get_swqe_ptr(qp, qp->s_last); seq_printf(s, "N %d %s QP%u R %u %s %u %u %u f=%x %u %u %u %u %u PSN %x %x %x %x %x (%u %u %u %u %u %u) QP%u LID %x SL %u MTU %d %u %u %u SDE %p,%u\n", iter->n, @@ -1225,7 +1225,7 @@ void notify_qp_reset(struct rvt_qp *qp) iowait_init( &priv->s_iowait, 1, - hfi1_do_send, + _hfi1_do_send, iowait_sleep, iowait_wakeup); priv->r_adefered = 0; diff --git a/drivers/staging/rdma/hfi1/rc.c b/drivers/staging/rdma/hfi1/rc.c index e80a092..a30bf30 100644 --- a/drivers/staging/rdma/hfi1/rc.c +++ b/drivers/staging/rdma/hfi1/rc.c @@ -105,7 +105,7 @@ static int make_rc_ack(struct hfi1_ibdev *dev, struct rvt_qp *qp, int middle = 0; /* Don't send an ACK if we aren't supposed to. */ - if (!(ib_hfi1_state_ops[qp->state] & HFI1_PROCESS_RECV_OK)) + if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK)) goto bail; /* header size in 32-bit words LRH+BTH = (8+12)/4. */ @@ -291,8 +291,8 @@ int hfi1_make_rc_req(struct rvt_qp *qp) make_rc_ack(dev, qp, ohdr, pmtu)) goto done; - if (!(ib_hfi1_state_ops[qp->state] & HFI1_PROCESS_SEND_OK)) { - if (!(ib_hfi1_state_ops[qp->state] & HFI1_FLUSH_SEND)) + if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_SEND_OK)) { + if (!(ib_rvt_state_ops[qp->state] & RVT_FLUSH_SEND)) goto bail; /* We are in the error state, flush the work request. */ if (qp->s_last == qp->s_head) @@ -303,7 +303,7 @@ int hfi1_make_rc_req(struct rvt_qp *qp) goto bail; } clear_ahg(qp); - wqe = get_swqe_ptr(qp, qp->s_last); + wqe = rvt_get_swqe_ptr(qp, qp->s_last); hfi1_send_complete(qp, wqe, qp->s_last != qp->s_acked ? IB_WC_SUCCESS : IB_WC_WR_FLUSH_ERR); /* will get called again */ @@ -323,10 +323,10 @@ int hfi1_make_rc_req(struct rvt_qp *qp) } /* Send a request. */ - wqe = get_swqe_ptr(qp, qp->s_cur); + wqe = rvt_get_swqe_ptr(qp, qp->s_cur); switch (qp->s_state) { default: - if (!(ib_hfi1_state_ops[qp->state] & HFI1_PROCESS_NEXT_SEND_OK)) + if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_NEXT_SEND_OK)) goto bail; /* * Resend an old request or start a new one. @@ -797,7 +797,7 @@ queue_ack: static void reset_psn(struct rvt_qp *qp, u32 psn) { u32 n = qp->s_acked; - struct rvt_swqe *wqe = get_swqe_ptr(qp, n); + struct rvt_swqe *wqe = rvt_get_swqe_ptr(qp, n); u32 opcode; qp->s_cur = n; @@ -820,7 +820,7 @@ static void reset_psn(struct rvt_qp *qp, u32 psn) n = 0; if (n == qp->s_tail) break; - wqe = get_swqe_ptr(qp, n); + wqe = rvt_get_swqe_ptr(qp, n); diff = cmp_psn(psn, wqe->psn); if (diff < 0) break; @@ -882,7 +882,7 @@ done: */ static void restart_rc(struct rvt_qp *qp, u32 psn, int wait) { - struct rvt_swqe *wqe = get_swqe_ptr(qp, qp->s_acked); + struct rvt_swqe *wqe = rvt_get_swqe_ptr(qp, qp->s_acked); struct hfi1_ibport *ibp; if (qp->s_retry == 0) { @@ -964,7 +964,7 @@ static void reset_sending_psn(struct rvt_qp *qp, u32 psn) /* Find the work request corresponding to the given PSN. */ for (;;) { - wqe = get_swqe_ptr(qp, n); + wqe = rvt_get_swqe_ptr(qp, n); if (cmp_psn(psn, wqe->lpsn) <= 0) { if (wqe->wr.opcode == IB_WR_RDMA_READ) qp->s_sending_psn = wqe->lpsn + 1; @@ -991,7 +991,7 @@ void hfi1_rc_send_complete(struct rvt_qp *qp, struct hfi1_ib_header *hdr) u32 opcode; u32 psn; - if (!(ib_hfi1_state_ops[qp->state] & HFI1_PROCESS_OR_FLUSH_SEND)) + if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_OR_FLUSH_SEND)) return; /* Find out where the BTH is */ @@ -1018,11 +1018,11 @@ void hfi1_rc_send_complete(struct rvt_qp *qp, struct hfi1_ib_header *hdr) if ((psn & IB_BTH_REQ_ACK) && qp->s_acked != qp->s_tail && !(qp->s_flags & (RVT_S_TIMER | RVT_S_WAIT_RNR | RVT_S_WAIT_PSN)) && - (ib_hfi1_state_ops[qp->state] & HFI1_PROCESS_RECV_OK)) + (ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK)) start_timer(qp); while (qp->s_last != qp->s_acked) { - wqe = get_swqe_ptr(qp, qp->s_last); + wqe = rvt_get_swqe_ptr(qp, qp->s_last); if (cmp_psn(wqe->lpsn, qp->s_sending_psn) >= 0 && cmp_psn(qp->s_sending_psn, qp->s_sending_hpsn) <= 0) break; @@ -1132,7 +1132,7 @@ static struct rvt_swqe *do_rc_completion(struct rvt_qp *qp, if (++qp->s_cur >= qp->s_size) qp->s_cur = 0; qp->s_acked = qp->s_cur; - wqe = get_swqe_ptr(qp, qp->s_cur); + wqe = rvt_get_swqe_ptr(qp, qp->s_cur); if (qp->s_acked != qp->s_tail) { qp->s_state = OP(SEND_LAST); qp->s_psn = wqe->psn; @@ -1142,7 +1142,7 @@ static struct rvt_swqe *do_rc_completion(struct rvt_qp *qp, qp->s_acked = 0; if (qp->state == IB_QPS_SQD && qp->s_acked == qp->s_cur) qp->s_draining = 0; - wqe = get_swqe_ptr(qp, qp->s_acked); + wqe = rvt_get_swqe_ptr(qp, qp->s_acked); } return wqe; } @@ -1183,7 +1183,7 @@ static int do_rc_ack(struct rvt_qp *qp, u32 aeth, u32 psn, int opcode, ack_psn = psn; if (aeth >> 29) ack_psn--; - wqe = get_swqe_ptr(qp, qp->s_acked); + wqe = rvt_get_swqe_ptr(qp, qp->s_acked); ibp = to_iport(qp->ibqp.device, qp->port_num); /* @@ -1392,7 +1392,7 @@ static void rdma_seq_err(struct rvt_qp *qp, struct hfi1_ibport *ibp, u32 psn, del_timer(&qp->s_timer); } - wqe = get_swqe_ptr(qp, qp->s_acked); + wqe = rvt_get_swqe_ptr(qp, qp->s_acked); while (cmp_psn(psn, wqe->lpsn) > 0) { if (wqe->wr.opcode == IB_WR_RDMA_READ || @@ -1474,7 +1474,7 @@ static void rc_rcv_resp(struct hfi1_ibport *ibp, if (unlikely(qp->s_acked == qp->s_tail)) goto ack_done; - wqe = get_swqe_ptr(qp, qp->s_acked); + wqe = rvt_get_swqe_ptr(qp, qp->s_acked); status = IB_WC_SUCCESS; switch (opcode) { @@ -1492,7 +1492,7 @@ static void rc_rcv_resp(struct hfi1_ibport *ibp, if (!do_rc_ack(qp, aeth, psn, opcode, val, rcd) || opcode != OP(RDMA_READ_RESPONSE_FIRST)) goto ack_done; - wqe = get_swqe_ptr(qp, qp->s_acked); + wqe = rvt_get_swqe_ptr(qp, qp->s_acked); if (unlikely(wqe->wr.opcode != IB_WR_RDMA_READ)) goto ack_op_err; /* @@ -1557,7 +1557,7 @@ read_middle: * have to be careful to copy the data to the right * location. */ - wqe = get_swqe_ptr(qp, qp->s_acked); + wqe = rvt_get_swqe_ptr(qp, qp->s_acked); qp->s_rdma_read_len = restart_sge(&qp->s_rdma_read_sge, wqe, psn, pmtu); goto read_last; diff --git a/drivers/staging/rdma/hfi1/ruc.c b/drivers/staging/rdma/hfi1/ruc.c index c659cf8..b47e462 100644 --- a/drivers/staging/rdma/hfi1/ruc.c +++ b/drivers/staging/rdma/hfi1/ruc.c @@ -176,7 +176,7 @@ int hfi1_get_rwqe(struct rvt_qp *qp, int wr_id_only) } spin_lock_irqsave(&rq->lock, flags); - if (!(ib_hfi1_state_ops[qp->state] & HFI1_PROCESS_RECV_OK)) { + if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK)) { ret = 0; goto unlock; } @@ -383,7 +383,7 @@ static void ruc_loopback(struct rvt_qp *sqp) /* Return if we are already busy processing a work request. */ if ((sqp->s_flags & (RVT_S_BUSY | RVT_S_ANY_WAIT)) || - !(ib_hfi1_state_ops[sqp->state] & HFI1_PROCESS_OR_FLUSH_SEND)) + !(ib_rvt_state_ops[sqp->state] & RVT_PROCESS_OR_FLUSH_SEND)) goto unlock; sqp->s_flags |= RVT_S_BUSY; @@ -391,11 +391,11 @@ static void ruc_loopback(struct rvt_qp *sqp) again: if (sqp->s_last == sqp->s_head) goto clr_busy; - wqe = get_swqe_ptr(sqp, sqp->s_last); + wqe = rvt_get_swqe_ptr(sqp, sqp->s_last); /* Return if it is not OK to start a new work request. */ - if (!(ib_hfi1_state_ops[sqp->state] & HFI1_PROCESS_NEXT_SEND_OK)) { - if (!(ib_hfi1_state_ops[sqp->state] & HFI1_FLUSH_SEND)) + if (!(ib_rvt_state_ops[sqp->state] & RVT_PROCESS_NEXT_SEND_OK)) { + if (!(ib_rvt_state_ops[sqp->state] & RVT_FLUSH_SEND)) goto clr_busy; /* We are in the error state, flush the work request. */ send_status = IB_WC_WR_FLUSH_ERR; @@ -413,7 +413,7 @@ again: } spin_unlock_irqrestore(&sqp->s_lock, flags); - if (!qp || !(ib_hfi1_state_ops[qp->state] & HFI1_PROCESS_RECV_OK) || + if (!qp || !(ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK) || qp->ibqp.qp_type != sqp->ibqp.qp_type) { ibp->rvp.n_pkt_drops++; /* @@ -593,7 +593,7 @@ rnr_nak: if (sqp->s_rnr_retry_cnt < 7) sqp->s_rnr_retry--; spin_lock_irqsave(&sqp->s_lock, flags); - if (!(ib_hfi1_state_ops[sqp->state] & HFI1_PROCESS_RECV_OK)) + if (!(ib_rvt_state_ops[sqp->state] & RVT_PROCESS_RECV_OK)) goto clr_busy; sqp->s_flags |= RVT_S_WAIT_RNR; sqp->s_timer.function = hfi1_rc_rnr_retry; @@ -802,6 +802,14 @@ void hfi1_make_ruc_header(struct rvt_qp *qp, struct hfi1_other_headers *ohdr, /* when sending, force a reschedule every one of these periods */ #define SEND_RESCHED_TIMEOUT (5 * HZ) /* 5s in jiffies */ +void _hfi1_do_send(struct work_struct *work) +{ + struct iowait *wait = container_of(work, struct iowait, iowork); + struct rvt_qp *qp = iowait_to_qp(wait); + + hfi1_do_send(qp); +} + /** * hfi1_do_send - perform a send on a QP * @work: contains a pointer to the QP @@ -810,10 +818,8 @@ void hfi1_make_ruc_header(struct rvt_qp *qp, struct hfi1_other_headers *ohdr, * exhausted. Only allow one CPU to send a packet per QP (tasklet). * Otherwise, two threads could send packets out of order. */ -void hfi1_do_send(struct work_struct *work) +void hfi1_do_send(struct rvt_qp *qp) { - struct iowait *wait = container_of(work, struct iowait, iowork); - struct rvt_qp *qp = iowait_to_qp(wait); struct hfi1_pkt_state ps; int (*make_req)(struct rvt_qp *qp); unsigned long flags; @@ -883,7 +889,7 @@ void hfi1_send_complete(struct rvt_qp *qp, struct rvt_swqe *wqe, u32 old_last, last; unsigned i; - if (!(ib_hfi1_state_ops[qp->state] & HFI1_PROCESS_OR_FLUSH_SEND)) + if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_OR_FLUSH_SEND)) return; for (i = 0; i < wqe->wr.num_sge; i++) { diff --git a/drivers/staging/rdma/hfi1/uc.c b/drivers/staging/rdma/hfi1/uc.c index 75cded3..ec404ff 100644 --- a/drivers/staging/rdma/hfi1/uc.c +++ b/drivers/staging/rdma/hfi1/uc.c @@ -76,8 +76,8 @@ int hfi1_make_uc_req(struct rvt_qp *qp) spin_lock_irqsave(&qp->s_lock, flags); - if (!(ib_hfi1_state_ops[qp->state] & HFI1_PROCESS_SEND_OK)) { - if (!(ib_hfi1_state_ops[qp->state] & HFI1_FLUSH_SEND)) + if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_SEND_OK)) { + if (!(ib_rvt_state_ops[qp->state] & RVT_FLUSH_SEND)) goto bail; /* We are in the error state, flush the work request. */ if (qp->s_last == qp->s_head) @@ -88,7 +88,7 @@ int hfi1_make_uc_req(struct rvt_qp *qp) goto bail; } clear_ahg(qp); - wqe = get_swqe_ptr(qp, qp->s_last); + wqe = rvt_get_swqe_ptr(qp, qp->s_last); hfi1_send_complete(qp, wqe, IB_WC_WR_FLUSH_ERR); goto done; } @@ -98,12 +98,12 @@ int hfi1_make_uc_req(struct rvt_qp *qp) ohdr = &priv->s_hdr->ibh.u.l.oth; /* Get the next send request. */ - wqe = get_swqe_ptr(qp, qp->s_cur); + wqe = rvt_get_swqe_ptr(qp, qp->s_cur); qp->s_wqe = NULL; switch (qp->s_state) { default: - if (!(ib_hfi1_state_ops[qp->state] & - HFI1_PROCESS_NEXT_SEND_OK)) + if (!(ib_rvt_state_ops[qp->state] & + RVT_PROCESS_NEXT_SEND_OK)) goto bail; /* Check if send work queue is empty. */ if (qp->s_cur == qp->s_head) { diff --git a/drivers/staging/rdma/hfi1/ud.c b/drivers/staging/rdma/hfi1/ud.c index a4746e8..e2cbdc8 100644 --- a/drivers/staging/rdma/hfi1/ud.c +++ b/drivers/staging/rdma/hfi1/ud.c @@ -93,7 +93,7 @@ static void ud_loopback(struct rvt_qp *sqp, struct rvt_swqe *swqe) IB_QPT_UD : qp->ibqp.qp_type; if (dqptype != sqptype || - !(ib_hfi1_state_ops[qp->state] & HFI1_PROCESS_RECV_OK)) { + !(ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK)) { ibp->rvp.n_pkt_drops++; goto drop; } @@ -282,8 +282,8 @@ int hfi1_make_ud_req(struct rvt_qp *qp) spin_lock_irqsave(&qp->s_lock, flags); - if (!(ib_hfi1_state_ops[qp->state] & HFI1_PROCESS_NEXT_SEND_OK)) { - if (!(ib_hfi1_state_ops[qp->state] & HFI1_FLUSH_SEND)) + if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_NEXT_SEND_OK)) { + if (!(ib_rvt_state_ops[qp->state] & RVT_FLUSH_SEND)) goto bail; /* We are in the error state, flush the work request. */ if (qp->s_last == qp->s_head) @@ -293,7 +293,7 @@ int hfi1_make_ud_req(struct rvt_qp *qp) qp->s_flags |= RVT_S_WAIT_DMA; goto bail; } - wqe = get_swqe_ptr(qp, qp->s_last); + wqe = rvt_get_swqe_ptr(qp, qp->s_last); hfi1_send_complete(qp, wqe, IB_WC_WR_FLUSH_ERR); goto done; } @@ -301,7 +301,7 @@ int hfi1_make_ud_req(struct rvt_qp *qp) if (qp->s_cur == qp->s_head) goto bail; - wqe = get_swqe_ptr(qp, qp->s_cur); + wqe = rvt_get_swqe_ptr(qp, qp->s_cur); next_cur = qp->s_cur + 1; if (next_cur >= qp->s_size) next_cur = 0; diff --git a/drivers/staging/rdma/hfi1/verbs.c b/drivers/staging/rdma/hfi1/verbs.c index ee969d0..b4cfda4 100644 --- a/drivers/staging/rdma/hfi1/verbs.c +++ b/drivers/staging/rdma/hfi1/verbs.c @@ -133,28 +133,6 @@ static void verbs_sdma_complete( #define TXREQ_NAME_LEN 24 /* - * Note that it is OK to post send work requests in the SQE and ERR - * states; hfi1_do_send() will process them and generate error - * completions as per IB 1.2 C10-96. - */ -const int ib_hfi1_state_ops[IB_QPS_ERR + 1] = { - [IB_QPS_RESET] = 0, - [IB_QPS_INIT] = HFI1_POST_RECV_OK, - [IB_QPS_RTR] = HFI1_POST_RECV_OK | HFI1_PROCESS_RECV_OK, - [IB_QPS_RTS] = HFI1_POST_RECV_OK | HFI1_PROCESS_RECV_OK | - HFI1_POST_SEND_OK | HFI1_PROCESS_SEND_OK | - HFI1_PROCESS_NEXT_SEND_OK, - [IB_QPS_SQD] = HFI1_POST_RECV_OK | HFI1_PROCESS_RECV_OK | - HFI1_POST_SEND_OK | HFI1_PROCESS_SEND_OK, - [IB_QPS_SQE] = HFI1_POST_RECV_OK | HFI1_PROCESS_RECV_OK | - HFI1_POST_SEND_OK | HFI1_FLUSH_SEND, - [IB_QPS_ERR] = HFI1_POST_RECV_OK | HFI1_FLUSH_RECV | - HFI1_POST_SEND_OK | HFI1_FLUSH_SEND, -}; - -static inline void _hfi1_schedule_send(struct rvt_qp *qp); - -/* * Translate ib_wr_opcode into ib_wc_opcode. */ const enum ib_wc_opcode ib_hfi1_wc_opcode[] = { @@ -346,169 +324,6 @@ void hfi1_skip_sge(struct rvt_sge_state *ss, u32 length, int release) } /** - * post_one_send - post one RC, UC, or UD send work request - * @qp: the QP to post on - * @wr: the work request to send - */ -static int post_one_send(struct rvt_qp *qp, struct ib_send_wr *wr) -{ - struct rvt_swqe *wqe; - u32 next; - int i; - int j; - int acc; - struct rvt_lkey_table *rkt; - struct rvt_pd *pd; - struct hfi1_devdata *dd = dd_from_ibdev(qp->ibqp.device); - struct hfi1_pportdata *ppd; - struct hfi1_ibport *ibp; - - /* IB spec says that num_sge == 0 is OK. */ - if (unlikely(wr->num_sge > qp->s_max_sge)) - return -EINVAL; - - ppd = &dd->pport[qp->port_num - 1]; - ibp = &ppd->ibport_data; - - /* - * Don't allow RDMA reads or atomic operations on UC or - * undefined operations. - * Make sure buffer is large enough to hold the result for atomics. - */ - if (qp->ibqp.qp_type == IB_QPT_UC) { - if ((unsigned) wr->opcode >= IB_WR_RDMA_READ) - return -EINVAL; - } else if (qp->ibqp.qp_type != IB_QPT_RC) { - /* Check IB_QPT_SMI, IB_QPT_GSI, IB_QPT_UD opcode */ - if (wr->opcode != IB_WR_SEND && - wr->opcode != IB_WR_SEND_WITH_IMM) - return -EINVAL; - /* Check UD destination address PD */ - if (qp->ibqp.pd != ud_wr(wr)->ah->pd) - return -EINVAL; - } else if ((unsigned) wr->opcode > IB_WR_ATOMIC_FETCH_AND_ADD) - return -EINVAL; - else if (wr->opcode >= IB_WR_ATOMIC_CMP_AND_SWP && - (wr->num_sge == 0 || - wr->sg_list[0].length < sizeof(u64) || - wr->sg_list[0].addr & (sizeof(u64) - 1))) - return -EINVAL; - else if (wr->opcode >= IB_WR_RDMA_READ && !qp->s_max_rd_atomic) - return -EINVAL; - - next = qp->s_head + 1; - if (next >= qp->s_size) - next = 0; - if (next == qp->s_last) - return -ENOMEM; - - rkt = &to_idev(qp->ibqp.device)->rdi.lkey_table; - pd = ibpd_to_rvtpd(qp->ibqp.pd); - wqe = get_swqe_ptr(qp, qp->s_head); - - - if (qp->ibqp.qp_type != IB_QPT_UC && - qp->ibqp.qp_type != IB_QPT_RC) - memcpy(&wqe->ud_wr, ud_wr(wr), sizeof(wqe->ud_wr)); - else if (wr->opcode == IB_WR_RDMA_WRITE_WITH_IMM || - wr->opcode == IB_WR_RDMA_WRITE || - wr->opcode == IB_WR_RDMA_READ) - memcpy(&wqe->rdma_wr, rdma_wr(wr), sizeof(wqe->rdma_wr)); - else if (wr->opcode == IB_WR_ATOMIC_CMP_AND_SWP || - wr->opcode == IB_WR_ATOMIC_FETCH_AND_ADD) - memcpy(&wqe->atomic_wr, atomic_wr(wr), sizeof(wqe->atomic_wr)); - else - memcpy(&wqe->wr, wr, sizeof(wqe->wr)); - - wqe->length = 0; - j = 0; - if (wr->num_sge) { - acc = wr->opcode >= IB_WR_RDMA_READ ? - IB_ACCESS_LOCAL_WRITE : 0; - for (i = 0; i < wr->num_sge; i++) { - u32 length = wr->sg_list[i].length; - int ok; - - if (length == 0) - continue; - ok = rvt_lkey_ok(rkt, pd, &wqe->sg_list[j], - &wr->sg_list[i], acc); - if (!ok) - goto bail_inval_free; - wqe->length += length; - j++; - } - wqe->wr.num_sge = j; - } - if (qp->ibqp.qp_type == IB_QPT_UC || - qp->ibqp.qp_type == IB_QPT_RC) { - if (wqe->length > 0x80000000U) - goto bail_inval_free; - } else { - atomic_inc(&ibah_to_rvtah(ud_wr(wr)->ah)->refcount); - } - wqe->ssn = qp->s_ssn++; - qp->s_head = next; - - return 0; - -bail_inval_free: - /* release mr holds */ - while (j) { - struct rvt_sge *sge = &wqe->sg_list[--j]; - - rvt_put_mr(sge->mr); - } - return -EINVAL; -} - -/** - * post_send - post a send on a QP - * @ibqp: the QP to post the send on - * @wr: the list of work requests to post - * @bad_wr: the first bad WR is put here - * - * This may be called from interrupt context. - */ -static int post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, - struct ib_send_wr **bad_wr) -{ - struct rvt_qp *qp = to_iqp(ibqp); - struct hfi1_qp_priv *priv = qp->priv; - int err = 0; - int call_send; - unsigned long flags; - unsigned nreq = 0; - - spin_lock_irqsave(&qp->s_lock, flags); - - /* Check that state is OK to post send. */ - if (unlikely(!(ib_hfi1_state_ops[qp->state] & HFI1_POST_SEND_OK))) { - spin_unlock_irqrestore(&qp->s_lock, flags); - return -EINVAL; - } - - /* sq empty and not list -> call send */ - call_send = qp->s_head == qp->s_last && !wr->next; - - for (; wr; wr = wr->next) { - err = post_one_send(qp, wr); - if (unlikely(err)) { - *bad_wr = wr; - goto bail; - } - nreq++; - } -bail: - spin_unlock_irqrestore(&qp->s_lock, flags); - if (nreq && !call_send) - _hfi1_schedule_send(qp); - if (nreq && call_send) - hfi1_do_send(&priv->s_iowait.iowork); - return err; -} - -/** * post_receive - post a receive on a QP * @ibqp: the QP to post the receive on * @wr: the WR to post @@ -519,13 +334,13 @@ bail: static int post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr, struct ib_recv_wr **bad_wr) { - struct rvt_qp *qp = to_iqp(ibqp); + struct rvt_qp *qp = ibqp_to_rvtqp(ibqp); struct rvt_rwq *wq = qp->r_rq.wq; unsigned long flags; int ret; /* Check that state is OK to post receive. */ - if (!(ib_hfi1_state_ops[qp->state] & HFI1_POST_RECV_OK) || !wq) { + if (!(ib_rvt_state_ops[qp->state] & RVT_POST_RECV_OK) || !wq) { *bad_wr = wr; ret = -EINVAL; goto bail; @@ -576,7 +391,7 @@ static inline int qp_ok(int opcode, struct hfi1_packet *packet) { struct hfi1_ibport *ibp; - if (!(ib_hfi1_state_ops[packet->qp->state] & HFI1_PROCESS_RECV_OK)) + if (!(ib_rvt_state_ops[packet->qp->state] & RVT_PROCESS_RECV_OK)) goto dropit; if (((opcode & OPCODE_QP_MASK) == packet->qp->allowed_ops) || (opcode == IB_OPCODE_CNP)) @@ -737,7 +552,7 @@ static noinline struct verbs_txreq *__get_txreq(struct hfi1_ibdev *dev, if (!tx) { spin_lock_irqsave(&qp->s_lock, flags); write_seqlock(&dev->iowait_lock); - if (ib_hfi1_state_ops[qp->state] & HFI1_PROCESS_RECV_OK && + if (ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK && list_empty(&priv->s_iowait.list)) { dev->n_txwait++; qp->s_flags |= RVT_S_WAIT_TX; @@ -855,7 +670,7 @@ static int wait_kmem(struct hfi1_ibdev *dev, struct rvt_qp *qp) int ret = 0; spin_lock_irqsave(&qp->s_lock, flags); - if (ib_hfi1_state_ops[qp->state] & HFI1_PROCESS_RECV_OK) { + if (ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK) { write_seqlock(&dev->iowait_lock); if (list_empty(&priv->s_iowait.list)) { if (list_empty(&dev->memwait)) @@ -1085,7 +900,7 @@ static int no_bufs_available(struct rvt_qp *qp, struct send_context *sc) * enabling the PIO avail interrupt. */ spin_lock_irqsave(&qp->s_lock, flags); - if (ib_hfi1_state_ops[qp->state] & HFI1_PROCESS_RECV_OK) { + if (ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK) { write_seqlock(&dev->iowait_lock); if (list_empty(&priv->s_iowait.list)) { struct hfi1_ibdev *dev = &dd->verbs_dev; @@ -1812,7 +1627,7 @@ int hfi1_register_ib_device(struct hfi1_devdata *dd) ibdev->modify_qp = hfi1_modify_qp; ibdev->query_qp = hfi1_query_qp; ibdev->destroy_qp = hfi1_destroy_qp; - ibdev->post_send = post_send; + ibdev->post_send = NULL; ibdev->post_recv = post_receive; ibdev->post_srq_recv = hfi1_post_srq_receive; ibdev->create_cq = NULL; @@ -1864,6 +1679,8 @@ int hfi1_register_ib_device(struct hfi1_devdata *dd) dd->verbs_dev.rdi.driver_f.qp_priv_free = qp_priv_free; dd->verbs_dev.rdi.driver_f.free_all_qps = free_all_qps; dd->verbs_dev.rdi.driver_f.notify_qp_reset = notify_qp_reset; + dd->verbs_dev.rdi.driver_f.do_send = hfi1_do_send; + dd->verbs_dev.rdi.driver_f.schedule_send = hfi1_schedule_send; /* completeion queue */ snprintf(dd->verbs_dev.rdi.dparms.cq_name, diff --git a/drivers/staging/rdma/hfi1/verbs.h b/drivers/staging/rdma/hfi1/verbs.h index ef8fb13..8e032a7 100644 --- a/drivers/staging/rdma/hfi1/verbs.h +++ b/drivers/staging/rdma/hfi1/verbs.h @@ -92,17 +92,6 @@ struct hfi1_packet; #define IB_NAK_REMOTE_OPERATIONAL_ERROR 0x63 #define IB_NAK_INVALID_RD_REQUEST 0x64 -/* Flags for checking QP state (see ib_hfi1_state_ops[]) */ -#define HFI1_POST_SEND_OK 0x01 -#define HFI1_POST_RECV_OK 0x02 -#define HFI1_PROCESS_RECV_OK 0x04 -#define HFI1_PROCESS_SEND_OK 0x08 -#define HFI1_PROCESS_NEXT_SEND_OK 0x10 -#define HFI1_FLUSH_SEND 0x20 -#define HFI1_FLUSH_RECV 0x40 -#define HFI1_PROCESS_OR_FLUSH_SEND \ - (HFI1_PROCESS_SEND_OK | HFI1_FLUSH_SEND) - /* IB Performance Manager status values */ #define IB_PMA_SAMPLE_STATUS_DONE 0x00 #define IB_PMA_SAMPLE_STATUS_STARTED 0x01 @@ -256,19 +245,6 @@ struct hfi1_pkt_state { #define HFI1_PSN_CREDIT 16 /* - * Since struct rvt_swqe is not a fixed size, we can't simply index into - * struct hfi1_qp.s_wq. This function does the array index computation. - */ -static inline struct rvt_swqe *get_swqe_ptr(struct rvt_qp *qp, - unsigned n) -{ - return (struct rvt_swqe *)((char *)qp->s_wq + - (sizeof(struct rvt_swqe) + - qp->s_max_sge * - sizeof(struct rvt_sge)) * n); -} - -/* * Since struct rvt_rwqe is not a fixed size, we can't simply index into * struct rvt_rwq.wq. This function does the array index computation. */ @@ -359,11 +335,6 @@ struct hfi1_verbs_counters { u32 vl15_dropped; }; -static inline struct rvt_qp *to_iqp(struct ib_qp *ibqp) -{ - return container_of(ibqp, struct rvt_qp, ibqp); -} - static inline struct hfi1_ibdev *to_idev(struct ib_device *ibdev) { struct rvt_dev_info *rdi; @@ -544,7 +515,9 @@ u32 hfi1_make_grh(struct hfi1_ibport *ibp, struct ib_grh *hdr, void hfi1_make_ruc_header(struct rvt_qp *qp, struct hfi1_other_headers *ohdr, u32 bth0, u32 bth2, int middle); -void hfi1_do_send(struct work_struct *work); +void _hfi1_do_send(struct work_struct *work); + +void hfi1_do_send(struct rvt_qp *qp); void hfi1_send_complete(struct rvt_qp *qp, struct rvt_swqe *wqe, enum ib_wc_status status); @@ -577,7 +550,7 @@ extern const enum ib_wc_opcode ib_hfi1_wc_opcode[]; extern const u8 hdr_len_by_opcode[]; -extern const int ib_hfi1_state_ops[]; +extern const int ib_rvt_state_ops[]; extern __be64 ib_hfi1_sys_image_guid; /* in network order */ diff --git a/drivers/staging/rdma/hfi1/verbs_mcast.c b/drivers/staging/rdma/hfi1/verbs_mcast.c index aa3f560..175396b 100644 --- a/drivers/staging/rdma/hfi1/verbs_mcast.c +++ b/drivers/staging/rdma/hfi1/verbs_mcast.c @@ -241,7 +241,7 @@ bail: int hfi1_multicast_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) { - struct rvt_qp *qp = to_iqp(ibqp); + struct rvt_qp *qp = ibqp_to_rvtqp(ibqp); struct hfi1_ibdev *dev = to_idev(ibqp->device); struct hfi1_ibport *ibp; struct hfi1_mcast *mcast; @@ -299,7 +299,7 @@ bail: int hfi1_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) { - struct rvt_qp *qp = to_iqp(ibqp); + struct rvt_qp *qp = ibqp_to_rvtqp(ibqp); struct hfi1_ibdev *dev = to_idev(ibqp->device); struct hfi1_ibport *ibp = to_iport(ibqp->device, qp->port_num); struct hfi1_mcast *mcast = NULL; -- cgit v0.10.2 From 0facc5a1635252a45ab2fdb119309a3c24e9be82 Mon Sep 17 00:00:00 2001 From: Dennis Dalessandro Date: Tue, 19 Jan 2016 14:43:39 -0800 Subject: staging/rdma/hfi1: Remove multicast verbs functions Multicast is now supported by rdmavt. Remove the verbs multicast functions and use that. Reviewed-by: Mike Marciniszyn Reviewed-by: Harish Chegondi Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford diff --git a/drivers/staging/rdma/hfi1/Makefile b/drivers/staging/rdma/hfi1/Makefile index 7797f2c..0069796 100644 --- a/drivers/staging/rdma/hfi1/Makefile +++ b/drivers/staging/rdma/hfi1/Makefile @@ -10,7 +10,7 @@ obj-$(CONFIG_INFINIBAND_HFI1) += hfi1.o hfi1-y := chip.o device.o diag.o driver.o efivar.o eprom.o file_ops.o firmware.o \ init.o intr.o mad.o pcie.o pio.o pio_copy.o \ qp.o qsfp.o rc.o ruc.o sdma.o srq.o sysfs.o trace.o twsi.o \ - uc.o ud.o user_exp_rcv.o user_pages.o user_sdma.o verbs_mcast.o verbs.o + uc.o ud.o user_exp_rcv.o user_pages.o user_sdma.o verbs.o hfi1-$(CONFIG_DEBUG_FS) += debugfs.o CFLAGS_trace.o = -I$(src) diff --git a/drivers/staging/rdma/hfi1/qp.c b/drivers/staging/rdma/hfi1/qp.c index ff27f1a..748a3a7 100644 --- a/drivers/staging/rdma/hfi1/qp.c +++ b/drivers/staging/rdma/hfi1/qp.c @@ -1205,8 +1205,6 @@ unsigned free_all_qps(struct rvt_dev_info *rdi) for (n = 0; n < dd->num_pports; n++) { struct hfi1_ibport *ibp = &dd->pport[n].ibport_data; - if (!hfi1_mcast_tree_empty(ibp)) - qp_inuse++; rcu_read_lock(); if (rcu_dereference(ibp->rvp.qp[0])) qp_inuse++; diff --git a/drivers/staging/rdma/hfi1/verbs.c b/drivers/staging/rdma/hfi1/verbs.c index b4cfda4..2fed284 100644 --- a/drivers/staging/rdma/hfi1/verbs.c +++ b/drivers/staging/rdma/hfi1/verbs.c @@ -451,12 +451,12 @@ void hfi1_ib_rcv(struct hfi1_packet *packet) lid = be16_to_cpu(hdr->lrh[1]); if (unlikely((lid >= be16_to_cpu(IB_MULTICAST_LID_BASE)) && (lid != be16_to_cpu(IB_LID_PERMISSIVE)))) { - struct hfi1_mcast *mcast; - struct hfi1_mcast_qp *p; + struct rvt_mcast *mcast; + struct rvt_mcast_qp *p; if (lnh != HFI1_LRH_GRH) goto drop; - mcast = hfi1_mcast_find(ibp, &hdr->u.l.grh.dgid); + mcast = rvt_mcast_find(&ibp->rvp, &hdr->u.l.grh.dgid); if (mcast == NULL) goto drop; list_for_each_entry_rcu(p, &mcast->qp_list, list) { @@ -467,7 +467,7 @@ void hfi1_ib_rcv(struct hfi1_packet *packet) spin_unlock_irqrestore(&packet->qp->r_lock, flags); } /* - * Notify hfi1_multicast_detach() if it is waiting for us + * Notify rvt_multicast_detach() if it is waiting for us * to finish. */ if (atomic_dec_return(&mcast->refcount) <= 1) @@ -1536,7 +1536,6 @@ int hfi1_register_ib_device(struct hfi1_devdata *dd) spin_lock_init(&dev->n_qps_lock); spin_lock_init(&dev->n_srqs_lock); - spin_lock_init(&dev->n_mcast_grps_lock); init_timer(&dev->mem_timer); dev->mem_timer.function = mem_timer; dev->mem_timer.data = (unsigned long) dev; @@ -1644,8 +1643,8 @@ int hfi1_register_ib_device(struct hfi1_devdata *dd) ibdev->map_phys_fmr = NULL; ibdev->unmap_fmr = NULL; ibdev->dealloc_fmr = NULL; - ibdev->attach_mcast = hfi1_multicast_attach; - ibdev->detach_mcast = hfi1_multicast_detach; + ibdev->attach_mcast = NULL; + ibdev->detach_mcast = NULL; ibdev->process_mad = hfi1_process_mad; ibdev->mmap = NULL; ibdev->dma_ops = NULL; diff --git a/drivers/staging/rdma/hfi1/verbs.h b/drivers/staging/rdma/hfi1/verbs.h index 8e032a7..8e82cf0 100644 --- a/drivers/staging/rdma/hfi1/verbs.h +++ b/drivers/staging/rdma/hfi1/verbs.h @@ -201,25 +201,6 @@ struct tx_pio_header { } ____cacheline_aligned; /* - * There is one struct hfi1_mcast for each multicast GID. - * All attached QPs are then stored as a list of - * struct hfi1_mcast_qp. - */ -struct hfi1_mcast_qp { - struct list_head list; - struct rvt_qp *qp; -}; - -struct hfi1_mcast { - struct rb_node rb_node; - union ib_gid mgid; - struct list_head qp_list; - wait_queue_head_t wait; - atomic_t refcount; - int n_attached; -}; - -/* * hfi1 specific data structures that will be hidden from rvt after the queue * pair is made common */ @@ -309,8 +290,6 @@ struct hfi1_ibdev { spinlock_t n_qps_lock; u32 n_srqs_allocated; /* number of SRQs allocated for device */ spinlock_t n_srqs_lock; - u32 n_mcast_grps_allocated; /* number of mcast groups allocated */ - spinlock_t n_mcast_grps_lock; #ifdef CONFIG_DEBUG_FS /* per HFI debugfs */ struct dentry *hfi1_ibdev_dbg; @@ -434,14 +413,6 @@ static inline u32 delta_psn(u32 a, u32 b) return (((int)a - (int)b) << PSN_SHIFT) >> PSN_SHIFT; } -struct hfi1_mcast *hfi1_mcast_find(struct hfi1_ibport *ibp, union ib_gid *mgid); - -int hfi1_multicast_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid); - -int hfi1_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid); - -int hfi1_mcast_tree_empty(struct hfi1_ibport *ibp); - struct verbs_txreq; void hfi1_put_txreq(struct verbs_txreq *tx); diff --git a/drivers/staging/rdma/hfi1/verbs_mcast.c b/drivers/staging/rdma/hfi1/verbs_mcast.c deleted file mode 100644 index 175396b..0000000 --- a/drivers/staging/rdma/hfi1/verbs_mcast.c +++ /dev/null @@ -1,385 +0,0 @@ -/* - * - * This file is provided under a dual BSD/GPLv2 license. When using or - * redistributing this file, you may do so under either license. - * - * GPL LICENSE SUMMARY - * - * Copyright(c) 2015 Intel Corporation. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of version 2 of the GNU General Public License as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * BSD LICENSE - * - * Copyright(c) 2015 Intel Corporation. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * - Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * - Neither the name of Intel Corporation nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - */ - -#include - -#include "hfi.h" - -/** - * mcast_qp_alloc - alloc a struct to link a QP to mcast GID struct - * @qp: the QP to link - */ -static struct hfi1_mcast_qp *mcast_qp_alloc(struct rvt_qp *qp) -{ - struct hfi1_mcast_qp *mqp; - - mqp = kmalloc(sizeof(*mqp), GFP_KERNEL); - if (!mqp) - goto bail; - - mqp->qp = qp; - atomic_inc(&qp->refcount); - -bail: - return mqp; -} - -static void mcast_qp_free(struct hfi1_mcast_qp *mqp) -{ - struct rvt_qp *qp = mqp->qp; - - /* Notify hfi1_destroy_qp() if it is waiting. */ - if (atomic_dec_and_test(&qp->refcount)) - wake_up(&qp->wait); - - kfree(mqp); -} - -/** - * mcast_alloc - allocate the multicast GID structure - * @mgid: the multicast GID - * - * A list of QPs will be attached to this structure. - */ -static struct hfi1_mcast *mcast_alloc(union ib_gid *mgid) -{ - struct hfi1_mcast *mcast; - - mcast = kmalloc(sizeof(*mcast), GFP_KERNEL); - if (!mcast) - goto bail; - - mcast->mgid = *mgid; - INIT_LIST_HEAD(&mcast->qp_list); - init_waitqueue_head(&mcast->wait); - atomic_set(&mcast->refcount, 0); - mcast->n_attached = 0; - -bail: - return mcast; -} - -static void mcast_free(struct hfi1_mcast *mcast) -{ - struct hfi1_mcast_qp *p, *tmp; - - list_for_each_entry_safe(p, tmp, &mcast->qp_list, list) - mcast_qp_free(p); - - kfree(mcast); -} - -/** - * hfi1_mcast_find - search the global table for the given multicast GID - * @ibp: the IB port structure - * @mgid: the multicast GID to search for - * - * Returns NULL if not found. - * - * The caller is responsible for decrementing the reference count if found. - */ -struct hfi1_mcast *hfi1_mcast_find(struct hfi1_ibport *ibp, union ib_gid *mgid) -{ - struct rb_node *n; - unsigned long flags; - struct hfi1_mcast *mcast; - - spin_lock_irqsave(&ibp->rvp.lock, flags); - n = ibp->rvp.mcast_tree.rb_node; - while (n) { - int ret; - - mcast = rb_entry(n, struct hfi1_mcast, rb_node); - - ret = memcmp(mgid->raw, mcast->mgid.raw, - sizeof(union ib_gid)); - if (ret < 0) - n = n->rb_left; - else if (ret > 0) - n = n->rb_right; - else { - atomic_inc(&mcast->refcount); - spin_unlock_irqrestore(&ibp->rvp.lock, flags); - goto bail; - } - } - spin_unlock_irqrestore(&ibp->rvp.lock, flags); - - mcast = NULL; - -bail: - return mcast; -} - -/** - * mcast_add - insert mcast GID into table and attach QP struct - * @mcast: the mcast GID table - * @mqp: the QP to attach - * - * Return zero if both were added. Return EEXIST if the GID was already in - * the table but the QP was added. Return ESRCH if the QP was already - * attached and neither structure was added. - */ -static int mcast_add(struct hfi1_ibdev *dev, struct hfi1_ibport *ibp, - struct hfi1_mcast *mcast, struct hfi1_mcast_qp *mqp) -{ - struct rb_node **n = &ibp->rvp.mcast_tree.rb_node; - struct rb_node *pn = NULL; - int ret; - - spin_lock_irq(&ibp->rvp.lock); - - while (*n) { - struct hfi1_mcast *tmcast; - struct hfi1_mcast_qp *p; - - pn = *n; - tmcast = rb_entry(pn, struct hfi1_mcast, rb_node); - - ret = memcmp(mcast->mgid.raw, tmcast->mgid.raw, - sizeof(union ib_gid)); - if (ret < 0) { - n = &pn->rb_left; - continue; - } - if (ret > 0) { - n = &pn->rb_right; - continue; - } - - /* Search the QP list to see if this is already there. */ - list_for_each_entry_rcu(p, &tmcast->qp_list, list) { - if (p->qp == mqp->qp) { - ret = ESRCH; - goto bail; - } - } - if (tmcast->n_attached == hfi1_max_mcast_qp_attached) { - ret = ENOMEM; - goto bail; - } - - tmcast->n_attached++; - - list_add_tail_rcu(&mqp->list, &tmcast->qp_list); - ret = EEXIST; - goto bail; - } - - spin_lock(&dev->n_mcast_grps_lock); - if (dev->n_mcast_grps_allocated == hfi1_max_mcast_grps) { - spin_unlock(&dev->n_mcast_grps_lock); - ret = ENOMEM; - goto bail; - } - - dev->n_mcast_grps_allocated++; - spin_unlock(&dev->n_mcast_grps_lock); - - mcast->n_attached++; - - list_add_tail_rcu(&mqp->list, &mcast->qp_list); - - atomic_inc(&mcast->refcount); - rb_link_node(&mcast->rb_node, pn, n); - rb_insert_color(&mcast->rb_node, &ibp->rvp.mcast_tree); - - ret = 0; - -bail: - spin_unlock_irq(&ibp->rvp.lock); - - return ret; -} - -int hfi1_multicast_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) -{ - struct rvt_qp *qp = ibqp_to_rvtqp(ibqp); - struct hfi1_ibdev *dev = to_idev(ibqp->device); - struct hfi1_ibport *ibp; - struct hfi1_mcast *mcast; - struct hfi1_mcast_qp *mqp; - int ret; - - if (ibqp->qp_num <= 1 || qp->state == IB_QPS_RESET) { - ret = -EINVAL; - goto bail; - } - - /* - * Allocate data structures since its better to do this outside of - * spin locks and it will most likely be needed. - */ - mcast = mcast_alloc(gid); - if (mcast == NULL) { - ret = -ENOMEM; - goto bail; - } - mqp = mcast_qp_alloc(qp); - if (mqp == NULL) { - mcast_free(mcast); - ret = -ENOMEM; - goto bail; - } - ibp = to_iport(ibqp->device, qp->port_num); - switch (mcast_add(dev, ibp, mcast, mqp)) { - case ESRCH: - /* Neither was used: OK to attach the same QP twice. */ - mcast_qp_free(mqp); - mcast_free(mcast); - break; - - case EEXIST: /* The mcast wasn't used */ - mcast_free(mcast); - break; - - case ENOMEM: - /* Exceeded the maximum number of mcast groups. */ - mcast_qp_free(mqp); - mcast_free(mcast); - ret = -ENOMEM; - goto bail; - - default: - break; - } - - ret = 0; - -bail: - return ret; -} - -int hfi1_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) -{ - struct rvt_qp *qp = ibqp_to_rvtqp(ibqp); - struct hfi1_ibdev *dev = to_idev(ibqp->device); - struct hfi1_ibport *ibp = to_iport(ibqp->device, qp->port_num); - struct hfi1_mcast *mcast = NULL; - struct hfi1_mcast_qp *p, *tmp; - struct rb_node *n; - int last = 0; - int ret; - - if (ibqp->qp_num <= 1 || qp->state == IB_QPS_RESET) { - ret = -EINVAL; - goto bail; - } - - spin_lock_irq(&ibp->rvp.lock); - - /* Find the GID in the mcast table. */ - n = ibp->rvp.mcast_tree.rb_node; - while (1) { - if (n == NULL) { - spin_unlock_irq(&ibp->rvp.lock); - ret = -EINVAL; - goto bail; - } - - mcast = rb_entry(n, struct hfi1_mcast, rb_node); - ret = memcmp(gid->raw, mcast->mgid.raw, - sizeof(union ib_gid)); - if (ret < 0) - n = n->rb_left; - else if (ret > 0) - n = n->rb_right; - else - break; - } - - /* Search the QP list. */ - list_for_each_entry_safe(p, tmp, &mcast->qp_list, list) { - if (p->qp != qp) - continue; - /* - * We found it, so remove it, but don't poison the forward - * link until we are sure there are no list walkers. - */ - list_del_rcu(&p->list); - mcast->n_attached--; - - /* If this was the last attached QP, remove the GID too. */ - if (list_empty(&mcast->qp_list)) { - rb_erase(&mcast->rb_node, &ibp->rvp.mcast_tree); - last = 1; - } - break; - } - - spin_unlock_irq(&ibp->rvp.lock); - - if (p) { - /* - * Wait for any list walkers to finish before freeing the - * list element. - */ - wait_event(mcast->wait, atomic_read(&mcast->refcount) <= 1); - mcast_qp_free(p); - } - if (last) { - atomic_dec(&mcast->refcount); - wait_event(mcast->wait, !atomic_read(&mcast->refcount)); - mcast_free(mcast); - spin_lock_irq(&dev->n_mcast_grps_lock); - dev->n_mcast_grps_allocated--; - spin_unlock_irq(&dev->n_mcast_grps_lock); - } - - ret = 0; - -bail: - return ret; -} - -int hfi1_mcast_tree_empty(struct hfi1_ibport *ibp) -{ - return !ibp->rvp.mcast_tree.rb_node; -} -- cgit v0.10.2 From ec4274f1aeb5e5012c1e46ba11ceef7767af8b3d Mon Sep 17 00:00:00 2001 From: Dennis Dalessandro Date: Tue, 19 Jan 2016 14:43:44 -0800 Subject: staging/rdma/hfi1: Remove modify queue pair from hfi1 In addition to removing the modify queue pair verb from hfi1 we also remove ancillary functions which existed only for modify queue pair and are also already present in hfi1. Reviewed-by: Mike Marciniszyn Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford diff --git a/drivers/infiniband/sw/rdmavt/qp.c b/drivers/infiniband/sw/rdmavt/qp.c index e8d0da8..322de64 100644 --- a/drivers/infiniband/sw/rdmavt/qp.c +++ b/drivers/infiniband/sw/rdmavt/qp.c @@ -1133,13 +1133,6 @@ int rvt_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, qp->remote_ah_attr = qp->alt_ah_attr; qp->port_num = qp->alt_ah_attr.port_num; qp->s_pkey_index = qp->s_alt_pkey_index; - - /* - * Ignored by drivers which do not support it. Not - * really worth creating a call back into the driver - * just to set a flag. - */ - qp->s_flags |= RVT_S_AHG_CLEAR; } } diff --git a/drivers/staging/rdma/hfi1/common.h b/drivers/staging/rdma/hfi1/common.h index cb5ca79..dcf8edf 100644 --- a/drivers/staging/rdma/hfi1/common.h +++ b/drivers/staging/rdma/hfi1/common.h @@ -345,7 +345,6 @@ struct hfi1_message_header { #define HFI1_AETH_CREDIT_MASK 0x1F #define HFI1_AETH_CREDIT_INVAL 0x1F #define HFI1_MSN_MASK 0xFFFFFF -#define HFI1_QPN_MASK 0xFFFFFF #define HFI1_FECN_SHIFT 31 #define HFI1_FECN_MASK 1 #define HFI1_FECN_SMASK BIT(HFI1_FECN_SHIFT) diff --git a/drivers/staging/rdma/hfi1/driver.c b/drivers/staging/rdma/hfi1/driver.c index d57c08f..d848cc0 100644 --- a/drivers/staging/rdma/hfi1/driver.c +++ b/drivers/staging/rdma/hfi1/driver.c @@ -282,6 +282,8 @@ static void rcv_hdrerr(struct hfi1_ctxtdata *rcd, struct hfi1_pportdata *ppd, u32 rte = rhf_rcv_type_err(packet->rhf); int lnh = be16_to_cpu(rhdr->lrh[0]) & 3; struct hfi1_ibport *ibp = &ppd->ibport_data; + struct hfi1_devdata *dd = ppd->dd; + struct rvt_dev_info *rdi = &dd->verbs_dev.rdi; if (packet->rhf & (RHF_VCRC_ERR | RHF_ICRC_ERR)) return; @@ -316,13 +318,13 @@ static void rcv_hdrerr(struct hfi1_ctxtdata *rcd, struct hfi1_pportdata *ppd, goto drop; /* Get the destination QP number. */ - qp_num = be32_to_cpu(ohdr->bth[1]) & HFI1_QPN_MASK; + qp_num = be32_to_cpu(ohdr->bth[1]) & RVT_QPN_MASK; if (lid < be16_to_cpu(IB_MULTICAST_LID_BASE)) { struct rvt_qp *qp; unsigned long flags; rcu_read_lock(); - qp = hfi1_lookup_qpn(ibp, qp_num); + qp = rvt_lookup_qpn(rdi, &ibp->rvp, qp_num); if (!qp) { rcu_read_unlock(); goto drop; @@ -397,9 +399,9 @@ static void rcv_hdrerr(struct hfi1_ctxtdata *rcd, struct hfi1_pportdata *ppd, sc5 |= 0x10; sl = ibp->sc_to_sl[sc5]; - lqpn = be32_to_cpu(bth[1]) & HFI1_QPN_MASK; + lqpn = be32_to_cpu(bth[1]) & RVT_QPN_MASK; rcu_read_lock(); - qp = hfi1_lookup_qpn(ibp, lqpn); + qp = rvt_lookup_qpn(rdi, &ibp->rvp, lqpn); if (qp == NULL) { rcu_read_unlock(); goto drop; @@ -470,7 +472,7 @@ static void process_ecn(struct rvt_qp *qp, struct hfi1_ib_header *hdr, case IB_QPT_GSI: case IB_QPT_UD: rlid = be16_to_cpu(hdr->lrh[3]); - rqpn = be32_to_cpu(ohdr->u.ud.deth[1]) & HFI1_QPN_MASK; + rqpn = be32_to_cpu(ohdr->u.ud.deth[1]) & RVT_QPN_MASK; svc_type = IB_CC_SVCTYPE_UD; break; case IB_QPT_UC: @@ -500,7 +502,7 @@ static void process_ecn(struct rvt_qp *qp, struct hfi1_ib_header *hdr, if (bth1 & HFI1_BECN_SMASK) { struct hfi1_pportdata *ppd = ppd_from_ibp(ibp); - u32 lqpn = bth1 & HFI1_QPN_MASK; + u32 lqpn = bth1 & RVT_QPN_MASK; u8 sl = ibp->sc_to_sl[sc5]; process_becn(ppd, sl, rlid, lqpn, rqpn, svc_type); @@ -599,6 +601,7 @@ static void prescan_rxq(struct hfi1_packet *packet) struct hfi1_ib_header *hdr; struct hfi1_other_headers *ohdr; struct ib_grh *grh = NULL; + struct rvt_dev_info *rdi = &dd->verbs_dev.rdi; u64 rhf = rhf_to_cpu(rhf_addr); u32 etype = rhf_rcv_type(rhf), qpn, bth1; int is_ecn = 0; @@ -631,9 +634,9 @@ static void prescan_rxq(struct hfi1_packet *packet) if (!is_ecn) goto next; - qpn = bth1 & HFI1_QPN_MASK; + qpn = bth1 & RVT_QPN_MASK; rcu_read_lock(); - qp = hfi1_lookup_qpn(ibp, qpn); + qp = rvt_lookup_qpn(rdi, &ibp->rvp, qpn); if (qp == NULL) { rcu_read_unlock(); diff --git a/drivers/staging/rdma/hfi1/qp.c b/drivers/staging/rdma/hfi1/qp.c index 748a3a7..1e6ca4f 100644 --- a/drivers/staging/rdma/hfi1/qp.c +++ b/drivers/staging/rdma/hfi1/qp.c @@ -54,6 +54,8 @@ #include #include #include +#include +#include #include "hfi.h" #include "qp.h" @@ -115,230 +117,6 @@ static const u16 credit_table[31] = { 32768 /* 1E */ }; -static void free_qpn(struct rvt_qpn_table *qpt, u32 qpn) -{ - struct rvt_qpn_map *map; - - map = qpt->map + qpn / RVT_BITS_PER_PAGE; - if (map->page) - clear_bit(qpn & RVT_BITS_PER_PAGE_MASK, map->page); -} - -/* - * Put the QP into the hash table. - * The hash table holds a reference to the QP. - */ -static void insert_qp(struct hfi1_ibdev *dev, struct rvt_qp *qp) -{ - struct hfi1_ibport *ibp = to_iport(qp->ibqp.device, qp->port_num); - unsigned long flags; - - atomic_inc(&qp->refcount); - spin_lock_irqsave(&dev->rdi.qp_dev->qpt_lock, flags); - - if (qp->ibqp.qp_num <= 1) { - rcu_assign_pointer(ibp->rvp.qp[qp->ibqp.qp_num], qp); - } else { - u32 n = qpn_hash(dev->rdi.qp_dev, qp->ibqp.qp_num); - - qp->next = dev->rdi.qp_dev->qp_table[n]; - rcu_assign_pointer(dev->rdi.qp_dev->qp_table[n], qp); - trace_hfi1_qpinsert(qp, n); - } - - spin_unlock_irqrestore(&dev->rdi.qp_dev->qpt_lock, flags); -} - -/* - * Remove the QP from the table so it can't be found asynchronously by - * the receive interrupt routine. - */ -static void remove_qp(struct hfi1_ibdev *dev, struct rvt_qp *qp) -{ - struct hfi1_ibport *ibp = to_iport(qp->ibqp.device, qp->port_num); - u32 n = qpn_hash(dev->rdi.qp_dev, qp->ibqp.qp_num); - unsigned long flags; - int removed = 1; - - spin_lock_irqsave(&dev->rdi.qp_dev->qpt_lock, flags); - - if (rcu_dereference_protected(ibp->rvp.qp[0], - lockdep_is_held( - &dev->rdi.qp_dev->qpt_lock)) == qp) { - RCU_INIT_POINTER(ibp->rvp.qp[0], NULL); - } else if (rcu_dereference_protected(ibp->rvp.qp[1], - lockdep_is_held(&dev->rdi.qp_dev->qpt_lock)) == qp) { - RCU_INIT_POINTER(ibp->rvp.qp[1], NULL); - } else { - struct rvt_qp *q; - struct rvt_qp __rcu **qpp; - - removed = 0; - qpp = &dev->rdi.qp_dev->qp_table[n]; - for (; (q = rcu_dereference_protected(*qpp, - lockdep_is_held(&dev->rdi.qp_dev->qpt_lock))) - != NULL; - qpp = &q->next) - if (q == qp) { - RCU_INIT_POINTER(*qpp, - rcu_dereference_protected(qp->next, - lockdep_is_held(&dev->rdi.qp_dev->qpt_lock))); - removed = 1; - trace_hfi1_qpremove(qp, n); - break; - } - } - - spin_unlock_irqrestore(&dev->rdi.qp_dev->qpt_lock, flags); - if (removed) { - synchronize_rcu(); - if (atomic_dec_and_test(&qp->refcount)) - wake_up(&qp->wait); - } -} - -static void clear_mr_refs(struct rvt_qp *qp, int clr_sends) -{ - unsigned n; - - if (test_and_clear_bit(RVT_R_REWIND_SGE, &qp->r_aflags)) - hfi1_put_ss(&qp->s_rdma_read_sge); - - hfi1_put_ss(&qp->r_sge); - - if (clr_sends) { - while (qp->s_last != qp->s_head) { - struct rvt_swqe *wqe = rvt_get_swqe_ptr(qp, qp->s_last); - unsigned i; - - for (i = 0; i < wqe->wr.num_sge; i++) { - struct rvt_sge *sge = &wqe->sg_list[i]; - - rvt_put_mr(sge->mr); - } - if (qp->ibqp.qp_type == IB_QPT_UD || - qp->ibqp.qp_type == IB_QPT_SMI || - qp->ibqp.qp_type == IB_QPT_GSI) - atomic_dec(&ibah_to_rvtah(wqe->ud_wr.ah)->refcount); - if (++qp->s_last >= qp->s_size) - qp->s_last = 0; - } - if (qp->s_rdma_mr) { - rvt_put_mr(qp->s_rdma_mr); - qp->s_rdma_mr = NULL; - } - } - - if (qp->ibqp.qp_type != IB_QPT_RC) - return; - - for (n = 0; n < ARRAY_SIZE(qp->s_ack_queue); n++) { - struct rvt_ack_entry *e = &qp->s_ack_queue[n]; - - if (e->opcode == IB_OPCODE_RC_RDMA_READ_REQUEST && - e->rdma_sge.mr) { - rvt_put_mr(e->rdma_sge.mr); - e->rdma_sge.mr = NULL; - } - } -} - -/** - * hfi1_error_qp - put a QP into the error state - * @qp: the QP to put into the error state - * @err: the receive completion error to signal if a RWQE is active - * - * Flushes both send and receive work queues. - * Returns true if last WQE event should be generated. - * The QP r_lock and s_lock should be held and interrupts disabled. - * If we are already in error state, just return. - */ -int hfi1_error_qp(struct rvt_qp *qp, enum ib_wc_status err) -{ - struct hfi1_ibdev *dev = to_idev(qp->ibqp.device); - struct hfi1_qp_priv *priv = qp->priv; - struct ib_wc wc; - int ret = 0; - - if (qp->state == IB_QPS_ERR || qp->state == IB_QPS_RESET) - goto bail; - - qp->state = IB_QPS_ERR; - - if (qp->s_flags & (RVT_S_TIMER | RVT_S_WAIT_RNR)) { - qp->s_flags &= ~(RVT_S_TIMER | RVT_S_WAIT_RNR); - del_timer(&qp->s_timer); - } - - if (qp->s_flags & RVT_S_ANY_WAIT_SEND) - qp->s_flags &= ~RVT_S_ANY_WAIT_SEND; - - write_seqlock(&dev->iowait_lock); - if (!list_empty(&priv->s_iowait.list) && !(qp->s_flags & RVT_S_BUSY)) { - qp->s_flags &= ~RVT_S_ANY_WAIT_IO; - list_del_init(&priv->s_iowait.list); - if (atomic_dec_and_test(&qp->refcount)) - wake_up(&qp->wait); - } - write_sequnlock(&dev->iowait_lock); - - if (!(qp->s_flags & RVT_S_BUSY)) { - qp->s_hdrwords = 0; - if (qp->s_rdma_mr) { - rvt_put_mr(qp->s_rdma_mr); - qp->s_rdma_mr = NULL; - } - flush_tx_list(qp); - } - - /* Schedule the sending tasklet to drain the send work queue. */ - if (qp->s_last != qp->s_head) - hfi1_schedule_send(qp); - - clear_mr_refs(qp, 0); - - memset(&wc, 0, sizeof(wc)); - wc.qp = &qp->ibqp; - wc.opcode = IB_WC_RECV; - - if (test_and_clear_bit(RVT_R_WRID_VALID, &qp->r_aflags)) { - wc.wr_id = qp->r_wr_id; - wc.status = err; - rvt_cq_enter(ibcq_to_rvtcq(qp->ibqp.recv_cq), &wc, 1); - } - wc.status = IB_WC_WR_FLUSH_ERR; - - if (qp->r_rq.wq) { - struct rvt_rwq *wq; - u32 head; - u32 tail; - - spin_lock(&qp->r_rq.lock); - - /* sanity check pointers before trusting them */ - wq = qp->r_rq.wq; - head = wq->head; - if (head >= qp->r_rq.size) - head = 0; - tail = wq->tail; - if (tail >= qp->r_rq.size) - tail = 0; - while (tail != head) { - wc.wr_id = get_rwqe_ptr(&qp->r_rq, tail)->wr_id; - if (++tail >= qp->r_rq.size) - tail = 0; - rvt_cq_enter(ibcq_to_rvtcq(qp->ibqp.recv_cq), &wc, 1); - } - wq->tail = tail; - - spin_unlock(&qp->r_rq.lock); - } else if (qp->ibqp.event_handler) - ret = 1; - -bail: - return ret; -} - static void flush_tx_list(struct rvt_qp *qp) { struct hfi1_qp_priv *priv = qp->priv; @@ -397,314 +175,49 @@ static inline int verbs_mtu_enum_to_int(struct ib_device *dev, enum ib_mtu mtu) return ib_mtu_enum_to_int(mtu); } - -/** - * hfi1_modify_qp - modify the attributes of a queue pair - * @ibqp: the queue pair who's attributes we're modifying - * @attr: the new attributes - * @attr_mask: the mask of attributes to modify - * @udata: user data for libibverbs.so - * - * Returns 0 on success, otherwise returns an errno. - */ -int hfi1_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, - int attr_mask, struct ib_udata *udata) +int hfi1_check_modify_qp(struct rvt_qp *qp, struct ib_qp_attr *attr, + int attr_mask, struct ib_udata *udata) { + struct ib_qp *ibqp = &qp->ibqp; struct hfi1_ibdev *dev = to_idev(ibqp->device); - struct rvt_qp *qp = ibqp_to_rvtqp(ibqp); - struct hfi1_qp_priv *priv = qp->priv; - enum ib_qp_state cur_state, new_state; - struct ib_event ev; - int lastwqe = 0; - int mig = 0; - int ret; - u32 pmtu = 0; /* for gcc warning only */ struct hfi1_devdata *dd = dd_from_dev(dev); - - spin_lock_irq(&qp->r_lock); - spin_lock(&qp->s_lock); - - cur_state = attr_mask & IB_QP_CUR_STATE ? - attr->cur_qp_state : qp->state; - new_state = attr_mask & IB_QP_STATE ? attr->qp_state : cur_state; - - if (!ib_modify_qp_is_ok(cur_state, new_state, ibqp->qp_type, - attr_mask, IB_LINK_LAYER_UNSPECIFIED)) - goto inval; + u8 sc; if (attr_mask & IB_QP_AV) { - u8 sc; - - if (attr->ah_attr.dlid >= be16_to_cpu(IB_MULTICAST_LID_BASE)) - goto inval; - if (rvt_check_ah(qp->ibqp.device, &attr->ah_attr)) - goto inval; sc = ah_to_sc(ibqp->device, &attr->ah_attr); if (!qp_to_sdma_engine(qp, sc) && dd->flags & HFI1_HAS_SEND_DMA) - goto inval; + return -EINVAL; } if (attr_mask & IB_QP_ALT_PATH) { - u8 sc; - - if (attr->alt_ah_attr.dlid >= - be16_to_cpu(IB_MULTICAST_LID_BASE)) - goto inval; - if (rvt_check_ah(qp->ibqp.device, &attr->alt_ah_attr)) - goto inval; - if (attr->alt_pkey_index >= hfi1_get_npkeys(dd)) - goto inval; sc = ah_to_sc(ibqp->device, &attr->alt_ah_attr); if (!qp_to_sdma_engine(qp, sc) && dd->flags & HFI1_HAS_SEND_DMA) - goto inval; - } - - if (attr_mask & IB_QP_PKEY_INDEX) - if (attr->pkey_index >= hfi1_get_npkeys(dd)) - goto inval; - - if (attr_mask & IB_QP_MIN_RNR_TIMER) - if (attr->min_rnr_timer > 31) - goto inval; - - if (attr_mask & IB_QP_PORT) - if (qp->ibqp.qp_type == IB_QPT_SMI || - qp->ibqp.qp_type == IB_QPT_GSI || - attr->port_num == 0 || - attr->port_num > ibqp->device->phys_port_cnt) - goto inval; - - if (attr_mask & IB_QP_DEST_QPN) - if (attr->dest_qp_num > HFI1_QPN_MASK) - goto inval; - - if (attr_mask & IB_QP_RETRY_CNT) - if (attr->retry_cnt > 7) - goto inval; - - if (attr_mask & IB_QP_RNR_RETRY) - if (attr->rnr_retry > 7) - goto inval; - - /* - * Don't allow invalid path_mtu values. OK to set greater - * than the active mtu (or even the max_cap, if we have tuned - * that to a small mtu. We'll set qp->path_mtu - * to the lesser of requested attribute mtu and active, - * for packetizing messages. - * Note that the QP port has to be set in INIT and MTU in RTR. - */ - if (attr_mask & IB_QP_PATH_MTU) { - int mtu, pidx = qp->port_num - 1; - - dd = dd_from_dev(dev); - mtu = verbs_mtu_enum_to_int(ibqp->device, attr->path_mtu); - if (mtu == -1) - goto inval; - - if (mtu > dd->pport[pidx].ibmtu) - pmtu = mtu_to_enum(dd->pport[pidx].ibmtu, IB_MTU_2048); - else - pmtu = attr->path_mtu; + return -EINVAL; } - if (attr_mask & IB_QP_PATH_MIG_STATE) { - if (attr->path_mig_state == IB_MIG_REARM) { - if (qp->s_mig_state == IB_MIG_ARMED) - goto inval; - if (new_state != IB_QPS_RTS) - goto inval; - } else if (attr->path_mig_state == IB_MIG_MIGRATED) { - if (qp->s_mig_state == IB_MIG_REARM) - goto inval; - if (new_state != IB_QPS_RTS && new_state != IB_QPS_SQD) - goto inval; - if (qp->s_mig_state == IB_MIG_ARMED) - mig = 1; - } else - goto inval; - } - - if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC) - if (attr->max_dest_rd_atomic > HFI1_MAX_RDMA_ATOMIC) - goto inval; - - switch (new_state) { - case IB_QPS_RESET: - if (qp->state != IB_QPS_RESET) { - qp->state = IB_QPS_RESET; - flush_iowait(qp); - qp->s_flags &= ~(RVT_S_TIMER | RVT_S_ANY_WAIT); - spin_unlock(&qp->s_lock); - spin_unlock_irq(&qp->r_lock); - /* Stop the sending work queue and retry timer */ - cancel_work_sync(&priv->s_iowait.iowork); - del_timer_sync(&qp->s_timer); - iowait_sdma_drain(&priv->s_iowait); - flush_tx_list(qp); - remove_qp(dev, qp); - wait_event(qp->wait, !atomic_read(&qp->refcount)); - spin_lock_irq(&qp->r_lock); - spin_lock(&qp->s_lock); - clear_mr_refs(qp, 1); - clear_ahg(qp); - rvt_reset_qp(&dev->rdi, qp, ibqp->qp_type); - } - break; - - case IB_QPS_RTR: - /* Allow event to re-trigger if QP set to RTR more than once */ - qp->r_flags &= ~RVT_R_COMM_EST; - qp->state = new_state; - break; - - case IB_QPS_SQD: - qp->s_draining = qp->s_last != qp->s_cur; - qp->state = new_state; - break; - - case IB_QPS_SQE: - if (qp->ibqp.qp_type == IB_QPT_RC) - goto inval; - qp->state = new_state; - break; - - case IB_QPS_ERR: - lastwqe = hfi1_error_qp(qp, IB_WC_WR_FLUSH_ERR); - break; - - default: - qp->state = new_state; - break; - } - - if (attr_mask & IB_QP_PKEY_INDEX) - qp->s_pkey_index = attr->pkey_index; - - if (attr_mask & IB_QP_PORT) - qp->port_num = attr->port_num; - - if (attr_mask & IB_QP_DEST_QPN) - qp->remote_qpn = attr->dest_qp_num; - - if (attr_mask & IB_QP_SQ_PSN) { - qp->s_next_psn = attr->sq_psn & PSN_MODIFY_MASK; - qp->s_psn = qp->s_next_psn; - qp->s_sending_psn = qp->s_next_psn; - qp->s_last_psn = qp->s_next_psn - 1; - qp->s_sending_hpsn = qp->s_last_psn; - } - - if (attr_mask & IB_QP_RQ_PSN) - qp->r_psn = attr->rq_psn & PSN_MODIFY_MASK; + return 0; +} - if (attr_mask & IB_QP_ACCESS_FLAGS) - qp->qp_access_flags = attr->qp_access_flags; +void hfi1_modify_qp(struct rvt_qp *qp, struct ib_qp_attr *attr, + int attr_mask, struct ib_udata *udata) +{ + struct ib_qp *ibqp = &qp->ibqp; + struct hfi1_qp_priv *priv = qp->priv; if (attr_mask & IB_QP_AV) { - qp->remote_ah_attr = attr->ah_attr; - qp->s_srate = attr->ah_attr.static_rate; - qp->srate_mbps = ib_rate_to_mbps(qp->s_srate); priv->s_sc = ah_to_sc(ibqp->device, &qp->remote_ah_attr); priv->s_sde = qp_to_sdma_engine(qp, priv->s_sc); } - if (attr_mask & IB_QP_ALT_PATH) { - qp->alt_ah_attr = attr->alt_ah_attr; - qp->s_alt_pkey_index = attr->alt_pkey_index; - } - - if (attr_mask & IB_QP_PATH_MIG_STATE) { - qp->s_mig_state = attr->path_mig_state; - if (mig) { - qp->remote_ah_attr = qp->alt_ah_attr; - qp->port_num = qp->alt_ah_attr.port_num; - qp->s_pkey_index = qp->s_alt_pkey_index; - qp->s_flags |= RVT_S_AHG_CLEAR; - priv->s_sc = ah_to_sc(ibqp->device, &qp->remote_ah_attr); - priv->s_sde = qp_to_sdma_engine(qp, priv->s_sc); - } - } - - if (attr_mask & IB_QP_PATH_MTU) { - struct hfi1_ibport *ibp; - u8 sc, vl; - u32 mtu; - - dd = dd_from_dev(dev); - ibp = &dd->pport[qp->port_num - 1].ibport_data; - - sc = ibp->sl_to_sc[qp->remote_ah_attr.sl]; - vl = sc_to_vlt(dd, sc); - - mtu = verbs_mtu_enum_to_int(ibqp->device, pmtu); - if (vl < PER_VL_SEND_CONTEXTS) - mtu = min_t(u32, mtu, dd->vld[vl].mtu); - pmtu = mtu_to_enum(mtu, OPA_MTU_8192); - - qp->path_mtu = pmtu; - qp->pmtu = mtu; - } - - if (attr_mask & IB_QP_RETRY_CNT) { - qp->s_retry_cnt = attr->retry_cnt; - qp->s_retry = attr->retry_cnt; - } - - if (attr_mask & IB_QP_RNR_RETRY) { - qp->s_rnr_retry_cnt = attr->rnr_retry; - qp->s_rnr_retry = attr->rnr_retry; - } - - if (attr_mask & IB_QP_MIN_RNR_TIMER) - qp->r_min_rnr_timer = attr->min_rnr_timer; - - if (attr_mask & IB_QP_TIMEOUT) { - qp->timeout = attr->timeout; - qp->timeout_jiffies = - usecs_to_jiffies((4096UL * (1UL << qp->timeout)) / - 1000UL); - } - - if (attr_mask & IB_QP_QKEY) - qp->qkey = attr->qkey; - - if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC) - qp->r_max_rd_atomic = attr->max_dest_rd_atomic; - - if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC) - qp->s_max_rd_atomic = attr->max_rd_atomic; - - spin_unlock(&qp->s_lock); - spin_unlock_irq(&qp->r_lock); - - if (cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT) - insert_qp(dev, qp); - - if (lastwqe) { - ev.device = qp->ibqp.device; - ev.element.qp = &qp->ibqp; - ev.event = IB_EVENT_QP_LAST_WQE_REACHED; - qp->ibqp.event_handler(&ev, qp->ibqp.qp_context); - } - if (mig) { - ev.device = qp->ibqp.device; - ev.element.qp = &qp->ibqp; - ev.event = IB_EVENT_PATH_MIG; - qp->ibqp.event_handler(&ev, qp->ibqp.qp_context); + if (attr_mask & IB_QP_PATH_MIG_STATE && + attr->path_mig_state == IB_MIG_MIGRATED && + qp->s_mig_state == IB_MIG_ARMED) { + qp->s_flags |= RVT_S_AHG_CLEAR; + priv->s_sc = ah_to_sc(ibqp->device, &qp->remote_ah_attr); + priv->s_sde = qp_to_sdma_engine(qp, priv->s_sc); } - ret = 0; - goto bail; - -inval: - spin_unlock(&qp->s_lock); - spin_unlock_irq(&qp->r_lock); - ret = -EINVAL; - -bail: - return ret; } int hfi1_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, @@ -846,21 +359,19 @@ int hfi1_destroy_qp(struct ib_qp *ibqp) del_timer_sync(&qp->s_timer); iowait_sdma_drain(&priv->s_iowait); flush_tx_list(qp); - remove_qp(dev, qp); + rvt_remove_qp(ib_to_rvt(ibqp->device), qp); wait_event(qp->wait, !atomic_read(&qp->refcount)); spin_lock_irq(&qp->r_lock); spin_lock(&qp->s_lock); - clear_mr_refs(qp, 1); + rvt_clear_mr_refs(qp, 1); clear_ahg(qp); } spin_unlock(&qp->s_lock); spin_unlock_irq(&qp->r_lock); /* all user's cleaned up, mark it available */ - free_qpn(&dev->rdi.qp_dev->qpn_table, qp->ibqp.qp_num); - spin_lock(&dev->n_qps_lock); - dev->n_qps_allocated--; - spin_unlock(&dev->n_qps_lock); + rvt_free_qpn(&dev->rdi.qp_dev->qpn_table, qp->ibqp.qp_num); + rvt_dec_qp_cnt(&dev->rdi); if (qp->ip) kref_put(&qp->ip->ref, rvt_release_mmap_info); @@ -1216,6 +727,26 @@ unsigned free_all_qps(struct rvt_dev_info *rdi) return qp_inuse; } +void flush_qp_waiters(struct rvt_qp *qp) +{ + flush_iowait(qp); +} + +void stop_send_queue(struct rvt_qp *qp) +{ + struct hfi1_qp_priv *priv = qp->priv; + + cancel_work_sync(&priv->s_iowait.iowork); +} + +void quiesce_qp(struct rvt_qp *qp) +{ + struct hfi1_qp_priv *priv = qp->priv; + + iowait_sdma_drain(&priv->s_iowait); + flush_tx_list(qp); +} + void notify_qp_reset(struct rvt_qp *qp) { struct hfi1_qp_priv *priv = qp->priv; @@ -1252,3 +783,75 @@ void hfi1_migrate_qp(struct rvt_qp *qp) ev.event = IB_EVENT_PATH_MIG; qp->ibqp.event_handler(&ev, qp->ibqp.qp_context); } + +int mtu_to_path_mtu(u32 mtu) +{ + return mtu_to_enum(mtu, OPA_MTU_8192); +} + +u32 mtu_from_qp(struct rvt_dev_info *rdi, struct rvt_qp *qp, u32 pmtu) +{ + u32 mtu; + struct hfi1_ibdev *verbs_dev = container_of(rdi, + struct hfi1_ibdev, + rdi); + struct hfi1_devdata *dd = container_of(verbs_dev, + struct hfi1_devdata, + verbs_dev); + struct hfi1_ibport *ibp; + u8 sc, vl; + + ibp = &dd->pport[qp->port_num - 1].ibport_data; + sc = ibp->sl_to_sc[qp->remote_ah_attr.sl]; + vl = sc_to_vlt(dd, sc); + + mtu = verbs_mtu_enum_to_int(qp->ibqp.device, pmtu); + if (vl < PER_VL_SEND_CONTEXTS) + mtu = min_t(u32, mtu, dd->vld[vl].mtu); + return mtu; +} + +int get_pmtu_from_attr(struct rvt_dev_info *rdi, struct rvt_qp *qp, + struct ib_qp_attr *attr) +{ + int mtu, pidx = qp->port_num - 1; + struct hfi1_ibdev *verbs_dev = container_of(rdi, + struct hfi1_ibdev, + rdi); + struct hfi1_devdata *dd = container_of(verbs_dev, + struct hfi1_devdata, + verbs_dev); + mtu = verbs_mtu_enum_to_int(qp->ibqp.device, attr->path_mtu); + if (mtu == -1) + return -1; /* values less than 0 are error */ + + if (mtu > dd->pport[pidx].ibmtu) + return mtu_to_enum(dd->pport[pidx].ibmtu, IB_MTU_2048); + else + return attr->path_mtu; +} + +void notify_error_qp(struct rvt_qp *qp) +{ + struct hfi1_ibdev *dev = to_idev(qp->ibqp.device); + struct hfi1_qp_priv *priv = qp->priv; + + write_seqlock(&dev->iowait_lock); + if (!list_empty(&priv->s_iowait.list) && !(qp->s_flags & RVT_S_BUSY)) { + qp->s_flags &= ~RVT_S_ANY_WAIT_IO; + list_del_init(&priv->s_iowait.list); + if (atomic_dec_and_test(&qp->refcount)) + wake_up(&qp->wait); + } + write_sequnlock(&dev->iowait_lock); + + if (!(qp->s_flags & RVT_S_BUSY)) { + qp->s_hdrwords = 0; + if (qp->s_rdma_mr) { + rvt_put_mr(qp->s_rdma_mr); + qp->s_rdma_mr = NULL; + } + flush_tx_list(qp); + } +} + diff --git a/drivers/staging/rdma/hfi1/qp.h b/drivers/staging/rdma/hfi1/qp.h index b825cb3..d6bfb98 100644 --- a/drivers/staging/rdma/hfi1/qp.h +++ b/drivers/staging/rdma/hfi1/qp.h @@ -57,38 +57,6 @@ extern unsigned int hfi1_qp_table_size; -static inline u32 qpn_hash(struct rvt_qp_ibdev *dev, u32 qpn) -{ - return hash_32(qpn, dev->qp_table_bits); -} - -/** - * hfi1_lookup_qpn - return the QP with the given QPN - * @ibp: the ibport - * @qpn: the QP number to look up - * - * The caller must hold the rcu_read_lock(), and keep the lock until - * the returned qp is no longer in use. - */ -static inline struct rvt_qp *hfi1_lookup_qpn(struct hfi1_ibport *ibp, - u32 qpn) __must_hold(RCU) -{ - struct rvt_qp *qp = NULL; - - if (unlikely(qpn <= 1)) { - qp = rcu_dereference(ibp->rvp.qp[qpn]); - } else { - struct hfi1_ibdev *dev = &ppd_from_ibp(ibp)->dd->verbs_dev; - u32 n = qpn_hash(dev->rdi.qp_dev, qpn); - - for (qp = rcu_dereference(dev->rdi.qp_dev->qp_table[n]); qp; - qp = rcu_dereference(qp->next)) - if (qp->ibqp.qp_num == qpn) - break; - } - return qp; -} - /* * free_ahg - clear ahg from QP */ @@ -103,30 +71,6 @@ static inline void clear_ahg(struct rvt_qp *qp) qp->s_ahgidx = -1; } -/** - * hfi1_error_qp - put a QP into the error state - * @qp: the QP to put into the error state - * @err: the receive completion error to signal if a RWQE is active - * - * Flushes both send and receive work queues. - * Returns true if last WQE event should be generated. - * The QP r_lock and s_lock should be held and interrupts disabled. - * If we are already in error state, just return. - */ -int hfi1_error_qp(struct rvt_qp *qp, enum ib_wc_status err); - -/** - * hfi1_modify_qp - modify the attributes of a queue pair - * @ibqp: the queue pair who's attributes we're modifying - * @attr: the new attributes - * @attr_mask: the mask of attributes to modify - * @udata: user data for libibverbs.so - * - * Returns 0 on success, otherwise returns an errno. - */ -int hfi1_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, - int attr_mask, struct ib_udata *udata); - int hfi1_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask, struct ib_qp_init_attr *init_attr); @@ -253,5 +197,12 @@ void *qp_priv_alloc(struct rvt_dev_info *rdi, struct rvt_qp *qp, void qp_priv_free(struct rvt_dev_info *rdi, struct rvt_qp *qp); unsigned free_all_qps(struct rvt_dev_info *rdi); void notify_qp_reset(struct rvt_qp *qp); - +int get_pmtu_from_attr(struct rvt_dev_info *rdi, struct rvt_qp *qp, + struct ib_qp_attr *attr); +void flush_qp_waiters(struct rvt_qp *qp); +void notify_error_qp(struct rvt_qp *qp); +void stop_send_queue(struct rvt_qp *qp); +void quiesce_qp(struct rvt_qp *qp); +u32 mtu_from_qp(struct rvt_dev_info *rdi, struct rvt_qp *qp, u32 pmtu); +int mtu_to_path_mtu(u32 mtu); #endif /* _QP_H */ diff --git a/drivers/staging/rdma/hfi1/rc.c b/drivers/staging/rdma/hfi1/rc.c index a30bf30..50559fd 100644 --- a/drivers/staging/rdma/hfi1/rc.c +++ b/drivers/staging/rdma/hfi1/rc.c @@ -49,6 +49,8 @@ */ #include +#include +#include #include "hfi.h" #include "qp.h" @@ -891,7 +893,7 @@ static void restart_rc(struct rvt_qp *qp, u32 psn, int wait) qp->s_retry = qp->s_retry_cnt; } else if (qp->s_last == qp->s_acked) { hfi1_send_complete(qp, wqe, IB_WC_RETRY_EXC_ERR); - hfi1_error_qp(qp, IB_WC_WR_FLUSH_ERR); + rvt_error_qp(qp, IB_WC_WR_FLUSH_ERR); return; } else /* need to handle delayed completion */ return; @@ -1355,7 +1357,7 @@ static int do_rc_ack(struct rvt_qp *qp, u32 aeth, u32 psn, int opcode, class_b: if (qp->s_last == qp->s_acked) { hfi1_send_complete(qp, wqe, status); - hfi1_error_qp(qp, IB_WC_WR_FLUSH_ERR); + rvt_error_qp(qp, IB_WC_WR_FLUSH_ERR); } break; @@ -1601,7 +1603,7 @@ ack_len_err: ack_err: if (qp->s_last == qp->s_acked) { hfi1_send_complete(qp, wqe, status); - hfi1_error_qp(qp, IB_WC_WR_FLUSH_ERR); + rvt_error_qp(qp, IB_WC_WR_FLUSH_ERR); } ack_done: spin_unlock_irqrestore(&qp->s_lock, flags); @@ -1832,7 +1834,7 @@ void hfi1_rc_error(struct rvt_qp *qp, enum ib_wc_status err) int lastwqe; spin_lock_irqsave(&qp->s_lock, flags); - lastwqe = hfi1_error_qp(qp, err); + lastwqe = rvt_error_qp(qp, err); spin_unlock_irqrestore(&qp->s_lock, flags); if (lastwqe) { @@ -1873,8 +1875,8 @@ static void log_cca_event(struct hfi1_pportdata *ppd, u8 sl, u32 rlid, cc_event = &ppd->cc_events[ppd->cc_log_idx++]; if (ppd->cc_log_idx == OPA_CONG_LOG_ELEMS) ppd->cc_log_idx = 0; - cc_event->lqpn = lqpn & HFI1_QPN_MASK; - cc_event->rqpn = rqpn & HFI1_QPN_MASK; + cc_event->lqpn = lqpn & RVT_QPN_MASK; + cc_event->rqpn = rqpn & RVT_QPN_MASK; cc_event->sl = sl; cc_event->svc_type = svc_type; cc_event->rlid = rlid; @@ -2063,7 +2065,7 @@ void hfi1_rc_rcv(struct hfi1_packet *packet) /* OK, process the packet. */ switch (opcode) { case OP(SEND_FIRST): - ret = hfi1_get_rwqe(qp, 0); + ret = hfi1_rvt_get_rwqe(qp, 0); if (ret < 0) goto nack_op_err; if (!ret) @@ -2084,7 +2086,7 @@ send_middle: case OP(RDMA_WRITE_LAST_WITH_IMMEDIATE): /* consume RWQE */ - ret = hfi1_get_rwqe(qp, 1); + ret = hfi1_rvt_get_rwqe(qp, 1); if (ret < 0) goto nack_op_err; if (!ret) @@ -2093,7 +2095,7 @@ send_middle: case OP(SEND_ONLY): case OP(SEND_ONLY_WITH_IMMEDIATE): - ret = hfi1_get_rwqe(qp, 0); + ret = hfi1_rvt_get_rwqe(qp, 0); if (ret < 0) goto nack_op_err; if (!ret) @@ -2125,7 +2127,7 @@ send_last: if (unlikely(wc.byte_len > qp->r_len)) goto nack_inv; hfi1_copy_sge(&qp->r_sge, data, tlen, 1); - hfi1_put_ss(&qp->r_sge); + rvt_put_ss(&qp->r_sge); qp->r_msn++; if (!test_and_clear_bit(RVT_R_WRID_VALID, &qp->r_aflags)) break; @@ -2193,7 +2195,7 @@ send_last: goto send_middle; else if (opcode == OP(RDMA_WRITE_ONLY)) goto no_immediate_data; - ret = hfi1_get_rwqe(qp, 1); + ret = hfi1_rvt_get_rwqe(qp, 1); if (ret < 0) goto nack_op_err; if (!ret) diff --git a/drivers/staging/rdma/hfi1/ruc.c b/drivers/staging/rdma/hfi1/ruc.c index b47e462..6379df5 100644 --- a/drivers/staging/rdma/hfi1/ruc.c +++ b/drivers/staging/rdma/hfi1/ruc.c @@ -145,7 +145,7 @@ bail: } /** - * hfi1_get_rwqe - copy the next RWQE into the QP's RWQE + * hfi1_rvt_get_rwqe - copy the next RWQE into the QP's RWQE * @qp: the QP * @wr_id_only: update qp->r_wr_id only, not qp->r_sge * @@ -154,7 +154,7 @@ bail: * * Can be called from interrupt level. */ -int hfi1_get_rwqe(struct rvt_qp *qp, int wr_id_only) +int hfi1_rvt_get_rwqe(struct rvt_qp *qp, int wr_id_only) { unsigned long flags; struct rvt_rq *rq; @@ -192,7 +192,7 @@ int hfi1_get_rwqe(struct rvt_qp *qp, int wr_id_only) } /* Make sure entry is read after head index is read. */ smp_rmb(); - wqe = get_rwqe_ptr(rq, tail); + wqe = rvt_get_rwqe_ptr(rq, tail); /* * Even though we update the tail index in memory, the verbs * consumer is not supposed to post more entries until a @@ -377,7 +377,8 @@ static void ruc_loopback(struct rvt_qp *sqp) * Note that we check the responder QP state after * checking the requester's state. */ - qp = hfi1_lookup_qpn(ibp, sqp->remote_qpn); + qp = rvt_lookup_qpn(ib_to_rvt(sqp->ibqp.device), &ibp->rvp, + sqp->remote_qpn); spin_lock_irqsave(&sqp->s_lock, flags); @@ -441,7 +442,7 @@ again: wc.ex.imm_data = wqe->wr.ex.imm_data; /* FALLTHROUGH */ case IB_WR_SEND: - ret = hfi1_get_rwqe(qp, 0); + ret = hfi1_rvt_get_rwqe(qp, 0); if (ret < 0) goto op_err; if (!ret) @@ -453,7 +454,7 @@ again: goto inv_err; wc.wc_flags = IB_WC_WITH_IMM; wc.ex.imm_data = wqe->wr.ex.imm_data; - ret = hfi1_get_rwqe(qp, 1); + ret = hfi1_rvt_get_rwqe(qp, 1); if (ret < 0) goto op_err; if (!ret) @@ -548,7 +549,7 @@ again: sqp->s_len -= len; } if (release) - hfi1_put_ss(&qp->r_sge); + rvt_put_ss(&qp->r_sge); if (!test_and_clear_bit(RVT_R_WRID_VALID, &qp->r_aflags)) goto send_comp; @@ -623,7 +624,7 @@ serr: spin_lock_irqsave(&sqp->s_lock, flags); hfi1_send_complete(sqp, wqe, send_status); if (sqp->ibqp.qp_type == IB_QPT_RC) { - int lastwqe = hfi1_error_qp(sqp, IB_WC_WR_FLUSH_ERR); + int lastwqe = rvt_error_qp(sqp, IB_WC_WR_FLUSH_ERR); sqp->s_flags &= ~RVT_S_BUSY; spin_unlock_irqrestore(&sqp->s_lock, flags); diff --git a/drivers/staging/rdma/hfi1/srq.c b/drivers/staging/rdma/hfi1/srq.c index c53b378..f71dff0 100644 --- a/drivers/staging/rdma/hfi1/srq.c +++ b/drivers/staging/rdma/hfi1/srq.c @@ -93,7 +93,7 @@ int hfi1_post_srq_receive(struct ib_srq *ibsrq, struct ib_recv_wr *wr, goto bail; } - wqe = get_rwqe_ptr(&srq->rq, wq->head); + wqe = rvt_get_rwqe_ptr(&srq->rq, wq->head); wqe->wr_id = wr->wr_id; wqe->num_sge = wr->num_sge; for (i = 0; i < wr->num_sge; i++) @@ -299,7 +299,7 @@ int hfi1_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr, struct rvt_rwqe *wqe; int i; - wqe = get_rwqe_ptr(&srq->rq, tail); + wqe = rvt_get_rwqe_ptr(&srq->rq, tail); p->wr_id = wqe->wr_id; p->num_sge = wqe->num_sge; for (i = 0; i < wqe->num_sge; i++) diff --git a/drivers/staging/rdma/hfi1/trace.c b/drivers/staging/rdma/hfi1/trace.c index 10122e8..9eadec5 100644 --- a/drivers/staging/rdma/hfi1/trace.c +++ b/drivers/staging/rdma/hfi1/trace.c @@ -166,7 +166,7 @@ const char *parse_everbs_hdrs( case OP(UD, SEND_ONLY_WITH_IMMEDIATE): trace_seq_printf(p, DETH_PRN, be32_to_cpu(eh->ud.deth[0]), - be32_to_cpu(eh->ud.deth[1]) & HFI1_QPN_MASK); + be32_to_cpu(eh->ud.deth[1]) & RVT_QPN_MASK); break; } trace_seq_putc(p, 0); diff --git a/drivers/staging/rdma/hfi1/trace.h b/drivers/staging/rdma/hfi1/trace.h index 14601d7..fcae96e 100644 --- a/drivers/staging/rdma/hfi1/trace.h +++ b/drivers/staging/rdma/hfi1/trace.h @@ -364,37 +364,6 @@ DEFINE_EVENT(hfi1_qpsleepwakeup_template, hfi1_qpsleep, TP_ARGS(qp, flags)); #undef TRACE_SYSTEM -#define TRACE_SYSTEM hfi1_qphash -DECLARE_EVENT_CLASS(hfi1_qphash_template, - TP_PROTO(struct rvt_qp *qp, u32 bucket), - TP_ARGS(qp, bucket), - TP_STRUCT__entry( - DD_DEV_ENTRY(dd_from_ibdev(qp->ibqp.device)) - __field(u32, qpn) - __field(u32, bucket) - ), - TP_fast_assign( - DD_DEV_ASSIGN(dd_from_ibdev(qp->ibqp.device)) - __entry->qpn = qp->ibqp.qp_num; - __entry->bucket = bucket; - ), - TP_printk( - "[%s] qpn 0x%x bucket %u", - __get_str(dev), - __entry->qpn, - __entry->bucket - ) -); - -DEFINE_EVENT(hfi1_qphash_template, hfi1_qpinsert, - TP_PROTO(struct rvt_qp *qp, u32 bucket), - TP_ARGS(qp, bucket)); - -DEFINE_EVENT(hfi1_qphash_template, hfi1_qpremove, - TP_PROTO(struct rvt_qp *qp, u32 bucket), - TP_ARGS(qp, bucket)); - -#undef TRACE_SYSTEM #define TRACE_SYSTEM hfi1_ibhdrs u8 ibhdr_exhdr_len(struct hfi1_ib_header *hdr); @@ -538,7 +507,7 @@ DECLARE_EVENT_CLASS(hfi1_ibhdr_template, (be32_to_cpu(ohdr->bth[1]) >> HFI1_BECN_SHIFT) & HFI1_BECN_MASK; __entry->qpn = - be32_to_cpu(ohdr->bth[1]) & HFI1_QPN_MASK; + be32_to_cpu(ohdr->bth[1]) & RVT_QPN_MASK; __entry->a = (be32_to_cpu(ohdr->bth[2]) >> 31) & 1; /* allow for larger PSN */ @@ -627,7 +596,7 @@ TRACE_EVENT(snoop_capture, DD_DEV_ASSIGN(dd); __entry->slid = be16_to_cpu(hdr->lrh[3]); __entry->dlid = be16_to_cpu(hdr->lrh[1]); - __entry->qpn = be32_to_cpu(ohdr->bth[1]) & HFI1_QPN_MASK; + __entry->qpn = be32_to_cpu(ohdr->bth[1]) & RVT_QPN_MASK; __entry->opcode = (be32_to_cpu(ohdr->bth[0]) >> 24) & 0xff; __entry->sl = (u8)(be16_to_cpu(hdr->lrh[0]) >> 4) & 0xf; __entry->pkey = be32_to_cpu(ohdr->bth[0]) & 0xffff; diff --git a/drivers/staging/rdma/hfi1/uc.c b/drivers/staging/rdma/hfi1/uc.c index ec404ff..1e50d30 100644 --- a/drivers/staging/rdma/hfi1/uc.c +++ b/drivers/staging/rdma/hfi1/uc.c @@ -292,7 +292,7 @@ void hfi1_uc_rcv(struct hfi1_packet *packet) u16 rlid = be16_to_cpu(hdr->lrh[3]); u8 sl, sc5; - lqpn = bth1 & HFI1_QPN_MASK; + lqpn = bth1 & RVT_QPN_MASK; rqpn = qp->remote_qpn; sc5 = ibp->sl_to_sc[qp->remote_ah_attr.sl]; @@ -335,7 +335,7 @@ inv: set_bit(RVT_R_REWIND_SGE, &qp->r_aflags); qp->r_sge.num_sge = 0; } else - hfi1_put_ss(&qp->r_sge); + rvt_put_ss(&qp->r_sge); qp->r_state = OP(SEND_LAST); switch (opcode) { case OP(SEND_FIRST): @@ -394,7 +394,7 @@ send_first: if (test_and_clear_bit(RVT_R_REWIND_SGE, &qp->r_aflags)) qp->r_sge = qp->s_rdma_read_sge; else { - ret = hfi1_get_rwqe(qp, 0); + ret = hfi1_rvt_get_rwqe(qp, 0); if (ret < 0) goto op_err; if (!ret) @@ -444,7 +444,7 @@ send_last: goto rewind; wc.opcode = IB_WC_RECV; hfi1_copy_sge(&qp->r_sge, data, tlen, 0); - hfi1_put_ss(&qp->s_rdma_read_sge); + rvt_put_ss(&qp->s_rdma_read_sge); last_imm: wc.wr_id = qp->r_wr_id; wc.status = IB_WC_SUCCESS; @@ -537,9 +537,9 @@ rdma_last_imm: if (unlikely(tlen + qp->r_rcv_len != qp->r_len)) goto drop; if (test_and_clear_bit(RVT_R_REWIND_SGE, &qp->r_aflags)) - hfi1_put_ss(&qp->s_rdma_read_sge); + rvt_put_ss(&qp->s_rdma_read_sge); else { - ret = hfi1_get_rwqe(qp, 1); + ret = hfi1_rvt_get_rwqe(qp, 1); if (ret < 0) goto op_err; if (!ret) @@ -548,7 +548,7 @@ rdma_last_imm: wc.byte_len = qp->r_len; wc.opcode = IB_WC_RECV_RDMA_WITH_IMM; hfi1_copy_sge(&qp->r_sge, data, tlen, 1); - hfi1_put_ss(&qp->r_sge); + rvt_put_ss(&qp->r_sge); goto last_imm; case OP(RDMA_WRITE_LAST): @@ -564,7 +564,7 @@ rdma_last: if (unlikely(tlen + qp->r_rcv_len != qp->r_len)) goto drop; hfi1_copy_sge(&qp->r_sge, data, tlen, 1); - hfi1_put_ss(&qp->r_sge); + rvt_put_ss(&qp->r_sge); break; default: diff --git a/drivers/staging/rdma/hfi1/ud.c b/drivers/staging/rdma/hfi1/ud.c index e2cbdc8..2eae167 100644 --- a/drivers/staging/rdma/hfi1/ud.c +++ b/drivers/staging/rdma/hfi1/ud.c @@ -80,7 +80,8 @@ static void ud_loopback(struct rvt_qp *sqp, struct rvt_swqe *swqe) rcu_read_lock(); - qp = hfi1_lookup_qpn(ibp, swqe->ud_wr.remote_qpn); + qp = rvt_lookup_qpn(ib_to_rvt(sqp->ibqp.device), &ibp->rvp, + swqe->ud_wr.remote_qpn); if (!qp) { ibp->rvp.n_pkt_drops++; rcu_read_unlock(); @@ -166,7 +167,7 @@ static void ud_loopback(struct rvt_qp *sqp, struct rvt_swqe *swqe) else { int ret; - ret = hfi1_get_rwqe(qp, 0); + ret = hfi1_rvt_get_rwqe(qp, 0); if (ret < 0) { hfi1_rc_error(qp, IB_WC_LOC_QP_OP_ERR); goto bail_unlock; @@ -222,7 +223,7 @@ static void ud_loopback(struct rvt_qp *sqp, struct rvt_swqe *swqe) } length -= len; } - hfi1_put_ss(&qp->r_sge); + rvt_put_ss(&qp->r_sge); if (!test_and_clear_bit(RVT_R_WRID_VALID, &qp->r_aflags)) goto bail_unlock; wc.wr_id = qp->r_wr_id; @@ -664,7 +665,7 @@ void hfi1_ud_rcv(struct hfi1_packet *packet) struct ib_grh *grh = NULL; qkey = be32_to_cpu(ohdr->u.ud.deth[0]); - src_qp = be32_to_cpu(ohdr->u.ud.deth[1]) & HFI1_QPN_MASK; + src_qp = be32_to_cpu(ohdr->u.ud.deth[1]) & RVT_QPN_MASK; dlid = be16_to_cpu(hdr->lrh[1]); is_mcast = (dlid > be16_to_cpu(IB_MULTICAST_LID_BASE)) && (dlid != be16_to_cpu(IB_LID_PERMISSIVE)); @@ -675,7 +676,7 @@ void hfi1_ud_rcv(struct hfi1_packet *packet) * error path. */ struct hfi1_pportdata *ppd = ppd_from_ibp(ibp); - u32 lqpn = be32_to_cpu(ohdr->bth[1]) & HFI1_QPN_MASK; + u32 lqpn = be32_to_cpu(ohdr->bth[1]) & RVT_QPN_MASK; u8 sl, sc5; sc5 = (be16_to_cpu(hdr->lrh[0]) >> 12) & 0xf; @@ -817,7 +818,7 @@ void hfi1_ud_rcv(struct hfi1_packet *packet) else { int ret; - ret = hfi1_get_rwqe(qp, 0); + ret = hfi1_rvt_get_rwqe(qp, 0); if (ret < 0) { hfi1_rc_error(qp, IB_WC_LOC_QP_OP_ERR); return; @@ -840,7 +841,7 @@ void hfi1_ud_rcv(struct hfi1_packet *packet) } else hfi1_skip_sge(&qp->r_sge, sizeof(struct ib_grh), 1); hfi1_copy_sge(&qp->r_sge, data, wc.byte_len - sizeof(struct ib_grh), 1); - hfi1_put_ss(&qp->r_sge); + rvt_put_ss(&qp->r_sge); if (!test_and_clear_bit(RVT_R_WRID_VALID, &qp->r_aflags)) return; wc.wr_id = qp->r_wr_id; diff --git a/drivers/staging/rdma/hfi1/verbs.c b/drivers/staging/rdma/hfi1/verbs.c index 2fed284..e51f827 100644 --- a/drivers/staging/rdma/hfi1/verbs.c +++ b/drivers/staging/rdma/hfi1/verbs.c @@ -368,7 +368,7 @@ static int post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr, goto bail; } - wqe = get_rwqe_ptr(&qp->r_rq, wq->head); + wqe = rvt_get_rwqe_ptr(&qp->r_rq, wq->head); wqe->wr_id = wr->wr_id; wqe->num_sge = wr->num_sge; for (i = 0; i < wr->num_sge; i++) @@ -418,6 +418,7 @@ void hfi1_ib_rcv(struct hfi1_packet *packet) u32 tlen = packet->tlen; struct hfi1_pportdata *ppd = rcd->ppd; struct hfi1_ibport *ibp = &ppd->ibport_data; + struct rvt_dev_info *rdi = &ppd->dd->verbs_dev.rdi; unsigned long flags; u32 qp_num; int lnh; @@ -447,7 +448,7 @@ void hfi1_ib_rcv(struct hfi1_packet *packet) inc_opstats(tlen, &rcd->opstats->stats[opcode]); /* Get the destination QP number. */ - qp_num = be32_to_cpu(packet->ohdr->bth[1]) & HFI1_QPN_MASK; + qp_num = be32_to_cpu(packet->ohdr->bth[1]) & RVT_QPN_MASK; lid = be16_to_cpu(hdr->lrh[1]); if (unlikely((lid >= be16_to_cpu(IB_MULTICAST_LID_BASE)) && (lid != be16_to_cpu(IB_LID_PERMISSIVE)))) { @@ -474,7 +475,7 @@ void hfi1_ib_rcv(struct hfi1_packet *packet) wake_up(&mcast->wait); } else { rcu_read_lock(); - packet->qp = hfi1_lookup_qpn(ibp, qp_num); + packet->qp = rvt_lookup_qpn(rdi, &ibp->rvp, qp_num); if (!packet->qp) { rcu_read_unlock(); goto drop; @@ -1534,7 +1535,6 @@ int hfi1_register_ib_device(struct hfi1_devdata *dd) /* Only need to initialize non-zero fields. */ - spin_lock_init(&dev->n_qps_lock); spin_lock_init(&dev->n_srqs_lock); init_timer(&dev->mem_timer); dev->mem_timer.function = mem_timer; @@ -1623,7 +1623,7 @@ int hfi1_register_ib_device(struct hfi1_devdata *dd) ibdev->query_srq = hfi1_query_srq; ibdev->destroy_srq = hfi1_destroy_srq; ibdev->create_qp = NULL; - ibdev->modify_qp = hfi1_modify_qp; + ibdev->modify_qp = NULL; ibdev->query_qp = hfi1_query_qp; ibdev->destroy_qp = hfi1_destroy_qp; ibdev->post_send = NULL; @@ -1674,12 +1674,26 @@ int hfi1_register_ib_device(struct hfi1_devdata *dd) dd->verbs_dev.rdi.dparms.qpn_res_start = kdeth_qp << 16; dd->verbs_dev.rdi.dparms.qpn_res_end = dd->verbs_dev.rdi.dparms.qpn_res_start + 65535; + dd->verbs_dev.rdi.dparms.max_rdma_atomic = HFI1_MAX_RDMA_ATOMIC; + dd->verbs_dev.rdi.dparms.psn_mask = PSN_MASK; + dd->verbs_dev.rdi.dparms.psn_shift = PSN_SHIFT; + dd->verbs_dev.rdi.dparms.psn_modify_mask = PSN_MODIFY_MASK; dd->verbs_dev.rdi.driver_f.qp_priv_alloc = qp_priv_alloc; dd->verbs_dev.rdi.driver_f.qp_priv_free = qp_priv_free; dd->verbs_dev.rdi.driver_f.free_all_qps = free_all_qps; dd->verbs_dev.rdi.driver_f.notify_qp_reset = notify_qp_reset; dd->verbs_dev.rdi.driver_f.do_send = hfi1_do_send; dd->verbs_dev.rdi.driver_f.schedule_send = hfi1_schedule_send; + dd->verbs_dev.rdi.driver_f.get_pmtu_from_attr = get_pmtu_from_attr; + dd->verbs_dev.rdi.driver_f.notify_error_qp = notify_error_qp; + dd->verbs_dev.rdi.driver_f.flush_qp_waiters = flush_qp_waiters; + dd->verbs_dev.rdi.driver_f.stop_send_queue = stop_send_queue; + dd->verbs_dev.rdi.driver_f.quiesce_qp = quiesce_qp; + dd->verbs_dev.rdi.driver_f.notify_error_qp = notify_error_qp; + dd->verbs_dev.rdi.driver_f.mtu_from_qp = mtu_from_qp; + dd->verbs_dev.rdi.driver_f.mtu_to_path_mtu = mtu_to_path_mtu; + dd->verbs_dev.rdi.driver_f.check_modify_qp = hfi1_check_modify_qp; + dd->verbs_dev.rdi.driver_f.modify_qp = hfi1_modify_qp; /* completeion queue */ snprintf(dd->verbs_dev.rdi.dparms.cq_name, diff --git a/drivers/staging/rdma/hfi1/verbs.h b/drivers/staging/rdma/hfi1/verbs.h index 8e82cf0..f2c8a21 100644 --- a/drivers/staging/rdma/hfi1/verbs.h +++ b/drivers/staging/rdma/hfi1/verbs.h @@ -225,18 +225,6 @@ struct hfi1_pkt_state { #define HFI1_PSN_CREDIT 16 -/* - * Since struct rvt_rwqe is not a fixed size, we can't simply index into - * struct rvt_rwq.wq. This function does the array index computation. - */ -static inline struct rvt_rwqe *get_rwqe_ptr(struct rvt_rq *rq, unsigned n) -{ - return (struct rvt_rwqe *) - ((char *) rq->wq->wq + - (sizeof(struct rvt_rwqe) + - rq->max_sge * sizeof(struct ib_sge)) * n); -} - struct hfi1_opcode_stats { u64 n_packets; /* number of packets */ u64 n_bytes; /* total number of bytes */ @@ -286,8 +274,6 @@ struct hfi1_ibdev { u64 n_kmem_wait; u64 n_send_schedule; - u32 n_qps_allocated; /* number of QPs allocated for device */ - spinlock_t n_qps_lock; u32 n_srqs_allocated; /* number of SRQs allocated for device */ spinlock_t n_srqs_lock; #ifdef CONFIG_DEBUG_FS @@ -464,19 +450,16 @@ int hfi1_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr); int hfi1_destroy_srq(struct ib_srq *ibsrq); -static inline void hfi1_put_ss(struct rvt_sge_state *ss) -{ - while (ss->num_sge) { - rvt_put_mr(ss->sge.mr); - if (--ss->num_sge) - ss->sge = *ss->sg_list++; - } -} - -int hfi1_get_rwqe(struct rvt_qp *qp, int wr_id_only); +int hfi1_rvt_get_rwqe(struct rvt_qp *qp, int wr_id_only); void hfi1_migrate_qp(struct rvt_qp *qp); +int hfi1_check_modify_qp(struct rvt_qp *qp, struct ib_qp_attr *attr, + int attr_mask, struct ib_udata *udata); + +void hfi1_modify_qp(struct rvt_qp *qp, struct ib_qp_attr *attr, + int attr_mask, struct ib_udata *udata); + int hfi1_ruc_check_hdr(struct hfi1_ibport *ibp, struct hfi1_ib_header *hdr, int has_grh, struct rvt_qp *qp, u32 bth0); -- cgit v0.10.2 From 75261cc6ab663e0d44f6f5a02a46d3e197cbe639 Mon Sep 17 00:00:00 2001 From: Dennis Dalessandro Date: Tue, 19 Jan 2016 14:43:50 -0800 Subject: staging/rdma/hfi1: Remove destroy qp verb This removes the destroy qp verbs in favor of using rdmavt. Reviewed-by: Harish Chegondi Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford diff --git a/drivers/staging/rdma/hfi1/qp.c b/drivers/staging/rdma/hfi1/qp.c index 1e6ca4f..c9f2467 100644 --- a/drivers/staging/rdma/hfi1/qp.c +++ b/drivers/staging/rdma/hfi1/qp.c @@ -332,59 +332,6 @@ __be32 hfi1_compute_aeth(struct rvt_qp *qp) } /** - * hfi1_destroy_qp - destroy a queue pair - * @ibqp: the queue pair to destroy - * - * Returns 0 on success. - * - * Note that this can be called while the QP is actively sending or - * receiving! - */ -int hfi1_destroy_qp(struct ib_qp *ibqp) -{ - struct rvt_qp *qp = ibqp_to_rvtqp(ibqp); - struct hfi1_ibdev *dev = to_idev(ibqp->device); - struct hfi1_qp_priv *priv = qp->priv; - - /* Make sure HW and driver activity is stopped. */ - spin_lock_irq(&qp->r_lock); - spin_lock(&qp->s_lock); - if (qp->state != IB_QPS_RESET) { - qp->state = IB_QPS_RESET; - flush_iowait(qp); - qp->s_flags &= ~(RVT_S_TIMER | RVT_S_ANY_WAIT); - spin_unlock(&qp->s_lock); - spin_unlock_irq(&qp->r_lock); - cancel_work_sync(&priv->s_iowait.iowork); - del_timer_sync(&qp->s_timer); - iowait_sdma_drain(&priv->s_iowait); - flush_tx_list(qp); - rvt_remove_qp(ib_to_rvt(ibqp->device), qp); - wait_event(qp->wait, !atomic_read(&qp->refcount)); - spin_lock_irq(&qp->r_lock); - spin_lock(&qp->s_lock); - rvt_clear_mr_refs(qp, 1); - clear_ahg(qp); - } - spin_unlock(&qp->s_lock); - spin_unlock_irq(&qp->r_lock); - - /* all user's cleaned up, mark it available */ - rvt_free_qpn(&dev->rdi.qp_dev->qpn_table, qp->ibqp.qp_num); - rvt_dec_qp_cnt(&dev->rdi); - - if (qp->ip) - kref_put(&qp->ip->ref, rvt_release_mmap_info); - else - vfree(qp->r_rq.wq); - vfree(qp->s_wq); - kfree(priv->s_hdr); - kfree(priv); - kfree(qp); - return 0; -} - -/** * hfi1_get_credit - flush the send work queue of a QP * @qp: the qp who's send work queue to flush * @aeth: the Acknowledge Extended Transport Header diff --git a/drivers/staging/rdma/hfi1/qp.h b/drivers/staging/rdma/hfi1/qp.h index d6bfb98..21af3ad 100644 --- a/drivers/staging/rdma/hfi1/qp.h +++ b/drivers/staging/rdma/hfi1/qp.h @@ -96,17 +96,6 @@ struct ib_qp *hfi1_create_qp(struct ib_pd *ibpd, struct ib_qp_init_attr *init_attr, struct ib_udata *udata); /** - * hfi1_destroy_qp - destroy a queue pair - * @ibqp: the queue pair to destroy - * - * Returns 0 on success. - * - * Note that this can be called while the QP is actively sending or - * receiving! - */ -int hfi1_destroy_qp(struct ib_qp *ibqp); - -/** * hfi1_get_credit - flush the send work queue of a QP * @qp: the qp who's send work queue to flush * @aeth: the Acknowledge Extended Transport Header diff --git a/drivers/staging/rdma/hfi1/verbs.c b/drivers/staging/rdma/hfi1/verbs.c index e51f827..1ed1f20 100644 --- a/drivers/staging/rdma/hfi1/verbs.c +++ b/drivers/staging/rdma/hfi1/verbs.c @@ -1625,7 +1625,7 @@ int hfi1_register_ib_device(struct hfi1_devdata *dd) ibdev->create_qp = NULL; ibdev->modify_qp = NULL; ibdev->query_qp = hfi1_query_qp; - ibdev->destroy_qp = hfi1_destroy_qp; + ibdev->destroy_qp = NULL; ibdev->post_send = NULL; ibdev->post_recv = post_receive; ibdev->post_srq_recv = hfi1_post_srq_receive; -- cgit v0.10.2 From 1897ce219143cae13a87e0544b3b467ad3932964 Mon Sep 17 00:00:00 2001 From: Dennis Dalessandro Date: Tue, 19 Jan 2016 14:43:55 -0800 Subject: staging/rdma/hfi1: Remove post_recv and use rdmavt version This patch removes the simple post recv function in favor of using rdmavt. The packet receive processing still lives in the driver though. Reviewed-by: Harish Chegondi Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford diff --git a/drivers/staging/rdma/hfi1/verbs.c b/drivers/staging/rdma/hfi1/verbs.c index 1ed1f20..b72eb7b 100644 --- a/drivers/staging/rdma/hfi1/verbs.c +++ b/drivers/staging/rdma/hfi1/verbs.c @@ -323,67 +323,6 @@ void hfi1_skip_sge(struct rvt_sge_state *ss, u32 length, int release) } } -/** - * post_receive - post a receive on a QP - * @ibqp: the QP to post the receive on - * @wr: the WR to post - * @bad_wr: the first bad WR is put here - * - * This may be called from interrupt context. - */ -static int post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr, - struct ib_recv_wr **bad_wr) -{ - struct rvt_qp *qp = ibqp_to_rvtqp(ibqp); - struct rvt_rwq *wq = qp->r_rq.wq; - unsigned long flags; - int ret; - - /* Check that state is OK to post receive. */ - if (!(ib_rvt_state_ops[qp->state] & RVT_POST_RECV_OK) || !wq) { - *bad_wr = wr; - ret = -EINVAL; - goto bail; - } - - for (; wr; wr = wr->next) { - struct rvt_rwqe *wqe; - u32 next; - int i; - - if ((unsigned) wr->num_sge > qp->r_rq.max_sge) { - *bad_wr = wr; - ret = -EINVAL; - goto bail; - } - - spin_lock_irqsave(&qp->r_rq.lock, flags); - next = wq->head + 1; - if (next >= qp->r_rq.size) - next = 0; - if (next == wq->tail) { - spin_unlock_irqrestore(&qp->r_rq.lock, flags); - *bad_wr = wr; - ret = -ENOMEM; - goto bail; - } - - wqe = rvt_get_rwqe_ptr(&qp->r_rq, wq->head); - wqe->wr_id = wr->wr_id; - wqe->num_sge = wr->num_sge; - for (i = 0; i < wr->num_sge; i++) - wqe->sg_list[i] = wr->sg_list[i]; - /* Make sure queue entry is written before the head index. */ - smp_wmb(); - wq->head = next; - spin_unlock_irqrestore(&qp->r_rq.lock, flags); - } - ret = 0; - -bail: - return ret; -} - /* * Make sure the QP is ready and able to accept the given opcode. */ @@ -1627,7 +1566,7 @@ int hfi1_register_ib_device(struct hfi1_devdata *dd) ibdev->query_qp = hfi1_query_qp; ibdev->destroy_qp = NULL; ibdev->post_send = NULL; - ibdev->post_recv = post_receive; + ibdev->post_recv = NULL; ibdev->post_srq_recv = hfi1_post_srq_receive; ibdev->create_cq = NULL; ibdev->destroy_cq = NULL; -- cgit v0.10.2 From 4331629f57c4def899e560a7e3cb87fda577fb4b Mon Sep 17 00:00:00 2001 From: Dennis Dalessandro Date: Tue, 19 Jan 2016 14:44:01 -0800 Subject: staging/rdma/hfi1: Clean up register device Now that rdmavt has solidified in its design we can clean up the driver specific register device functions. This handles hfi1. Reviewed-by: Ira Weiny Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford diff --git a/drivers/staging/rdma/hfi1/verbs.c b/drivers/staging/rdma/hfi1/verbs.c index b72eb7b..6799915 100644 --- a/drivers/staging/rdma/hfi1/verbs.c +++ b/drivers/staging/rdma/hfi1/verbs.c @@ -1507,86 +1507,21 @@ int hfi1_register_ib_device(struct hfi1_devdata *dd) strlcpy(ibdev->name + lcpysz, "_%d", IB_DEVICE_NAME_MAX - lcpysz); ibdev->owner = THIS_MODULE; ibdev->node_guid = cpu_to_be64(ppd->guid); - ibdev->uverbs_abi_ver = HFI1_UVERBS_ABI_VERSION; - ibdev->uverbs_cmd_mask = - (1ull << IB_USER_VERBS_CMD_GET_CONTEXT) | - (1ull << IB_USER_VERBS_CMD_QUERY_DEVICE) | - (1ull << IB_USER_VERBS_CMD_QUERY_PORT) | - (1ull << IB_USER_VERBS_CMD_ALLOC_PD) | - (1ull << IB_USER_VERBS_CMD_DEALLOC_PD) | - (1ull << IB_USER_VERBS_CMD_CREATE_AH) | - (1ull << IB_USER_VERBS_CMD_MODIFY_AH) | - (1ull << IB_USER_VERBS_CMD_QUERY_AH) | - (1ull << IB_USER_VERBS_CMD_DESTROY_AH) | - (1ull << IB_USER_VERBS_CMD_REG_MR) | - (1ull << IB_USER_VERBS_CMD_DEREG_MR) | - (1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) | - (1ull << IB_USER_VERBS_CMD_CREATE_CQ) | - (1ull << IB_USER_VERBS_CMD_RESIZE_CQ) | - (1ull << IB_USER_VERBS_CMD_DESTROY_CQ) | - (1ull << IB_USER_VERBS_CMD_POLL_CQ) | - (1ull << IB_USER_VERBS_CMD_REQ_NOTIFY_CQ) | - (1ull << IB_USER_VERBS_CMD_CREATE_QP) | - (1ull << IB_USER_VERBS_CMD_QUERY_QP) | - (1ull << IB_USER_VERBS_CMD_MODIFY_QP) | - (1ull << IB_USER_VERBS_CMD_DESTROY_QP) | - (1ull << IB_USER_VERBS_CMD_POST_SEND) | - (1ull << IB_USER_VERBS_CMD_POST_RECV) | - (1ull << IB_USER_VERBS_CMD_ATTACH_MCAST) | - (1ull << IB_USER_VERBS_CMD_DETACH_MCAST) | - (1ull << IB_USER_VERBS_CMD_CREATE_SRQ) | - (1ull << IB_USER_VERBS_CMD_MODIFY_SRQ) | - (1ull << IB_USER_VERBS_CMD_QUERY_SRQ) | - (1ull << IB_USER_VERBS_CMD_DESTROY_SRQ) | - (1ull << IB_USER_VERBS_CMD_POST_SRQ_RECV); - ibdev->node_type = RDMA_NODE_IB_CA; ibdev->phys_port_cnt = dd->num_pports; - ibdev->num_comp_vectors = 1; ibdev->dma_device = &dd->pcidev->dev; - ibdev->query_device = NULL; ibdev->modify_device = modify_device; ibdev->query_port = query_port; ibdev->modify_port = modify_port; - ibdev->query_pkey = NULL; ibdev->query_gid = query_gid; - ibdev->alloc_ucontext = NULL; - ibdev->dealloc_ucontext = NULL; - ibdev->alloc_pd = NULL; - ibdev->dealloc_pd = NULL; - ibdev->create_ah = NULL; - ibdev->destroy_ah = NULL; - ibdev->modify_ah = NULL; - ibdev->query_ah = NULL; ibdev->create_srq = hfi1_create_srq; ibdev->modify_srq = hfi1_modify_srq; ibdev->query_srq = hfi1_query_srq; ibdev->destroy_srq = hfi1_destroy_srq; - ibdev->create_qp = NULL; - ibdev->modify_qp = NULL; ibdev->query_qp = hfi1_query_qp; - ibdev->destroy_qp = NULL; - ibdev->post_send = NULL; - ibdev->post_recv = NULL; ibdev->post_srq_recv = hfi1_post_srq_receive; - ibdev->create_cq = NULL; - ibdev->destroy_cq = NULL; - ibdev->resize_cq = NULL; - ibdev->poll_cq = NULL; - ibdev->req_notify_cq = NULL; - ibdev->get_dma_mr = NULL; - ibdev->reg_user_mr = NULL; - ibdev->dereg_mr = NULL; - ibdev->alloc_mr = NULL; - ibdev->map_mr_sg = NULL; - ibdev->alloc_fmr = NULL; - ibdev->map_phys_fmr = NULL; - ibdev->unmap_fmr = NULL; - ibdev->dealloc_fmr = NULL; - ibdev->attach_mcast = NULL; - ibdev->detach_mcast = NULL; + + /* keep process mad in the driver */ ibdev->process_mad = hfi1_process_mad; - ibdev->mmap = NULL; - ibdev->dma_ops = NULL; ibdev->get_port_immutable = port_immutable; strncpy(ibdev->node_desc, init_utsname()->nodename, -- cgit v0.10.2 From 7af6d00654a16ca805f50e05eebb545ef9dbb016 Mon Sep 17 00:00:00 2001 From: Dennis Dalessandro Date: Tue, 19 Jan 2016 14:44:06 -0800 Subject: staging/rdma/hfi1: Use rdmavt device allocation function No longer do drivers need to call into the IB core to allocate the verbs device. Use the functionality provided by rdmavt. Reviewed-by: Ira Weiny Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford diff --git a/drivers/staging/rdma/hfi1/init.c b/drivers/staging/rdma/hfi1/init.c index d1cb2c8..7def3f3 100644 --- a/drivers/staging/rdma/hfi1/init.c +++ b/drivers/staging/rdma/hfi1/init.c @@ -998,13 +998,16 @@ struct hfi1_devdata *hfi1_alloc_devdata(struct pci_dev *pdev, size_t extra) { unsigned long flags; struct hfi1_devdata *dd; - int ret; + int ret, nports; + + /* extra is * number of ports */ + nports = extra / sizeof(struct hfi1_pportdata); - dd = (struct hfi1_devdata *)ib_alloc_device(sizeof(*dd) + extra); + dd = (struct hfi1_devdata *)rvt_alloc_device(sizeof(*dd) + extra, + nports); if (!dd) return ERR_PTR(-ENOMEM); - /* extra is * number of ports */ - dd->num_pports = extra / sizeof(struct hfi1_pportdata); + dd->num_pports = nports; dd->pport = (struct hfi1_pportdata *)(dd + 1); INIT_LIST_HEAD(&dd->list); -- cgit v0.10.2 From 9c4a311e6ca03db4e16c4c06bb37a1189ba0bc03 Mon Sep 17 00:00:00 2001 From: Dennis Dalessandro Date: Tue, 19 Jan 2016 14:44:11 -0800 Subject: staging/rdma/hfi1: Remove create and free mad agents Get rid of create and free mad agent from the driver and use rdmavt version. Reviewed-by: Ira Weiny Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford diff --git a/drivers/staging/rdma/hfi1/mad.c b/drivers/staging/rdma/hfi1/mad.c index 6daf277..9cadf77 100644 --- a/drivers/staging/rdma/hfi1/mad.c +++ b/drivers/staging/rdma/hfi1/mad.c @@ -129,7 +129,7 @@ static void send_trap(struct hfi1_ibport *ibp, void *data, unsigned len) memcpy(smp->route.lid.data, data, len); spin_lock_irqsave(&ibp->rvp.lock, flags); - if (!ibp->sm_ah) { + if (!ibp->rvp.sm_ah) { if (ibp->rvp.sm_lid != be16_to_cpu(IB_LID_PERMISSIVE)) { struct ib_ah *ah; @@ -138,13 +138,13 @@ static void send_trap(struct hfi1_ibport *ibp, void *data, unsigned len) ret = PTR_ERR(ah); else { send_buf->ah = ah; - ibp->sm_ah = ibah_to_rvtah(ah); + ibp->rvp.sm_ah = ibah_to_rvtah(ah); ret = 0; } } else ret = -EINVAL; } else { - send_buf->ah = &ibp->sm_ah->ibah; + send_buf->ah = &ibp->rvp.sm_ah->ibah; ret = 0; } spin_unlock_irqrestore(&ibp->rvp.lock, flags); @@ -1138,11 +1138,11 @@ static int __subn_set_opa_portinfo(struct opa_smp *smp, u32 am, u8 *data, } else if (smlid != ibp->rvp.sm_lid || msl != ibp->rvp.sm_sl) { pr_warn("SubnSet(OPA_PortInfo) smlid 0x%x\n", smlid); spin_lock_irqsave(&ibp->rvp.lock, flags); - if (ibp->sm_ah) { + if (ibp->rvp.sm_ah) { if (smlid != ibp->rvp.sm_lid) - ibp->sm_ah->attr.dlid = smlid; + ibp->rvp.sm_ah->attr.dlid = smlid; if (msl != ibp->rvp.sm_sl) - ibp->sm_ah->attr.sl = msl; + ibp->rvp.sm_ah->attr.sl = msl; } spin_unlock_irqrestore(&ibp->rvp.lock, flags); if (smlid != ibp->rvp.sm_lid) @@ -4157,67 +4157,3 @@ int hfi1_process_mad(struct ib_device *ibdev, int mad_flags, u8 port, return IB_MAD_RESULT_FAILURE; } - -static void send_handler(struct ib_mad_agent *agent, - struct ib_mad_send_wc *mad_send_wc) -{ - ib_free_send_mad(mad_send_wc->send_buf); -} - -int hfi1_create_agents(struct hfi1_ibdev *dev) -{ - struct hfi1_devdata *dd = dd_from_dev(dev); - struct ib_mad_agent *agent; - struct hfi1_ibport *ibp; - int p; - int ret; - - for (p = 0; p < dd->num_pports; p++) { - ibp = &dd->pport[p].ibport_data; - agent = ib_register_mad_agent(&dev->rdi.ibdev, p + 1, - IB_QPT_SMI, - NULL, 0, send_handler, - NULL, NULL, 0); - if (IS_ERR(agent)) { - ret = PTR_ERR(agent); - goto err; - } - - ibp->rvp.send_agent = agent; - } - - return 0; - -err: - for (p = 0; p < dd->num_pports; p++) { - ibp = &dd->pport[p].ibport_data; - if (ibp->rvp.send_agent) { - agent = ibp->rvp.send_agent; - ibp->rvp.send_agent = NULL; - ib_unregister_mad_agent(agent); - } - } - - return ret; -} - -void hfi1_free_agents(struct hfi1_ibdev *dev) -{ - struct hfi1_devdata *dd = dd_from_dev(dev); - struct ib_mad_agent *agent; - struct hfi1_ibport *ibp; - int p; - - for (p = 0; p < dd->num_pports; p++) { - ibp = &dd->pport[p].ibport_data; - if (ibp->rvp.send_agent) { - agent = ibp->rvp.send_agent; - ibp->rvp.send_agent = NULL; - ib_unregister_mad_agent(agent); - } - if (ibp->sm_ah) { - ib_destroy_ah(&ibp->sm_ah->ibah); - ibp->sm_ah = NULL; - } - } -} diff --git a/drivers/staging/rdma/hfi1/verbs.c b/drivers/staging/rdma/hfi1/verbs.c index 6799915..68f4045 100644 --- a/drivers/staging/rdma/hfi1/verbs.c +++ b/drivers/staging/rdma/hfi1/verbs.c @@ -1590,27 +1590,19 @@ int hfi1_register_ib_device(struct hfi1_devdata *dd) ret = rvt_register_device(&dd->verbs_dev.rdi); if (ret) - goto err_reg; - - ret = hfi1_create_agents(dev); - if (ret) - goto err_agents; + goto err_verbs_txreq; ret = hfi1_verbs_register_sysfs(dd); if (ret) goto err_class; - goto bail; + return ret; err_class: - hfi1_free_agents(dev); -err_agents: rvt_unregister_device(&dd->verbs_dev.rdi); -err_reg: err_verbs_txreq: kmem_cache_destroy(dev->verbs_txreq_cache); dd_dev_err(dd, "cannot register verbs: %d!\n", -ret); -bail: return ret; } @@ -1620,8 +1612,6 @@ void hfi1_unregister_ib_device(struct hfi1_devdata *dd) hfi1_verbs_unregister_sysfs(dd); - hfi1_free_agents(dev); - rvt_unregister_device(&dd->verbs_dev.rdi); if (!list_empty(&dev->txwait)) diff --git a/drivers/staging/rdma/hfi1/verbs.h b/drivers/staging/rdma/hfi1/verbs.h index f2c8a21..c845514 100644 --- a/drivers/staging/rdma/hfi1/verbs.h +++ b/drivers/staging/rdma/hfi1/verbs.h @@ -247,8 +247,6 @@ static inline void inc_opstats( struct hfi1_ibport { struct rvt_qp __rcu *qp[2]; struct rvt_ibport rvp; - struct rvt_ah *sm_ah; - struct rvt_ah *smi_ah; __be64 guids[HFI1_GUIDS_PER_PORT - 1]; /* writable GUIDs */ @@ -340,8 +338,6 @@ int hfi1_process_mad(struct ib_device *ibdev, int mad_flags, u8 port, const struct ib_mad_hdr *in_mad, size_t in_mad_size, struct ib_mad_hdr *out_mad, size_t *out_mad_size, u16 *out_mad_pkey_index); -int hfi1_create_agents(struct hfi1_ibdev *dev); -void hfi1_free_agents(struct hfi1_ibdev *dev); /* * The PSN_MASK and PSN_SHIFT allow for -- cgit v0.10.2 From 07336db4003fd911681e37b0523529fbd04fa604 Mon Sep 17 00:00:00 2001 From: Harish Chegondi Date: Tue, 19 Jan 2016 14:44:17 -0800 Subject: staging/rdma/hfi1: Remove hfi1_query_qp function Rely on rvt_query_qp function defined in rdmavt Reviewed-by: Dennis Dalessandro Signed-off-by: Harish Chegondi Signed-off-by: Doug Ledford diff --git a/drivers/staging/rdma/hfi1/qp.c b/drivers/staging/rdma/hfi1/qp.c index c9f2467..52723c2 100644 --- a/drivers/staging/rdma/hfi1/qp.c +++ b/drivers/staging/rdma/hfi1/qp.c @@ -220,56 +220,6 @@ void hfi1_modify_qp(struct rvt_qp *qp, struct ib_qp_attr *attr, } } -int hfi1_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, - int attr_mask, struct ib_qp_init_attr *init_attr) -{ - struct rvt_qp *qp = ibqp_to_rvtqp(ibqp); - - attr->qp_state = qp->state; - attr->cur_qp_state = attr->qp_state; - attr->path_mtu = qp->path_mtu; - attr->path_mig_state = qp->s_mig_state; - attr->qkey = qp->qkey; - attr->rq_psn = mask_psn(qp->r_psn); - attr->sq_psn = mask_psn(qp->s_next_psn); - attr->dest_qp_num = qp->remote_qpn; - attr->qp_access_flags = qp->qp_access_flags; - attr->cap.max_send_wr = qp->s_size - 1; - attr->cap.max_recv_wr = qp->ibqp.srq ? 0 : qp->r_rq.size - 1; - attr->cap.max_send_sge = qp->s_max_sge; - attr->cap.max_recv_sge = qp->r_rq.max_sge; - attr->cap.max_inline_data = 0; - attr->ah_attr = qp->remote_ah_attr; - attr->alt_ah_attr = qp->alt_ah_attr; - attr->pkey_index = qp->s_pkey_index; - attr->alt_pkey_index = qp->s_alt_pkey_index; - attr->en_sqd_async_notify = 0; - attr->sq_draining = qp->s_draining; - attr->max_rd_atomic = qp->s_max_rd_atomic; - attr->max_dest_rd_atomic = qp->r_max_rd_atomic; - attr->min_rnr_timer = qp->r_min_rnr_timer; - attr->port_num = qp->port_num; - attr->timeout = qp->timeout; - attr->retry_cnt = qp->s_retry_cnt; - attr->rnr_retry = qp->s_rnr_retry_cnt; - attr->alt_port_num = qp->alt_ah_attr.port_num; - attr->alt_timeout = qp->alt_timeout; - - init_attr->event_handler = qp->ibqp.event_handler; - init_attr->qp_context = qp->ibqp.qp_context; - init_attr->send_cq = qp->ibqp.send_cq; - init_attr->recv_cq = qp->ibqp.recv_cq; - init_attr->srq = qp->ibqp.srq; - init_attr->cap = attr->cap; - if (qp->s_flags & RVT_S_SIGNAL_REQ_WR) - init_attr->sq_sig_type = IB_SIGNAL_REQ_WR; - else - init_attr->sq_sig_type = IB_SIGNAL_ALL_WR; - init_attr->qp_type = qp->ibqp.qp_type; - init_attr->port_num = qp->port_num; - return 0; -} - /** * hfi1_compute_aeth - compute the AETH (syndrome + MSN) * @qp: the queue pair to compute the AETH for diff --git a/drivers/staging/rdma/hfi1/qp.h b/drivers/staging/rdma/hfi1/qp.h index 21af3ad..36be547 100644 --- a/drivers/staging/rdma/hfi1/qp.h +++ b/drivers/staging/rdma/hfi1/qp.h @@ -71,9 +71,6 @@ static inline void clear_ahg(struct rvt_qp *qp) qp->s_ahgidx = -1; } -int hfi1_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, - int attr_mask, struct ib_qp_init_attr *init_attr); - /** * hfi1_compute_aeth - compute the AETH (syndrome + MSN) * @qp: the queue pair to compute the AETH for diff --git a/drivers/staging/rdma/hfi1/verbs.c b/drivers/staging/rdma/hfi1/verbs.c index 68f4045..f5cc0b9 100644 --- a/drivers/staging/rdma/hfi1/verbs.c +++ b/drivers/staging/rdma/hfi1/verbs.c @@ -1517,7 +1517,6 @@ int hfi1_register_ib_device(struct hfi1_devdata *dd) ibdev->modify_srq = hfi1_modify_srq; ibdev->query_srq = hfi1_query_srq; ibdev->destroy_srq = hfi1_destroy_srq; - ibdev->query_qp = hfi1_query_qp; ibdev->post_srq_recv = hfi1_post_srq_receive; /* keep process mad in the driver */ -- cgit v0.10.2 From 9cd70e1bbf9393633904b1cb71925c40e1839d68 Mon Sep 17 00:00:00 2001 From: Jubin John Date: Wed, 3 Feb 2016 14:30:40 -0800 Subject: staging/rdma/hfi1: Remove srq functionality srq functionality is now in rdmavt. Remove it from the hfi1 driver. Reviewed-by: Dennis Dalessandro Reviewed-by: Harish Chegondi Signed-off-by: Jubin John Signed-off-by: Doug Ledford diff --git a/drivers/staging/rdma/hfi1/Makefile b/drivers/staging/rdma/hfi1/Makefile index 0069796..ca2dea5 100644 --- a/drivers/staging/rdma/hfi1/Makefile +++ b/drivers/staging/rdma/hfi1/Makefile @@ -9,7 +9,7 @@ obj-$(CONFIG_INFINIBAND_HFI1) += hfi1.o hfi1-y := chip.o device.o diag.o driver.o efivar.o eprom.o file_ops.o firmware.o \ init.o intr.o mad.o pcie.o pio.o pio_copy.o \ - qp.o qsfp.o rc.o ruc.o sdma.o srq.o sysfs.o trace.o twsi.o \ + qp.o qsfp.o rc.o ruc.o sdma.o sysfs.o trace.o twsi.o \ uc.o ud.o user_exp_rcv.o user_pages.o user_sdma.o verbs.o hfi1-$(CONFIG_DEBUG_FS) += debugfs.o diff --git a/drivers/staging/rdma/hfi1/srq.c b/drivers/staging/rdma/hfi1/srq.c deleted file mode 100644 index f71dff0..0000000 --- a/drivers/staging/rdma/hfi1/srq.c +++ /dev/null @@ -1,397 +0,0 @@ -/* - * - * This file is provided under a dual BSD/GPLv2 license. When using or - * redistributing this file, you may do so under either license. - * - * GPL LICENSE SUMMARY - * - * Copyright(c) 2015 Intel Corporation. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of version 2 of the GNU General Public License as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * BSD LICENSE - * - * Copyright(c) 2015 Intel Corporation. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * - Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * - Neither the name of Intel Corporation nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - */ - -#include -#include -#include - -#include "verbs.h" - -/** - * hfi1_post_srq_receive - post a receive on a shared receive queue - * @ibsrq: the SRQ to post the receive on - * @wr: the list of work requests to post - * @bad_wr: A pointer to the first WR to cause a problem is put here - * - * This may be called from interrupt context. - */ -int hfi1_post_srq_receive(struct ib_srq *ibsrq, struct ib_recv_wr *wr, - struct ib_recv_wr **bad_wr) -{ - struct rvt_srq *srq = ibsrq_to_rvtsrq(ibsrq); - struct rvt_rwq *wq; - unsigned long flags; - int ret; - - for (; wr; wr = wr->next) { - struct rvt_rwqe *wqe; - u32 next; - int i; - - if ((unsigned) wr->num_sge > srq->rq.max_sge) { - *bad_wr = wr; - ret = -EINVAL; - goto bail; - } - - spin_lock_irqsave(&srq->rq.lock, flags); - wq = srq->rq.wq; - next = wq->head + 1; - if (next >= srq->rq.size) - next = 0; - if (next == wq->tail) { - spin_unlock_irqrestore(&srq->rq.lock, flags); - *bad_wr = wr; - ret = -ENOMEM; - goto bail; - } - - wqe = rvt_get_rwqe_ptr(&srq->rq, wq->head); - wqe->wr_id = wr->wr_id; - wqe->num_sge = wr->num_sge; - for (i = 0; i < wr->num_sge; i++) - wqe->sg_list[i] = wr->sg_list[i]; - /* Make sure queue entry is written before the head index. */ - smp_wmb(); - wq->head = next; - spin_unlock_irqrestore(&srq->rq.lock, flags); - } - ret = 0; - -bail: - return ret; -} - -/** - * hfi1_create_srq - create a shared receive queue - * @ibpd: the protection domain of the SRQ to create - * @srq_init_attr: the attributes of the SRQ - * @udata: data from libibverbs when creating a user SRQ - */ -struct ib_srq *hfi1_create_srq(struct ib_pd *ibpd, - struct ib_srq_init_attr *srq_init_attr, - struct ib_udata *udata) -{ - struct hfi1_ibdev *dev = to_idev(ibpd->device); - struct rvt_srq *srq; - u32 sz; - struct ib_srq *ret; - - if (srq_init_attr->srq_type != IB_SRQT_BASIC) { - ret = ERR_PTR(-ENOSYS); - goto done; - } - - if (srq_init_attr->attr.max_sge == 0 || - srq_init_attr->attr.max_sge > hfi1_max_srq_sges || - srq_init_attr->attr.max_wr == 0 || - srq_init_attr->attr.max_wr > hfi1_max_srq_wrs) { - ret = ERR_PTR(-EINVAL); - goto done; - } - - srq = kmalloc(sizeof(*srq), GFP_KERNEL); - if (!srq) { - ret = ERR_PTR(-ENOMEM); - goto done; - } - - /* - * Need to use vmalloc() if we want to support large #s of entries. - */ - srq->rq.size = srq_init_attr->attr.max_wr + 1; - srq->rq.max_sge = srq_init_attr->attr.max_sge; - sz = sizeof(struct ib_sge) * srq->rq.max_sge + - sizeof(struct rvt_rwqe); - srq->rq.wq = vmalloc_user(sizeof(struct rvt_rwq) + srq->rq.size * sz); - if (!srq->rq.wq) { - ret = ERR_PTR(-ENOMEM); - goto bail_srq; - } - - /* - * Return the address of the RWQ as the offset to mmap. - * See hfi1_mmap() for details. - */ - if (udata && udata->outlen >= sizeof(__u64)) { - int err; - u32 s = sizeof(struct rvt_rwq) + srq->rq.size * sz; - - srq->ip = - rvt_create_mmap_info(&dev->rdi, s, ibpd->uobject->context, - srq->rq.wq); - if (!srq->ip) { - ret = ERR_PTR(-ENOMEM); - goto bail_wq; - } - - err = ib_copy_to_udata(udata, &srq->ip->offset, - sizeof(srq->ip->offset)); - if (err) { - ret = ERR_PTR(err); - goto bail_ip; - } - } else - srq->ip = NULL; - - /* - * ib_create_srq() will initialize srq->ibsrq. - */ - spin_lock_init(&srq->rq.lock); - srq->rq.wq->head = 0; - srq->rq.wq->tail = 0; - srq->limit = srq_init_attr->attr.srq_limit; - - spin_lock(&dev->n_srqs_lock); - if (dev->n_srqs_allocated == hfi1_max_srqs) { - spin_unlock(&dev->n_srqs_lock); - ret = ERR_PTR(-ENOMEM); - goto bail_ip; - } - - dev->n_srqs_allocated++; - spin_unlock(&dev->n_srqs_lock); - - if (srq->ip) { - spin_lock_irq(&dev->rdi.pending_lock); - list_add(&srq->ip->pending_mmaps, &dev->rdi.pending_mmaps); - spin_unlock_irq(&dev->rdi.pending_lock); - } - - ret = &srq->ibsrq; - goto done; - -bail_ip: - kfree(srq->ip); -bail_wq: - vfree(srq->rq.wq); -bail_srq: - kfree(srq); -done: - return ret; -} - -/** - * hfi1_modify_srq - modify a shared receive queue - * @ibsrq: the SRQ to modify - * @attr: the new attributes of the SRQ - * @attr_mask: indicates which attributes to modify - * @udata: user data for libibverbs.so - */ -int hfi1_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr, - enum ib_srq_attr_mask attr_mask, - struct ib_udata *udata) -{ - struct rvt_srq *srq = ibsrq_to_rvtsrq(ibsrq); - struct rvt_rwq *wq; - int ret = 0; - - if (attr_mask & IB_SRQ_MAX_WR) { - struct rvt_rwq *owq; - struct rvt_rwqe *p; - u32 sz, size, n, head, tail; - - /* Check that the requested sizes are below the limits. */ - if ((attr->max_wr > hfi1_max_srq_wrs) || - ((attr_mask & IB_SRQ_LIMIT) ? - attr->srq_limit : srq->limit) > attr->max_wr) { - ret = -EINVAL; - goto bail; - } - - sz = sizeof(struct rvt_rwqe) + - srq->rq.max_sge * sizeof(struct ib_sge); - size = attr->max_wr + 1; - wq = vmalloc_user(sizeof(struct rvt_rwq) + size * sz); - if (!wq) { - ret = -ENOMEM; - goto bail; - } - - /* Check that we can write the offset to mmap. */ - if (udata && udata->inlen >= sizeof(__u64)) { - __u64 offset_addr; - __u64 offset = 0; - - ret = ib_copy_from_udata(&offset_addr, udata, - sizeof(offset_addr)); - if (ret) - goto bail_free; - udata->outbuf = - (void __user *) (unsigned long) offset_addr; - ret = ib_copy_to_udata(udata, &offset, - sizeof(offset)); - if (ret) - goto bail_free; - } - - spin_lock_irq(&srq->rq.lock); - /* - * validate head and tail pointer values and compute - * the number of remaining WQEs. - */ - owq = srq->rq.wq; - head = owq->head; - tail = owq->tail; - if (head >= srq->rq.size || tail >= srq->rq.size) { - ret = -EINVAL; - goto bail_unlock; - } - n = head; - if (n < tail) - n += srq->rq.size - tail; - else - n -= tail; - if (size <= n) { - ret = -EINVAL; - goto bail_unlock; - } - n = 0; - p = wq->wq; - while (tail != head) { - struct rvt_rwqe *wqe; - int i; - - wqe = rvt_get_rwqe_ptr(&srq->rq, tail); - p->wr_id = wqe->wr_id; - p->num_sge = wqe->num_sge; - for (i = 0; i < wqe->num_sge; i++) - p->sg_list[i] = wqe->sg_list[i]; - n++; - p = (struct rvt_rwqe *)((char *)p + sz); - if (++tail >= srq->rq.size) - tail = 0; - } - srq->rq.wq = wq; - srq->rq.size = size; - wq->head = n; - wq->tail = 0; - if (attr_mask & IB_SRQ_LIMIT) - srq->limit = attr->srq_limit; - spin_unlock_irq(&srq->rq.lock); - - vfree(owq); - - if (srq->ip) { - struct rvt_mmap_info *ip = srq->ip; - struct hfi1_ibdev *dev = to_idev(srq->ibsrq.device); - u32 s = sizeof(struct rvt_rwq) + size * sz; - - rvt_update_mmap_info(&dev->rdi, ip, s, wq); - - /* - * Return the offset to mmap. - * See hfi1_mmap() for details. - */ - if (udata && udata->inlen >= sizeof(__u64)) { - ret = ib_copy_to_udata(udata, &ip->offset, - sizeof(ip->offset)); - if (ret) - goto bail; - } - - /* - * Put user mapping info onto the pending list - * unless it already is on the list. - */ - spin_lock_irq(&dev->rdi.pending_lock); - if (list_empty(&ip->pending_mmaps)) - list_add(&ip->pending_mmaps, - &dev->rdi.pending_mmaps); - spin_unlock_irq(&dev->rdi.pending_lock); - } - } else if (attr_mask & IB_SRQ_LIMIT) { - spin_lock_irq(&srq->rq.lock); - if (attr->srq_limit >= srq->rq.size) - ret = -EINVAL; - else - srq->limit = attr->srq_limit; - spin_unlock_irq(&srq->rq.lock); - } - goto bail; - -bail_unlock: - spin_unlock_irq(&srq->rq.lock); -bail_free: - vfree(wq); -bail: - return ret; -} - -int hfi1_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr) -{ - struct rvt_srq *srq = ibsrq_to_rvtsrq(ibsrq); - - attr->max_wr = srq->rq.size - 1; - attr->max_sge = srq->rq.max_sge; - attr->srq_limit = srq->limit; - return 0; -} - -/** - * hfi1_destroy_srq - destroy a shared receive queue - * @ibsrq: the SRQ to destroy - */ -int hfi1_destroy_srq(struct ib_srq *ibsrq) -{ - struct rvt_srq *srq = ibsrq_to_rvtsrq(ibsrq); - struct hfi1_ibdev *dev = to_idev(ibsrq->device); - - spin_lock(&dev->n_srqs_lock); - dev->n_srqs_allocated--; - spin_unlock(&dev->n_srqs_lock); - if (srq->ip) - kref_put(&srq->ip->ref, rvt_release_mmap_info); - else - vfree(srq->rq.wq); - kfree(srq); - - return 0; -} diff --git a/drivers/staging/rdma/hfi1/verbs.c b/drivers/staging/rdma/hfi1/verbs.c index f5cc0b9..a53d93a 100644 --- a/drivers/staging/rdma/hfi1/verbs.c +++ b/drivers/staging/rdma/hfi1/verbs.c @@ -1474,7 +1474,6 @@ int hfi1_register_ib_device(struct hfi1_devdata *dd) /* Only need to initialize non-zero fields. */ - spin_lock_init(&dev->n_srqs_lock); init_timer(&dev->mem_timer); dev->mem_timer.function = mem_timer; dev->mem_timer.data = (unsigned long) dev; @@ -1513,11 +1512,6 @@ int hfi1_register_ib_device(struct hfi1_devdata *dd) ibdev->query_port = query_port; ibdev->modify_port = modify_port; ibdev->query_gid = query_gid; - ibdev->create_srq = hfi1_create_srq; - ibdev->modify_srq = hfi1_modify_srq; - ibdev->query_srq = hfi1_query_srq; - ibdev->destroy_srq = hfi1_destroy_srq; - ibdev->post_srq_recv = hfi1_post_srq_receive; /* keep process mad in the driver */ ibdev->process_mad = hfi1_process_mad; diff --git a/drivers/staging/rdma/hfi1/verbs.h b/drivers/staging/rdma/hfi1/verbs.h index c845514..79bcab6 100644 --- a/drivers/staging/rdma/hfi1/verbs.h +++ b/drivers/staging/rdma/hfi1/verbs.h @@ -272,8 +272,6 @@ struct hfi1_ibdev { u64 n_kmem_wait; u64 n_send_schedule; - u32 n_srqs_allocated; /* number of SRQs allocated for device */ - spinlock_t n_srqs_lock; #ifdef CONFIG_DEBUG_FS /* per HFI debugfs */ struct dentry *hfi1_ibdev_dbg; @@ -431,21 +429,6 @@ void hfi1_ud_rcv(struct hfi1_packet *packet); int hfi1_lookup_pkey_idx(struct hfi1_ibport *ibp, u16 pkey); -int hfi1_post_srq_receive(struct ib_srq *ibsrq, struct ib_recv_wr *wr, - struct ib_recv_wr **bad_wr); - -struct ib_srq *hfi1_create_srq(struct ib_pd *ibpd, - struct ib_srq_init_attr *srq_init_attr, - struct ib_udata *udata); - -int hfi1_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr, - enum ib_srq_attr_mask attr_mask, - struct ib_udata *udata); - -int hfi1_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr); - -int hfi1_destroy_srq(struct ib_srq *ibsrq); - int hfi1_rvt_get_rwqe(struct rvt_qp *qp, int wr_id_only); void hfi1_migrate_qp(struct rvt_qp *qp); -- cgit v0.10.2 From a9c05e350c17db98d82e8784ed0c05a78bd0169f Mon Sep 17 00:00:00 2001 From: Bryan Morgan Date: Wed, 3 Feb 2016 14:30:49 -0800 Subject: staging/rdma/hfi1: HFI reports wrong offline disabled reason when cable removed Removing QSFP cable should report 'No Local Media' instead of 'Transient' as reported by 'opaportinfo'. Workaround is to change the state to OPA_LINKDOWN_REASON_LOCAL_MEDIA_NOT_INSTALLED in cable handler. With cable still removed, 'opaportinfo bounce' should not cause a state change to Polling, as reported by 'opaportinfo'. Resolution is to prevent physical state change from Offline->Polling. Use a macro to mask lower nibble of OPA_LINKDOWN_REASON* as needed for offline_disabled_reason. Reviewed-by: Mike Marciniszyn Reviewed-by: Easwar Hariharan Reviewed-by: Dean Luick Reported-by: Todd Rimmer Signed-off-by: Bryan Morgan Signed-off-by: Jubin John Signed-off-by: Doug Ledford diff --git a/drivers/staging/rdma/hfi1/chip.c b/drivers/staging/rdma/hfi1/chip.c index 93e152d..16e2ff2 100644 --- a/drivers/staging/rdma/hfi1/chip.c +++ b/drivers/staging/rdma/hfi1/chip.c @@ -5857,6 +5857,16 @@ static void handle_qsfp_int(struct hfi1_devdata *dd, u32 src_ctx, u64 reg) ASIC_QSFP2_INVERT : ASIC_QSFP1_INVERT, qsfp_int_mgmt); + + if ((ppd->offline_disabled_reason > + HFI1_ODR_MASK( + OPA_LINKDOWN_REASONLOCAL_MEDIA_NOT_INSTALLED)) || + (ppd->offline_disabled_reason == + HFI1_ODR_MASK(OPA_LINKDOWN_REASON_NONE))) + ppd->offline_disabled_reason = + HFI1_ODR_MASK( + OPA_LINKDOWN_REASONLOCAL_MEDIA_NOT_INSTALLED); + if (ppd->host_link_state == HLS_DN_POLL) { /* * The link is still in POLL. This means @@ -9615,9 +9625,10 @@ static int goto_offline(struct hfi1_pportdata *ppd, u8 rem_reason) ret); return -EINVAL; } - if (ppd->offline_disabled_reason == OPA_LINKDOWN_REASON_NONE) + if (ppd->offline_disabled_reason == + HFI1_ODR_MASK(OPA_LINKDOWN_REASON_NONE)) ppd->offline_disabled_reason = - OPA_LINKDOWN_REASON_TRANSIENT; + HFI1_ODR_MASK(OPA_LINKDOWN_REASON_TRANSIENT); } if (do_wait) { @@ -9972,7 +9983,8 @@ int set_link_state(struct hfi1_pportdata *ppd, u32 state) ret = -EINVAL; } } - ppd->offline_disabled_reason = OPA_LINKDOWN_REASON_NONE; + ppd->offline_disabled_reason = + HFI1_ODR_MASK(OPA_LINKDOWN_REASON_NONE); /* * If an error occurred above, go back to offline. The * caller may reschedule another attempt. diff --git a/drivers/staging/rdma/hfi1/hfi.h b/drivers/staging/rdma/hfi1/hfi.h index e6a5fed..57014b0 100644 --- a/drivers/staging/rdma/hfi1/hfi.h +++ b/drivers/staging/rdma/hfi1/hfi.h @@ -99,6 +99,8 @@ extern unsigned long hfi1_cap_mask; #define HFI1_CAP_IS_USET(cap) (!!HFI1_CAP_UGET(cap)) #define HFI1_MISC_GET() ((hfi1_cap_mask >> HFI1_CAP_MISC_SHIFT) & \ HFI1_CAP_MISC_MASK) +/* Offline Disabled Reason is 4-bits */ +#define HFI1_ODR_MASK(rsn) ((rsn) & OPA_PI_MASK_OFFLINE_REASON) /* * Control context is always 0 and handles the error packets. diff --git a/drivers/staging/rdma/hfi1/intr.c b/drivers/staging/rdma/hfi1/intr.c index 1283f2d..9adab86 100644 --- a/drivers/staging/rdma/hfi1/intr.c +++ b/drivers/staging/rdma/hfi1/intr.c @@ -152,7 +152,8 @@ void handle_linkup_change(struct hfi1_devdata *dd, u32 linkup) /* physical link went up */ ppd->linkup = 1; - ppd->offline_disabled_reason = OPA_LINKDOWN_REASON_NONE; + ppd->offline_disabled_reason = + HFI1_ODR_MASK(OPA_LINKDOWN_REASON_NONE); /* link widths are not available until the link is fully up */ get_linkup_link_widths(ppd); diff --git a/drivers/staging/rdma/hfi1/mad.c b/drivers/staging/rdma/hfi1/mad.c index 9cadf77..303dfee 100644 --- a/drivers/staging/rdma/hfi1/mad.c +++ b/drivers/staging/rdma/hfi1/mad.c @@ -590,12 +590,11 @@ static int __subn_get_opa_portinfo(struct opa_smp *smp, u32 am, u8 *data, pi->port_states.ledenable_offlinereason |= ppd->is_sm_config_started << 5; pi->port_states.ledenable_offlinereason |= - ppd->offline_disabled_reason & OPA_PI_MASK_OFFLINE_REASON; + ppd->offline_disabled_reason; #else pi->port_states.offline_reason = ppd->neighbor_normal << 4; pi->port_states.offline_reason |= ppd->is_sm_config_started << 5; - pi->port_states.offline_reason |= ppd->offline_disabled_reason & - OPA_PI_MASK_OFFLINE_REASON; + pi->port_states.offline_reason |= ppd->offline_disabled_reason; #endif /* PI_LED_ENABLE_SUP */ pi->port_states.portphysstate_portstate = @@ -930,6 +929,14 @@ static int port_states_transition_allowed(struct hfi1_pportdata *ppd, return HFI_TRANSITION_IGNORED; /* + * A change request of Physical Port State from + * 'Offline' to 'Polling' should be ignored. + */ + if ((physical_old == OPA_PORTPHYSSTATE_OFFLINE) && + (physical_new == IB_PORTPHYSSTATE_POLLING)) + return HFI_TRANSITION_IGNORED; + + /* * Either physical_allowed or logical_allowed is * HFI_TRANSITION_ALLOWED. */ @@ -993,11 +1000,11 @@ static int set_port_states(struct hfi1_pportdata *ppd, struct opa_smp *smp, set_link_state(ppd, link_state); if (link_state == HLS_DN_DISABLE && (ppd->offline_disabled_reason > - OPA_LINKDOWN_REASON_SMA_DISABLED || + HFI1_ODR_MASK(OPA_LINKDOWN_REASON_SMA_DISABLED) || ppd->offline_disabled_reason == - OPA_LINKDOWN_REASON_NONE)) + HFI1_ODR_MASK(OPA_LINKDOWN_REASON_NONE))) ppd->offline_disabled_reason = - OPA_LINKDOWN_REASON_SMA_DISABLED; + HFI1_ODR_MASK(OPA_LINKDOWN_REASON_SMA_DISABLED); /* * Don't send a reply if the response would be sent * through the disabled port. @@ -1710,12 +1717,11 @@ static int __subn_get_opa_psi(struct opa_smp *smp, u32 am, u8 *data, psi->port_states.ledenable_offlinereason |= ppd->is_sm_config_started << 5; psi->port_states.ledenable_offlinereason |= - ppd->offline_disabled_reason & OPA_PI_MASK_OFFLINE_REASON; + ppd->offline_disabled_reason; #else psi->port_states.offline_reason = ppd->neighbor_normal << 4; psi->port_states.offline_reason |= ppd->is_sm_config_started << 5; - psi->port_states.offline_reason |= ppd->offline_disabled_reason & - OPA_PI_MASK_OFFLINE_REASON; + psi->port_states.offline_reason |= ppd->offline_disabled_reason; #endif /* PI_LED_ENABLE_SUP */ psi->port_states.portphysstate_portstate = -- cgit v0.10.2 From 76ef8c0798d3377fd58a1ef083d65b4528682db4 Mon Sep 17 00:00:00 2001 From: Easwar Hariharan Date: Wed, 3 Feb 2016 14:30:57 -0800 Subject: staging/rdma/hfi1: cleanup messages on qsfp_read() failure The ":" in "%s:" adds no value. Reviewed-by: Dean Luick Signed-off-by: Easwar Hariharan Signed-off-by: Doug Ledford diff --git a/drivers/staging/rdma/hfi1/qsfp.c b/drivers/staging/rdma/hfi1/qsfp.c index 6326a91..6e9c56f 100644 --- a/drivers/staging/rdma/hfi1/qsfp.c +++ b/drivers/staging/rdma/hfi1/qsfp.c @@ -330,48 +330,48 @@ int refresh_qsfp_cache(struct hfi1_pportdata *ppd, struct qsfp_data *cp) /* all */ ret = qsfp_read(ppd, target, 384, cache + 256, 128); if (ret <= 0 || ret != 128) { - dd_dev_info(ppd->dd, "%s: failed\n", __func__); + dd_dev_info(ppd->dd, "%s failed\n", __func__); goto bail; } ret = qsfp_read(ppd, target, 640, cache + 384, 128); if (ret <= 0 || ret != 128) { - dd_dev_info(ppd->dd, "%s: failed\n", __func__); + dd_dev_info(ppd->dd, "%s failed\n", __func__); goto bail; } ret = qsfp_read(ppd, target, 896, cache + 512, 128); if (ret <= 0 || ret != 128) { - dd_dev_info(ppd->dd, "%s: failed\n", __func__); + dd_dev_info(ppd->dd, "%s failed\n", __func__); goto bail; } } else if ((cache[195] & 0x80) == 0x80) { /* only page 2 and 3 */ ret = qsfp_read(ppd, target, 640, cache + 384, 128); if (ret <= 0 || ret != 128) { - dd_dev_info(ppd->dd, "%s: failed\n", __func__); + dd_dev_info(ppd->dd, "%s failed\n", __func__); goto bail; } ret = qsfp_read(ppd, target, 896, cache + 512, 128); if (ret <= 0 || ret != 128) { - dd_dev_info(ppd->dd, "%s: failed\n", __func__); + dd_dev_info(ppd->dd, "%s failed\n", __func__); goto bail; } } else if ((cache[195] & 0x40) == 0x40) { /* only page 1 and 3 */ ret = qsfp_read(ppd, target, 384, cache + 256, 128); if (ret <= 0 || ret != 128) { - dd_dev_info(ppd->dd, "%s: failed\n", __func__); + dd_dev_info(ppd->dd, "%s failed\n", __func__); goto bail; } ret = qsfp_read(ppd, target, 896, cache + 512, 128); if (ret <= 0 || ret != 128) { - dd_dev_info(ppd->dd, "%s: failed\n", __func__); + dd_dev_info(ppd->dd, "%s failed\n", __func__); goto bail; } } else { /* only page 3 */ ret = qsfp_read(ppd, target, 896, cache + 512, 128); if (ret <= 0 || ret != 128) { - dd_dev_info(ppd->dd, "%s: failed\n", __func__); + dd_dev_info(ppd->dd, "%s failed\n", __func__); goto bail; } } -- cgit v0.10.2 From c7cb7635d91d9126431159ee7f90b7137c908e89 Mon Sep 17 00:00:00 2001 From: Mike Marciniszyn Date: Wed, 3 Feb 2016 14:31:05 -0800 Subject: staging/rdma/hfi1: Fix QSFP memory read/write across 128 byte boundary The QSFP memory cache reads both lower and upper page 0H in one shot, which leads to the address counter wrapping around to the beginning of lower page 00H at byte 128, as defined by SFF-8636. This patch fixes this by modifying the underlying QSFP read and writes to avoid this wrap around. Reviewed-by: Dean Luick Reviewed-by: Ira Weiny Signed-off-by: Easwar Hariharan Signed-off-by: Doug Ledford diff --git a/drivers/staging/rdma/hfi1/qsfp.c b/drivers/staging/rdma/hfi1/qsfp.c index 6e9c56f..0d2ec97 100644 --- a/drivers/staging/rdma/hfi1/qsfp.c +++ b/drivers/staging/rdma/hfi1/qsfp.c @@ -186,6 +186,10 @@ int i2c_read(struct hfi1_pportdata *ppd, u32 target, int i2c_addr, int offset, return ret; } +/* + * Write page n, offset m of QSFP memory as defined by SFF 8636 + * in the cache by writing @addr = ((256 * n) + m) + */ int qsfp_write(struct hfi1_pportdata *ppd, u32 target, int addr, void *bp, int len) { @@ -217,15 +221,15 @@ int qsfp_write(struct hfi1_pportdata *ppd, u32 target, int addr, void *bp, break; } - /* truncate write to end of page if crossing page boundary */ offset = addr % QSFP_PAGESIZE; nwrite = len - count; - if ((offset + nwrite) > QSFP_PAGESIZE) - nwrite = QSFP_PAGESIZE - offset; + /* truncate write to boundary if crossing boundary */ + if (((addr % QSFP_RW_BOUNDARY) + nwrite) > QSFP_RW_BOUNDARY) + nwrite = QSFP_RW_BOUNDARY - (addr % QSFP_RW_BOUNDARY); ret = __i2c_write(ppd, target, QSFP_DEV, offset, bp + count, nwrite); - if (ret <= 0) /* stop on error or nothing read */ + if (ret <= 0) /* stop on error or nothing written */ break; count += ret; @@ -239,6 +243,10 @@ int qsfp_write(struct hfi1_pportdata *ppd, u32 target, int addr, void *bp, return count; } +/* + * Access page n, offset m of QSFP memory as defined by SFF 8636 + * in the cache by reading @addr = ((256 * n) + m) + */ int qsfp_read(struct hfi1_pportdata *ppd, u32 target, int addr, void *bp, int len) { @@ -269,11 +277,11 @@ int qsfp_read(struct hfi1_pportdata *ppd, u32 target, int addr, void *bp, break; } - /* truncate read to end of page if crossing page boundary */ offset = addr % QSFP_PAGESIZE; nread = len - count; - if ((offset + nread) > QSFP_PAGESIZE) - nread = QSFP_PAGESIZE - offset; + /* truncate read to boundary if crossing boundary */ + if (((addr % QSFP_RW_BOUNDARY) + nread) > QSFP_RW_BOUNDARY) + nread = QSFP_RW_BOUNDARY - (addr % QSFP_RW_BOUNDARY); ret = __i2c_read(ppd, target, QSFP_DEV, offset, bp + count, nread); @@ -295,6 +303,11 @@ int qsfp_read(struct hfi1_pportdata *ppd, u32 target, int addr, void *bp, * This function caches the QSFP memory range in 128 byte chunks. * As an example, the next byte after address 255 is byte 128 from * upper page 01H (if existing) rather than byte 0 from lower page 00H. + * Access page n, offset m of QSFP memory as defined by SFF 8636 + * in the cache by reading byte ((128 * n) + m) + * The calls to qsfp_{read,write} in this function correctly handle the + * address map difference between this mapping and the mapping implemented + * by those functions */ int refresh_qsfp_cache(struct hfi1_pportdata *ppd, struct qsfp_data *cp) { @@ -305,23 +318,24 @@ int refresh_qsfp_cache(struct hfi1_pportdata *ppd, struct qsfp_data *cp) /* ensure sane contents on invalid reads, for cable swaps */ memset(cache, 0, (QSFP_MAX_NUM_PAGES*128)); - dd_dev_info(ppd->dd, "%s: called\n", __func__); + spin_lock_irqsave(&ppd->qsfp_info.qsfp_lock, flags); + ppd->qsfp_info.cache_valid = 0; + spin_unlock_irqrestore(&ppd->qsfp_info.qsfp_lock, flags); + + dd_dev_info(ppd->dd, "%s called\n", __func__); if (!qsfp_mod_present(ppd)) { ret = -ENODEV; goto bail; } - ret = qsfp_read(ppd, target, 0, cache, 256); - if (ret != 256) { + ret = qsfp_read(ppd, target, 0, cache, QSFP_PAGESIZE); + if (ret != QSFP_PAGESIZE) { dd_dev_info(ppd->dd, - "%s: Read of pages 00H failed, expected 256, got %d\n", - __func__, ret); + "%s: Page 0 read failed, expected %d, got %d\n", + __func__, QSFP_PAGESIZE, ret); goto bail; } - if (cache[0] != 0x0C && cache[0] != 0x0D) - goto bail; - /* Is paging enabled? */ if (!(cache[2] & 4)) { diff --git a/drivers/staging/rdma/hfi1/qsfp.h b/drivers/staging/rdma/hfi1/qsfp.h index 16aebdc..3422250 100644 --- a/drivers/staging/rdma/hfi1/qsfp.h +++ b/drivers/staging/rdma/hfi1/qsfp.h @@ -67,15 +67,16 @@ /* QSFP is paged at 256 bytes */ #define QSFP_PAGESIZE 256 +/* Reads/writes cannot cross 128 byte boundaries */ +#define QSFP_RW_BOUNDARY 128 /* Defined fields that Intel requires of qualified cables */ /* Byte 0 is Identifier, not checked */ /* Byte 1 is reserved "status MSB" */ -/* Byte 2 is "status LSB" We only care that D2 "Flat Mem" is set. */ -/* - * Rest of first 128 not used, although 127 is reserved for page select - * if module is not "Flat memory". - */ +#define QSFP_TX_CTRL_BYTE_OFFS 86 +#define QSFP_PWR_CTRL_BYTE_OFFS 93 +#define QSFP_CDR_CTRL_BYTE_OFFS 98 + #define QSFP_PAGE_SELECT_BYTE_OFFS 127 /* Byte 128 is Identifier: must be 0x0c for QSFP, or 0x0d for QSFP+ */ #define QSFP_MOD_ID_OFFS 128 @@ -87,7 +88,8 @@ /* Byte 130 is Connector type. Not Intel req'd */ /* Bytes 131..138 are Transceiver types, bit maps for various tech, none IB */ /* Byte 139 is encoding. code 0x01 is 8b10b. Not Intel req'd */ -/* byte 140 is nominal bit-rate, in units of 100Mbits/sec Not Intel req'd */ +/* byte 140 is nominal bit-rate, in units of 100Mbits/sec */ +#define QSFP_NOM_BIT_RATE_100_OFFS 140 /* Byte 141 is Extended Rate Select. Not Intel req'd */ /* Bytes 142..145 are lengths for various fiber types. Not Intel req'd */ /* Byte 146 is length for Copper. Units of 1 meter */ @@ -135,11 +137,18 @@ extern const char *const hfi1_qsfp_devtech[16]; */ #define QSFP_ATTEN_OFFS 186 #define QSFP_ATTEN_LEN 2 -/* Bytes 188,189 are Wavelength tolerance, not Intel req'd */ +/* + * Bytes 188,189 are Wavelength tolerance, if optical + * If copper, they are attenuation in dB: + * Byte 188 is at 12.5 Gb/s, Byte 189 at 25 Gb/s + */ +#define QSFP_CU_ATTEN_7G_OFFS 188 +#define QSFP_CU_ATTEN_12G_OFFS 189 /* Byte 190 is Max Case Temp. Not Intel req'd */ /* Byte 191 is LSB of sum of bytes 128..190. Not Intel req'd */ #define QSFP_CC_OFFS 191 -/* Bytes 192..195 are Options implemented in qsfp. Not Intel req'd */ +#define QSFP_EQ_INFO_OFFS 193 +#define QSFP_CDR_INFO_OFFS 194 /* Bytes 196..211 are Serial Number, String */ #define QSFP_SN_OFFS 196 #define QSFP_SN_LEN 16 @@ -150,6 +159,8 @@ extern const char *const hfi1_qsfp_devtech[16]; #define QSFP_LOT_OFFS 218 #define QSFP_LOT_LEN 2 /* Bytes 220, 221 indicate monitoring options, Not Intel req'd */ +/* Byte 222 indicates nominal bitrate in units of 250Mbits/sec */ +#define QSFP_NOM_BIT_RATE_250_OFFS 222 /* Byte 223 is LSB of sum of bytes 192..222 */ #define QSFP_CC_EXT_OFFS 223 @@ -191,6 +202,7 @@ extern const char *const hfi1_qsfp_devtech[16]; */ #define QSFP_PWR(pbyte) (((pbyte) >> 6) & 3) +#define QSFP_HIGH_PWR(pbyte) (((pbyte) & 3) | 4) #define QSFP_ATTEN_SDR(attenarray) (attenarray[0]) #define QSFP_ATTEN_DDR(attenarray) (attenarray[1]) -- cgit v0.10.2 From 8ebd4cf1852afb56773ce8818da22157bfffa900 Mon Sep 17 00:00:00 2001 From: Easwar Hariharan Date: Wed, 3 Feb 2016 14:31:14 -0800 Subject: staging/rdma/hfi1: Add active and optical cable support This patch qualifies and tunes active and optical cables for optimal bit error rate and signal integrity settings. These settings are fetched from the platform configuration data. Based on attributes of the QSFP cable as read from the SFF-8636 compliant memory map, we select the appropriate settings from the platform configuration data (examples: TX/RX equalization, enabling cable high power, enabling TX/RX clock data recovery mechanisms, and RX amplitude control) and apply them to the SERDES and QSFP cable. The platform configuration data also contains system parameters such as maximum power dissipation supported, and the cables are qualified based on these parameters. As part of qualifying the cables, the correct OfflineDisabledReasons are set for the appropriate scenarios. Reviewed-by: Dean Luick Reviewed-by: Mike Marciniszyn Reviewed-by: Brent R Rothermel Signed-off-by: Easwar Hariharan Signed-off-by: Jubin John Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford diff --git a/drivers/staging/rdma/hfi1/Makefile b/drivers/staging/rdma/hfi1/Makefile index ca2dea5..9b3f7e9 100644 --- a/drivers/staging/rdma/hfi1/Makefile +++ b/drivers/staging/rdma/hfi1/Makefile @@ -8,7 +8,7 @@ obj-$(CONFIG_INFINIBAND_HFI1) += hfi1.o hfi1-y := chip.o device.o diag.o driver.o efivar.o eprom.o file_ops.o firmware.o \ - init.o intr.o mad.o pcie.o pio.o pio_copy.o \ + init.o intr.o mad.o pcie.o pio.o pio_copy.o platform.o \ qp.o qsfp.o rc.o ruc.o sdma.o sysfs.o trace.o twsi.o \ uc.o ud.o user_exp_rcv.o user_pages.o user_sdma.o verbs.o hfi1-$(CONFIG_DEBUG_FS) += debugfs.o diff --git a/drivers/staging/rdma/hfi1/chip.c b/drivers/staging/rdma/hfi1/chip.c index 16e2ff2..4d70a96 100644 --- a/drivers/staging/rdma/hfi1/chip.c +++ b/drivers/staging/rdma/hfi1/chip.c @@ -64,6 +64,7 @@ #include "sdma.h" #include "eprom.h" #include "efivar.h" +#include "platform.h" #define NUM_IB_PORTS 1 @@ -5826,7 +5827,7 @@ static void is_various_int(struct hfi1_devdata *dd, unsigned int source) static void handle_qsfp_int(struct hfi1_devdata *dd, u32 src_ctx, u64 reg) { - /* source is always zero */ + /* src_ctx is always zero */ struct hfi1_pportdata *ppd = dd->pport; unsigned long flags; u64 qsfp_int_mgmt = (u64)(QSFP_HFI0_INT_N | QSFP_HFI0_MODPRST_N); @@ -5849,14 +5850,13 @@ static void handle_qsfp_int(struct hfi1_devdata *dd, u32 src_ctx, u64 reg) * an interrupt when a cable is inserted */ ppd->qsfp_info.cache_valid = 0; - ppd->qsfp_info.qsfp_interrupt_functional = 0; + ppd->qsfp_info.reset_needed = 0; + ppd->qsfp_info.limiting_active = 0; spin_unlock_irqrestore(&ppd->qsfp_info.qsfp_lock, flags); - write_csr(dd, - dd->hfi1_id ? - ASIC_QSFP2_INVERT : - ASIC_QSFP1_INVERT, - qsfp_int_mgmt); + /* Invert the ModPresent pin now to detect plug-in */ + write_csr(dd, dd->hfi1_id ? ASIC_QSFP2_INVERT : + ASIC_QSFP1_INVERT, qsfp_int_mgmt); if ((ppd->offline_disabled_reason > HFI1_ODR_MASK( @@ -5883,12 +5883,16 @@ static void handle_qsfp_int(struct hfi1_devdata *dd, u32 src_ctx, u64 reg) spin_unlock_irqrestore(&ppd->qsfp_info.qsfp_lock, flags); + /* + * Stop inversion of ModPresent pin to detect + * removal of the cable + */ qsfp_int_mgmt &= ~(u64)QSFP_HFI0_MODPRST_N; - write_csr(dd, - dd->hfi1_id ? - ASIC_QSFP2_INVERT : - ASIC_QSFP1_INVERT, - qsfp_int_mgmt); + write_csr(dd, dd->hfi1_id ? ASIC_QSFP2_INVERT : + ASIC_QSFP1_INVERT, qsfp_int_mgmt); + + ppd->offline_disabled_reason = + HFI1_ODR_MASK(OPA_LINKDOWN_REASON_TRANSIENT); } } @@ -5898,7 +5902,6 @@ static void handle_qsfp_int(struct hfi1_devdata *dd, u32 src_ctx, u64 reg) __func__); spin_lock_irqsave(&ppd->qsfp_info.qsfp_lock, flags); ppd->qsfp_info.check_interrupt_flags = 1; - ppd->qsfp_info.qsfp_interrupt_functional = 1; spin_unlock_irqrestore(&ppd->qsfp_info.qsfp_lock, flags); } @@ -6666,6 +6669,7 @@ void handle_link_up(struct work_struct *work) set_link_down_reason(ppd, OPA_LINKDOWN_REASON_SPEED_POLICY, 0, OPA_LINKDOWN_REASON_SPEED_POLICY); set_link_state(ppd, HLS_DN_OFFLINE); + tune_serdes(ppd); start_link(ppd); } } @@ -6691,7 +6695,13 @@ void handle_link_down(struct work_struct *work) struct hfi1_pportdata *ppd = container_of(work, struct hfi1_pportdata, link_down_work); - /* go offline first, then deal with reasons */ + if ((ppd->host_link_state & + (HLS_DN_POLL | HLS_VERIFY_CAP | HLS_GOING_UP)) && + ppd->port_type == PORT_TYPE_FIXED) + ppd->offline_disabled_reason = + HFI1_ODR_MASK(OPA_LINKDOWN_REASON_NOT_INSTALLED); + + /* Go offline first, then deal with reading/writing through 8051 */ set_link_state(ppd, HLS_DN_OFFLINE); lcl_reason = 0; @@ -6713,10 +6723,12 @@ void handle_link_down(struct work_struct *work) /* If there is no cable attached, turn the DC off. Otherwise, * start the link bring up. */ - if (!qsfp_mod_present(ppd)) + if (!qsfp_mod_present(ppd)) { dc_shutdown(ppd->dd); - else + } else { + tune_serdes(ppd); start_link(ppd); + } } void handle_link_bounce(struct work_struct *work) @@ -6729,6 +6741,7 @@ void handle_link_bounce(struct work_struct *work) */ if (ppd->host_link_state & HLS_UP) { set_link_state(ppd, HLS_DN_OFFLINE); + tune_serdes(ppd); start_link(ppd); } else { dd_dev_info(ppd->dd, "%s: link not up (%s), nothing to do\n", @@ -7237,6 +7250,7 @@ done: set_link_down_reason(ppd, OPA_LINKDOWN_REASON_WIDTH_POLICY, 0, OPA_LINKDOWN_REASON_WIDTH_POLICY); set_link_state(ppd, HLS_DN_OFFLINE); + tune_serdes(ppd); start_link(ppd); } } @@ -8235,8 +8249,8 @@ static int set_physical_link_state(struct hfi1_devdata *dd, u64 state) return do_8051_command(dd, HCMD_CHANGE_PHY_STATE, state, NULL); } -static int load_8051_config(struct hfi1_devdata *dd, u8 field_id, - u8 lane_id, u32 config_data) +int load_8051_config(struct hfi1_devdata *dd, u8 field_id, + u8 lane_id, u32 config_data) { u64 data; int ret; @@ -8258,8 +8272,8 @@ static int load_8051_config(struct hfi1_devdata *dd, u8 field_id, * set the result, even on error. * Return 0 on success, -errno on failure */ -static int read_8051_config(struct hfi1_devdata *dd, u8 field_id, u8 lane_id, - u32 *result) +int read_8051_config(struct hfi1_devdata *dd, u8 field_id, u8 lane_id, + u32 *result) { u64 big_data; u32 addr; @@ -8881,32 +8895,80 @@ int start_link(struct hfi1_pportdata *ppd) return -EAGAIN; } -static void reset_qsfp(struct hfi1_pportdata *ppd) +static void wait_for_qsfp_init(struct hfi1_pportdata *ppd) +{ + struct hfi1_devdata *dd = ppd->dd; + u64 mask; + unsigned long timeout; + + /* + * Check for QSFP interrupt for t_init (SFF 8679) + */ + timeout = jiffies + msecs_to_jiffies(2000); + while (1) { + mask = read_csr(dd, dd->hfi1_id ? + ASIC_QSFP2_IN : ASIC_QSFP1_IN); + if (!(mask & QSFP_HFI0_INT_N)) { + write_csr(dd, dd->hfi1_id ? ASIC_QSFP2_CLEAR : + ASIC_QSFP1_CLEAR, QSFP_HFI0_INT_N); + break; + } + if (time_after(jiffies, timeout)) { + dd_dev_info(dd, "%s: No IntN detected, reset complete\n", + __func__); + break; + } + udelay(2); + } +} + +static void set_qsfp_int_n(struct hfi1_pportdata *ppd, u8 enable) +{ + struct hfi1_devdata *dd = ppd->dd; + u64 mask; + + mask = read_csr(dd, dd->hfi1_id ? ASIC_QSFP2_MASK : ASIC_QSFP1_MASK); + if (enable) + mask |= (u64)QSFP_HFI0_INT_N; + else + mask &= ~(u64)QSFP_HFI0_INT_N; + write_csr(dd, dd->hfi1_id ? ASIC_QSFP2_MASK : ASIC_QSFP1_MASK, mask); +} + +void reset_qsfp(struct hfi1_pportdata *ppd) { struct hfi1_devdata *dd = ppd->dd; u64 mask, qsfp_mask; + /* Disable INT_N from triggering QSFP interrupts */ + set_qsfp_int_n(ppd, 0); + + /* Reset the QSFP */ mask = (u64)QSFP_HFI0_RESET_N; - qsfp_mask = read_csr(dd, - dd->hfi1_id ? ASIC_QSFP2_OE : ASIC_QSFP1_OE); + qsfp_mask = read_csr(dd, dd->hfi1_id ? ASIC_QSFP2_OE : ASIC_QSFP1_OE); qsfp_mask |= mask; write_csr(dd, - dd->hfi1_id ? ASIC_QSFP2_OE : ASIC_QSFP1_OE, - qsfp_mask); + dd->hfi1_id ? ASIC_QSFP2_OE : ASIC_QSFP1_OE, qsfp_mask); - qsfp_mask = read_csr(dd, - dd->hfi1_id ? ASIC_QSFP2_OUT : ASIC_QSFP1_OUT); + qsfp_mask = read_csr(dd, dd->hfi1_id ? + ASIC_QSFP2_OUT : ASIC_QSFP1_OUT); qsfp_mask &= ~mask; write_csr(dd, - dd->hfi1_id ? ASIC_QSFP2_OUT : ASIC_QSFP1_OUT, - qsfp_mask); + dd->hfi1_id ? ASIC_QSFP2_OUT : ASIC_QSFP1_OUT, qsfp_mask); udelay(10); qsfp_mask |= mask; write_csr(dd, - dd->hfi1_id ? ASIC_QSFP2_OUT : ASIC_QSFP1_OUT, - qsfp_mask); + dd->hfi1_id ? ASIC_QSFP2_OUT : ASIC_QSFP1_OUT, qsfp_mask); + + wait_for_qsfp_init(ppd); + + /* + * Allow INT_N to trigger the QSFP interrupt to watch + * for alarms and warnings + */ + set_qsfp_int_n(ppd, 1); } static int handle_qsfp_error_conditions(struct hfi1_pportdata *ppd, @@ -9018,35 +9080,8 @@ static int handle_qsfp_error_conditions(struct hfi1_pportdata *ppd, return 0; } -static int do_pre_lni_host_behaviors(struct hfi1_pportdata *ppd) -{ - refresh_qsfp_cache(ppd, &ppd->qsfp_info); - - return 0; -} - -static int do_qsfp_intr_fallback(struct hfi1_pportdata *ppd) -{ - struct hfi1_devdata *dd = ppd->dd; - u8 qsfp_interrupt_status = 0; - - if (qsfp_read(ppd, dd->hfi1_id, 2, &qsfp_interrupt_status, 1) - != 1) { - dd_dev_info(dd, - "%s: Failed to read status of QSFP module\n", - __func__); - return -EIO; - } - - /* We don't care about alarms & warnings with a non-functional INT_N */ - if (!(qsfp_interrupt_status & QSFP_DATA_NOT_READY)) - do_pre_lni_host_behaviors(ppd); - - return 0; -} - /* This routine will only be scheduled if the QSFP module is present */ -static void qsfp_event(struct work_struct *work) +void qsfp_event(struct work_struct *work) { struct qsfp_data *qd; struct hfi1_pportdata *ppd; @@ -9068,20 +9103,20 @@ static void qsfp_event(struct work_struct *work) dc_start(dd); if (qd->cache_refresh_required) { - msleep(3000); - reset_qsfp(ppd); - /* Check for QSFP interrupt after t_init (SFF 8679) - * + extra + set_qsfp_int_n(ppd, 0); + + wait_for_qsfp_init(ppd); + + /* + * Allow INT_N to trigger the QSFP interrupt to watch + * for alarms and warnings */ - msleep(3000); - if (!qd->qsfp_interrupt_functional) { - if (do_qsfp_intr_fallback(ppd) < 0) - dd_dev_info(dd, "%s: QSFP fallback failed\n", - __func__); - ppd->driver_link_ready = 1; - start_link(ppd); - } + set_qsfp_int_n(ppd, 1); + + tune_serdes(ppd); + + start_link(ppd); } if (qd->check_interrupt_flags) { @@ -9094,50 +9129,50 @@ static void qsfp_event(struct work_struct *work) __func__); } else { unsigned long flags; - u8 data_status; + handle_qsfp_error_conditions( + ppd, qsfp_interrupt_status); spin_lock_irqsave(&ppd->qsfp_info.qsfp_lock, flags); ppd->qsfp_info.check_interrupt_flags = 0; spin_unlock_irqrestore(&ppd->qsfp_info.qsfp_lock, flags); - - if (qsfp_read(ppd, dd->hfi1_id, 2, &data_status, 1) - != 1) { - dd_dev_info(dd, - "%s: Failed to read status of QSFP module\n", - __func__); - } - if (!(data_status & QSFP_DATA_NOT_READY)) { - do_pre_lni_host_behaviors(ppd); - start_link(ppd); - } else - handle_qsfp_error_conditions(ppd, - qsfp_interrupt_status); } } } -void init_qsfp(struct hfi1_pportdata *ppd) +static void init_qsfp_int(struct hfi1_devdata *dd) { - struct hfi1_devdata *dd = ppd->dd; - u64 qsfp_mask; + struct hfi1_pportdata *ppd = dd->pport; + u64 qsfp_mask, cce_int_mask; + const int qsfp1_int_smask = QSFP1_INT % 64; + const int qsfp2_int_smask = QSFP2_INT % 64; - if (loopback == LOOPBACK_SERDES || loopback == LOOPBACK_LCB || - ppd->dd->icode == ICODE_FUNCTIONAL_SIMULATOR) { - ppd->driver_link_ready = 1; - return; + /* + * disable QSFP1 interrupts for HFI1, QSFP2 interrupts for HFI0 + * Qsfp1Int and Qsfp2Int are adjacent bits in the same CSR, + * therefore just one of QSFP1_INT/QSFP2_INT can be used to find + * the index of the appropriate CSR in the CCEIntMask CSR array + */ + cce_int_mask = read_csr(dd, CCE_INT_MASK + + (8 * (QSFP1_INT / 64))); + if (dd->hfi1_id) { + cce_int_mask &= ~((u64)1 << qsfp1_int_smask); + write_csr(dd, CCE_INT_MASK + (8 * (QSFP1_INT / 64)), + cce_int_mask); + } else { + cce_int_mask &= ~((u64)1 << qsfp2_int_smask); + write_csr(dd, CCE_INT_MASK + (8 * (QSFP2_INT / 64)), + cce_int_mask); } - ppd->qsfp_info.ppd = ppd; - INIT_WORK(&ppd->qsfp_info.qsfp_work, qsfp_event); - qsfp_mask = (u64)(QSFP_HFI0_INT_N | QSFP_HFI0_MODPRST_N); /* Clear current status to avoid spurious interrupts */ - write_csr(dd, - dd->hfi1_id ? - ASIC_QSFP2_CLEAR : - ASIC_QSFP1_CLEAR, - qsfp_mask); + write_csr(dd, dd->hfi1_id ? ASIC_QSFP2_CLEAR : ASIC_QSFP1_CLEAR, + qsfp_mask); + write_csr(dd, dd->hfi1_id ? ASIC_QSFP2_MASK : ASIC_QSFP1_MASK, + qsfp_mask); + + set_qsfp_int_n(ppd, 0); /* Handle active low nature of INT_N and MODPRST_N pins */ if (qsfp_mod_present(ppd)) @@ -9145,29 +9180,6 @@ void init_qsfp(struct hfi1_pportdata *ppd) write_csr(dd, dd->hfi1_id ? ASIC_QSFP2_INVERT : ASIC_QSFP1_INVERT, qsfp_mask); - - /* Allow only INT_N and MODPRST_N to trigger QSFP interrupts */ - qsfp_mask |= (u64)QSFP_HFI0_MODPRST_N; - write_csr(dd, - dd->hfi1_id ? ASIC_QSFP2_MASK : ASIC_QSFP1_MASK, - qsfp_mask); - - if (qsfp_mod_present(ppd)) { - msleep(3000); - reset_qsfp(ppd); - - /* Check for QSFP interrupt after t_init (SFF 8679) - * + extra - */ - msleep(3000); - if (!ppd->qsfp_info.qsfp_interrupt_functional) { - if (do_qsfp_intr_fallback(ppd) < 0) - dd_dev_info(dd, - "%s: QSFP fallback failed\n", - __func__); - ppd->driver_link_ready = 1; - } - } } /* @@ -9203,8 +9215,6 @@ int bringup_serdes(struct hfi1_pportdata *ppd) ppd->guid = guid; } - /* the link defaults to enabled */ - ppd->link_enabled = 1; /* Set linkinit_reason on power up per OPA spec */ ppd->linkinit_reason = OPA_LINKINIT_REASON_LINKUP; @@ -9217,6 +9227,12 @@ int bringup_serdes(struct hfi1_pportdata *ppd) return ret; } + /* tune the SERDES to a ballpark setting for + * optimal signal and bit error rate + * Needs to be done before starting the link + */ + tune_serdes(ppd); + return start_link(ppd); } @@ -9234,6 +9250,8 @@ void hfi1_quiet_serdes(struct hfi1_pportdata *ppd) ppd->driver_link_ready = 0; ppd->link_enabled = 0; + ppd->offline_disabled_reason = + HFI1_ODR_MASK(OPA_LINKDOWN_REASON_SMA_DISABLED); set_link_down_reason(ppd, OPA_LINKDOWN_REASON_SMA_DISABLED, 0, OPA_LINKDOWN_REASON_SMA_DISABLED); set_link_state(ppd, HLS_DN_OFFLINE); @@ -9649,6 +9667,12 @@ static int goto_offline(struct hfi1_pportdata *ppd, u8 rem_reason) write_csr(dd, DC_LCB_ERR_EN, ~0ull); /* watch LCB errors */ ppd->host_link_state = HLS_LINK_COOLDOWN; /* LCB access allowed */ + if (ppd->port_type == PORT_TYPE_QSFP && + ppd->qsfp_info.limiting_active && + qsfp_mod_present(ppd)) { + set_qsfp_tx(ppd, 0); + } + /* * The LNI has a mandatory wait time after the physical state * moves to Offline.Quiet. The wait time may be different @@ -12078,31 +12102,11 @@ void set_intr_state(struct hfi1_devdata *dd, u32 enable) * In HFI, the mask needs to be 1 to allow interrupts. */ if (enable) { - u64 cce_int_mask; - const int qsfp1_int_smask = QSFP1_INT % 64; - const int qsfp2_int_smask = QSFP2_INT % 64; - /* enable all interrupts */ for (i = 0; i < CCE_NUM_INT_CSRS; i++) write_csr(dd, CCE_INT_MASK + (8*i), ~(u64)0); - /* - * disable QSFP1 interrupts for HFI1, QSFP2 interrupts for HFI0 - * Qsfp1Int and Qsfp2Int are adjacent bits in the same CSR, - * therefore just one of QSFP1_INT/QSFP2_INT can be used to find - * the index of the appropriate CSR in the CCEIntMask CSR array - */ - cce_int_mask = read_csr(dd, CCE_INT_MASK + - (8*(QSFP1_INT/64))); - if (dd->hfi1_id) { - cce_int_mask &= ~((u64)1 << qsfp1_int_smask); - write_csr(dd, CCE_INT_MASK + (8*(QSFP1_INT/64)), - cce_int_mask); - } else { - cce_int_mask &= ~((u64)1 << qsfp2_int_smask); - write_csr(dd, CCE_INT_MASK + (8*(QSFP2_INT/64)), - cce_int_mask); - } + init_qsfp_int(dd); } else { for (i = 0; i < CCE_NUM_INT_CSRS; i++) write_csr(dd, CCE_INT_MASK + (8*i), 0ull); diff --git a/drivers/staging/rdma/hfi1/chip.h b/drivers/staging/rdma/hfi1/chip.h index 78ba425..5e4fe43 100644 --- a/drivers/staging/rdma/hfi1/chip.h +++ b/drivers/staging/rdma/hfi1/chip.h @@ -371,6 +371,9 @@ #define NUM_LANE_FIELDS 0x8 /* 8051 general register Field IDs */ +#define LINK_OPTIMIZATION_SETTINGS 0x00 +#define LINK_TUNING_PARAMETERS 0x02 +#define DC_HOST_COMM_SETTINGS 0x03 #define TX_SETTINGS 0x06 #define VERIFY_CAP_LOCAL_PHY 0x07 #define VERIFY_CAP_LOCAL_FABRIC 0x08 @@ -647,10 +650,13 @@ void handle_link_down(struct work_struct *work); void handle_link_downgrade(struct work_struct *work); void handle_link_bounce(struct work_struct *work); void handle_sma_message(struct work_struct *work); +void reset_qsfp(struct hfi1_pportdata *ppd); +void qsfp_event(struct work_struct *work); void start_freeze_handling(struct hfi1_pportdata *ppd, int flags); int send_idle_sma(struct hfi1_devdata *dd, u64 message); +int load_8051_config(struct hfi1_devdata *, u8, u8, u32); +int read_8051_config(struct hfi1_devdata *, u8, u8, u32 *); int start_link(struct hfi1_pportdata *ppd); -void init_qsfp(struct hfi1_pportdata *ppd); int bringup_serdes(struct hfi1_pportdata *ppd); void set_intr_state(struct hfi1_devdata *dd, u32 enable); void apply_link_downgrade_policy(struct hfi1_pportdata *ppd, diff --git a/drivers/staging/rdma/hfi1/hfi.h b/drivers/staging/rdma/hfi1/hfi.h index 57014b0..f79b070 100644 --- a/drivers/staging/rdma/hfi1/hfi.h +++ b/drivers/staging/rdma/hfi1/hfi.h @@ -74,7 +74,7 @@ #include "chip.h" #include "mad.h" #include "qsfp.h" -#include "platform_config.h" +#include "platform.h" /* bumped 1 from s/w major version of TrueScale */ #define HFI1_CHIP_VERS_MAJ 3U @@ -563,7 +563,8 @@ struct hfi1_pportdata { struct kobject sl2sc_kobj; struct kobject vl2mtu_kobj; - /* QSFP support */ + /* PHY support */ + u32 port_type; struct qsfp_data qsfp_info; /* GUID for this interface, in host order */ diff --git a/drivers/staging/rdma/hfi1/init.c b/drivers/staging/rdma/hfi1/init.c index 7def3f3..354935f 100644 --- a/drivers/staging/rdma/hfi1/init.c +++ b/drivers/staging/rdma/hfi1/init.c @@ -500,10 +500,13 @@ void hfi1_init_pportdata(struct pci_dev *pdev, struct hfi1_pportdata *ppd, INIT_WORK(&ppd->sma_message_work, handle_sma_message); INIT_WORK(&ppd->link_bounce_work, handle_link_bounce); INIT_WORK(&ppd->linkstate_active_work, receive_interrupt_work); + INIT_WORK(&ppd->qsfp_info.qsfp_work, qsfp_event); + mutex_init(&ppd->hls_lock); spin_lock_init(&ppd->sdma_alllock); spin_lock_init(&ppd->qsfp_info.qsfp_lock); + ppd->qsfp_info.ppd = ppd; ppd->sm_trap_qp = 0x0; ppd->sa_qp = 0x1; @@ -781,13 +784,6 @@ done: for (pidx = 0; pidx < dd->num_pports; ++pidx) { ppd = dd->pport + pidx; - /* initialize the qsfp if it exists - * Requires interrupts to be enabled so we are notified - * when the QSFP completes reset, and has - * to be done before bringing up the SERDES - */ - init_qsfp(ppd); - /* start the serdes - must be after interrupts are enabled so we are notified when the link goes up */ lastfail = bringup_serdes(ppd); diff --git a/drivers/staging/rdma/hfi1/platform.c b/drivers/staging/rdma/hfi1/platform.c new file mode 100644 index 0000000..9aa5e06 --- /dev/null +++ b/drivers/staging/rdma/hfi1/platform.c @@ -0,0 +1,838 @@ +/* + * + * This file is provided under a dual BSD/GPLv2 license. When using or + * redistributing this file, you may do so under either license. + * + * GPL LICENSE SUMMARY + * + * Copyright(c) 2015 Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * BSD LICENSE + * + * Copyright(c) 2015 Intel Corporation. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * - Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + */ +#include "hfi.h" + +int set_qsfp_tx(struct hfi1_pportdata *ppd, int on) +{ + u8 tx_ctrl_byte = on ? 0x0 : 0xF; + int ret = 0; + + ret = qsfp_write(ppd, ppd->dd->hfi1_id, QSFP_TX_CTRL_BYTE_OFFS, + &tx_ctrl_byte, 1); + /* we expected 1, so consider 0 an error */ + if (ret == 0) + ret = -EIO; + else if (ret == 1) + ret = 0; + return ret; +} + +static int qual_power(struct hfi1_pportdata *ppd) +{ + u32 cable_power_class = 0, power_class_max = 0; + u8 *cache = ppd->qsfp_info.cache; + int ret = 0; + + ret = get_platform_config_field( + ppd->dd, PLATFORM_CONFIG_SYSTEM_TABLE, 0, + SYSTEM_TABLE_QSFP_POWER_CLASS_MAX, &power_class_max, 4); + if (ret) + return ret; + + if (QSFP_HIGH_PWR(cache[QSFP_MOD_PWR_OFFS]) != 4) + cable_power_class = QSFP_HIGH_PWR(cache[QSFP_MOD_PWR_OFFS]); + else + cable_power_class = QSFP_PWR(cache[QSFP_MOD_PWR_OFFS]); + + if (cable_power_class <= 3 && cable_power_class > (power_class_max - 1)) + ppd->offline_disabled_reason = + HFI1_ODR_MASK(OPA_LINKDOWN_REASON_POWER_POLICY); + else if (cable_power_class > 4 && cable_power_class > (power_class_max)) + ppd->offline_disabled_reason = + HFI1_ODR_MASK(OPA_LINKDOWN_REASON_POWER_POLICY); + /* + * cable_power_class will never have value 4 as this simply + * means the high power settings are unused + */ + + if (ppd->offline_disabled_reason == + HFI1_ODR_MASK(OPA_LINKDOWN_REASON_POWER_POLICY)) { + dd_dev_info( + ppd->dd, + "%s: Port disabled due to system power restrictions\n", + __func__); + ret = -EPERM; + } + return ret; +} + +static int qual_bitrate(struct hfi1_pportdata *ppd) +{ + u16 lss = ppd->link_speed_supported, lse = ppd->link_speed_enabled; + u8 *cache = ppd->qsfp_info.cache; + + if ((lss & OPA_LINK_SPEED_25G) && (lse & OPA_LINK_SPEED_25G) && + cache[QSFP_NOM_BIT_RATE_250_OFFS] < 0x64) + ppd->offline_disabled_reason = + HFI1_ODR_MASK(OPA_LINKDOWN_REASON_LINKSPEED_POLICY); + + if ((lss & OPA_LINK_SPEED_12_5G) && (lse & OPA_LINK_SPEED_12_5G) && + cache[QSFP_NOM_BIT_RATE_100_OFFS] < 0x7D) + ppd->offline_disabled_reason = + HFI1_ODR_MASK(OPA_LINKDOWN_REASON_LINKSPEED_POLICY); + + if (ppd->offline_disabled_reason == + HFI1_ODR_MASK(OPA_LINKDOWN_REASON_LINKSPEED_POLICY)) { + dd_dev_info( + ppd->dd, + "%s: Cable failed bitrate check, disabling port\n", + __func__); + return -EPERM; + } + return 0; +} + +static int set_qsfp_high_power(struct hfi1_pportdata *ppd) +{ + u8 cable_power_class = 0, power_ctrl_byte = 0; + u8 *cache = ppd->qsfp_info.cache; + int ret; + + if (QSFP_HIGH_PWR(cache[QSFP_MOD_PWR_OFFS]) != 4) + cable_power_class = QSFP_HIGH_PWR(cache[QSFP_MOD_PWR_OFFS]); + else + cable_power_class = QSFP_PWR(cache[QSFP_MOD_PWR_OFFS]); + + if (cable_power_class) { + power_ctrl_byte = cache[QSFP_PWR_CTRL_BYTE_OFFS]; + + power_ctrl_byte |= 1; + power_ctrl_byte &= ~(0x2); + + ret = qsfp_write(ppd, ppd->dd->hfi1_id, + QSFP_PWR_CTRL_BYTE_OFFS, + &power_ctrl_byte, 1); + if (ret != 1) + return -EIO; + + if (cable_power_class > 3) { + /* > power class 4*/ + power_ctrl_byte |= (1 << 2); + ret = qsfp_write(ppd, ppd->dd->hfi1_id, + QSFP_PWR_CTRL_BYTE_OFFS, + &power_ctrl_byte, 1); + if (ret != 1) + return -EIO; + } + + /* SFF 8679 rev 1.7 LPMode Deassert time */ + msleep(300); + } + return 0; +} + +static void apply_rx_cdr(struct hfi1_pportdata *ppd, + u32 rx_preset_index, + u8 *cdr_ctrl_byte) +{ + u32 rx_preset; + u8 *cache = ppd->qsfp_info.cache; + + if (!((cache[QSFP_MOD_PWR_OFFS] & 0x4) && + (cache[QSFP_CDR_INFO_OFFS] & 0x40))) + return; + + /* rx_preset preset to zero to catch error */ + get_platform_config_field( + ppd->dd, PLATFORM_CONFIG_RX_PRESET_TABLE, + rx_preset_index, RX_PRESET_TABLE_QSFP_RX_CDR_APPLY, + &rx_preset, 4); + + if (!rx_preset) { + dd_dev_info( + ppd->dd, + "%s: RX_CDR_APPLY is set to disabled\n", + __func__); + return; + } + get_platform_config_field( + ppd->dd, PLATFORM_CONFIG_RX_PRESET_TABLE, + rx_preset_index, RX_PRESET_TABLE_QSFP_RX_CDR, + &rx_preset, 4); + + /* Expand cdr setting to all 4 lanes */ + rx_preset = (rx_preset | (rx_preset << 1) | + (rx_preset << 2) | (rx_preset << 3)); + + if (rx_preset) { + *cdr_ctrl_byte |= rx_preset; + } else { + *cdr_ctrl_byte &= rx_preset; + /* Preserve current TX CDR status */ + *cdr_ctrl_byte |= (cache[QSFP_CDR_CTRL_BYTE_OFFS] & 0xF0); + } +} + +static void apply_tx_cdr(struct hfi1_pportdata *ppd, + u32 tx_preset_index, + u8 *ctr_ctrl_byte) +{ + u32 tx_preset; + u8 *cache = ppd->qsfp_info.cache; + + if (!((cache[QSFP_MOD_PWR_OFFS] & 0x8) && + (cache[QSFP_CDR_INFO_OFFS] & 0x80))) + return; + + get_platform_config_field( + ppd->dd, + PLATFORM_CONFIG_TX_PRESET_TABLE, tx_preset_index, + TX_PRESET_TABLE_QSFP_TX_CDR_APPLY, &tx_preset, 4); + + if (!tx_preset) { + dd_dev_info( + ppd->dd, + "%s: TX_CDR_APPLY is set to disabled\n", + __func__); + return; + } + get_platform_config_field( + ppd->dd, + PLATFORM_CONFIG_TX_PRESET_TABLE, + tx_preset_index, + TX_PRESET_TABLE_QSFP_TX_CDR, &tx_preset, 4); + + /* Expand cdr setting to all 4 lanes */ + tx_preset = (tx_preset | (tx_preset << 1) | + (tx_preset << 2) | (tx_preset << 3)); + + if (tx_preset) + *ctr_ctrl_byte |= (tx_preset << 4); + else + /* Preserve current/determined RX CDR status */ + *ctr_ctrl_byte &= ((tx_preset << 4) | 0xF); +} + +static void apply_cdr_settings( + struct hfi1_pportdata *ppd, u32 rx_preset_index, + u32 tx_preset_index) +{ + u8 *cache = ppd->qsfp_info.cache; + u8 cdr_ctrl_byte = cache[QSFP_CDR_CTRL_BYTE_OFFS]; + + apply_rx_cdr(ppd, rx_preset_index, &cdr_ctrl_byte); + + apply_tx_cdr(ppd, tx_preset_index, &cdr_ctrl_byte); + + qsfp_write(ppd, ppd->dd->hfi1_id, QSFP_CDR_CTRL_BYTE_OFFS, + &cdr_ctrl_byte, 1); +} + +static void apply_tx_eq_auto(struct hfi1_pportdata *ppd) +{ + u8 *cache = ppd->qsfp_info.cache; + u8 tx_eq; + + if (!(cache[QSFP_EQ_INFO_OFFS] & 0x8)) + return; + /* Disable adaptive TX EQ if present */ + tx_eq = cache[(128 * 3) + 241]; + tx_eq &= 0xF0; + qsfp_write(ppd, ppd->dd->hfi1_id, (256 * 3) + 241, &tx_eq, 1); +} + +static void apply_tx_eq_prog(struct hfi1_pportdata *ppd, u32 tx_preset_index) +{ + u8 *cache = ppd->qsfp_info.cache; + u32 tx_preset; + u8 tx_eq; + + if (!(cache[QSFP_EQ_INFO_OFFS] & 0x4)) + return; + + get_platform_config_field( + ppd->dd, PLATFORM_CONFIG_TX_PRESET_TABLE, + tx_preset_index, TX_PRESET_TABLE_QSFP_TX_EQ_APPLY, + &tx_preset, 4); + if (!tx_preset) { + dd_dev_info( + ppd->dd, + "%s: TX_EQ_APPLY is set to disabled\n", + __func__); + return; + } + get_platform_config_field( + ppd->dd, PLATFORM_CONFIG_TX_PRESET_TABLE, + tx_preset_index, TX_PRESET_TABLE_QSFP_TX_EQ, + &tx_preset, 4); + + if (((cache[(128 * 3) + 224] & 0xF0) >> 4) < tx_preset) { + dd_dev_info( + ppd->dd, + "%s: TX EQ %x unsupported\n", + __func__, tx_preset); + + dd_dev_info( + ppd->dd, + "%s: Applying EQ %x\n", + __func__, cache[608] & 0xF0); + + tx_preset = (cache[608] & 0xF0) >> 4; + } + + tx_eq = tx_preset | (tx_preset << 4); + qsfp_write(ppd, ppd->dd->hfi1_id, (256 * 3) + 234, &tx_eq, 1); + qsfp_write(ppd, ppd->dd->hfi1_id, (256 * 3) + 235, &tx_eq, 1); +} + +static void apply_rx_eq_emp(struct hfi1_pportdata *ppd, u32 rx_preset_index) +{ + u32 rx_preset; + u8 rx_eq, *cache = ppd->qsfp_info.cache; + + if (!(cache[QSFP_EQ_INFO_OFFS] & 0x2)) + return; + get_platform_config_field( + ppd->dd, PLATFORM_CONFIG_RX_PRESET_TABLE, + rx_preset_index, RX_PRESET_TABLE_QSFP_RX_EMP_APPLY, + &rx_preset, 4); + + if (!rx_preset) { + dd_dev_info( + ppd->dd, + "%s: RX_EMP_APPLY is set to disabled\n", + __func__); + return; + } + get_platform_config_field( + ppd->dd, PLATFORM_CONFIG_RX_PRESET_TABLE, + rx_preset_index, RX_PRESET_TABLE_QSFP_RX_EMP, + &rx_preset, 4); + + if ((cache[(128 * 3) + 224] & 0xF) < rx_preset) { + dd_dev_info( + ppd->dd, + "%s: Requested RX EMP %x\n", + __func__, rx_preset); + + dd_dev_info( + ppd->dd, + "%s: Applying supported EMP %x\n", + __func__, cache[608] & 0xF); + + rx_preset = cache[608] & 0xF; + } + + rx_eq = rx_preset | (rx_preset << 4); + + qsfp_write(ppd, ppd->dd->hfi1_id, (256 * 3) + 236, &rx_eq, 1); + qsfp_write(ppd, ppd->dd->hfi1_id, (256 * 3) + 237, &rx_eq, 1); +} + +static void apply_eq_settings(struct hfi1_pportdata *ppd, + u32 rx_preset_index, u32 tx_preset_index) +{ + u8 *cache = ppd->qsfp_info.cache; + + /* no point going on w/o a page 3 */ + if (cache[2] & 4) { + dd_dev_info(ppd->dd, + "%s: Upper page 03 not present\n", + __func__); + return; + } + + apply_tx_eq_auto(ppd); + + apply_tx_eq_prog(ppd, tx_preset_index); + + apply_rx_eq_emp(ppd, rx_preset_index); +} + +static void apply_rx_amplitude_settings( + struct hfi1_pportdata *ppd, u32 rx_preset_index, + u32 tx_preset_index) +{ + u32 rx_preset; + u8 rx_amp = 0, i = 0, preferred = 0, *cache = ppd->qsfp_info.cache; + + /* no point going on w/o a page 3 */ + if (cache[2] & 4) { + dd_dev_info(ppd->dd, + "%s: Upper page 03 not present\n", + __func__); + return; + } + if (!(cache[QSFP_EQ_INFO_OFFS] & 0x1)) { + dd_dev_info(ppd->dd, + "%s: RX_AMP_APPLY is set to disabled\n", + __func__); + return; + } + + get_platform_config_field(ppd->dd, + PLATFORM_CONFIG_RX_PRESET_TABLE, + rx_preset_index, + RX_PRESET_TABLE_QSFP_RX_AMP_APPLY, + &rx_preset, 4); + + if (!rx_preset) { + dd_dev_info(ppd->dd, + "%s: RX_AMP_APPLY is set to disabled\n", + __func__); + return; + } + get_platform_config_field(ppd->dd, + PLATFORM_CONFIG_RX_PRESET_TABLE, + rx_preset_index, + RX_PRESET_TABLE_QSFP_RX_AMP, + &rx_preset, 4); + + dd_dev_info(ppd->dd, + "%s: Requested RX AMP %x\n", + __func__, + rx_preset); + + for (i = 0; i < 4; i++) { + if (cache[(128 * 3) + 225] & (1 << i)) { + preferred = i; + if (preferred == rx_preset) + break; + } + } + + /* + * Verify that preferred RX amplitude is not just a + * fall through of the default + */ + if (!preferred && !(cache[(128 * 3) + 225] & 0x1)) { + dd_dev_info(ppd->dd, "No supported RX AMP, not applying\n"); + return; + } + + dd_dev_info(ppd->dd, + "%s: Applying RX AMP %x\n", __func__, preferred); + + rx_amp = preferred | (preferred << 4); + qsfp_write(ppd, ppd->dd->hfi1_id, (256 * 3) + 238, &rx_amp, 1); + qsfp_write(ppd, ppd->dd->hfi1_id, (256 * 3) + 239, &rx_amp, 1); +} + +#define OPA_INVALID_INDEX 0xFFF + +static void apply_tx_lanes(struct hfi1_pportdata *ppd, u32 config_data, + const char *message) +{ + u8 i; + int ret = HCMD_SUCCESS; + + for (i = 0; i < 4; i++) { + ret = load_8051_config(ppd->dd, 0, i, config_data); + if (ret != HCMD_SUCCESS) { + dd_dev_err( + ppd->dd, + "%s: %s for lane %u failed\n", + message, __func__, i); + } + } +} + +static void apply_tunings( + struct hfi1_pportdata *ppd, u32 tx_preset_index, + u8 tuning_method, u32 total_atten, u8 limiting_active) +{ + int ret = 0; + u32 config_data = 0, tx_preset = 0; + u8 precur = 0, attn = 0, postcur = 0, external_device_config = 0; + u8 *cache = ppd->qsfp_info.cache; + + read_8051_config(ppd->dd, LINK_OPTIMIZATION_SETTINGS, + GENERAL_CONFIG, &config_data); + config_data |= limiting_active; + ret = load_8051_config(ppd->dd, LINK_OPTIMIZATION_SETTINGS, + GENERAL_CONFIG, config_data); + if (ret != HCMD_SUCCESS) + dd_dev_err( + ppd->dd, + "%s: Failed to set enable external device config\n", + __func__); + + config_data = 0; /* re-init */ + read_8051_config(ppd->dd, LINK_TUNING_PARAMETERS, GENERAL_CONFIG, + &config_data); + config_data |= tuning_method; + ret = load_8051_config(ppd->dd, LINK_TUNING_PARAMETERS, GENERAL_CONFIG, + config_data); + if (ret != HCMD_SUCCESS) + dd_dev_err(ppd->dd, "%s: Failed to set tuning method\n", + __func__); + + external_device_config = + ((cache[QSFP_MOD_PWR_OFFS] & 0x4) << 3) | + ((cache[QSFP_MOD_PWR_OFFS] & 0x8) << 2) | + ((cache[QSFP_EQ_INFO_OFFS] & 0x2) << 1) | + (cache[QSFP_EQ_INFO_OFFS] & 0x4); + + config_data = 0; /* re-init */ + read_8051_config(ppd->dd, DC_HOST_COMM_SETTINGS, GENERAL_CONFIG, + &config_data); + config_data |= (external_device_config << 24); + ret = load_8051_config(ppd->dd, DC_HOST_COMM_SETTINGS, GENERAL_CONFIG, + config_data); + if (ret != HCMD_SUCCESS) + dd_dev_err( + ppd->dd, + "%s: Failed to set external device config parameters\n", + __func__); + + config_data = 0; /* re-init */ + read_8051_config(ppd->dd, TX_SETTINGS, GENERAL_CONFIG, &config_data); + if ((ppd->link_speed_supported & OPA_LINK_SPEED_25G) && + (ppd->link_speed_enabled & OPA_LINK_SPEED_25G)) + config_data |= 0x02; + if ((ppd->link_speed_supported & OPA_LINK_SPEED_12_5G) && + (ppd->link_speed_enabled & OPA_LINK_SPEED_12_5G)) + config_data |= 0x01; + ret = load_8051_config(ppd->dd, TX_SETTINGS, GENERAL_CONFIG, + config_data); + if (ret != HCMD_SUCCESS) + dd_dev_err( + ppd->dd, + "%s: Failed to set external device config parameters\n", + __func__); + + config_data = (total_atten << 8) | (total_atten); + + apply_tx_lanes(ppd, config_data, "Setting channel loss"); + + if (tx_preset_index == OPA_INVALID_INDEX) + return; + + get_platform_config_field( + ppd->dd, PLATFORM_CONFIG_TX_PRESET_TABLE, tx_preset_index, + TX_PRESET_TABLE_PRECUR, &tx_preset, 4); + precur = tx_preset; + + get_platform_config_field( + ppd->dd, PLATFORM_CONFIG_TX_PRESET_TABLE, + tx_preset_index, TX_PRESET_TABLE_ATTN, &tx_preset, 4); + attn = tx_preset; + + get_platform_config_field( + ppd->dd, PLATFORM_CONFIG_TX_PRESET_TABLE, + tx_preset_index, TX_PRESET_TABLE_POSTCUR, &tx_preset, 4); + postcur = tx_preset; + + config_data = precur | (attn << 8) | (postcur << 16); + + apply_tx_lanes(ppd, config_data, "Applying TX settings"); +} + +static int tune_active_qsfp(struct hfi1_pportdata *ppd, u32 *ptr_tx_preset, + u32 *ptr_rx_preset, u32 *ptr_total_atten) +{ + int ret = 0; + u16 lss = ppd->link_speed_supported, lse = ppd->link_speed_enabled; + u8 *cache = ppd->qsfp_info.cache; + + ppd->qsfp_info.limiting_active = 1; + + ret = set_qsfp_tx(ppd, 0); + if (ret) + return ret; + + ret = qual_power(ppd); + if (ret) + return ret; + + ret = qual_bitrate(ppd); + if (ret) + return ret; + + if (ppd->qsfp_info.reset_needed) { + reset_qsfp(ppd); + ppd->qsfp_info.reset_needed = 0; + refresh_qsfp_cache(ppd, &ppd->qsfp_info); + } else { + ppd->qsfp_info.reset_needed = 1; + } + + ret = set_qsfp_high_power(ppd); + if (ret) + return ret; + + if (cache[QSFP_EQ_INFO_OFFS] & 0x4) { + ret = get_platform_config_field( + ppd->dd, + PLATFORM_CONFIG_PORT_TABLE, 0, + PORT_TABLE_TX_PRESET_IDX_ACTIVE_EQ, + ptr_tx_preset, 4); + if (ret) { + *ptr_tx_preset = OPA_INVALID_INDEX; + return ret; + } + } else { + ret = get_platform_config_field( + ppd->dd, + PLATFORM_CONFIG_PORT_TABLE, 0, + PORT_TABLE_TX_PRESET_IDX_ACTIVE_NO_EQ, + ptr_tx_preset, 4); + if (ret) { + *ptr_tx_preset = OPA_INVALID_INDEX; + return ret; + } + } + + ret = get_platform_config_field( + ppd->dd, PLATFORM_CONFIG_PORT_TABLE, 0, + PORT_TABLE_RX_PRESET_IDX, ptr_rx_preset, 4); + if (ret) { + *ptr_rx_preset = OPA_INVALID_INDEX; + return ret; + } + + if ((lss & OPA_LINK_SPEED_25G) && (lse & OPA_LINK_SPEED_25G)) + get_platform_config_field( + ppd->dd, PLATFORM_CONFIG_PORT_TABLE, 0, + PORT_TABLE_LOCAL_ATTEN_25G, ptr_total_atten, 4); + else if ((lss & OPA_LINK_SPEED_12_5G) && (lse & OPA_LINK_SPEED_12_5G)) + get_platform_config_field( + ppd->dd, PLATFORM_CONFIG_PORT_TABLE, 0, + PORT_TABLE_LOCAL_ATTEN_12G, ptr_total_atten, 4); + + apply_cdr_settings(ppd, *ptr_rx_preset, *ptr_tx_preset); + + apply_eq_settings(ppd, *ptr_rx_preset, *ptr_tx_preset); + + apply_rx_amplitude_settings(ppd, *ptr_rx_preset, *ptr_tx_preset); + + ret = set_qsfp_tx(ppd, 1); + return ret; +} + +static int tune_qsfp(struct hfi1_pportdata *ppd, + u32 *ptr_tx_preset, u32 *ptr_rx_preset, + u8 *ptr_tuning_method, u32 *ptr_total_atten) +{ + u32 cable_atten = 0, remote_atten = 0, platform_atten = 0; + u16 lss = ppd->link_speed_supported, lse = ppd->link_speed_enabled; + int ret = 0; + u8 *cache = ppd->qsfp_info.cache; + + switch ((cache[QSFP_MOD_TECH_OFFS] & 0xF0) >> 4) { + case 0xA ... 0xB: + ret = get_platform_config_field( + ppd->dd, + PLATFORM_CONFIG_PORT_TABLE, 0, + PORT_TABLE_LOCAL_ATTEN_25G, + &platform_atten, 4); + if (ret) + return ret; + + if ((lss & OPA_LINK_SPEED_25G) && (lse & OPA_LINK_SPEED_25G)) + cable_atten = cache[QSFP_CU_ATTEN_12G_OFFS]; + else if ((lss & OPA_LINK_SPEED_12_5G) && + (lse & OPA_LINK_SPEED_12_5G)) + cable_atten = cache[QSFP_CU_ATTEN_7G_OFFS]; + + /* Fallback to configured attenuation if cable memory is bad */ + if (cable_atten == 0 || cable_atten > 36) { + ret = get_platform_config_field( + ppd->dd, + PLATFORM_CONFIG_SYSTEM_TABLE, 0, + SYSTEM_TABLE_QSFP_ATTENUATION_DEFAULT_25G, + &cable_atten, 4); + if (ret) + return ret; + } + + ret = get_platform_config_field( + ppd->dd, PLATFORM_CONFIG_PORT_TABLE, 0, + PORT_TABLE_REMOTE_ATTEN_25G, &remote_atten, 4); + if (ret) + return ret; + + *ptr_total_atten = platform_atten + cable_atten + remote_atten; + + *ptr_tuning_method = OPA_PASSIVE_TUNING; + break; + case 0x0 ... 0x9: /* fallthrough */ + case 0xC: /* fallthrough */ + case 0xE: + ret = tune_active_qsfp(ppd, ptr_tx_preset, ptr_rx_preset, + ptr_total_atten); + if (ret) + return ret; + + *ptr_tuning_method = OPA_ACTIVE_TUNING; + break; + case 0xD: /* fallthrough */ + case 0xF: + default: + dd_dev_info(ppd->dd, "%s: Unknown/unsupported cable\n", + __func__); + break; + } + return ret; +} + +/* + * This function communicates its success or failure via ppd->driver_link_ready + * Thus, it depends on its association with start_link(...) which checks + * driver_link_ready before proceeding with the link negotiation and + * initialization process. + */ +void tune_serdes(struct hfi1_pportdata *ppd) +{ + int ret = 0; + u32 total_atten = 0; + u32 remote_atten = 0, platform_atten = 0; + u32 rx_preset_index, tx_preset_index; + u8 tuning_method = 0; + struct hfi1_devdata *dd = ppd->dd; + + rx_preset_index = OPA_INVALID_INDEX; + tx_preset_index = OPA_INVALID_INDEX; + + /* the link defaults to enabled */ + ppd->link_enabled = 1; + /* the driver link ready state defaults to not ready */ + ppd->driver_link_ready = 0; + ppd->offline_disabled_reason = HFI1_ODR_MASK(OPA_LINKDOWN_REASON_NONE); + + if (loopback == LOOPBACK_SERDES || loopback == LOOPBACK_LCB || + ppd->dd->icode == ICODE_FUNCTIONAL_SIMULATOR || + !dd->pcfg_cache.cache_valid) { + ppd->driver_link_ready = 1; + return; + } + + ret = get_platform_config_field(ppd->dd, PLATFORM_CONFIG_PORT_TABLE, 0, + PORT_TABLE_PORT_TYPE, &ppd->port_type, + 4); + if (ret) + goto bail; + + switch (ppd->port_type) { + case PORT_TYPE_DISCONNECTED: + ppd->offline_disabled_reason = + HFI1_ODR_MASK(OPA_LINKDOWN_REASON_DISCONNECTED); + dd_dev_info(dd, "%s: Port disconnected, disabling port\n", + __func__); + goto bail; + case PORT_TYPE_FIXED: + /* platform_atten, remote_atten pre-zeroed to catch error */ + get_platform_config_field( + ppd->dd, PLATFORM_CONFIG_PORT_TABLE, 0, + PORT_TABLE_LOCAL_ATTEN_25G, &platform_atten, 4); + + get_platform_config_field( + ppd->dd, PLATFORM_CONFIG_PORT_TABLE, 0, + PORT_TABLE_REMOTE_ATTEN_25G, &remote_atten, 4); + + total_atten = platform_atten + remote_atten; + + tuning_method = OPA_PASSIVE_TUNING; + break; + case PORT_TYPE_VARIABLE: + if (qsfp_mod_present(ppd)) { + /* + * platform_atten, remote_atten pre-zeroed to + * catch error + */ + get_platform_config_field( + ppd->dd, PLATFORM_CONFIG_PORT_TABLE, 0, + PORT_TABLE_LOCAL_ATTEN_25G, + &platform_atten, 4); + + get_platform_config_field( + ppd->dd, PLATFORM_CONFIG_PORT_TABLE, 0, + PORT_TABLE_REMOTE_ATTEN_25G, + &remote_atten, 4); + + total_atten = platform_atten + remote_atten; + + tuning_method = OPA_PASSIVE_TUNING; + } else + ppd->offline_disabled_reason = + HFI1_ODR_MASK(OPA_LINKDOWN_REASON_CHASSIS_CONFIG); + break; + case PORT_TYPE_QSFP: + if (qsfp_mod_present(ppd)) { + refresh_qsfp_cache(ppd, &ppd->qsfp_info); + + if (ppd->qsfp_info.cache_valid) { + ret = tune_qsfp(ppd, + &tx_preset_index, + &rx_preset_index, + &tuning_method, + &total_atten); + if (ret) + goto bail; + } else { + dd_dev_err(dd, + "%s: Reading QSFP memory failed\n", + __func__); + goto bail; + } + } else + ppd->offline_disabled_reason = + HFI1_ODR_MASK( + OPA_LINKDOWN_REASONLOCAL_MEDIA_NOT_INSTALLED); + break; + default: + dd_dev_info(ppd->dd, "%s: Unknown port type\n", __func__); + break; + } + if (ppd->offline_disabled_reason == + HFI1_ODR_MASK(OPA_LINKDOWN_REASON_NONE)) + apply_tunings(ppd, tx_preset_index, tuning_method, + total_atten, + ppd->qsfp_info.limiting_active); + + if (ppd->port_type == PORT_TYPE_QSFP) + refresh_qsfp_cache(ppd, &ppd->qsfp_info); + + ppd->driver_link_ready = 1; + + return; +bail: + ppd->driver_link_ready = 0; +} diff --git a/drivers/staging/rdma/hfi1/platform.h b/drivers/staging/rdma/hfi1/platform.h new file mode 100644 index 0000000..5b53d71 --- /dev/null +++ b/drivers/staging/rdma/hfi1/platform.h @@ -0,0 +1,298 @@ +/* + * + * This file is provided under a dual BSD/GPLv2 license. When using or + * redistributing this file, you may do so under either license. + * + * GPL LICENSE SUMMARY + * + * Copyright(c) 2015 Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * BSD LICENSE + * + * Copyright(c) 2015 Intel Corporation. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * - Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + */ +#ifndef __PLATFORM_H +#define __PLATFORM_H + +#define METADATA_TABLE_FIELD_START_SHIFT 0 +#define METADATA_TABLE_FIELD_START_LEN_BITS 15 +#define METADATA_TABLE_FIELD_LEN_SHIFT 16 +#define METADATA_TABLE_FIELD_LEN_LEN_BITS 16 + +/* Header structure */ +#define PLATFORM_CONFIG_HEADER_RECORD_IDX_SHIFT 0 +#define PLATFORM_CONFIG_HEADER_RECORD_IDX_LEN_BITS 6 +#define PLATFORM_CONFIG_HEADER_TABLE_LENGTH_SHIFT 16 +#define PLATFORM_CONFIG_HEADER_TABLE_LENGTH_LEN_BITS 12 +#define PLATFORM_CONFIG_HEADER_TABLE_TYPE_SHIFT 28 +#define PLATFORM_CONFIG_HEADER_TABLE_TYPE_LEN_BITS 4 + +enum platform_config_table_type_encoding { + PLATFORM_CONFIG_TABLE_RESERVED, + PLATFORM_CONFIG_SYSTEM_TABLE, + PLATFORM_CONFIG_PORT_TABLE, + PLATFORM_CONFIG_RX_PRESET_TABLE, + PLATFORM_CONFIG_TX_PRESET_TABLE, + PLATFORM_CONFIG_QSFP_ATTEN_TABLE, + PLATFORM_CONFIG_VARIABLE_SETTINGS_TABLE, + PLATFORM_CONFIG_TABLE_MAX +}; + +enum platform_config_system_table_fields { + SYSTEM_TABLE_RESERVED, + SYSTEM_TABLE_NODE_STRING, + SYSTEM_TABLE_SYSTEM_IMAGE_GUID, + SYSTEM_TABLE_NODE_GUID, + SYSTEM_TABLE_REVISION, + SYSTEM_TABLE_VENDOR_OUI, + SYSTEM_TABLE_META_VERSION, + SYSTEM_TABLE_DEVICE_ID, + SYSTEM_TABLE_PARTITION_ENFORCEMENT_CAP, + SYSTEM_TABLE_QSFP_POWER_CLASS_MAX, + SYSTEM_TABLE_QSFP_ATTENUATION_DEFAULT_12G, + SYSTEM_TABLE_QSFP_ATTENUATION_DEFAULT_25G, + SYSTEM_TABLE_VARIABLE_TABLE_ENTRIES_PER_PORT, + SYSTEM_TABLE_MAX +}; + +enum platform_config_port_table_fields { + PORT_TABLE_RESERVED, + PORT_TABLE_PORT_TYPE, + PORT_TABLE_LOCAL_ATTEN_12G, + PORT_TABLE_LOCAL_ATTEN_25G, + PORT_TABLE_LINK_SPEED_SUPPORTED, + PORT_TABLE_LINK_WIDTH_SUPPORTED, + PORT_TABLE_AUTO_LANE_SHEDDING_ENABLED, + PORT_TABLE_EXTERNAL_LOOPBACK_ALLOWED, + PORT_TABLE_VL_CAP, + PORT_TABLE_MTU_CAP, + PORT_TABLE_TX_LANE_ENABLE_MASK, + PORT_TABLE_LOCAL_MAX_TIMEOUT, + PORT_TABLE_REMOTE_ATTEN_12G, + PORT_TABLE_REMOTE_ATTEN_25G, + PORT_TABLE_TX_PRESET_IDX_ACTIVE_NO_EQ, + PORT_TABLE_TX_PRESET_IDX_ACTIVE_EQ, + PORT_TABLE_RX_PRESET_IDX, + PORT_TABLE_CABLE_REACH_CLASS, + PORT_TABLE_MAX +}; + +enum platform_config_rx_preset_table_fields { + RX_PRESET_TABLE_RESERVED, + RX_PRESET_TABLE_QSFP_RX_CDR_APPLY, + RX_PRESET_TABLE_QSFP_RX_EMP_APPLY, + RX_PRESET_TABLE_QSFP_RX_AMP_APPLY, + RX_PRESET_TABLE_QSFP_RX_CDR, + RX_PRESET_TABLE_QSFP_RX_EMP, + RX_PRESET_TABLE_QSFP_RX_AMP, + RX_PRESET_TABLE_MAX +}; + +enum platform_config_tx_preset_table_fields { + TX_PRESET_TABLE_RESERVED, + TX_PRESET_TABLE_PRECUR, + TX_PRESET_TABLE_ATTN, + TX_PRESET_TABLE_POSTCUR, + TX_PRESET_TABLE_QSFP_TX_CDR_APPLY, + TX_PRESET_TABLE_QSFP_TX_EQ_APPLY, + TX_PRESET_TABLE_QSFP_TX_CDR, + TX_PRESET_TABLE_QSFP_TX_EQ, + TX_PRESET_TABLE_MAX +}; + +enum platform_config_qsfp_attn_table_fields { + QSFP_ATTEN_TABLE_RESERVED, + QSFP_ATTEN_TABLE_TX_PRESET_IDX, + QSFP_ATTEN_TABLE_RX_PRESET_IDX, + QSFP_ATTEN_TABLE_MAX +}; + +enum platform_config_variable_settings_table_fields { + VARIABLE_SETTINGS_TABLE_RESERVED, + VARIABLE_SETTINGS_TABLE_TX_PRESET_IDX, + VARIABLE_SETTINGS_TABLE_RX_PRESET_IDX, + VARIABLE_SETTINGS_TABLE_MAX +}; + +struct platform_config_data { + u32 *table; + u32 *table_metadata; + u32 num_table; +}; + +/* + * This struct acts as a quick reference into the platform_data binary image + * and is populated by parse_platform_config(...) depending on the specific + * META_VERSION + */ +struct platform_config_cache { + u8 cache_valid; + struct platform_config_data config_tables[PLATFORM_CONFIG_TABLE_MAX]; +}; + +static const u32 platform_config_table_limits[PLATFORM_CONFIG_TABLE_MAX] = { + 0, + SYSTEM_TABLE_MAX, + PORT_TABLE_MAX, + RX_PRESET_TABLE_MAX, + TX_PRESET_TABLE_MAX, + QSFP_ATTEN_TABLE_MAX, + VARIABLE_SETTINGS_TABLE_MAX +}; + +/* This section defines default values and encodings for the + * fields defined for each table above + */ + +/* + *===================================================== + * System table encodings + *==================================================== + */ +#define PLATFORM_CONFIG_MAGIC_NUM 0x3d4f5041 +#define PLATFORM_CONFIG_MAGIC_NUMBER_LEN 4 + +/* + * These power classes are the same as defined in SFF 8636 spec rev 2.4 + * describing byte 129 in table 6-16, except enumerated in a different order + */ +enum platform_config_qsfp_power_class_encoding { + QSFP_POWER_CLASS_1 = 1, + QSFP_POWER_CLASS_2, + QSFP_POWER_CLASS_3, + QSFP_POWER_CLASS_4, + QSFP_POWER_CLASS_5, + QSFP_POWER_CLASS_6, + QSFP_POWER_CLASS_7 +}; + +/* + *===================================================== + * Port table encodings + *==================================================== + */ +enum platform_config_port_type_encoding { + PORT_TYPE_UNKNOWN, + PORT_TYPE_DISCONNECTED, + PORT_TYPE_FIXED, + PORT_TYPE_VARIABLE, + PORT_TYPE_QSFP, + PORT_TYPE_MAX +}; + +enum platform_config_link_speed_supported_encoding { + LINK_SPEED_SUPP_12G = 1, + LINK_SPEED_SUPP_25G, + LINK_SPEED_SUPP_12G_25G, + LINK_SPEED_SUPP_MAX +}; + +/* + * This is a subset (not strict) of the link downgrades + * supported. The link downgrades supported are expected + * to be supplied to the driver by another entity such as + * the fabric manager + */ +enum platform_config_link_width_supported_encoding { + LINK_WIDTH_SUPP_1X = 1, + LINK_WIDTH_SUPP_2X, + LINK_WIDTH_SUPP_2X_1X, + LINK_WIDTH_SUPP_3X, + LINK_WIDTH_SUPP_3X_1X, + LINK_WIDTH_SUPP_3X_2X, + LINK_WIDTH_SUPP_3X_2X_1X, + LINK_WIDTH_SUPP_4X, + LINK_WIDTH_SUPP_4X_1X, + LINK_WIDTH_SUPP_4X_2X, + LINK_WIDTH_SUPP_4X_2X_1X, + LINK_WIDTH_SUPP_4X_3X, + LINK_WIDTH_SUPP_4X_3X_1X, + LINK_WIDTH_SUPP_4X_3X_2X, + LINK_WIDTH_SUPP_4X_3X_2X_1X, + LINK_WIDTH_SUPP_MAX +}; + +enum platform_config_virtual_lane_capability_encoding { + VL_CAP_VL0 = 1, + VL_CAP_VL0_1, + VL_CAP_VL0_2, + VL_CAP_VL0_3, + VL_CAP_VL0_4, + VL_CAP_VL0_5, + VL_CAP_VL0_6, + VL_CAP_VL0_7, + VL_CAP_VL0_8, + VL_CAP_VL0_9, + VL_CAP_VL0_10, + VL_CAP_VL0_11, + VL_CAP_VL0_12, + VL_CAP_VL0_13, + VL_CAP_VL0_14, + VL_CAP_MAX +}; + +/* Max MTU */ +enum platform_config_mtu_capability_encoding { + MTU_CAP_256 = 1, + MTU_CAP_512 = 2, + MTU_CAP_1024 = 3, + MTU_CAP_2048 = 4, + MTU_CAP_4096 = 5, + MTU_CAP_8192 = 6, + MTU_CAP_10240 = 7 +}; + +enum platform_config_local_max_timeout_encoding { + LOCAL_MAX_TIMEOUT_10_MS = 1, + LOCAL_MAX_TIMEOUT_100_MS, + LOCAL_MAX_TIMEOUT_1_S, + LOCAL_MAX_TIMEOUT_10_S, + LOCAL_MAX_TIMEOUT_100_S, + LOCAL_MAX_TIMEOUT_1000_S +}; + +enum link_tuning_encoding { + OPA_PASSIVE_TUNING, + OPA_ACTIVE_TUNING, + OPA_UNKNOWN_TUNING +}; + +int set_qsfp_tx(struct hfi1_pportdata *ppd, int on); +void tune_serdes(struct hfi1_pportdata *ppd); +#endif /*__PLATFORM_H*/ diff --git a/drivers/staging/rdma/hfi1/platform_config.h b/drivers/staging/rdma/hfi1/platform_config.h deleted file mode 100644 index 8a94a83..0000000 --- a/drivers/staging/rdma/hfi1/platform_config.h +++ /dev/null @@ -1,286 +0,0 @@ -/* - * - * This file is provided under a dual BSD/GPLv2 license. When using or - * redistributing this file, you may do so under either license. - * - * GPL LICENSE SUMMARY - * - * Copyright(c) 2015 Intel Corporation. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of version 2 of the GNU General Public License as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * BSD LICENSE - * - * Copyright(c) 2015 Intel Corporation. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * - Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * - Neither the name of Intel Corporation nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - */ -#ifndef __PLATFORM_CONFIG_H -#define __PLATFORM_CONFIG_H - -#define METADATA_TABLE_FIELD_START_SHIFT 0 -#define METADATA_TABLE_FIELD_START_LEN_BITS 15 -#define METADATA_TABLE_FIELD_LEN_SHIFT 16 -#define METADATA_TABLE_FIELD_LEN_LEN_BITS 16 - -/* Header structure */ -#define PLATFORM_CONFIG_HEADER_RECORD_IDX_SHIFT 0 -#define PLATFORM_CONFIG_HEADER_RECORD_IDX_LEN_BITS 6 -#define PLATFORM_CONFIG_HEADER_TABLE_LENGTH_SHIFT 16 -#define PLATFORM_CONFIG_HEADER_TABLE_LENGTH_LEN_BITS 12 -#define PLATFORM_CONFIG_HEADER_TABLE_TYPE_SHIFT 28 -#define PLATFORM_CONFIG_HEADER_TABLE_TYPE_LEN_BITS 4 - -enum platform_config_table_type_encoding { - PLATFORM_CONFIG_TABLE_RESERVED, - PLATFORM_CONFIG_SYSTEM_TABLE, - PLATFORM_CONFIG_PORT_TABLE, - PLATFORM_CONFIG_RX_PRESET_TABLE, - PLATFORM_CONFIG_TX_PRESET_TABLE, - PLATFORM_CONFIG_QSFP_ATTEN_TABLE, - PLATFORM_CONFIG_VARIABLE_SETTINGS_TABLE, - PLATFORM_CONFIG_TABLE_MAX -}; - -enum platform_config_system_table_fields { - SYSTEM_TABLE_RESERVED, - SYSTEM_TABLE_NODE_STRING, - SYSTEM_TABLE_SYSTEM_IMAGE_GUID, - SYSTEM_TABLE_NODE_GUID, - SYSTEM_TABLE_REVISION, - SYSTEM_TABLE_VENDOR_OUI, - SYSTEM_TABLE_META_VERSION, - SYSTEM_TABLE_DEVICE_ID, - SYSTEM_TABLE_PARTITION_ENFORCEMENT_CAP, - SYSTEM_TABLE_QSFP_POWER_CLASS_MAX, - SYSTEM_TABLE_QSFP_ATTENUATION_DEFAULT_12G, - SYSTEM_TABLE_QSFP_ATTENUATION_DEFAULT_25G, - SYSTEM_TABLE_VARIABLE_TABLE_ENTRIES_PER_PORT, - SYSTEM_TABLE_MAX -}; - -enum platform_config_port_table_fields { - PORT_TABLE_RESERVED, - PORT_TABLE_PORT_TYPE, - PORT_TABLE_ATTENUATION_12G, - PORT_TABLE_ATTENUATION_25G, - PORT_TABLE_LINK_SPEED_SUPPORTED, - PORT_TABLE_LINK_WIDTH_SUPPORTED, - PORT_TABLE_VL_CAP, - PORT_TABLE_MTU_CAP, - PORT_TABLE_TX_LANE_ENABLE_MASK, - PORT_TABLE_LOCAL_MAX_TIMEOUT, - PORT_TABLE_AUTO_LANE_SHEDDING_ENABLED, - PORT_TABLE_EXTERNAL_LOOPBACK_ALLOWED, - PORT_TABLE_TX_PRESET_IDX_PASSIVE_CU, - PORT_TABLE_TX_PRESET_IDX_ACTIVE_NO_EQ, - PORT_TABLE_TX_PRESET_IDX_ACTIVE_EQ, - PORT_TABLE_RX_PRESET_IDX, - PORT_TABLE_CABLE_REACH_CLASS, - PORT_TABLE_MAX -}; - -enum platform_config_rx_preset_table_fields { - RX_PRESET_TABLE_RESERVED, - RX_PRESET_TABLE_QSFP_RX_CDR_APPLY, - RX_PRESET_TABLE_QSFP_RX_EQ_APPLY, - RX_PRESET_TABLE_QSFP_RX_AMP_APPLY, - RX_PRESET_TABLE_QSFP_RX_CDR, - RX_PRESET_TABLE_QSFP_RX_EQ, - RX_PRESET_TABLE_QSFP_RX_AMP, - RX_PRESET_TABLE_MAX -}; - -enum platform_config_tx_preset_table_fields { - TX_PRESET_TABLE_RESERVED, - TX_PRESET_TABLE_PRECUR, - TX_PRESET_TABLE_ATTN, - TX_PRESET_TABLE_POSTCUR, - TX_PRESET_TABLE_QSFP_TX_CDR_APPLY, - TX_PRESET_TABLE_QSFP_TX_EQ_APPLY, - TX_PRESET_TABLE_QSFP_TX_CDR, - TX_PRESET_TABLE_QSFP_TX_EQ, - TX_PRESET_TABLE_MAX -}; - -enum platform_config_qsfp_attn_table_fields { - QSFP_ATTEN_TABLE_RESERVED, - QSFP_ATTEN_TABLE_TX_PRESET_IDX, - QSFP_ATTEN_TABLE_RX_PRESET_IDX, - QSFP_ATTEN_TABLE_MAX -}; - -enum platform_config_variable_settings_table_fields { - VARIABLE_SETTINGS_TABLE_RESERVED, - VARIABLE_SETTINGS_TABLE_TX_PRESET_IDX, - VARIABLE_SETTINGS_TABLE_RX_PRESET_IDX, - VARIABLE_SETTINGS_TABLE_MAX -}; - -struct platform_config_data { - u32 *table; - u32 *table_metadata; - u32 num_table; -}; - -/* - * This struct acts as a quick reference into the platform_data binary image - * and is populated by parse_platform_config(...) depending on the specific - * META_VERSION - */ -struct platform_config_cache { - u8 cache_valid; - struct platform_config_data config_tables[PLATFORM_CONFIG_TABLE_MAX]; -}; - -static const u32 platform_config_table_limits[PLATFORM_CONFIG_TABLE_MAX] = { - 0, - SYSTEM_TABLE_MAX, - PORT_TABLE_MAX, - RX_PRESET_TABLE_MAX, - TX_PRESET_TABLE_MAX, - QSFP_ATTEN_TABLE_MAX, - VARIABLE_SETTINGS_TABLE_MAX -}; - -/* This section defines default values and encodings for the - * fields defined for each table above - */ - -/*===================================================== - * System table encodings - *====================================================*/ -#define PLATFORM_CONFIG_MAGIC_NUM 0x3d4f5041 -#define PLATFORM_CONFIG_MAGIC_NUMBER_LEN 4 - -/* - * These power classes are the same as defined in SFF 8636 spec rev 2.4 - * describing byte 129 in table 6-16, except enumerated in a different order - */ -enum platform_config_qsfp_power_class_encoding { - QSFP_POWER_CLASS_1 = 1, - QSFP_POWER_CLASS_2, - QSFP_POWER_CLASS_3, - QSFP_POWER_CLASS_4, - QSFP_POWER_CLASS_5, - QSFP_POWER_CLASS_6, - QSFP_POWER_CLASS_7 -}; - - -/*===================================================== - * Port table encodings - *==================================================== */ -enum platform_config_port_type_encoding { - PORT_TYPE_RESERVED, - PORT_TYPE_DISCONNECTED, - PORT_TYPE_FIXED, - PORT_TYPE_VARIABLE, - PORT_TYPE_QSFP, - PORT_TYPE_MAX -}; - -enum platform_config_link_speed_supported_encoding { - LINK_SPEED_SUPP_12G = 1, - LINK_SPEED_SUPP_25G, - LINK_SPEED_SUPP_12G_25G, - LINK_SPEED_SUPP_MAX -}; - -/* - * This is a subset (not strict) of the link downgrades - * supported. The link downgrades supported are expected - * to be supplied to the driver by another entity such as - * the fabric manager - */ -enum platform_config_link_width_supported_encoding { - LINK_WIDTH_SUPP_1X = 1, - LINK_WIDTH_SUPP_2X, - LINK_WIDTH_SUPP_2X_1X, - LINK_WIDTH_SUPP_3X, - LINK_WIDTH_SUPP_3X_1X, - LINK_WIDTH_SUPP_3X_2X, - LINK_WIDTH_SUPP_3X_2X_1X, - LINK_WIDTH_SUPP_4X, - LINK_WIDTH_SUPP_4X_1X, - LINK_WIDTH_SUPP_4X_2X, - LINK_WIDTH_SUPP_4X_2X_1X, - LINK_WIDTH_SUPP_4X_3X, - LINK_WIDTH_SUPP_4X_3X_1X, - LINK_WIDTH_SUPP_4X_3X_2X, - LINK_WIDTH_SUPP_4X_3X_2X_1X, - LINK_WIDTH_SUPP_MAX -}; - -enum platform_config_virtual_lane_capability_encoding { - VL_CAP_VL0 = 1, - VL_CAP_VL0_1, - VL_CAP_VL0_2, - VL_CAP_VL0_3, - VL_CAP_VL0_4, - VL_CAP_VL0_5, - VL_CAP_VL0_6, - VL_CAP_VL0_7, - VL_CAP_VL0_8, - VL_CAP_VL0_9, - VL_CAP_VL0_10, - VL_CAP_VL0_11, - VL_CAP_VL0_12, - VL_CAP_VL0_13, - VL_CAP_VL0_14, - VL_CAP_MAX -}; - -/* Max MTU */ -enum platform_config_mtu_capability_encoding { - MTU_CAP_256 = 1, - MTU_CAP_512 = 2, - MTU_CAP_1024 = 3, - MTU_CAP_2048 = 4, - MTU_CAP_4096 = 5, - MTU_CAP_8192 = 6, - MTU_CAP_10240 = 7 -}; - -enum platform_config_local_max_timeout_encoding { - LOCAL_MAX_TIMEOUT_10_MS = 1, - LOCAL_MAX_TIMEOUT_100_MS, - LOCAL_MAX_TIMEOUT_1_S, - LOCAL_MAX_TIMEOUT_10_S, - LOCAL_MAX_TIMEOUT_100_S, - LOCAL_MAX_TIMEOUT_1000_S -}; - -#endif /*__PLATFORM_CONFIG_H*/ diff --git a/drivers/staging/rdma/hfi1/qsfp.h b/drivers/staging/rdma/hfi1/qsfp.h index 3422250..b1b9e4a 100644 --- a/drivers/staging/rdma/hfi1/qsfp.h +++ b/drivers/staging/rdma/hfi1/qsfp.h @@ -213,7 +213,8 @@ struct qsfp_data { u8 cache[QSFP_MAX_NUM_PAGES*128]; spinlock_t qsfp_lock; u8 check_interrupt_flags; - u8 qsfp_interrupt_functional; + u8 reset_needed; + u8 limiting_active; u8 cache_valid; u8 cache_refresh_required; }; -- cgit v0.10.2 From 1d01cf33e38a6aff87c25575286385daac11b8ca Mon Sep 17 00:00:00 2001 From: Easwar Hariharan Date: Wed, 3 Feb 2016 14:31:22 -0800 Subject: staging/rdma/hfi1: Get port type from configuration file The current code employs a heuristic to guess the port type. The canonical location to identify the port type of the designed platform is from the platform configuration data. This patch uses the previously fetched port type from the platform configuration and removes the now obsolete heuristic routine and its associated defines. Reviewed-by: Arthur Kepner Signed-off-by: Easwar Hariharan Signed-off-by: Doug Ledford diff --git a/drivers/staging/rdma/hfi1/mad.c b/drivers/staging/rdma/hfi1/mad.c index 303dfee..5146f5d 100644 --- a/drivers/staging/rdma/hfi1/mad.c +++ b/drivers/staging/rdma/hfi1/mad.c @@ -503,16 +503,6 @@ void read_ltp_rtt(struct hfi1_devdata *dd) write_lcb_cache(DC_LCB_STS_ROUND_TRIP_LTP_CNT, reg); } -static u8 __opa_porttype(struct hfi1_pportdata *ppd) -{ - if (qsfp_mod_present(ppd)) { - if (ppd->qsfp_info.cache_valid) - return OPA_PORT_TYPE_STANDARD; - return OPA_PORT_TYPE_DISCONNECTED; - } - return OPA_PORT_TYPE_UNKNOWN; -} - static int __subn_get_opa_portinfo(struct opa_smp *smp, u32 am, u8 *data, struct ib_device *ibdev, u8 port, u32 *resp_len) @@ -583,7 +573,7 @@ static int __subn_get_opa_portinfo(struct opa_smp *smp, u32 am, u8 *data, if (start_of_sm_config && (state == IB_PORT_INIT)) ppd->is_sm_config_started = 1; - pi->port_phys_conf = __opa_porttype(ppd) & 0xf; + pi->port_phys_conf = (ppd->port_type & 0xf); #if PI_LED_ENABLE_SUP pi->port_states.ledenable_offlinereason = ppd->neighbor_normal << 4; diff --git a/drivers/staging/rdma/hfi1/opa_compat.h b/drivers/staging/rdma/hfi1/opa_compat.h index f64eec1..30f7707 100644 --- a/drivers/staging/rdma/hfi1/opa_compat.h +++ b/drivers/staging/rdma/hfi1/opa_compat.h @@ -111,19 +111,4 @@ enum opa_port_phys_state { /* values 12-15 are reserved/ignored */ }; -/* OPA_PORT_TYPE_* definitions - these belong in opa_port_info.h */ -#define OPA_PORT_TYPE_UNKNOWN 0 -#define OPA_PORT_TYPE_DISCONNECTED 1 -/* port is not currently usable, CableInfo not available */ -#define OPA_PORT_TYPE_FIXED 2 -/* A fixed backplane port in a director class switch. All OPA ASICS */ -#define OPA_PORT_TYPE_VARIABLE 3 -/* A backplane port in a blade system, possibly mixed configuration */ -#define OPA_PORT_TYPE_STANDARD 4 -/* implies a SFF-8636 defined format for CableInfo (QSFP) */ -#define OPA_PORT_TYPE_SI_PHOTONICS 5 -/* A silicon photonics module implies TBD defined format for CableInfo - * as defined by Intel SFO group */ -/* 6 - 15 are reserved */ - #endif /* _LINUX_H */ -- cgit v0.10.2 From cbac386a120a7e8a26c013f496717b11cacbd99c Mon Sep 17 00:00:00 2001 From: Easwar Hariharan Date: Wed, 3 Feb 2016 14:31:31 -0800 Subject: staging/rdma/hfi1: Support external device configuration requests from 8051 This patch implements support for turning on and off the clock data recovery mechanisms implemented in QSFP cable on request by the DC 8051 on a per-lane basis. Reviewed-by: Dean Luick Signed-off-by: Easwar Hariharan Signed-off-by: Doug Ledford diff --git a/drivers/staging/rdma/hfi1/chip.c b/drivers/staging/rdma/hfi1/chip.c index 4d70a96..41af05e 100644 --- a/drivers/staging/rdma/hfi1/chip.c +++ b/drivers/staging/rdma/hfi1/chip.c @@ -6085,13 +6085,19 @@ static void hreq_response(struct hfi1_devdata *dd, u8 return_code, u16 rsp_data) } /* - * Handle requests from the 8051. + * Handle host requests from the 8051. + * + * This is a work-queue function outside of the interrupt. */ -static void handle_8051_request(struct hfi1_devdata *dd) +void handle_8051_request(struct work_struct *work) { + struct hfi1_pportdata *ppd = container_of(work, struct hfi1_pportdata, + dc_host_req_work); + struct hfi1_devdata *dd = ppd->dd; u64 reg; - u16 data; - u8 type; + u16 data = 0; + u8 type, i, lanes, *cache = ppd->qsfp_info.cache; + u8 cdr_ctrl_byte = cache[QSFP_CDR_CTRL_BYTE_OFFS]; reg = read_csr(dd, DC_DC8051_CFG_EXT_DEV_1); if ((reg & DC_DC8051_CFG_EXT_DEV_1_REQ_NEW_SMASK) == 0) @@ -6112,12 +6118,46 @@ static void handle_8051_request(struct hfi1_devdata *dd) case HREQ_READ_CONFIG: case HREQ_SET_TX_EQ_ABS: case HREQ_SET_TX_EQ_REL: - case HREQ_ENABLE: dd_dev_info(dd, "8051 request: request 0x%x not supported\n", type); hreq_response(dd, HREQ_NOT_SUPPORTED, 0); break; + case HREQ_ENABLE: + lanes = data & 0xF; + for (i = 0; lanes; lanes >>= 1, i++) { + if (!(lanes & 1)) + continue; + if (data & 0x200) { + /* enable TX CDR */ + if (cache[QSFP_MOD_PWR_OFFS] & 0x8 && + cache[QSFP_CDR_INFO_OFFS] & 0x80) + cdr_ctrl_byte |= (1 << (i + 4)); + } else { + /* disable TX CDR */ + if (cache[QSFP_MOD_PWR_OFFS] & 0x8 && + cache[QSFP_CDR_INFO_OFFS] & 0x80) + cdr_ctrl_byte &= ~(1 << (i + 4)); + } + + if (data & 0x800) { + /* enable RX CDR */ + if (cache[QSFP_MOD_PWR_OFFS] & 0x4 && + cache[QSFP_CDR_INFO_OFFS] & 0x40) + cdr_ctrl_byte |= (1 << i); + } else { + /* disable RX CDR */ + if (cache[QSFP_MOD_PWR_OFFS] & 0x4 && + cache[QSFP_CDR_INFO_OFFS] & 0x40) + cdr_ctrl_byte &= ~(1 << i); + } + } + qsfp_write(ppd, ppd->dd->hfi1_id, QSFP_CDR_CTRL_BYTE_OFFS, + &cdr_ctrl_byte, 1); + hreq_response(dd, HREQ_SUCCESS, data); + refresh_qsfp_cache(ppd, &ppd->qsfp_info); + break; + case HREQ_CONFIG_DONE: hreq_response(dd, HREQ_SUCCESS, 0); break; @@ -7373,7 +7413,7 @@ static void handle_8051_interrupt(struct hfi1_devdata *dd, u32 unused, u64 reg) host_msg &= ~(u64)LINKUP_ACHIEVED; } if (host_msg & EXT_DEVICE_CFG_REQ) { - handle_8051_request(dd); + queue_work(ppd->hfi1_wq, &ppd->dc_host_req_work); host_msg &= ~(u64)EXT_DEVICE_CFG_REQ; } if (host_msg & VERIFY_CAP_FRAME) { diff --git a/drivers/staging/rdma/hfi1/chip.h b/drivers/staging/rdma/hfi1/chip.h index 5e4fe43..0e95f0b 100644 --- a/drivers/staging/rdma/hfi1/chip.h +++ b/drivers/staging/rdma/hfi1/chip.h @@ -647,6 +647,7 @@ void handle_verify_cap(struct work_struct *work); void handle_freeze(struct work_struct *work); void handle_link_up(struct work_struct *work); void handle_link_down(struct work_struct *work); +void handle_8051_request(struct work_struct *work); void handle_link_downgrade(struct work_struct *work); void handle_link_bounce(struct work_struct *work); void handle_sma_message(struct work_struct *work); diff --git a/drivers/staging/rdma/hfi1/hfi.h b/drivers/staging/rdma/hfi1/hfi.h index f79b070..0fe630e 100644 --- a/drivers/staging/rdma/hfi1/hfi.h +++ b/drivers/staging/rdma/hfi1/hfi.h @@ -589,6 +589,7 @@ struct hfi1_pportdata { struct work_struct link_vc_work; struct work_struct link_up_work; struct work_struct link_down_work; + struct work_struct dc_host_req_work; struct work_struct sma_message_work; struct work_struct freeze_work; struct work_struct link_downgrade_work; diff --git a/drivers/staging/rdma/hfi1/init.c b/drivers/staging/rdma/hfi1/init.c index 354935f..2851e90 100644 --- a/drivers/staging/rdma/hfi1/init.c +++ b/drivers/staging/rdma/hfi1/init.c @@ -495,6 +495,7 @@ void hfi1_init_pportdata(struct pci_dev *pdev, struct hfi1_pportdata *ppd, INIT_WORK(&ppd->link_vc_work, handle_verify_cap); INIT_WORK(&ppd->link_up_work, handle_link_up); INIT_WORK(&ppd->link_down_work, handle_link_down); + INIT_WORK(&ppd->dc_host_req_work, handle_8051_request); INIT_WORK(&ppd->freeze_work, handle_freeze); INIT_WORK(&ppd->link_downgrade_work, handle_link_downgrade); INIT_WORK(&ppd->sma_message_work, handle_sma_message); -- cgit v0.10.2 From 6b14e0ea48890e633ac69caa3ae13beccdd497be Mon Sep 17 00:00:00 2001 From: Dean Luick Date: Wed, 3 Feb 2016 14:31:40 -0800 Subject: staging/rdma/hfi1: Fix missing firmware NULL dereference The gen3 bump code must mark a firmware download failure as fatal. Otherwise a later load attempt will fail with a NULL dereference. Also: o Only do a firmware back-off for RTL. There are no alternates for FPGA or simulation. o Rearrange OS firmware request order to match what is actually loaded. This results in more coherent informational messages in the case of missing firmware. Reviewed-by: Dennis Dalessandro Signed-off-by: Dean Luick Signed-off-by: Doug Ledford diff --git a/drivers/staging/rdma/hfi1/firmware.c b/drivers/staging/rdma/hfi1/firmware.c index 28ae42f..d954e1a 100644 --- a/drivers/staging/rdma/hfi1/firmware.c +++ b/drivers/staging/rdma/hfi1/firmware.c @@ -593,27 +593,27 @@ retry: fw_pcie_serdes_name = ALT_FW_PCIE_NAME; } - if (fw_8051_load) { - err = obtain_one_firmware(dd, fw_8051_name, &fw_8051); + if (fw_sbus_load) { + err = obtain_one_firmware(dd, fw_sbus_name, &fw_sbus); if (err) goto done; } - if (fw_fabric_serdes_load) { - err = obtain_one_firmware(dd, fw_fabric_serdes_name, - &fw_fabric); + if (fw_pcie_serdes_load) { + err = obtain_one_firmware(dd, fw_pcie_serdes_name, &fw_pcie); if (err) goto done; } - if (fw_sbus_load) { - err = obtain_one_firmware(dd, fw_sbus_name, &fw_sbus); + if (fw_fabric_serdes_load) { + err = obtain_one_firmware(dd, fw_fabric_serdes_name, + &fw_fabric); if (err) goto done; } - if (fw_pcie_serdes_load) { - err = obtain_one_firmware(dd, fw_pcie_serdes_name, &fw_pcie); + if (fw_8051_load) { + err = obtain_one_firmware(dd, fw_8051_name, &fw_8051); if (err) goto done; } @@ -621,8 +621,8 @@ retry: done: if (err) { /* oops, had problems obtaining a firmware */ - if (fw_state == FW_EMPTY) { - /* retry with alternate */ + if (fw_state == FW_EMPTY && dd->icode == ICODE_RTL_SILICON) { + /* retry with alternate (RTL only) */ fw_state = FW_TRY; goto retry; } diff --git a/drivers/staging/rdma/hfi1/pcie.c b/drivers/staging/rdma/hfi1/pcie.c index b2f553d8..3cdc804 100644 --- a/drivers/staging/rdma/hfi1/pcie.c +++ b/drivers/staging/rdma/hfi1/pcie.c @@ -1032,8 +1032,11 @@ retry: /* step 4: download PCIe Gen3 SerDes firmware */ dd_dev_info(dd, "%s: downloading firmware\n", __func__); ret = load_pcie_firmware(dd); - if (ret) + if (ret) { + /* do not proceed if the firmware cannot be downloaded */ + return_error = 1; goto done; + } /* step 5: set up device parameter settings */ dd_dev_info(dd, "%s: setting PCIe registers\n", __func__); -- cgit v0.10.2 From 69a00b8e0508c8b98fba3b57a7c6c45b724553c3 Mon Sep 17 00:00:00 2001 From: Mike Marciniszyn Date: Wed, 3 Feb 2016 14:31:49 -0800 Subject: staging/rdma/hfi1: Fix per-VL transmit discard counts Implement per-VL transmit counters. Not all errors can be attributed to a particular VL, so make a best attempt. o Extend the egress error bits used to count toward transmit discard. o When an egress error or send error occur, try to map back to a VL. o Implement a SDMA engine to VL (back) map. o Add per-VL port transmit counters Reviewed-by: Mike Marciniszyn Reviewed-by: Dennis Dalessandro Signed-off-by: Dean Luick Signed-off-by: Doug Ledford diff --git a/drivers/staging/rdma/hfi1/chip.c b/drivers/staging/rdma/hfi1/chip.c index 41af05e..108cd48 100644 --- a/drivers/staging/rdma/hfi1/chip.c +++ b/drivers/staging/rdma/hfi1/chip.c @@ -510,6 +510,12 @@ static struct flag_table sdma_err_status_flags[] = { | SEND_DMA_ERR_STATUS_SDMA_CSR_PARITY_ERR_SMASK \ | SEND_DMA_ERR_STATUS_SDMA_PCIE_REQ_TRACKING_UNC_ERR_SMASK) +/* SendEgressErrInfo bits that correspond to a PortXmitDiscard counter */ +#define PORT_DISCARD_EGRESS_ERRS \ + (SEND_EGRESS_ERR_INFO_TOO_LONG_IB_PACKET_ERR_SMASK \ + | SEND_EGRESS_ERR_INFO_VL_MAPPING_ERR_SMASK \ + | SEND_EGRESS_ERR_INFO_VL_ERR_SMASK) + /* * TXE Egress Error flags */ @@ -1481,12 +1487,18 @@ static u64 access_sw_unknown_frame_cnt(const struct cntr_entry *entry, static u64 access_sw_xmit_discards(const struct cntr_entry *entry, void *context, int vl, int mode, u64 data) { - struct hfi1_pportdata *ppd = context; + struct hfi1_pportdata *ppd = (struct hfi1_pportdata *)context; + u64 zero = 0; + u64 *counter; - if (vl != CNTR_INVALID_VL) - return 0; + if (vl == CNTR_INVALID_VL) + counter = &ppd->port_xmit_discards; + else if (vl >= 0 && vl < C_VL_COUNT) + counter = &ppd->port_xmit_discards_vl[vl]; + else + counter = &zero; - return read_write_sw(ppd->dd, &ppd->port_xmit_discards, mode, data); + return read_write_sw(ppd->dd, counter, mode, data); } static u64 access_xmit_constraint_errs(const struct cntr_entry *entry, @@ -5508,12 +5520,14 @@ static void handle_sdma_err(struct hfi1_devdata *dd, u32 unused, u64 reg) } } -static void count_port_inactive(struct hfi1_devdata *dd) +static inline void __count_port_discards(struct hfi1_pportdata *ppd) { - struct hfi1_pportdata *ppd = dd->pport; + incr_cntr64(&ppd->port_xmit_discards); +} - if (ppd->port_xmit_discards < ~(u64)0) - ppd->port_xmit_discards++; +static void count_port_inactive(struct hfi1_devdata *dd) +{ + __count_port_discards(dd->pport); } /* @@ -5525,7 +5539,8 @@ static void count_port_inactive(struct hfi1_devdata *dd) * egress error if more than one packet fails the same integrity check * since we cleared the corresponding bit in SEND_EGRESS_ERR_INFO. */ -static void handle_send_egress_err_info(struct hfi1_devdata *dd) +static void handle_send_egress_err_info(struct hfi1_devdata *dd, + int vl) { struct hfi1_pportdata *ppd = dd->pport; u64 src = read_csr(dd, SEND_EGRESS_ERR_SOURCE); /* read first */ @@ -5540,10 +5555,24 @@ static void handle_send_egress_err_info(struct hfi1_devdata *dd) info, egress_err_info_string(buf, sizeof(buf), info), src); /* Eventually add other counters for each bit */ + if (info & PORT_DISCARD_EGRESS_ERRS) { + int weight, i; - if (info & SEND_EGRESS_ERR_INFO_TOO_LONG_IB_PACKET_ERR_SMASK) { - if (ppd->port_xmit_discards < ~(u64)0) - ppd->port_xmit_discards++; + /* + * Count all, in case multiple bits are set. Reminder: + * since there is only one info register for many sources, + * these may be attributed to the wrong VL if they occur + * too close together. + */ + weight = hweight64(info); + for (i = 0; i < weight; i++) { + __count_port_discards(ppd); + if (vl >= 0 && vl < TXE_NUM_DATA_VL) + incr_cntr64(&ppd->port_xmit_discards_vl[vl]); + else if (vl == 15) + incr_cntr64(&ppd->port_xmit_discards_vl + [C_VL_15]); + } } } @@ -5561,12 +5590,71 @@ static inline int port_inactive_err(u64 posn) * Input value is a bit position within the SEND_EGRESS_ERR_STATUS * register. Does it represent a 'disallowed packet' error? */ -static inline int disallowed_pkt_err(u64 posn) +static inline int disallowed_pkt_err(int posn) { return (posn >= SEES(TX_SDMA0_DISALLOWED_PACKET) && posn <= SEES(TX_SDMA15_DISALLOWED_PACKET)); } +/* + * Input value is a bit position of one of the SDMA engine disallowed + * packet errors. Return which engine. Use of this must be guarded by + * disallowed_pkt_err(). + */ +static inline int disallowed_pkt_engine(int posn) +{ + return posn - SEES(TX_SDMA0_DISALLOWED_PACKET); +} + +/* + * Translate an SDMA engine to a VL. Return -1 if the tranlation cannot + * be done. + */ +static int engine_to_vl(struct hfi1_devdata *dd, int engine) +{ + struct sdma_vl_map *m; + int vl; + + /* range check */ + if (engine < 0 || engine >= TXE_NUM_SDMA_ENGINES) + return -1; + + rcu_read_lock(); + m = rcu_dereference(dd->sdma_map); + vl = m->engine_to_vl[engine]; + rcu_read_unlock(); + + return vl; +} + +/* + * Translate the send context (sofware index) into a VL. Return -1 if the + * translation cannot be done. + */ +static int sc_to_vl(struct hfi1_devdata *dd, int sw_index) +{ + struct send_context_info *sci; + struct send_context *sc; + int i; + + sci = &dd->send_contexts[sw_index]; + + /* there is no information for user (PSM) and ack contexts */ + if (sci->type != SC_KERNEL) + return -1; + + sc = sci->sc; + if (!sc) + return -1; + if (dd->vld[15].sc == sc) + return 15; + for (i = 0; i < num_vls; i++) + if (dd->vld[i].sc == sc) + return i; + + return -1; +} + static void handle_egress_err(struct hfi1_devdata *dd, u32 unused, u64 reg) { u64 reg_copy = reg, handled = 0; @@ -5575,27 +5663,27 @@ static void handle_egress_err(struct hfi1_devdata *dd, u32 unused, u64 reg) if (reg & ALL_TXE_EGRESS_FREEZE_ERR) start_freeze_handling(dd->pport, 0); - if (is_ax(dd) && (reg & - SEND_EGRESS_ERR_STATUS_TX_CREDIT_RETURN_VL_ERR_SMASK) - && (dd->icode != ICODE_FUNCTIONAL_SIMULATOR)) + else if (is_ax(dd) && + (reg & SEND_EGRESS_ERR_STATUS_TX_CREDIT_RETURN_VL_ERR_SMASK) && + (dd->icode != ICODE_FUNCTIONAL_SIMULATOR)) start_freeze_handling(dd->pport, 0); while (reg_copy) { int posn = fls64(reg_copy); - /* - * fls64() returns a 1-based offset, but we generally - * want 0-based offsets. - */ + /* fls64() returns a 1-based offset, we want it zero based */ int shift = posn - 1; + u64 mask = 1ULL << shift; if (port_inactive_err(shift)) { count_port_inactive(dd); - handled |= (1ULL << shift); + handled |= mask; } else if (disallowed_pkt_err(shift)) { - handle_send_egress_err_info(dd); - handled |= (1ULL << shift); + int vl = engine_to_vl(dd, disallowed_pkt_engine(shift)); + + handle_send_egress_err_info(dd, vl); + handled |= mask; } - clear_bit(shift, (unsigned long *)®_copy); + reg_copy &= ~mask; } reg &= ~handled; @@ -5739,7 +5827,7 @@ static void is_sendctxt_err_int(struct hfi1_devdata *dd, send_context_err_status_string(flags, sizeof(flags), status)); if (status & SEND_CTXT_ERR_STATUS_PIO_DISALLOWED_PACKET_ERR_SMASK) - handle_send_egress_err_info(dd); + handle_send_egress_err_info(dd, sc_to_vl(dd, sw_index)); /* * Automatically restart halted kernel contexts out of interrupt diff --git a/drivers/staging/rdma/hfi1/hfi.h b/drivers/staging/rdma/hfi1/hfi.h index 0fe630e..76c369a 100644 --- a/drivers/staging/rdma/hfi1/hfi.h +++ b/drivers/staging/rdma/hfi1/hfi.h @@ -709,8 +709,9 @@ struct hfi1_pportdata { u64 *cntrs; /* port relative synthetic counter buffer */ u64 *scntrs; - /* we synthesize port_xmit_discards from several egress errors */ + /* port_xmit_discards are synthesized from different egress errors */ u64 port_xmit_discards; + u64 port_xmit_discards_vl[C_VL_COUNT]; u64 port_xmit_constraint_errors; u64 port_rcv_constraint_errors; /* count of 'link_err' interrupts from DC */ diff --git a/drivers/staging/rdma/hfi1/sdma.c b/drivers/staging/rdma/hfi1/sdma.c index 4eb55fa..ddaaaac 100644 --- a/drivers/staging/rdma/hfi1/sdma.c +++ b/drivers/staging/rdma/hfi1/sdma.c @@ -890,6 +890,9 @@ int sdma_map_init(struct hfi1_devdata *dd, u8 port, u8 num_vls, u8 *vl_engines) newmap->actual_vls = num_vls; newmap->vls = roundup_pow_of_two(num_vls); newmap->mask = (1 << ilog2(newmap->vls)) - 1; + /* initialize back-map */ + for (i = 0; i < TXE_NUM_SDMA_ENGINES; i++) + newmap->engine_to_vl[i] = -1; for (i = 0; i < newmap->vls; i++) { /* save for wrap around */ int first_engine = engine; @@ -913,6 +916,9 @@ int sdma_map_init(struct hfi1_devdata *dd, u8 port, u8 num_vls, u8 *vl_engines) /* wrap back to first engine */ engine = first_engine; } + /* assign back-map */ + for (j = 0; j < vl_engines[i]; j++) + newmap->engine_to_vl[first_engine + j] = i; } else { /* just re-use entry without allocating */ newmap->map[i] = newmap->map[i % num_vls]; diff --git a/drivers/staging/rdma/hfi1/sdma.h b/drivers/staging/rdma/hfi1/sdma.h index 0f51c45..1d52d6e 100644 --- a/drivers/staging/rdma/hfi1/sdma.h +++ b/drivers/staging/rdma/hfi1/sdma.h @@ -1087,6 +1087,7 @@ struct sdma_map_elem { /** * struct sdma_map_el - mapping for a vl + * @engine_to_vl - map of an engine to a vl * @list - rcu head for free callback * @mask - vl mask to "mod" the vl to produce an index to map array * @actual_vls - number of vls @@ -1098,6 +1099,7 @@ struct sdma_map_elem { * in turn point to an array of sde's for that vl. */ struct sdma_vl_map { + s8 engine_to_vl[TXE_NUM_SDMA_ENGINES]; struct rcu_head list; u32 mask; u8 actual_vls; -- cgit v0.10.2 From 845f876d0819e2b941b1dbe92e0d219df8010035 Mon Sep 17 00:00:00 2001 From: Dean Luick Date: Wed, 3 Feb 2016 14:31:57 -0800 Subject: staging/rdma/hfi1: Only warn when board description is not found Change-Id: Icc4ad27c4c67e51df8c8a203c4f16973793678ec Reviewed-by: Dennis Dalessandro Signed-off-by: Dean Luick Signed-off-by: Doug Ledford diff --git a/drivers/staging/rdma/hfi1/chip.c b/drivers/staging/rdma/hfi1/chip.c index 108cd48..81b48d6 100644 --- a/drivers/staging/rdma/hfi1/chip.c +++ b/drivers/staging/rdma/hfi1/chip.c @@ -13941,7 +13941,7 @@ static int obtain_boardname(struct hfi1_devdata *dd) ret = read_hfi1_efi_var(dd, "description", &size, (void **)&dd->boardname); if (ret) { - dd_dev_err(dd, "Board description not found\n"); + dd_dev_info(dd, "Board description not found\n"); /* use generic description */ dd->boardname = kstrdup(generic, GFP_KERNEL); if (!dd->boardname) -- cgit v0.10.2 From fe072e205a685cfba285259d779dd3bda5e250f2 Mon Sep 17 00:00:00 2001 From: Dean Luick Date: Wed, 3 Feb 2016 14:32:06 -0800 Subject: staging/rdma/hfi1: Make firmware failure messages warnings Make firmware validation failure and missing firmware messages a warning since alternates can be tried. Add an error message when all attempts fail. Reviewed-by: Dennis Dalessandro Signed-off-by: Dean Luick Signed-off-by: Doug Ledford diff --git a/drivers/staging/rdma/hfi1/firmware.c b/drivers/staging/rdma/hfi1/firmware.c index d954e1a..817cbf9 100644 --- a/drivers/staging/rdma/hfi1/firmware.c +++ b/drivers/staging/rdma/hfi1/firmware.c @@ -433,8 +433,8 @@ static int obtain_one_firmware(struct hfi1_devdata *dd, const char *name, ret = request_firmware(&fdet->fw, name, &dd->pcidev->dev); if (ret) { - dd_dev_err(dd, "cannot find firmware \"%s\", err %d\n", - name, ret); + dd_dev_warn(dd, "cannot find firmware \"%s\", err %d\n", + name, ret); return ret; } @@ -572,7 +572,7 @@ retry: * We tried the original and it failed. Move to the * alternate. */ - dd_dev_info(dd, "using alternate firmware names\n"); + dd_dev_warn(dd, "using alternate firmware names\n"); /* * Let others run. Some systems, when missing firmware, does * something that holds for 30 seconds. If we do that twice @@ -626,6 +626,7 @@ done: fw_state = FW_TRY; goto retry; } + dd_dev_err(dd, "unable to obtain working firmware\n"); fw_state = FW_ERR; fw_err = -ENOENT; } else { @@ -896,16 +897,17 @@ static int run_rsa(struct hfi1_devdata *dd, const char *who, MISC_ERR_STATUS_MISC_FW_AUTH_FAILED_ERR_SMASK | MISC_ERR_STATUS_MISC_KEY_MISMATCH_ERR_SMASK); /* - * All that is left are the current errors. Print failure details, - * if any. + * All that is left are the current errors. Print warnings on + * authorization failure details, if any. Firmware authorization + * can be retried, so these are only warnings. */ reg = read_csr(dd, MISC_ERR_STATUS); if (ret) { if (reg & MISC_ERR_STATUS_MISC_FW_AUTH_FAILED_ERR_SMASK) - dd_dev_err(dd, "%s firmware authorization failed\n", - who); + dd_dev_warn(dd, "%s firmware authorization failed\n", + who); if (reg & MISC_ERR_STATUS_MISC_KEY_MISMATCH_ERR_SMASK) - dd_dev_err(dd, "%s firmware key mismatch\n", who); + dd_dev_warn(dd, "%s firmware key mismatch\n", who); } return ret; -- cgit v0.10.2 From 715c430ca5b21c22648665045fc3718b4547d5c8 Mon Sep 17 00:00:00 2001 From: Easwar Hariharan Date: Wed, 3 Feb 2016 14:32:14 -0800 Subject: staging/rdma/hfi1: Don't attempt to qualify or tune loopback plugs Loopback plugs used for testing hardware don't need to be qualified to bring the link up unlike production cables. This patch adds an exception for loopback plugs to the QSFP and SerDes tuning algortihm. Reviewed-by: Dean Luick Signed-off-by: Easwar Hariharan Signed-off-by: Doug Ledford diff --git a/drivers/staging/rdma/hfi1/platform.c b/drivers/staging/rdma/hfi1/platform.c index 9aa5e06..c3df1d8 100644 --- a/drivers/staging/rdma/hfi1/platform.c +++ b/drivers/staging/rdma/hfi1/platform.c @@ -737,7 +737,8 @@ void tune_serdes(struct hfi1_pportdata *ppd) ppd->driver_link_ready = 0; ppd->offline_disabled_reason = HFI1_ODR_MASK(OPA_LINKDOWN_REASON_NONE); - if (loopback == LOOPBACK_SERDES || loopback == LOOPBACK_LCB || + /* Skip the tuning for testing (loopback != none) and simulations */ + if (loopback != LOOPBACK_NONE || ppd->dd->icode == ICODE_FUNCTIONAL_SIMULATOR || !dd->pcfg_cache.cache_valid) { ppd->driver_link_ready = 1; -- cgit v0.10.2 From dcc68e528238c9e1173a8e98de29f0149122ac4c Mon Sep 17 00:00:00 2001 From: Dean Luick Date: Wed, 3 Feb 2016 14:32:23 -0800 Subject: staging/rdma/hfi1: No firmware retry for simulation Simulation has no firmware, so it will never move firmware acquire to the FINAL state. Avoid that by skiping the TRY state and moving directly to FINAL. Reviewed-by: Dennis Dalessandro Signed-off-by: Dean Luick Signed-off-by: Doug Ledford diff --git a/drivers/staging/rdma/hfi1/firmware.c b/drivers/staging/rdma/hfi1/firmware.c index 817cbf9..4ba524b 100644 --- a/drivers/staging/rdma/hfi1/firmware.c +++ b/drivers/staging/rdma/hfi1/firmware.c @@ -631,7 +631,8 @@ done: fw_err = -ENOENT; } else { /* success */ - if (fw_state == FW_EMPTY) + if (fw_state == FW_EMPTY && + dd->icode != ICODE_FUNCTIONAL_SIMULATOR) fw_state = FW_TRY; /* may retry later */ else fw_state = FW_FINAL; /* cannot try again */ -- cgit v0.10.2 From a59329d5e8977fb4b6d7535bdf14b0fe7ece5559 Mon Sep 17 00:00:00 2001 From: Dean Luick Date: Wed, 3 Feb 2016 14:32:31 -0800 Subject: staging/rdma/hfi1: Skip lcb init for simulation The simulator does not correctly handle LCB cclk loopback. Skip that step for simulation - it is not needed. Reviewed-by: Dennis Dalessandro Signed-off-by: Dean Luick Signed-off-by: Doug Ledford diff --git a/drivers/staging/rdma/hfi1/chip.c b/drivers/staging/rdma/hfi1/chip.c index 81b48d6..d45e271 100644 --- a/drivers/staging/rdma/hfi1/chip.c +++ b/drivers/staging/rdma/hfi1/chip.c @@ -9315,6 +9315,10 @@ static void init_qsfp_int(struct hfi1_devdata *dd) */ static void init_lcb(struct hfi1_devdata *dd) { + /* simulator does not correctly handle LCB cclk loopback, skip */ + if (dd->icode == ICODE_FUNCTIONAL_SIMULATOR) + return; + /* the DC has been reset earlier in the driver load */ /* set LCB for cclk loopback on the port */ -- cgit v0.10.2 From 11d2b114cdebd9b520de573d74c70fb04c2771cc Mon Sep 17 00:00:00 2001 From: Sebastian Sanchez Date: Wed, 3 Feb 2016 14:32:40 -0800 Subject: staging/rdma/hfi1: Fix for 32-bit counter overflow in driver and hfi1stats When 32-bit hardware counters overflow, hfi1stats misinterprets the counters as being 64 bits causing the deltas for the counters to be a huge number. This patch makes hfi1stats aware that a counter is 32 bits by making the driver write ,32 to debugfs. Reviewed-by: Dean Luick Signed-off-by: Sebastian Sanchez Signed-off-by: Doug Ledford diff --git a/drivers/staging/rdma/hfi1/chip.c b/drivers/staging/rdma/hfi1/chip.c index d45e271..a90e6e6 100644 --- a/drivers/staging/rdma/hfi1/chip.c +++ b/drivers/staging/rdma/hfi1/chip.c @@ -11766,6 +11766,8 @@ static int init_cntrs(struct hfi1_devdata *dd) char *p; char name[C_MAX_NAME]; struct hfi1_pportdata *ppd; + const char *bit_type_32 = ",32"; + const int bit_type_32_sz = strlen(bit_type_32); /* set up the stats timer; the add_timer is done at the end */ setup_timer(&dd->synth_stats_timer, update_synth_timer, @@ -11795,6 +11797,9 @@ static int init_cntrs(struct hfi1_devdata *dd) dev_cntrs[i].name, vl_from_idx(j)); sz += strlen(name); + /* Add ",32" for 32-bit counters */ + if (dev_cntrs[i].flags & CNTR_32BIT) + sz += bit_type_32_sz; sz++; hfi1_dbg_early("\t\t%s\n", name); dd->ndevcntrs++; @@ -11809,13 +11814,19 @@ static int init_cntrs(struct hfi1_devdata *dd) snprintf(name, C_MAX_NAME, "%s%d", dev_cntrs[i].name, j); sz += strlen(name); + /* Add ",32" for 32-bit counters */ + if (dev_cntrs[i].flags & CNTR_32BIT) + sz += bit_type_32_sz; sz++; hfi1_dbg_early("\t\t%s\n", name); dd->ndevcntrs++; } } else { - /* +1 for newline */ + /* +1 for newline. */ sz += strlen(dev_cntrs[i].name) + 1; + /* Add ",32" for 32-bit counters */ + if (dev_cntrs[i].flags & CNTR_32BIT) + sz += bit_type_32_sz; dev_cntrs[i].offset = dd->ndevcntrs; dd->ndevcntrs++; hfi1_dbg_early("\tAdding %s\n", dev_cntrs[i].name); @@ -11842,33 +11853,50 @@ static int init_cntrs(struct hfi1_devdata *dd) for (p = dd->cntrnames, i = 0; i < DEV_CNTR_LAST; i++) { if (dev_cntrs[i].flags & CNTR_DISABLED) { /* Nothing */ - } else { - if (dev_cntrs[i].flags & CNTR_VL) { - for (j = 0; j < C_VL_COUNT; j++) { - memset(name, '\0', C_MAX_NAME); - snprintf(name, C_MAX_NAME, "%s%d", - dev_cntrs[i].name, - vl_from_idx(j)); - memcpy(p, name, strlen(name)); - p += strlen(name); - *p++ = '\n'; + } else if (dev_cntrs[i].flags & CNTR_VL) { + for (j = 0; j < C_VL_COUNT; j++) { + memset(name, '\0', C_MAX_NAME); + snprintf(name, C_MAX_NAME, "%s%d", + dev_cntrs[i].name, + vl_from_idx(j)); + memcpy(p, name, strlen(name)); + p += strlen(name); + + /* Counter is 32 bits */ + if (dev_cntrs[i].flags & CNTR_32BIT) { + memcpy(p, bit_type_32, bit_type_32_sz); + p += bit_type_32_sz; } - } else if (dev_cntrs[i].flags & CNTR_SDMA) { - for (j = 0; j < TXE_NUM_SDMA_ENGINES; - j++) { - memset(name, '\0', C_MAX_NAME); - snprintf(name, C_MAX_NAME, "%s%d", - dev_cntrs[i].name, j); - memcpy(p, name, strlen(name)); - p += strlen(name); - *p++ = '\n'; + + *p++ = '\n'; + } + } else if (dev_cntrs[i].flags & CNTR_SDMA) { + for (j = 0; j < dd->chip_sdma_engines; j++) { + memset(name, '\0', C_MAX_NAME); + snprintf(name, C_MAX_NAME, "%s%d", + dev_cntrs[i].name, j); + memcpy(p, name, strlen(name)); + p += strlen(name); + + /* Counter is 32 bits */ + if (dev_cntrs[i].flags & CNTR_32BIT) { + memcpy(p, bit_type_32, bit_type_32_sz); + p += bit_type_32_sz; } - } else { - memcpy(p, dev_cntrs[i].name, - strlen(dev_cntrs[i].name)); - p += strlen(dev_cntrs[i].name); + *p++ = '\n'; } + } else { + memcpy(p, dev_cntrs[i].name, strlen(dev_cntrs[i].name)); + p += strlen(dev_cntrs[i].name); + + /* Counter is 32 bits */ + if (dev_cntrs[i].flags & CNTR_32BIT) { + memcpy(p, bit_type_32, bit_type_32_sz); + p += bit_type_32_sz; + } + + *p++ = '\n'; } } @@ -11906,13 +11934,19 @@ static int init_cntrs(struct hfi1_devdata *dd) port_cntrs[i].name, vl_from_idx(j)); sz += strlen(name); + /* Add ",32" for 32-bit counters */ + if (port_cntrs[i].flags & CNTR_32BIT) + sz += bit_type_32_sz; sz++; hfi1_dbg_early("\t\t%s\n", name); dd->nportcntrs++; } } else { - /* +1 for newline */ + /* +1 for newline */ sz += strlen(port_cntrs[i].name) + 1; + /* Add ",32" for 32-bit counters */ + if (port_cntrs[i].flags & CNTR_32BIT) + sz += bit_type_32_sz; port_cntrs[i].offset = dd->nportcntrs; dd->nportcntrs++; hfi1_dbg_early("\tAdding %s\n", port_cntrs[i].name); @@ -11938,12 +11972,26 @@ static int init_cntrs(struct hfi1_devdata *dd) vl_from_idx(j)); memcpy(p, name, strlen(name)); p += strlen(name); + + /* Counter is 32 bits */ + if (port_cntrs[i].flags & CNTR_32BIT) { + memcpy(p, bit_type_32, bit_type_32_sz); + p += bit_type_32_sz; + } + *p++ = '\n'; } } else { memcpy(p, port_cntrs[i].name, strlen(port_cntrs[i].name)); p += strlen(port_cntrs[i].name); + + /* Counter is 32 bits */ + if (port_cntrs[i].flags & CNTR_32BIT) { + memcpy(p, bit_type_32, bit_type_32_sz); + p += bit_type_32_sz; + } + *p++ = '\n'; } } -- cgit v0.10.2 From 566c157cbd2113a18bfc40170de16227357434d7 Mon Sep 17 00:00:00 2001 From: Mitko Haralanov Date: Wed, 3 Feb 2016 14:32:49 -0800 Subject: staging/rdma/hfi1: Correctly set RcvCtxtCtrl register The RcvCtxtCtrl register was being incorrectly set upon context initialization and clean up resulting, in many cases, of contexts using settings from previous contexts' initialization. This resulted in bad and unexpected behavior. This was especially important for the TailUpd bit, which requires special handling and if set incorrectly could lead to severely degraded performance. This patch fixes the handling of the RcvCtxtCtrl register, ensuring that each context gets initialized with settings applicable only for that context. It also ensures the proper setting for the TailUpd bit by setting it to either 0 or 1 (as needed by the context's configuration) explicitly. Reviewed-by: Ira Weiny Signed-off-by: Mitko Haralanov Signed-off-by: Doug Ledford diff --git a/drivers/staging/rdma/hfi1/chip.c b/drivers/staging/rdma/hfi1/chip.c index a90e6e6..d10ba673 100644 --- a/drivers/staging/rdma/hfi1/chip.c +++ b/drivers/staging/rdma/hfi1/chip.c @@ -6684,11 +6684,17 @@ static void rxe_freeze(struct hfi1_devdata *dd) */ static void rxe_kernel_unfreeze(struct hfi1_devdata *dd) { + u32 rcvmask; int i; /* enable all kernel contexts */ - for (i = 0; i < dd->n_krcv_queues; i++) - hfi1_rcvctrl(dd, HFI1_RCVCTRL_CTXT_ENB, i); + for (i = 0; i < dd->n_krcv_queues; i++) { + rcvmask = HFI1_RCVCTRL_CTXT_ENB; + /* HFI1_RCVCTRL_TAILUPD_[ENB|DIS] needs to be set explicitly */ + rcvmask |= HFI1_CAP_KGET_MASK(dd->rcd[i]->flags, DMA_RTAIL) ? + HFI1_RCVCTRL_TAILUPD_ENB : HFI1_RCVCTRL_TAILUPD_DIS; + hfi1_rcvctrl(dd, rcvmask, i); + } /* enable port */ add_rcvctrl(dd, RCV_CTRL_RCV_PORT_ENABLE_SMASK); @@ -11255,6 +11261,7 @@ void hfi1_rcvctrl(struct hfi1_devdata *dd, unsigned int op, int ctxt) if (dd->rcvhdrtail_dummy_physaddr) { write_kctxt_csr(dd, ctxt, RCV_HDR_TAIL_ADDR, dd->rcvhdrtail_dummy_physaddr); + /* Enabling RcvCtxtCtrl.TailUpd is intentional. */ rcvctrl |= RCV_CTXT_CTRL_TAIL_UPD_SMASK; } @@ -11266,8 +11273,11 @@ void hfi1_rcvctrl(struct hfi1_devdata *dd, unsigned int op, int ctxt) rcvctrl &= ~RCV_CTXT_CTRL_INTR_AVAIL_SMASK; if (op & HFI1_RCVCTRL_TAILUPD_ENB && rcd->rcvhdrqtailaddr_phys) rcvctrl |= RCV_CTXT_CTRL_TAIL_UPD_SMASK; - if (op & HFI1_RCVCTRL_TAILUPD_DIS) - rcvctrl &= ~RCV_CTXT_CTRL_TAIL_UPD_SMASK; + if (op & HFI1_RCVCTRL_TAILUPD_DIS) { + /* See comment on RcvCtxtCtrl.TailUpd above */ + if (!(op & HFI1_RCVCTRL_CTXT_DIS)) + rcvctrl &= ~RCV_CTXT_CTRL_TAIL_UPD_SMASK; + } if (op & HFI1_RCVCTRL_TIDFLOW_ENB) rcvctrl |= RCV_CTXT_CTRL_TID_FLOW_ENABLE_SMASK; if (op & HFI1_RCVCTRL_TIDFLOW_DIS) diff --git a/drivers/staging/rdma/hfi1/file_ops.c b/drivers/staging/rdma/hfi1/file_ops.c index d365889..5c694fa 100644 --- a/drivers/staging/rdma/hfi1/file_ops.c +++ b/drivers/staging/rdma/hfi1/file_ops.c @@ -771,6 +771,7 @@ static int hfi1_file_close(struct inode *inode, struct file *fp) hfi1_rcvctrl(dd, HFI1_RCVCTRL_CTXT_DIS | HFI1_RCVCTRL_TIDFLOW_DIS | HFI1_RCVCTRL_INTRAVAIL_DIS | + HFI1_RCVCTRL_TAILUPD_DIS | HFI1_RCVCTRL_ONE_PKT_EGR_DIS | HFI1_RCVCTRL_NO_RHQ_DROP_DIS | HFI1_RCVCTRL_NO_EGR_DROP_DIS, uctxt->ctxt); @@ -1156,8 +1157,16 @@ static int user_init(struct file *fp) rcvctrl_ops |= HFI1_RCVCTRL_NO_EGR_DROP_ENB; if (HFI1_CAP_KGET_MASK(uctxt->flags, NODROP_RHQ_FULL)) rcvctrl_ops |= HFI1_RCVCTRL_NO_RHQ_DROP_ENB; + /* + * The RcvCtxtCtrl.TailUpd bit has to be explicitly written. + * We can't rely on the correct value to be set from prior + * uses of the chip or ctxt. Therefore, add the rcvctrl op + * for both cases. + */ if (HFI1_CAP_KGET_MASK(uctxt->flags, DMA_RTAIL)) rcvctrl_ops |= HFI1_RCVCTRL_TAILUPD_ENB; + else + rcvctrl_ops |= HFI1_RCVCTRL_TAILUPD_DIS; hfi1_rcvctrl(uctxt->dd, rcvctrl_ops, uctxt->ctxt); /* Notify any waiting slaves */ diff --git a/drivers/staging/rdma/hfi1/init.c b/drivers/staging/rdma/hfi1/init.c index 2851e90..fc3d40a 100644 --- a/drivers/staging/rdma/hfi1/init.c +++ b/drivers/staging/rdma/hfi1/init.c @@ -589,8 +589,8 @@ static void enable_chip(struct hfi1_devdata *dd) * Enable kernel ctxts' receive and receive interrupt. * Other ctxts done as user opens and initializes them. */ - rcvmask = HFI1_RCVCTRL_CTXT_ENB | HFI1_RCVCTRL_INTRAVAIL_ENB; for (i = 0; i < dd->first_user_ctxt; ++i) { + rcvmask = HFI1_RCVCTRL_CTXT_ENB | HFI1_RCVCTRL_INTRAVAIL_ENB; rcvmask |= HFI1_CAP_KGET_MASK(dd->rcd[i]->flags, DMA_RTAIL) ? HFI1_RCVCTRL_TAILUPD_ENB : HFI1_RCVCTRL_TAILUPD_DIS; if (!HFI1_CAP_KGET_MASK(dd->rcd[i]->flags, MULTI_PKT_EGR)) -- cgit v0.10.2 From 6c9e50f894458810591f7883bdd6f0f8474b06a0 Mon Sep 17 00:00:00 2001 From: Vennila Megavannan Date: Wed, 3 Feb 2016 14:32:57 -0800 Subject: staging/rdma/hfi1: Method to toggle "fast ECN" detection Add a per port sysfs paramter to toggle cc_prescan/Fast ECN Detection and remove the Kconfig option which was previously used to control this. While am updating the sysfs documentation, fix the name of CCMgtA. Reviewed-by: Arthur Kepner Reviewed-by: Mike Marciniszyn Signed-off-by: Vennila Megavannan Signed-off-by: Ira Weiny Signed-off-by: Doug Ledford diff --git a/Documentation/infiniband/sysfs.txt b/Documentation/infiniband/sysfs.txt index 9028b02..3ecf0c3 100644 --- a/Documentation/infiniband/sysfs.txt +++ b/Documentation/infiniband/sysfs.txt @@ -78,9 +78,10 @@ HFI1 chip_reset - diagnostic (root only) boardversion - board version ports/1/ - CMgtA/ + CCMgtA/ cc_settings_bin - CCA tables used by PSM2 cc_table_bin + cc_prescan - enable prescaning for faster BECN response sc2v/ - 32 files (0 - 31) used to translate sl->vl sl2sc/ - 32 files (0 - 31) used to translate sl->sc vl2mtu/ - 16 (0 - 15) files used to determine MTU for vl diff --git a/drivers/staging/rdma/hfi1/Kconfig b/drivers/staging/rdma/hfi1/Kconfig index 846c240c..3e668d8 100644 --- a/drivers/staging/rdma/hfi1/Kconfig +++ b/drivers/staging/rdma/hfi1/Kconfig @@ -26,13 +26,3 @@ config SDMA_VERBOSITY ---help--- This is a configuration flag to enable verbose SDMA debug -config PRESCAN_RXQ - bool "Enable prescanning of the RX queue for ECNs" - depends on INFINIBAND_HFI1 - default n - ---help--- - This option toggles the prescanning of the receive queue for - Explicit Congestion Notifications. If an ECN is detected, it - is processed as quickly as possible, the ECN is toggled off. - After the prescanning step, the receive queue is processed as - usual. diff --git a/drivers/staging/rdma/hfi1/driver.c b/drivers/staging/rdma/hfi1/driver.c index d848cc0..59ce85f 100644 --- a/drivers/staging/rdma/hfi1/driver.c +++ b/drivers/staging/rdma/hfi1/driver.c @@ -453,11 +453,6 @@ static inline void init_packet(struct hfi1_ctxtdata *rcd, packet->rcv_flags = 0; } -#ifndef CONFIG_PRESCAN_RXQ -static void prescan_rxq(struct hfi1_packet *packet) {} -#else /* !CONFIG_PRESCAN_RXQ */ -static int prescan_receive_queue; - static void process_ecn(struct rvt_qp *qp, struct hfi1_ib_header *hdr, struct hfi1_other_headers *ohdr, u64 rhf, u32 bth1, struct ib_grh *grh) @@ -581,15 +576,19 @@ static inline void update_ps_mdata(struct ps_mdata *mdata, * containing Excplicit Congestion Notifications (FECNs, or BECNs). * When an ECN is found, process the Congestion Notification, and toggle * it off. + * This is declared as a macro to allow quick checking of the port to avoid + * the overhead of a function call if not enabled. */ -static void prescan_rxq(struct hfi1_packet *packet) +#define prescan_rxq(rcd, packet) \ + do { \ + if (rcd->ppd->cc_prescan) \ + __prescan_rxq(packet); \ + } while (0) +static void __prescan_rxq(struct hfi1_packet *packet) { struct hfi1_ctxtdata *rcd = packet->rcd; struct ps_mdata mdata; - if (!prescan_receive_queue) - return; - init_ps_mdata(&mdata, packet); while (1) { @@ -653,7 +652,6 @@ next: update_ps_mdata(&mdata, rcd); } } -#endif /* CONFIG_PRESCAN_RXQ */ static inline int skip_rcv_packet(struct hfi1_packet *packet, int thread) { @@ -819,7 +817,7 @@ int handle_receive_interrupt_nodma_rtail(struct hfi1_ctxtdata *rcd, int thread) goto bail; } - prescan_rxq(&packet); + prescan_rxq(rcd, &packet); while (last == RCV_PKT_OK) { last = process_rcv_packet(&packet, thread); @@ -850,7 +848,7 @@ int handle_receive_interrupt_dma_rtail(struct hfi1_ctxtdata *rcd, int thread) } smp_rmb(); /* prevent speculative reads of dma'ed hdrq */ - prescan_rxq(&packet); + prescan_rxq(rcd, &packet); while (last == RCV_PKT_OK) { last = process_rcv_packet(&packet, thread); @@ -961,7 +959,7 @@ int handle_receive_interrupt(struct hfi1_ctxtdata *rcd, int thread) } } - prescan_rxq(&packet); + prescan_rxq(rcd, &packet); while (last == RCV_PKT_OK) { diff --git a/drivers/staging/rdma/hfi1/hfi.h b/drivers/staging/rdma/hfi1/hfi.h index 76c369a..d19d6b7 100644 --- a/drivers/staging/rdma/hfi1/hfi.h +++ b/drivers/staging/rdma/hfi1/hfi.h @@ -734,6 +734,8 @@ struct hfi1_pportdata { /* Error events that will cause a port bounce. */ u32 port_error_action; struct work_struct linkstate_active_work; + /* Does this port need to prescan for FECNs */ + bool cc_prescan; }; typedef int (*rhf_rcv_function_ptr)(struct hfi1_packet *packet); diff --git a/drivers/staging/rdma/hfi1/sysfs.c b/drivers/staging/rdma/hfi1/sysfs.c index d05b9f3..f1d47e7 100644 --- a/drivers/staging/rdma/hfi1/sysfs.c +++ b/drivers/staging/rdma/hfi1/sysfs.c @@ -99,10 +99,6 @@ static void port_release(struct kobject *kobj) /* nothing to do since memory is freed by hfi1_free_devdata() */ } -static struct kobj_type port_cc_ktype = { - .release = port_release, -}; - static struct bin_attribute cc_table_bin_attr = { .attr = {.name = "cc_table_bin", .mode = 0444}, .read = read_cc_table_bin, @@ -151,6 +147,68 @@ static struct bin_attribute cc_setting_bin_attr = { .size = PAGE_SIZE, }; +struct hfi1_port_attr { + struct attribute attr; + ssize_t (*show)(struct hfi1_pportdata *, char *); + ssize_t (*store)(struct hfi1_pportdata *, const char *, size_t); +}; + +static ssize_t cc_prescan_show(struct hfi1_pportdata *ppd, char *buf) +{ + return sprintf(buf, "%s\n", ppd->cc_prescan ? "on" : "off"); +} + +static ssize_t cc_prescan_store(struct hfi1_pportdata *ppd, const char *buf, + size_t count) +{ + if (!memcmp(buf, "on", 2)) + ppd->cc_prescan = true; + else if (!memcmp(buf, "off", 3)) + ppd->cc_prescan = false; + + return count; +} + +static struct hfi1_port_attr cc_prescan_attr = + __ATTR(cc_prescan, 0600, cc_prescan_show, cc_prescan_store); + +static ssize_t cc_attr_show(struct kobject *kobj, struct attribute *attr, + char *buf) +{ + struct hfi1_port_attr *port_attr = + container_of(attr, struct hfi1_port_attr, attr); + struct hfi1_pportdata *ppd = + container_of(kobj, struct hfi1_pportdata, pport_cc_kobj); + + return port_attr->show(ppd, buf); +} + +static ssize_t cc_attr_store(struct kobject *kobj, struct attribute *attr, + const char *buf, size_t count) +{ + struct hfi1_port_attr *port_attr = + container_of(attr, struct hfi1_port_attr, attr); + struct hfi1_pportdata *ppd = + container_of(kobj, struct hfi1_pportdata, pport_cc_kobj); + + return port_attr->store(ppd, buf, count); +} + +static const struct sysfs_ops port_cc_sysfs_ops = { + .show = cc_attr_show, + .store = cc_attr_store +}; + +static struct attribute *port_cc_default_attributes[] = { + &cc_prescan_attr.attr +}; + +static struct kobj_type port_cc_ktype = { + .release = port_release, + .sysfs_ops = &port_cc_sysfs_ops, + .default_attrs = port_cc_default_attributes +}; + /* Start sc2vl */ #define HFI1_SC2VL_ATTR(N) \ static struct hfi1_sc2vl_attr hfi1_sc2vl_attr_##N = { \ -- cgit v0.10.2 From affa48de8417ccdde467b075577f6e5154ff9943 Mon Sep 17 00:00:00 2001 From: Ashutosh Dixit Date: Wed, 3 Feb 2016 14:33:06 -0800 Subject: staging/rdma/hfi1: Add support for enabling/disabling PCIe ASPM hfi1 HW has a high PCIe ASPM L1 exit latency and also advertises an acceptable latency less than actual ASPM latencies. Additional mechanisms than those provided by BIOS/OS are therefore required to enable/disable ASPM for hfi1 to provide acceptable power/performance trade offs. This patch adds this support. By means of a module parameter ASPM can be either (a) always enabled (power save mode) (b) always disabled (performance mode) (c) enabled/disabled dynamically. The dynamic mode implements two heuristics to alleviate possible problems with high ASPM L1 exit latency. ASPM is normally enabled but is disabled if (a) there are any active user space PSM contexts, or (b) for verbs, ASPM is disabled as interrupt activity for a context starts to increase. A few more points about the verbs implementation. In order to reduce lock/cache contention between multiple verbs contexts, some processing is done at the context layer before contending for device layer locks. ASPM is disabled when two interrupts for a context happen within 1 millisec. A timer is scheduled which will re-enable ASPM after 1 second should the interrupt activity cease. Normally, every interrupt, or interrupt-pair should push the timer out further. However, since this might increase the processing load per interrupt, pushing the timer out is postponed for half a second. If after half a second we get two interrupts within 1 millisec the timer is pushed out by another second. Finally, the kernel ASPM API is not used in this patch. This is because this patch does several non-standard things as SW workarounds for HW issues. As mentioned above, it enables ASPM even when advertised actual latencies are greater than acceptable latencies. Also, whereas the kernel API only allows drivers to disable ASPM from driver probe, this patch enables/disables ASPM directly from interrupt context. Due to these reasons the kernel ASPM API was not used. Reviewed-by: Mike Marciniszyn Reviewed-by: Dean Luick Reviewed-by: Ira Weiny Signed-off-by: Ashutosh Dixit Signed-off-by: Doug Ledford diff --git a/drivers/staging/rdma/hfi1/aspm.h b/drivers/staging/rdma/hfi1/aspm.h new file mode 100644 index 0000000..67fce1d --- /dev/null +++ b/drivers/staging/rdma/hfi1/aspm.h @@ -0,0 +1,297 @@ +/* + * + * This file is provided under a dual BSD/GPLv2 license. When using or + * redistributing this file, you may do so under either license. + * + * GPL LICENSE SUMMARY + * + * Copyright(c) 2015 Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * BSD LICENSE + * + * Copyright(c) 2015 Intel Corporation. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * - Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + */ +#ifndef _ASPM_H +#define _ASPM_H + +#include "hfi.h" + +extern uint aspm_mode; + +enum aspm_mode { + ASPM_MODE_DISABLED = 0, /* ASPM always disabled, performance mode */ + ASPM_MODE_ENABLED = 1, /* ASPM always enabled, power saving mode */ + ASPM_MODE_DYNAMIC = 2, /* ASPM enabled/disabled dynamically */ +}; + +/* Time after which the timer interrupt will re-enable ASPM */ +#define ASPM_TIMER_MS 1000 +/* Time for which interrupts are ignored after a timer has been scheduled */ +#define ASPM_RESCHED_TIMER_MS (ASPM_TIMER_MS / 2) +/* Two interrupts within this time trigger ASPM disable */ +#define ASPM_TRIGGER_MS 1 +#define ASPM_TRIGGER_NS (ASPM_TRIGGER_MS * 1000 * 1000ull) +#define ASPM_L1_SUPPORTED(reg) \ + (((reg & PCI_EXP_LNKCAP_ASPMS) >> 10) & 0x2) + +static inline bool aspm_hw_l1_supported(struct hfi1_devdata *dd) +{ + struct pci_dev *parent = dd->pcidev->bus->self; + u32 up, dn; + + pcie_capability_read_dword(dd->pcidev, PCI_EXP_LNKCAP, &dn); + dn = ASPM_L1_SUPPORTED(dn); + + pcie_capability_read_dword(parent, PCI_EXP_LNKCAP, &up); + up = ASPM_L1_SUPPORTED(up); + + /* ASPM works on A-step but is reported as not supported */ + return (!!dn || is_ax(dd)) && !!up; +} + +/* Set L1 entrance latency for slower entry to L1 */ +static inline void aspm_hw_set_l1_ent_latency(struct hfi1_devdata *dd) +{ + u32 l1_ent_lat = 0x4u; + u32 reg32; + + pci_read_config_dword(dd->pcidev, PCIE_CFG_REG_PL3, ®32); + reg32 &= ~PCIE_CFG_REG_PL3_L1_ENT_LATENCY_SMASK; + reg32 |= l1_ent_lat << PCIE_CFG_REG_PL3_L1_ENT_LATENCY_SHIFT; + pci_write_config_dword(dd->pcidev, PCIE_CFG_REG_PL3, reg32); +} + +static inline void aspm_hw_enable_l1(struct hfi1_devdata *dd) +{ + struct pci_dev *parent = dd->pcidev->bus->self; + + /* Enable ASPM L1 first in upstream component and then downstream */ + pcie_capability_clear_and_set_word(parent, PCI_EXP_LNKCTL, + PCI_EXP_LNKCTL_ASPMC, + PCI_EXP_LNKCTL_ASPM_L1); + pcie_capability_clear_and_set_word(dd->pcidev, PCI_EXP_LNKCTL, + PCI_EXP_LNKCTL_ASPMC, + PCI_EXP_LNKCTL_ASPM_L1); +} + +static inline void aspm_hw_disable_l1(struct hfi1_devdata *dd) +{ + struct pci_dev *parent = dd->pcidev->bus->self; + + /* Disable ASPM L1 first in downstream component and then upstream */ + pcie_capability_clear_and_set_word(dd->pcidev, PCI_EXP_LNKCTL, + PCI_EXP_LNKCTL_ASPMC, 0x0); + pcie_capability_clear_and_set_word(parent, PCI_EXP_LNKCTL, + PCI_EXP_LNKCTL_ASPMC, 0x0); +} + +static inline void aspm_enable(struct hfi1_devdata *dd) +{ + if (dd->aspm_enabled || aspm_mode == ASPM_MODE_DISABLED || + !dd->aspm_supported) + return; + + aspm_hw_enable_l1(dd); + dd->aspm_enabled = true; +} + +static inline void aspm_disable(struct hfi1_devdata *dd) +{ + if (!dd->aspm_enabled || aspm_mode == ASPM_MODE_ENABLED) + return; + + aspm_hw_disable_l1(dd); + dd->aspm_enabled = false; +} + +static inline void aspm_disable_inc(struct hfi1_devdata *dd) +{ + unsigned long flags; + + spin_lock_irqsave(&dd->aspm_lock, flags); + aspm_disable(dd); + atomic_inc(&dd->aspm_disabled_cnt); + spin_unlock_irqrestore(&dd->aspm_lock, flags); +} + +static inline void aspm_enable_dec(struct hfi1_devdata *dd) +{ + unsigned long flags; + + spin_lock_irqsave(&dd->aspm_lock, flags); + if (atomic_dec_and_test(&dd->aspm_disabled_cnt)) + aspm_enable(dd); + spin_unlock_irqrestore(&dd->aspm_lock, flags); +} + +/* ASPM processing for each receive context interrupt */ +static inline void aspm_ctx_disable(struct hfi1_ctxtdata *rcd) +{ + bool restart_timer; + bool close_interrupts; + unsigned long flags; + ktime_t now, prev; + + /* Quickest exit for minimum impact */ + if (!rcd->aspm_intr_supported) + return; + + spin_lock_irqsave(&rcd->aspm_lock, flags); + /* PSM contexts are open */ + if (!rcd->aspm_intr_enable) + goto unlock; + + prev = rcd->aspm_ts_last_intr; + now = ktime_get(); + rcd->aspm_ts_last_intr = now; + + /* An interrupt pair close together in time */ + close_interrupts = ktime_to_ns(ktime_sub(now, prev)) < ASPM_TRIGGER_NS; + + /* Don't push out our timer till this much time has elapsed */ + restart_timer = ktime_to_ns(ktime_sub(now, rcd->aspm_ts_timer_sched)) > + ASPM_RESCHED_TIMER_MS * NSEC_PER_MSEC; + restart_timer = restart_timer && close_interrupts; + + /* Disable ASPM and schedule timer */ + if (rcd->aspm_enabled && close_interrupts) { + aspm_disable_inc(rcd->dd); + rcd->aspm_enabled = false; + restart_timer = true; + } + + if (restart_timer) { + mod_timer(&rcd->aspm_timer, + jiffies + msecs_to_jiffies(ASPM_TIMER_MS)); + rcd->aspm_ts_timer_sched = now; + } +unlock: + spin_unlock_irqrestore(&rcd->aspm_lock, flags); +} + +/* Timer function for re-enabling ASPM in the absence of interrupt activity */ +static inline void aspm_ctx_timer_function(unsigned long data) +{ + struct hfi1_ctxtdata *rcd = (struct hfi1_ctxtdata *)data; + unsigned long flags; + + spin_lock_irqsave(&rcd->aspm_lock, flags); + aspm_enable_dec(rcd->dd); + rcd->aspm_enabled = true; + spin_unlock_irqrestore(&rcd->aspm_lock, flags); +} + +/* Disable interrupt processing for verbs contexts when PSM contexts are open */ +static inline void aspm_disable_all(struct hfi1_devdata *dd) +{ + struct hfi1_ctxtdata *rcd; + unsigned long flags; + unsigned i; + + for (i = 0; i < dd->first_user_ctxt; i++) { + rcd = dd->rcd[i]; + del_timer_sync(&rcd->aspm_timer); + spin_lock_irqsave(&rcd->aspm_lock, flags); + rcd->aspm_intr_enable = false; + spin_unlock_irqrestore(&rcd->aspm_lock, flags); + } + + aspm_disable(dd); + atomic_set(&dd->aspm_disabled_cnt, 0); +} + +/* Re-enable interrupt processing for verbs contexts */ +static inline void aspm_enable_all(struct hfi1_devdata *dd) +{ + struct hfi1_ctxtdata *rcd; + unsigned long flags; + unsigned i; + + aspm_enable(dd); + + if (aspm_mode != ASPM_MODE_DYNAMIC) + return; + + for (i = 0; i < dd->first_user_ctxt; i++) { + rcd = dd->rcd[i]; + spin_lock_irqsave(&rcd->aspm_lock, flags); + rcd->aspm_intr_enable = true; + rcd->aspm_enabled = true; + spin_unlock_irqrestore(&rcd->aspm_lock, flags); + } +} + +static inline void aspm_ctx_init(struct hfi1_ctxtdata *rcd) +{ + spin_lock_init(&rcd->aspm_lock); + setup_timer(&rcd->aspm_timer, aspm_ctx_timer_function, + (unsigned long)rcd); + rcd->aspm_intr_supported = rcd->dd->aspm_supported && + aspm_mode == ASPM_MODE_DYNAMIC && + rcd->ctxt < rcd->dd->first_user_ctxt; +} + +static inline void aspm_init(struct hfi1_devdata *dd) +{ + unsigned i; + + spin_lock_init(&dd->aspm_lock); + dd->aspm_supported = aspm_hw_l1_supported(dd); + + for (i = 0; i < dd->first_user_ctxt; i++) + aspm_ctx_init(dd->rcd[i]); + + /* Start with ASPM disabled */ + aspm_hw_set_l1_ent_latency(dd); + dd->aspm_enabled = false; + aspm_hw_disable_l1(dd); + + /* Now turn on ASPM if configured */ + aspm_enable_all(dd); +} + +static inline void aspm_exit(struct hfi1_devdata *dd) +{ + aspm_disable_all(dd); + + /* Turn on ASPM on exit to conserve power */ + aspm_enable(dd); +} + +#endif /* _ASPM_H */ diff --git a/drivers/staging/rdma/hfi1/chip.c b/drivers/staging/rdma/hfi1/chip.c index d10ba673..3577042 100644 --- a/drivers/staging/rdma/hfi1/chip.c +++ b/drivers/staging/rdma/hfi1/chip.c @@ -65,6 +65,7 @@ #include "eprom.h" #include "efivar.h" #include "platform.h" +#include "aspm.h" #define NUM_IB_PORTS 1 @@ -8069,6 +8070,7 @@ static irqreturn_t receive_context_interrupt(int irq, void *data) trace_hfi1_receive_interrupt(dd, rcd->ctxt); this_cpu_inc(*dd->int_counter); + aspm_ctx_disable(rcd); /* receive interrupt remains blocked while processing packets */ disposition = rcd->do_interrupt(rcd, 0); @@ -12792,6 +12794,7 @@ static int set_up_context_variables(struct hfi1_devdata *dd) dd->num_rcv_contexts = total_contexts; dd->n_krcv_queues = num_kernel_contexts; dd->first_user_ctxt = num_kernel_contexts; + dd->num_user_contexts = num_user_contexts; dd->freectxts = num_user_contexts; dd_dev_info(dd, "rcv contexts: chip %d, used %d (kernel %d, user %d)\n", @@ -13948,6 +13951,7 @@ done: */ void hfi1_start_cleanup(struct hfi1_devdata *dd) { + aspm_exit(dd); free_cntrs(dd); free_rcverr(dd); clean_up_interrupts(dd); diff --git a/drivers/staging/rdma/hfi1/chip_registers.h b/drivers/staging/rdma/hfi1/chip_registers.h index 3cd3352..23898eb 100644 --- a/drivers/staging/rdma/hfi1/chip_registers.h +++ b/drivers/staging/rdma/hfi1/chip_registers.h @@ -1281,6 +1281,9 @@ #define SEND_STATIC_RATE_CONTROL_CSR_SRC_RELOAD_SHIFT 0 #define SEND_STATIC_RATE_CONTROL_CSR_SRC_RELOAD_SMASK 0xFFFFull #define PCIE_CFG_REG_PL2 (PCIE + 0x000000000708) +#define PCIE_CFG_REG_PL3 (PCIE + 0x00000000070C) +#define PCIE_CFG_REG_PL3_L1_ENT_LATENCY_SHIFT 27 +#define PCIE_CFG_REG_PL3_L1_ENT_LATENCY_SMASK 0x38000000 #define PCIE_CFG_REG_PL102 (PCIE + 0x000000000898) #define PCIE_CFG_REG_PL102_GEN3_EQ_POST_CURSOR_PSET_SHIFT 12 #define PCIE_CFG_REG_PL102_GEN3_EQ_CURSOR_PSET_SHIFT 6 diff --git a/drivers/staging/rdma/hfi1/file_ops.c b/drivers/staging/rdma/hfi1/file_ops.c index 5c694fa..084581a 100644 --- a/drivers/staging/rdma/hfi1/file_ops.c +++ b/drivers/staging/rdma/hfi1/file_ops.c @@ -60,6 +60,7 @@ #include "user_sdma.h" #include "user_exp_rcv.h" #include "eprom.h" +#include "aspm.h" #undef pr_fmt #define pr_fmt(fmt) DRIVER_NAME ": " fmt @@ -798,7 +799,8 @@ static int hfi1_file_close(struct inode *inode, struct file *fp) hfi1_clear_ctxt_pkey(dd, uctxt->ctxt); hfi1_stats.sps_ctxts--; - dd->freectxts++; + if (++dd->freectxts == dd->num_user_contexts) + aspm_enable_all(dd); mutex_unlock(&hfi1_mutex); hfi1_free_ctxtdata(dd, uctxt); done: @@ -1040,7 +1042,12 @@ static int allocate_ctxt(struct file *fp, struct hfi1_devdata *dd, INIT_LIST_HEAD(&uctxt->sdma_queues); spin_lock_init(&uctxt->sdma_qlock); hfi1_stats.sps_ctxts++; - dd->freectxts--; + /* + * Disable ASPM when there are open user/PSM contexts to avoid + * issues with ASPM L1 exit latency + */ + if (dd->freectxts-- == dd->num_user_contexts) + aspm_disable_all(dd); fd->uctxt = uctxt; return 0; diff --git a/drivers/staging/rdma/hfi1/hfi.h b/drivers/staging/rdma/hfi1/hfi.h index d19d6b7..cb2f90a 100644 --- a/drivers/staging/rdma/hfi1/hfi.h +++ b/drivers/staging/rdma/hfi1/hfi.h @@ -314,6 +314,21 @@ struct hfi1_ctxtdata { struct list_head sdma_queues; spinlock_t sdma_qlock; + /* Is ASPM interrupt supported for this context */ + bool aspm_intr_supported; + /* ASPM state (enabled/disabled) for this context */ + bool aspm_enabled; + /* Timer for re-enabling ASPM if interrupt activity quietens down */ + struct timer_list aspm_timer; + /* Lock to serialize between intr, timer intr and user threads */ + spinlock_t aspm_lock; + /* Is ASPM processing enabled for this context (in intr context) */ + bool aspm_intr_enable; + /* Last interrupt timestamp */ + ktime_t aspm_ts_last_intr; + /* Last timestamp at which we scheduled a timer for this context */ + ktime_t aspm_ts_timer_sched; + /* * The interrupt handler for a particular receive context can vary * throughout it's lifetime. This is not a lock protected data member so @@ -893,6 +908,8 @@ struct hfi1_devdata { * number of ctxts available for PSM open */ u32 freectxts; + /* total number of available user/PSM contexts */ + u32 num_user_contexts; /* base receive interrupt timeout, in CSR units */ u32 rcv_intr_timeout_csr; @@ -1121,6 +1138,13 @@ struct hfi1_devdata { /* receive context tail dummy address */ __le64 *rcvhdrtail_dummy_kvaddr; dma_addr_t rcvhdrtail_dummy_physaddr; + + bool aspm_supported; /* Does HW support ASPM */ + bool aspm_enabled; /* ASPM state: enabled/disabled */ + /* Serialize ASPM enable/disable between multiple verbs contexts */ + spinlock_t aspm_lock; + /* Number of verbs contexts which have disabled ASPM */ + atomic_t aspm_disabled_cnt; }; /* 8051 firmware version helper */ diff --git a/drivers/staging/rdma/hfi1/init.c b/drivers/staging/rdma/hfi1/init.c index fc3d40a..ba52ee3 100644 --- a/drivers/staging/rdma/hfi1/init.c +++ b/drivers/staging/rdma/hfi1/init.c @@ -66,6 +66,7 @@ #include "sdma.h" #include "debugfs.h" #include "verbs.h" +#include "aspm.h" #undef pr_fmt #define pr_fmt(fmt) DRIVER_NAME ": " fmt @@ -190,6 +191,12 @@ int hfi1_create_ctxts(struct hfi1_devdata *dd) } } + /* + * Initialize aspm, to be done after gen3 transition and setting up + * contexts and before enabling interrupts + */ + aspm_init(dd); + return 0; nomem: ret = -ENOMEM; diff --git a/drivers/staging/rdma/hfi1/pcie.c b/drivers/staging/rdma/hfi1/pcie.c index 3cdc804..76cf807 100644 --- a/drivers/staging/rdma/hfi1/pcie.c +++ b/drivers/staging/rdma/hfi1/pcie.c @@ -57,6 +57,7 @@ #include "hfi.h" #include "chip_registers.h" +#include "aspm.h" /* link speed vector for Gen3 speed - not in Linux headers */ #define GEN1_SPEED_VECTOR 0x1 @@ -463,6 +464,10 @@ static int hfi1_pcie_caps; module_param_named(pcie_caps, hfi1_pcie_caps, int, S_IRUGO); MODULE_PARM_DESC(pcie_caps, "Max PCIe tuning: Payload (0..3), ReadReq (4..7)"); +uint aspm_mode = ASPM_MODE_DISABLED; +module_param_named(aspm, aspm_mode, uint, S_IRUGO); +MODULE_PARM_DESC(aspm, "PCIe ASPM: 0: disable, 1: enable, 2: dynamic"); + static void tune_pcie_caps(struct hfi1_devdata *dd) { struct pci_dev *parent; @@ -957,7 +962,7 @@ int do_pcie_gen3_transition(struct hfi1_devdata *dd) int do_retry, retry_count = 0; uint default_pset; u16 target_vector, target_speed; - u16 lnkctl, lnkctl2, vendor; + u16 lnkctl2, vendor; u8 nsbr = 1; u8 div; const u8 (*eq)[3]; @@ -1147,11 +1152,12 @@ retry: */ write_xmt_margin(dd, __func__); - /* step 5e: disable active state power management (ASPM) */ + /* + * step 5e: disable active state power management (ASPM). It + * will be enabled if required later + */ dd_dev_info(dd, "%s: clearing ASPM\n", __func__); - pcie_capability_read_word(dd->pcidev, PCI_EXP_LNKCTL, &lnkctl); - lnkctl &= ~PCI_EXP_LNKCTL_ASPMC; - pcie_capability_write_word(dd->pcidev, PCI_EXP_LNKCTL, lnkctl); + aspm_hw_disable_l1(dd); /* * step 5f: clear DirectSpeedChange -- cgit v0.10.2 From 31e7af1c5bcd9617c68f1bbd78a85e896caf87e4 Mon Sep 17 00:00:00 2001 From: Ira Weiny Date: Wed, 3 Feb 2016 14:33:14 -0800 Subject: staging/rdma/hfi1: Fix SL->SC checks SLs which are mapped to SC15 are invalid and should fail the operation. For RC/UC QP types, verify the AH information at modify_qp time and fail the modify_qp if the SL is invalid. For other QP types check the SL during post_send via the new rdmavt callback. Reviewed-by: Mike Marciniszyn Reviewed-by: Dennis Dalessandro Signed-off-by: Ira Weiny Signed-off-by: Doug Ledford diff --git a/drivers/staging/rdma/hfi1/qp.c b/drivers/staging/rdma/hfi1/qp.c index 52723c2..05a9619 100644 --- a/drivers/staging/rdma/hfi1/qp.c +++ b/drivers/staging/rdma/hfi1/qp.c @@ -185,6 +185,9 @@ int hfi1_check_modify_qp(struct rvt_qp *qp, struct ib_qp_attr *attr, if (attr_mask & IB_QP_AV) { sc = ah_to_sc(ibqp->device, &attr->ah_attr); + if (sc == 0xf) + return -EINVAL; + if (!qp_to_sdma_engine(qp, sc) && dd->flags & HFI1_HAS_SEND_DMA) return -EINVAL; @@ -192,6 +195,9 @@ int hfi1_check_modify_qp(struct rvt_qp *qp, struct ib_qp_attr *attr, if (attr_mask & IB_QP_ALT_PATH) { sc = ah_to_sc(ibqp->device, &attr->alt_ah_attr); + if (sc == 0xf) + return -EINVAL; + if (!qp_to_sdma_engine(qp, sc) && dd->flags & HFI1_HAS_SEND_DMA) return -EINVAL; @@ -220,6 +226,20 @@ void hfi1_modify_qp(struct rvt_qp *qp, struct ib_qp_attr *attr, } } +int hfi1_check_send_wr(struct rvt_qp *qp, struct ib_send_wr *wr) +{ + struct hfi1_ibport *ibp = to_iport(qp->ibqp.device, qp->port_num); + struct rvt_ah *ah = ibah_to_rvtah(ud_wr(wr)->ah); + + if (qp->ibqp.qp_type != IB_QPT_RC && + qp->ibqp.qp_type != IB_QPT_UC && + qp->ibqp.qp_type != IB_QPT_SMI && + ibp->sl_to_sc[ah->attr.sl] == 0xf) { + return -EINVAL; + } + return 0; +} + /** * hfi1_compute_aeth - compute the AETH (syndrome + MSN) * @qp: the queue pair to compute the AETH for diff --git a/drivers/staging/rdma/hfi1/verbs.c b/drivers/staging/rdma/hfi1/verbs.c index a53d93a..6d541ca 100644 --- a/drivers/staging/rdma/hfi1/verbs.c +++ b/drivers/staging/rdma/hfi1/verbs.c @@ -1561,6 +1561,7 @@ int hfi1_register_ib_device(struct hfi1_devdata *dd) dd->verbs_dev.rdi.driver_f.mtu_to_path_mtu = mtu_to_path_mtu; dd->verbs_dev.rdi.driver_f.check_modify_qp = hfi1_check_modify_qp; dd->verbs_dev.rdi.driver_f.modify_qp = hfi1_modify_qp; + dd->verbs_dev.rdi.driver_f.check_send_wr = hfi1_check_send_wr; /* completeion queue */ snprintf(dd->verbs_dev.rdi.dparms.cq_name, diff --git a/drivers/staging/rdma/hfi1/verbs.h b/drivers/staging/rdma/hfi1/verbs.h index 79bcab6..1571ae3 100644 --- a/drivers/staging/rdma/hfi1/verbs.h +++ b/drivers/staging/rdma/hfi1/verbs.h @@ -439,6 +439,8 @@ int hfi1_check_modify_qp(struct rvt_qp *qp, struct ib_qp_attr *attr, void hfi1_modify_qp(struct rvt_qp *qp, struct ib_qp_attr *attr, int attr_mask, struct ib_udata *udata); +int hfi1_check_send_wr(struct rvt_qp *qp, struct ib_send_wr *wr); + int hfi1_ruc_check_hdr(struct hfi1_ibport *ibp, struct hfi1_ib_header *hdr, int has_grh, struct rvt_qp *qp, u32 bth0); -- cgit v0.10.2 From d413c1a65292189eb729738c549732951a2e50ab Mon Sep 17 00:00:00 2001 From: Mitko Haralanov Date: Wed, 3 Feb 2016 14:33:22 -0800 Subject: staging/rdma/hfi1: Remove unused code This comment and code was unused. Just remove it. Reviewed-by: Ira Weiny Signed-off-by: Mitko Haralanov Signed-off-by: Doug Ledford diff --git a/drivers/staging/rdma/hfi1/init.c b/drivers/staging/rdma/hfi1/init.c index ba52ee3..1680808 100644 --- a/drivers/staging/rdma/hfi1/init.c +++ b/drivers/staging/rdma/hfi1/init.c @@ -1526,13 +1526,6 @@ int hfi1_create_rcvhdrq(struct hfi1_devdata *dd, struct hfi1_ctxtdata *rcd) goto bail; } - /* Event mask is per device now and is in hfi1_devdata */ - /*if (rcd->ctxt >= dd->first_user_ctxt) { - rcd->user_event_mask = vmalloc_user(PAGE_SIZE); - if (!rcd->user_event_mask) - goto bail_free_hdrq; - }*/ - if (HFI1_CAP_KGET_MASK(rcd->flags, DMA_RTAIL)) { rcd->rcvhdrtail_kvaddr = dma_zalloc_coherent( &dd->pcidev->dev, PAGE_SIZE, &phys_hdrqtail, -- cgit v0.10.2 From 2780739262e32b9c283b6b04f7899f9803993ebc Mon Sep 17 00:00:00 2001 From: Mitko Haralanov Date: Wed, 3 Feb 2016 14:33:31 -0800 Subject: staging/rdma/hfi1: Remove unnecessary duplicated variable struct hfi1_devdata contained 2 variables which represented the numa node the device is attached to. Remove the duplicated one. Reviewed-by: Ira Weiny Signed-off-by: Mitko Haralanov Signed-off-by: Doug Ledford diff --git a/drivers/staging/rdma/hfi1/hfi.h b/drivers/staging/rdma/hfi1/hfi.h index cb2f90a..897046c 100644 --- a/drivers/staging/rdma/hfi1/hfi.h +++ b/drivers/staging/rdma/hfi1/hfi.h @@ -1128,7 +1128,6 @@ struct hfi1_devdata { struct timer_list rcverr_timer; u32 rcv_ovfl_cnt; - int assigned_node_id; wait_queue_head_t event_queue; /* Save the enabled LCB error bits */ diff --git a/drivers/staging/rdma/hfi1/init.c b/drivers/staging/rdma/hfi1/init.c index 1680808..17b876d 100644 --- a/drivers/staging/rdma/hfi1/init.c +++ b/drivers/staging/rdma/hfi1/init.c @@ -130,15 +130,10 @@ int hfi1_create_ctxts(struct hfi1_devdata *dd) { unsigned i; int ret; - int local_node_id = pcibus_to_node(dd->pcidev->bus); /* Control context has to be always 0 */ BUILD_BUG_ON(HFI1_CTRL_CTXT != 0); - if (local_node_id < 0) - local_node_id = numa_node_id(); - dd->assigned_node_id = local_node_id; - dd->rcd = kcalloc(dd->num_rcv_contexts, sizeof(*dd->rcd), GFP_KERNEL); if (!dd->rcd) goto nomem; diff --git a/drivers/staging/rdma/hfi1/qp.h b/drivers/staging/rdma/hfi1/qp.h index 36be547..973c14b 100644 --- a/drivers/staging/rdma/hfi1/qp.h +++ b/drivers/staging/rdma/hfi1/qp.h @@ -157,7 +157,7 @@ static inline void _hfi1_schedule_send(struct rvt_qp *qp) iowait_schedule(&priv->s_iowait, ppd->hfi1_wq, priv->s_sde ? priv->s_sde->cpu : - cpumask_first(cpumask_of_node(dd->assigned_node_id))); + cpumask_first(cpumask_of_node(dd->node))); } /** diff --git a/drivers/staging/rdma/hfi1/verbs.c b/drivers/staging/rdma/hfi1/verbs.c index 6d541ca..d617324 100644 --- a/drivers/staging/rdma/hfi1/verbs.c +++ b/drivers/staging/rdma/hfi1/verbs.c @@ -1567,7 +1567,7 @@ int hfi1_register_ib_device(struct hfi1_devdata *dd) snprintf(dd->verbs_dev.rdi.dparms.cq_name, sizeof(dd->verbs_dev.rdi.dparms.cq_name), "hfi1_cq%d", dd->unit); - dd->verbs_dev.rdi.dparms.node = dd->assigned_node_id; + dd->verbs_dev.rdi.dparms.node = dd->node; /* misc settings */ dd->verbs_dev.rdi.flags = 0; /* Let rdmavt handle it all */ -- cgit v0.10.2 From 957558c9668f06b04530b7ddbfd2dbea86630496 Mon Sep 17 00:00:00 2001 From: Mitko Haralanov Date: Wed, 3 Feb 2016 14:33:40 -0800 Subject: staging/rdma/hfi1: Consolidate CPU/IRQ affinity support This patch unifies the affinity support for CPU and IRQ allocations into a single code base. The goal is to allow the driver to make intelligent placement decision based on an overall view of processes and IRQs across as much of the driver as possible. Pulling all the scattered affinity code into a single code base lays the ground work for accomplishing the above goal. For example, previous implementations made user process placement decision solely based on other user processes. This algorithm is limited as it did not take into account IRQ placement and could result in overloading certain CPUs. A single code base also provides a much easier way to maintain and debug any performance issues related to affinity. Reviewed-by: Mike Marciniszyn Reviewed-by: Dean Luick Reviewed-by: Ira Weiny Signed-off-by: Mitko Haralanov Signed-off-by: Doug Ledford diff --git a/drivers/staging/rdma/hfi1/Makefile b/drivers/staging/rdma/hfi1/Makefile index 9b3f7e9..6681b74 100644 --- a/drivers/staging/rdma/hfi1/Makefile +++ b/drivers/staging/rdma/hfi1/Makefile @@ -7,7 +7,8 @@ # obj-$(CONFIG_INFINIBAND_HFI1) += hfi1.o -hfi1-y := chip.o device.o diag.o driver.o efivar.o eprom.o file_ops.o firmware.o \ +hfi1-y := affinity.o chip.o device.o diag.o driver.o efivar.o \ + eprom.o file_ops.o firmware.o \ init.o intr.o mad.o pcie.o pio.o pio_copy.o platform.o \ qp.o qsfp.o rc.o ruc.o sdma.o sysfs.o trace.o twsi.o \ uc.o ud.o user_exp_rcv.o user_pages.o user_sdma.o verbs.o diff --git a/drivers/staging/rdma/hfi1/affinity.c b/drivers/staging/rdma/hfi1/affinity.c new file mode 100644 index 0000000..59b2972 --- /dev/null +++ b/drivers/staging/rdma/hfi1/affinity.c @@ -0,0 +1,433 @@ +/* + * + * This file is provided under a dual BSD/GPLv2 license. When using or + * redistributing this file, you may do so under either license. + * + * GPL LICENSE SUMMARY + * + * Copyright(c) 2015 Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * BSD LICENSE + * + * Copyright(c) 2015 Intel Corporation. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * - Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + */ +#include +#include +#include + +#include "hfi.h" +#include "affinity.h" +#include "sdma.h" +#include "trace.h" + +struct cpu_mask_set { + struct cpumask mask; + struct cpumask used; + uint gen; +}; + +struct hfi1_affinity { + struct cpu_mask_set def_intr; + struct cpu_mask_set rcv_intr; + struct cpu_mask_set proc; + /* spin lock to protect affinity struct */ + spinlock_t lock; +}; + +/* Name of IRQ types, indexed by enum irq_type */ +static const char * const irq_type_names[] = { + "SDMA", + "RCVCTXT", + "GENERAL", + "OTHER", +}; + +static inline void init_cpu_mask_set(struct cpu_mask_set *set) +{ + cpumask_clear(&set->mask); + cpumask_clear(&set->used); + set->gen = 0; +} + +/* + * Interrupt affinity. + * + * non-rcv avail gets a default mask that + * starts as possible cpus with threads reset + * and each rcv avail reset. + * + * rcv avail gets node relative 1 wrapping back + * to the node relative 1 as necessary. + * + */ +int hfi1_dev_affinity_init(struct hfi1_devdata *dd) +{ + int node = pcibus_to_node(dd->pcidev->bus); + struct hfi1_affinity *info; + const struct cpumask *local_mask; + int curr_cpu, possible, i, ht; + + if (node < 0) + node = numa_node_id(); + dd->node = node; + + info = kzalloc(sizeof(*info), GFP_KERNEL); + if (!info) + return -ENOMEM; + spin_lock_init(&info->lock); + + init_cpu_mask_set(&info->def_intr); + init_cpu_mask_set(&info->rcv_intr); + init_cpu_mask_set(&info->proc); + + local_mask = cpumask_of_node(dd->node); + if (cpumask_first(local_mask) >= nr_cpu_ids) + local_mask = topology_core_cpumask(0); + /* use local mask as default */ + cpumask_copy(&info->def_intr.mask, local_mask); + /* + * Remove HT cores from the default mask. Do this in two steps below. + */ + possible = cpumask_weight(&info->def_intr.mask); + ht = cpumask_weight(topology_sibling_cpumask( + cpumask_first(&info->def_intr.mask))); + /* + * Step 1. Skip over the first N HT siblings and use them as the + * "real" cores. Assumes that HT cores are not enumerated in + * succession (except in the single core case). + */ + curr_cpu = cpumask_first(&info->def_intr.mask); + for (i = 0; i < possible / ht; i++) + curr_cpu = cpumask_next(curr_cpu, &info->def_intr.mask); + /* + * Step 2. Remove the remaining HT siblings. Use cpumask_next() to + * skip any gaps. + */ + for (; i < possible; i++) { + cpumask_clear_cpu(curr_cpu, &info->def_intr.mask); + curr_cpu = cpumask_next(curr_cpu, &info->def_intr.mask); + } + + /* fill in the receive list */ + possible = cpumask_weight(&info->def_intr.mask); + curr_cpu = cpumask_first(&info->def_intr.mask); + if (possible == 1) { + /* only one CPU, everyone will use it */ + cpumask_set_cpu(curr_cpu, &info->rcv_intr.mask); + } else { + /* + * Retain the first CPU in the default list for the control + * context. + */ + curr_cpu = cpumask_next(curr_cpu, &info->def_intr.mask); + /* + * Remove the remaining kernel receive queues from + * the default list and add them to the receive list. + */ + for (i = 0; i < dd->n_krcv_queues - 1; i++) { + cpumask_clear_cpu(curr_cpu, &info->def_intr.mask); + cpumask_set_cpu(curr_cpu, &info->rcv_intr.mask); + curr_cpu = cpumask_next(curr_cpu, &info->def_intr.mask); + if (curr_cpu >= nr_cpu_ids) + break; + } + } + + cpumask_copy(&info->proc.mask, cpu_online_mask); + dd->affinity = info; + return 0; +} + +void hfi1_dev_affinity_free(struct hfi1_devdata *dd) +{ + kfree(dd->affinity); +} + +int hfi1_get_irq_affinity(struct hfi1_devdata *dd, struct hfi1_msix_entry *msix) +{ + int ret; + cpumask_var_t diff; + struct cpu_mask_set *set; + struct sdma_engine *sde = NULL; + struct hfi1_ctxtdata *rcd = NULL; + char extra[64]; + int cpu = -1; + + extra[0] = '\0'; + cpumask_clear(&msix->mask); + + ret = zalloc_cpumask_var(&diff, GFP_KERNEL); + if (!ret) + return -ENOMEM; + + switch (msix->type) { + case IRQ_SDMA: + sde = (struct sdma_engine *)msix->arg; + scnprintf(extra, 64, "engine %u", sde->this_idx); + /* fall through */ + case IRQ_GENERAL: + set = &dd->affinity->def_intr; + break; + case IRQ_RCVCTXT: + rcd = (struct hfi1_ctxtdata *)msix->arg; + if (rcd->ctxt == HFI1_CTRL_CTXT) { + set = &dd->affinity->def_intr; + cpu = cpumask_first(&set->mask); + } else { + set = &dd->affinity->rcv_intr; + } + scnprintf(extra, 64, "ctxt %u", rcd->ctxt); + break; + default: + dd_dev_err(dd, "Invalid IRQ type %d\n", msix->type); + return -EINVAL; + } + + /* + * The control receive context is placed on a particular CPU, which + * is set above. Skip accounting for it. Everything else finds its + * CPU here. + */ + if (cpu == -1) { + spin_lock(&dd->affinity->lock); + if (cpumask_equal(&set->mask, &set->used)) { + /* + * We've used up all the CPUs, bump up the generation + * and reset the 'used' map + */ + set->gen++; + cpumask_clear(&set->used); + } + cpumask_andnot(diff, &set->mask, &set->used); + cpu = cpumask_first(diff); + cpumask_set_cpu(cpu, &set->used); + spin_unlock(&dd->affinity->lock); + } + + switch (msix->type) { + case IRQ_SDMA: + sde->cpu = cpu; + break; + case IRQ_GENERAL: + case IRQ_RCVCTXT: + case IRQ_OTHER: + break; + } + + cpumask_set_cpu(cpu, &msix->mask); + dd_dev_info(dd, "IRQ vector: %u, type %s %s -> cpu: %d\n", + msix->msix.vector, irq_type_names[msix->type], + extra, cpu); + irq_set_affinity_hint(msix->msix.vector, &msix->mask); + + free_cpumask_var(diff); + return 0; +} + +void hfi1_put_irq_affinity(struct hfi1_devdata *dd, + struct hfi1_msix_entry *msix) +{ + struct cpu_mask_set *set = NULL; + struct hfi1_ctxtdata *rcd; + + switch (msix->type) { + case IRQ_SDMA: + case IRQ_GENERAL: + set = &dd->affinity->def_intr; + break; + case IRQ_RCVCTXT: + rcd = (struct hfi1_ctxtdata *)msix->arg; + /* only do accounting for non control contexts */ + if (rcd->ctxt != HFI1_CTRL_CTXT) + set = &dd->affinity->rcv_intr; + break; + default: + return; + } + + if (set) { + spin_lock(&dd->affinity->lock); + cpumask_andnot(&set->used, &set->used, &msix->mask); + if (cpumask_empty(&set->used) && set->gen) { + set->gen--; + cpumask_copy(&set->used, &set->mask); + } + spin_unlock(&dd->affinity->lock); + } + + irq_set_affinity_hint(msix->msix.vector, NULL); + cpumask_clear(&msix->mask); +} + +int hfi1_get_proc_affinity(struct hfi1_devdata *dd, int node) +{ + int cpu = -1, ret; + cpumask_var_t diff, mask, intrs; + const struct cpumask *node_mask, + *proc_mask = tsk_cpus_allowed(current); + struct cpu_mask_set *set = &dd->affinity->proc; + char buf[1024]; + + /* + * check whether process/context affinity has already + * been set + */ + if (cpumask_weight(proc_mask) == 1) { + scnprintf(buf, 1024, "%*pbl", cpumask_pr_args(proc_mask)); + hfi1_cdbg(PROC, "PID %u %s affinity set to CPU %s", + current->pid, current->comm, buf); + /* + * Mark the pre-set CPU as used. This is atomic so we don't + * need the lock + */ + cpu = cpumask_first(proc_mask); + cpumask_set_cpu(cpu, &set->used); + goto done; + } else if (cpumask_weight(proc_mask) < cpumask_weight(&set->mask)) { + scnprintf(buf, 1024, "%*pbl", cpumask_pr_args(proc_mask)); + hfi1_cdbg(PROC, "PID %u %s affinity set to CPU set(s) %s", + current->pid, current->comm, buf); + goto done; + } + + /* + * The process does not have a preset CPU affinity so find one to + * recommend. We prefer CPUs on the same NUMA as the device. + */ + + ret = zalloc_cpumask_var(&diff, GFP_KERNEL); + if (!ret) + goto done; + ret = zalloc_cpumask_var(&mask, GFP_KERNEL); + if (!ret) + goto free_diff; + ret = zalloc_cpumask_var(&intrs, GFP_KERNEL); + if (!ret) + goto free_mask; + + spin_lock(&dd->affinity->lock); + /* + * If we've used all available CPUs, clear the mask and start + * overloading. + */ + if (cpumask_equal(&set->mask, &set->used)) { + set->gen++; + cpumask_clear(&set->used); + } + + /* CPUs used by interrupt handlers */ + cpumask_copy(intrs, (dd->affinity->def_intr.gen ? + &dd->affinity->def_intr.mask : + &dd->affinity->def_intr.used)); + cpumask_or(intrs, intrs, (dd->affinity->rcv_intr.gen ? + &dd->affinity->rcv_intr.mask : + &dd->affinity->rcv_intr.used)); + scnprintf(buf, 1024, "%*pbl", cpumask_pr_args(intrs)); + hfi1_cdbg(PROC, "CPUs used by interrupts: %s", buf); + + /* + * If we don't have a NUMA node requested, preference is towards + * device NUMA node + */ + if (node == -1) + node = dd->node; + node_mask = cpumask_of_node(node); + scnprintf(buf, 1024, "%*pbl", cpumask_pr_args(node_mask)); + hfi1_cdbg(PROC, "device on NUMA %u, CPUs %s", node, buf); + + /* diff will hold all unused cpus */ + cpumask_andnot(diff, &set->mask, &set->used); + scnprintf(buf, 1024, "%*pbl", cpumask_pr_args(diff)); + hfi1_cdbg(PROC, "unused CPUs (all) %s", buf); + + /* get cpumask of available CPUs on preferred NUMA */ + cpumask_and(mask, diff, node_mask); + scnprintf(buf, 1024, "%*pbl", cpumask_pr_args(mask)); + hfi1_cdbg(PROC, "available cpus on NUMA %s", buf); + + /* + * At first, we don't want to place processes on the same + * CPUs as interrupt handlers. + */ + cpumask_andnot(diff, mask, intrs); + if (!cpumask_empty(diff)) + cpumask_copy(mask, diff); + + /* + * if we don't have a cpu on the preferred NUMA, get + * the list of the remaining available CPUs + */ + if (cpumask_empty(mask)) { + cpumask_andnot(diff, &set->mask, &set->used); + cpumask_andnot(mask, diff, node_mask); + } + scnprintf(buf, 1024, "%*pbl", cpumask_pr_args(mask)); + hfi1_cdbg(PROC, "possible CPUs for process %s", buf); + + cpu = cpumask_first(mask); + if (cpu >= nr_cpu_ids) /* empty */ + cpu = -1; + else + cpumask_set_cpu(cpu, &set->used); + spin_unlock(&dd->affinity->lock); + + free_cpumask_var(intrs); +free_mask: + free_cpumask_var(mask); +free_diff: + free_cpumask_var(diff); +done: + return cpu; +} + +void hfi1_put_proc_affinity(struct hfi1_devdata *dd, int cpu) +{ + struct cpu_mask_set *set = &dd->affinity->proc; + + if (cpu < 0) + return; + spin_lock(&dd->affinity->lock); + cpumask_clear_cpu(cpu, &set->used); + if (cpumask_empty(&set->used) && set->gen) { + set->gen--; + cpumask_copy(&set->used, &set->mask); + } + spin_unlock(&dd->affinity->lock); +} + diff --git a/drivers/staging/rdma/hfi1/affinity.h b/drivers/staging/rdma/hfi1/affinity.h new file mode 100644 index 0000000..2bdac96 --- /dev/null +++ b/drivers/staging/rdma/hfi1/affinity.h @@ -0,0 +1,94 @@ +/* + * + * This file is provided under a dual BSD/GPLv2 license. When using or + * redistributing this file, you may do so under either license. + * + * GPL LICENSE SUMMARY + * + * Copyright(c) 2015 Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * BSD LICENSE + * + * Copyright(c) 2015 Intel Corporation. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * - Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + */ +#ifndef _HFI1_AFFINITY_H +#define _HFI1_AFFINITY_H + +#include "hfi.h" + +enum irq_type { + IRQ_SDMA, + IRQ_RCVCTXT, + IRQ_GENERAL, + IRQ_OTHER +}; + +/* Can be used for both memory and cpu */ +enum affinity_flags { + AFF_AUTO, + AFF_NUMA_LOCAL, + AFF_DEV_LOCAL, + AFF_IRQ_LOCAL +}; + +struct hfi1_msix_entry; + +/* Initialize driver affinity data */ +int hfi1_dev_affinity_init(struct hfi1_devdata *); +/* Free driver affinity data */ +void hfi1_dev_affinity_free(struct hfi1_devdata *); +/* + * Set IRQ affinity to a CPU. The function will determine the + * CPU and set the affinity to it. + */ +int hfi1_get_irq_affinity(struct hfi1_devdata *, struct hfi1_msix_entry *); +/* + * Remove the IRQ's CPU affinity. This function also updates + * any internal CPU tracking data + */ +void hfi1_put_irq_affinity(struct hfi1_devdata *, struct hfi1_msix_entry *); +/* + * Determine a CPU affinity for a user process, if the process does not + * have an affinity set yet. + */ +int hfi1_get_proc_affinity(struct hfi1_devdata *, int); +/* Release a CPU used by a user process. */ +void hfi1_put_proc_affinity(struct hfi1_devdata *, int); + +#endif /* _HFI1_AFFINITY_H */ diff --git a/drivers/staging/rdma/hfi1/chip.c b/drivers/staging/rdma/hfi1/chip.c index 3577042..6045c91 100644 --- a/drivers/staging/rdma/hfi1/chip.c +++ b/drivers/staging/rdma/hfi1/chip.c @@ -12349,9 +12349,8 @@ static void clean_up_interrupts(struct hfi1_devdata *dd) for (i = 0; i < dd->num_msix_entries; i++, me++) { if (me->arg == NULL) /* => no irq, no affinity */ - break; - irq_set_affinity_hint(dd->msix_entries[i].msix.vector, - NULL); + continue; + hfi1_put_irq_affinity(dd, &dd->msix_entries[i]); free_irq(me->msix.vector, me->arg); } } else { @@ -12372,8 +12371,6 @@ static void clean_up_interrupts(struct hfi1_devdata *dd) } /* clean structures */ - for (i = 0; i < dd->num_msix_entries; i++) - free_cpumask_var(dd->msix_entries[i].mask); kfree(dd->msix_entries); dd->msix_entries = NULL; dd->num_msix_entries = 0; @@ -12438,16 +12435,10 @@ static int request_intx_irq(struct hfi1_devdata *dd) static int request_msix_irqs(struct hfi1_devdata *dd) { - const struct cpumask *local_mask; - cpumask_var_t def, rcv; - bool def_ret, rcv_ret; int first_general, last_general; int first_sdma, last_sdma; int first_rx, last_rx; - int first_cpu, curr_cpu; - int rcv_cpu, sdma_cpu; - int i, ret = 0, possible; - int ht; + int i, ret = 0; /* calculate the ranges we are going to use */ first_general = 0; @@ -12456,52 +12447,6 @@ static int request_msix_irqs(struct hfi1_devdata *dd) last_rx = first_rx + dd->n_krcv_queues; /* - * Interrupt affinity. - * - * non-rcv avail gets a default mask that - * starts as possible cpus with threads reset - * and each rcv avail reset. - * - * rcv avail gets node relative 1 wrapping back - * to the node relative 1 as necessary. - * - */ - local_mask = cpumask_of_pcibus(dd->pcidev->bus); - /* if first cpu is invalid, use NUMA 0 */ - if (cpumask_first(local_mask) >= nr_cpu_ids) - local_mask = topology_core_cpumask(0); - - def_ret = zalloc_cpumask_var(&def, GFP_KERNEL); - rcv_ret = zalloc_cpumask_var(&rcv, GFP_KERNEL); - if (!def_ret || !rcv_ret) - goto bail; - /* use local mask as default */ - cpumask_copy(def, local_mask); - possible = cpumask_weight(def); - /* disarm threads from default */ - ht = cpumask_weight( - topology_sibling_cpumask(cpumask_first(local_mask))); - for (i = possible/ht; i < possible; i++) - cpumask_clear_cpu(i, def); - /* def now has full cores on chosen node*/ - first_cpu = cpumask_first(def); - if (nr_cpu_ids >= first_cpu) - first_cpu++; - curr_cpu = first_cpu; - - /* One context is reserved as control context */ - for (i = first_cpu; i < dd->n_krcv_queues + first_cpu - 1; i++) { - cpumask_clear_cpu(curr_cpu, def); - cpumask_set_cpu(curr_cpu, rcv); - curr_cpu = cpumask_next(curr_cpu, def); - if (curr_cpu >= nr_cpu_ids) - break; - } - /* def mask has non-rcv, rcv has recv mask */ - rcv_cpu = cpumask_first(rcv); - sdma_cpu = cpumask_first(def); - - /* * Sanity check - the code expects all SDMA chip source * interrupts to be in the same CSR, starting at bit 0. Verify * that this is true by checking the bit location of the start. @@ -12526,6 +12471,7 @@ static int request_msix_irqs(struct hfi1_devdata *dd) snprintf(me->name, sizeof(me->name), DRIVER_NAME "_%d", dd->unit); err_info = "general"; + me->type = IRQ_GENERAL; } else if (first_sdma <= i && i < last_sdma) { idx = i - first_sdma; sde = &dd->per_sdma[idx]; @@ -12535,6 +12481,7 @@ static int request_msix_irqs(struct hfi1_devdata *dd) DRIVER_NAME "_%d sdma%d", dd->unit, idx); err_info = "sdma"; remap_sdma_interrupts(dd, idx, i); + me->type = IRQ_SDMA; } else if (first_rx <= i && i < last_rx) { idx = i - first_rx; rcd = dd->rcd[idx]; @@ -12555,6 +12502,7 @@ static int request_msix_irqs(struct hfi1_devdata *dd) DRIVER_NAME "_%d kctxt%d", dd->unit, idx); err_info = "receive context"; remap_intr(dd, IS_RCVAVAIL_START + idx, i); + me->type = IRQ_RCVCTXT; } else { /* not in our expected range - complain, then ignore it */ @@ -12582,52 +12530,13 @@ static int request_msix_irqs(struct hfi1_devdata *dd) */ me->arg = arg; - if (!zalloc_cpumask_var( - &dd->msix_entries[i].mask, - GFP_KERNEL)) - goto bail; - if (handler == sdma_interrupt) { - dd_dev_info(dd, "sdma engine %d cpu %d\n", - sde->this_idx, sdma_cpu); - sde->cpu = sdma_cpu; - cpumask_set_cpu(sdma_cpu, dd->msix_entries[i].mask); - sdma_cpu = cpumask_next(sdma_cpu, def); - if (sdma_cpu >= nr_cpu_ids) - sdma_cpu = cpumask_first(def); - } else if (handler == receive_context_interrupt) { - dd_dev_info(dd, "rcv ctxt %d cpu %d\n", rcd->ctxt, - (rcd->ctxt == HFI1_CTRL_CTXT) ? - cpumask_first(def) : rcv_cpu); - if (rcd->ctxt == HFI1_CTRL_CTXT) { - /* map to first default */ - cpumask_set_cpu(cpumask_first(def), - dd->msix_entries[i].mask); - } else { - cpumask_set_cpu(rcv_cpu, - dd->msix_entries[i].mask); - rcv_cpu = cpumask_next(rcv_cpu, rcv); - if (rcv_cpu >= nr_cpu_ids) - rcv_cpu = cpumask_first(rcv); - } - } else { - /* otherwise first def */ - dd_dev_info(dd, "%s cpu %d\n", - err_info, cpumask_first(def)); - cpumask_set_cpu( - cpumask_first(def), dd->msix_entries[i].mask); - } - irq_set_affinity_hint( - dd->msix_entries[i].msix.vector, - dd->msix_entries[i].mask); + ret = hfi1_get_irq_affinity(dd, me); + if (ret) + dd_dev_err(dd, + "unable to pin IRQ %d\n", ret); } -out: - free_cpumask_var(def); - free_cpumask_var(rcv); return ret; -bail: - ret = -ENOMEM; - goto out; } /* @@ -14238,6 +14147,10 @@ struct hfi1_devdata *hfi1_init_dd(struct pci_dev *pdev, /* set up KDETH QP prefix in both RX and TX CSRs */ init_kdeth_qp(dd); + ret = hfi1_dev_affinity_init(dd); + if (ret) + goto bail_cleanup; + /* send contexts must be set up before receive contexts */ ret = init_send_contexts(dd); if (ret) diff --git a/drivers/staging/rdma/hfi1/file_ops.c b/drivers/staging/rdma/hfi1/file_ops.c index 084581a..c9172a0 100644 --- a/drivers/staging/rdma/hfi1/file_ops.c +++ b/drivers/staging/rdma/hfi1/file_ops.c @@ -749,6 +749,9 @@ static int hfi1_file_close(struct inode *inode, struct file *fp) /* drain user sdma queue */ hfi1_user_sdma_free_queues(fdata); + /* release the cpu */ + hfi1_put_proc_affinity(dd, fdata->rec_cpu_num); + /* * Clear any left over, unhandled events so the next process that * gets this context doesn't get confused. @@ -842,8 +845,16 @@ static int assign_ctxt(struct file *fp, struct hfi1_user_info *uinfo) mutex_lock(&hfi1_mutex); /* First, lets check if we need to setup a shared context? */ - if (uinfo->subctxt_cnt) + if (uinfo->subctxt_cnt) { + struct hfi1_filedata *fd = fp->private_data; + ret = find_shared_ctxt(fp, uinfo); + if (ret < 0) + goto done_unlock; + if (ret) + fd->rec_cpu_num = hfi1_get_proc_affinity( + fd->uctxt->dd, fd->uctxt->numa_id); + } /* * We execute the following block if we couldn't find a @@ -853,6 +864,7 @@ static int assign_ctxt(struct file *fp, struct hfi1_user_info *uinfo) i_minor = iminor(file_inode(fp)) - HFI1_USER_MINOR_BASE; ret = get_user_context(fp, uinfo, i_minor - 1, alg); } +done_unlock: mutex_unlock(&hfi1_mutex); done: return ret; @@ -978,7 +990,7 @@ static int allocate_ctxt(struct file *fp, struct hfi1_devdata *dd, struct hfi1_filedata *fd = fp->private_data; struct hfi1_ctxtdata *uctxt; unsigned ctxt; - int ret; + int ret, numa; if (dd->flags & HFI1_FROZEN) { /* @@ -998,12 +1010,21 @@ static int allocate_ctxt(struct file *fp, struct hfi1_devdata *dd, if (ctxt == dd->num_rcv_contexts) return -EBUSY; - uctxt = hfi1_create_ctxtdata(dd->pport, ctxt); + fd->rec_cpu_num = hfi1_get_proc_affinity(dd, -1); + if (fd->rec_cpu_num != -1) + numa = cpu_to_node(fd->rec_cpu_num); + else + numa = numa_node_id(); + uctxt = hfi1_create_ctxtdata(dd->pport, ctxt, numa); if (!uctxt) { dd_dev_err(dd, "Unable to allocate ctxtdata memory, failing open\n"); return -ENOMEM; } + hfi1_cdbg(PROC, "[%u:%u] pid %u assigned to CPU %d (NUMA %u)", + uctxt->ctxt, fd->subctxt, current->pid, fd->rec_cpu_num, + uctxt->numa_id); + /* * Allocate and enable a PIO send context. */ diff --git a/drivers/staging/rdma/hfi1/hfi.h b/drivers/staging/rdma/hfi1/hfi.h index 897046c..571e7b1 100644 --- a/drivers/staging/rdma/hfi1/hfi.h +++ b/drivers/staging/rdma/hfi1/hfi.h @@ -75,6 +75,7 @@ #include "mad.h" #include "qsfp.h" #include "platform.h" +#include "affinity.h" /* bumped 1 from s/w major version of TrueScale */ #define HFI1_CHIP_VERS_MAJ 3U @@ -529,10 +530,11 @@ static inline void incr_cntr32(u32 *cntr) #define MAX_NAME_SIZE 64 struct hfi1_msix_entry { + enum irq_type type; struct msix_entry msix; void *arg; char name[MAX_NAME_SIZE]; - cpumask_var_t mask; + cpumask_t mask; }; /* per-SL CCA information */ @@ -1144,6 +1146,8 @@ struct hfi1_devdata { spinlock_t aspm_lock; /* Number of verbs contexts which have disabled ASPM */ atomic_t aspm_disabled_cnt; + + struct hfi1_affinity *affinity; }; /* 8051 firmware version helper */ @@ -1197,7 +1201,7 @@ void handle_user_interrupt(struct hfi1_ctxtdata *rcd); int hfi1_create_rcvhdrq(struct hfi1_devdata *, struct hfi1_ctxtdata *); int hfi1_setup_eagerbufs(struct hfi1_ctxtdata *); int hfi1_create_ctxts(struct hfi1_devdata *dd); -struct hfi1_ctxtdata *hfi1_create_ctxtdata(struct hfi1_pportdata *, u32); +struct hfi1_ctxtdata *hfi1_create_ctxtdata(struct hfi1_pportdata *, u32, int); void hfi1_init_pportdata(struct pci_dev *, struct hfi1_pportdata *, struct hfi1_devdata *, u8, u8); void hfi1_free_ctxtdata(struct hfi1_devdata *, struct hfi1_ctxtdata *); diff --git a/drivers/staging/rdma/hfi1/init.c b/drivers/staging/rdma/hfi1/init.c index 17b876d..98b3fc1 100644 --- a/drivers/staging/rdma/hfi1/init.c +++ b/drivers/staging/rdma/hfi1/init.c @@ -144,7 +144,7 @@ int hfi1_create_ctxts(struct hfi1_devdata *dd) struct hfi1_ctxtdata *rcd; ppd = dd->pport + (i % dd->num_pports); - rcd = hfi1_create_ctxtdata(ppd, i); + rcd = hfi1_create_ctxtdata(ppd, i, dd->node); if (!rcd) { dd_dev_err(dd, "Unable to allocate kernel receive context, failing\n"); @@ -204,7 +204,8 @@ bail: /* * Common code for user and kernel context setup. */ -struct hfi1_ctxtdata *hfi1_create_ctxtdata(struct hfi1_pportdata *ppd, u32 ctxt) +struct hfi1_ctxtdata *hfi1_create_ctxtdata(struct hfi1_pportdata *ppd, u32 ctxt, + int numa) { struct hfi1_devdata *dd = ppd->dd; struct hfi1_ctxtdata *rcd; @@ -227,7 +228,7 @@ struct hfi1_ctxtdata *hfi1_create_ctxtdata(struct hfi1_pportdata *ppd, u32 ctxt) rcd->cnt = 1; rcd->ctxt = ctxt; dd->rcd[ctxt] = rcd; - rcd->numa_id = numa_node_id(); + rcd->numa_id = numa; rcd->rcv_array_groups = dd->rcv_entries.ngroups; mutex_init(&rcd->exp_lock); @@ -982,6 +983,7 @@ void hfi1_free_devdata(struct hfi1_devdata *dd) rcu_barrier(); /* wait for rcu callbacks to complete */ free_percpu(dd->int_counter); free_percpu(dd->rcv_limit); + hfi1_dev_affinity_free(dd); ib_dealloc_device(&dd->verbs_dev.rdi.ibdev); } @@ -1010,9 +1012,6 @@ struct hfi1_devdata *hfi1_alloc_devdata(struct pci_dev *pdev, size_t extra) dd->pport = (struct hfi1_pportdata *)(dd + 1); INIT_LIST_HEAD(&dd->list); - dd->node = dev_to_node(&pdev->dev); - if (dd->node < 0) - dd->node = 0; idr_preload(GFP_KERNEL); spin_lock_irqsave(&hfi1_devs_lock, flags); -- cgit v0.10.2 From cc57236f5515cd343fa47e9664090b54bdb410a3 Mon Sep 17 00:00:00 2001 From: Mitko Haralanov Date: Wed, 3 Feb 2016 14:33:49 -0800 Subject: staging/rdma/hfi1: Allocate send ctxt on device NUMA node Allocate the user mode send context memory on the numa node which the device is attached to for better performance. Reviewed-by: Dean Luick Reviewed-by: Mike Marciniszyn Reviewed-by: Ira Weiny Signed-off-by: Mitko Haralanov Signed-off-by: Doug Ledford diff --git a/drivers/staging/rdma/hfi1/file_ops.c b/drivers/staging/rdma/hfi1/file_ops.c index c9172a0..2de9032 100644 --- a/drivers/staging/rdma/hfi1/file_ops.c +++ b/drivers/staging/rdma/hfi1/file_ops.c @@ -1029,7 +1029,7 @@ static int allocate_ctxt(struct file *fp, struct hfi1_devdata *dd, * Allocate and enable a PIO send context. */ uctxt->sc = sc_alloc(dd, SC_USER, uctxt->rcvhdrqentsize, - uctxt->numa_id); + uctxt->dd->node); if (!uctxt->sc) return -ENOMEM; -- cgit v0.10.2 From 377f111ee81bcef1a788a396f6d813a6b966acbb Mon Sep 17 00:00:00 2001 From: Mitko Haralanov Date: Wed, 3 Feb 2016 14:33:58 -0800 Subject: staging/rdma/hfi1: Verbs Mem affinity support Change verbs memory allocations to the device numa node. This keeps memory close to the device for optimal performance. Reviewed-by: Mike Marciniszyn Reviewed-by: Ira Weiny Signed-off-by: Mitko Haralanov Signed-off-by: Doug Ledford diff --git a/drivers/staging/rdma/hfi1/init.c b/drivers/staging/rdma/hfi1/init.c index 98b3fc1..629e92a 100644 --- a/drivers/staging/rdma/hfi1/init.c +++ b/drivers/staging/rdma/hfi1/init.c @@ -134,7 +134,8 @@ int hfi1_create_ctxts(struct hfi1_devdata *dd) /* Control context has to be always 0 */ BUILD_BUG_ON(HFI1_CTRL_CTXT != 0); - dd->rcd = kcalloc(dd->num_rcv_contexts, sizeof(*dd->rcd), GFP_KERNEL); + dd->rcd = kzalloc_node(dd->num_rcv_contexts * sizeof(*dd->rcd), + GFP_KERNEL, dd->node); if (!dd->rcd) goto nomem; diff --git a/drivers/staging/rdma/hfi1/qp.c b/drivers/staging/rdma/hfi1/qp.c index 05a9619..b78c8ea 100644 --- a/drivers/staging/rdma/hfi1/qp.c +++ b/drivers/staging/rdma/hfi1/qp.c @@ -596,13 +596,13 @@ void *qp_priv_alloc(struct rvt_dev_info *rdi, struct rvt_qp *qp, { struct hfi1_qp_priv *priv; - priv = kzalloc(sizeof(*priv), gfp); + priv = kzalloc_node(sizeof(*priv), gfp, rdi->dparms.node); if (!priv) return ERR_PTR(-ENOMEM); priv->owner = qp; - priv->s_hdr = kzalloc(sizeof(*priv->s_hdr), gfp); + priv->s_hdr = kzalloc_node(sizeof(*priv->s_hdr), gfp, rdi->dparms.node); if (!priv->s_hdr) { kfree(priv); return ERR_PTR(-ENOMEM); -- cgit v0.10.2 From 89abfc8d64dd1ad32e6d96404eb0a1ea6cbb4ca4 Mon Sep 17 00:00:00 2001 From: Vennila Megavannan Date: Wed, 3 Feb 2016 14:34:07 -0800 Subject: staging/rdma/hfi1: Change send_schedule counter to a per cpu counter A patch to fix fairness issues in QP scheduling requires n_send_schedule counter to be converted to a per cpu counter to reduce cache misses. Reviewed-by: Mike Marciniszyn Signed-off-by: Vennila Megavannan Signed-off-by: Jubin John Signed-off-by: Doug Ledford diff --git a/drivers/staging/rdma/hfi1/chip.c b/drivers/staging/rdma/hfi1/chip.c index 6045c91..13b92a3 100644 --- a/drivers/staging/rdma/hfi1/chip.c +++ b/drivers/staging/rdma/hfi1/chip.c @@ -1609,7 +1609,8 @@ static u64 access_sw_send_schedule(const struct cntr_entry *entry, { struct hfi1_devdata *dd = (struct hfi1_devdata *)context; - return dd->verbs_dev.n_send_schedule; + return read_write_cpu(dd, &dd->z_send_schedule, dd->send_schedule, vl, + mode, data); } /* Software counters for the error status bits within MISC_ERR_STATUS */ diff --git a/drivers/staging/rdma/hfi1/hfi.h b/drivers/staging/rdma/hfi1/hfi.h index 571e7b1..112f790 100644 --- a/drivers/staging/rdma/hfi1/hfi.h +++ b/drivers/staging/rdma/hfi1/hfi.h @@ -898,10 +898,11 @@ struct hfi1_devdata { /* reset value */ u64 z_int_counter; u64 z_rcv_limit; + u64 z_send_schedule; /* percpu int_counter */ u64 __percpu *int_counter; u64 __percpu *rcv_limit; - + u64 __percpu *send_schedule; /* number of receive contexts in use by the driver */ u32 num_rcv_contexts; /* number of pio send contexts in use by the driver */ @@ -1884,6 +1885,7 @@ static inline void hfi1_reset_cpu_counters(struct hfi1_devdata *dd) dd->z_int_counter = get_all_cpu_total(dd->int_counter); dd->z_rcv_limit = get_all_cpu_total(dd->rcv_limit); + dd->z_send_schedule = get_all_cpu_total(dd->send_schedule); ppd = (struct hfi1_pportdata *)(dd + 1); for (i = 0; i < dd->num_pports; i++, ppd++) { diff --git a/drivers/staging/rdma/hfi1/init.c b/drivers/staging/rdma/hfi1/init.c index 629e92a..6ddf3c8 100644 --- a/drivers/staging/rdma/hfi1/init.c +++ b/drivers/staging/rdma/hfi1/init.c @@ -985,6 +985,7 @@ void hfi1_free_devdata(struct hfi1_devdata *dd) free_percpu(dd->int_counter); free_percpu(dd->rcv_limit); hfi1_dev_affinity_free(dd); + free_percpu(dd->send_schedule); ib_dealloc_device(&dd->verbs_dev.rdi.ibdev); } @@ -1063,6 +1064,14 @@ struct hfi1_devdata *hfi1_alloc_devdata(struct pci_dev *pdev, size_t extra) goto bail; } + dd->send_schedule = alloc_percpu(u64); + if (!dd->send_schedule) { + ret = -ENOMEM; + hfi1_early_err(&pdev->dev, + "Could not allocate per-cpu int_counter\n"); + goto bail; + } + if (!hfi1_cpulist_count) { u32 count = num_online_cpus(); diff --git a/drivers/staging/rdma/hfi1/ruc.c b/drivers/staging/rdma/hfi1/ruc.c index 6379df5..ae28b85 100644 --- a/drivers/staging/rdma/hfi1/ruc.c +++ b/drivers/staging/rdma/hfi1/ruc.c @@ -875,7 +875,7 @@ void hfi1_do_send(struct rvt_qp *qp) /* allow other tasks to run */ if (unlikely(time_after(jiffies, timeout))) { cond_resched(); - ps.ppd->dd->verbs_dev.n_send_schedule++; + this_cpu_inc(*ps.ppd->dd->send_schedule); timeout = jiffies + SEND_RESCHED_TIMEOUT; } } while (make_req(qp)); diff --git a/drivers/staging/rdma/hfi1/verbs.h b/drivers/staging/rdma/hfi1/verbs.h index 1571ae3..ac84dd7 100644 --- a/drivers/staging/rdma/hfi1/verbs.h +++ b/drivers/staging/rdma/hfi1/verbs.h @@ -270,7 +270,6 @@ struct hfi1_ibdev { u64 n_piowait; u64 n_txwait; u64 n_kmem_wait; - u64 n_send_schedule; #ifdef CONFIG_DEBUG_FS /* per HFI debugfs */ -- cgit v0.10.2 From f1bf29634057f56507945589aa40c96c649073ee Mon Sep 17 00:00:00 2001 From: Dean Luick Date: Wed, 3 Feb 2016 14:34:15 -0800 Subject: staging/rdma/hfi1: Fix for generic I2C interface The original I2C interface was geared for QSFP accesses. Modify the interface to behave more like a generic I2C controller such that reads and writes can accept multi-byte offsets. Removed reads following writes and moved reset to top level. Reviewed-by: Easwar Hariharan Reviewed-by: Dean Luick Signed-off-by: Pablo Cacho Signed-off-by: Jubin John Signed-off-by: Doug Ledford diff --git a/drivers/staging/rdma/hfi1/debugfs.c b/drivers/staging/rdma/hfi1/debugfs.c index acd2269..d6dc339 100644 --- a/drivers/staging/rdma/hfi1/debugfs.c +++ b/drivers/staging/rdma/hfi1/debugfs.c @@ -463,7 +463,8 @@ static ssize_t __i2c_debugfs_write(struct file *file, const char __user *buf, goto _free; } - i2c_addr = (*ppos >> 16) & 0xff; + /* byte offset format: [offsetSize][i2cAddr][offsetHigh][offsetLow] */ + i2c_addr = (*ppos >> 16) & 0xffff; offset = *ppos & 0xffff; total_written = i2c_write(ppd, target, i2c_addr, offset, buff, count); @@ -517,7 +518,8 @@ static ssize_t __i2c_debugfs_read(struct file *file, char __user *buf, goto _return; } - i2c_addr = (*ppos >> 16) & 0xff; + /* byte offset format: [offsetSize][i2cAddr][offsetHigh][offsetLow] */ + i2c_addr = (*ppos >> 16) & 0xffff; offset = *ppos & 0xffff; total_read = i2c_read(ppd, target, i2c_addr, offset, buff, count); diff --git a/drivers/staging/rdma/hfi1/qsfp.c b/drivers/staging/rdma/hfi1/qsfp.c index 0d2ec97..0e1a492 100644 --- a/drivers/staging/rdma/hfi1/qsfp.c +++ b/drivers/staging/rdma/hfi1/qsfp.c @@ -71,14 +71,6 @@ static int __i2c_write(struct hfi1_pportdata *ppd, u32 target, int i2c_addr, int ret, cnt; u8 *buff = bp; - /* Make sure TWSI bus is in sane state. */ - ret = hfi1_twsi_reset(dd, target); - if (ret) { - hfi1_dev_porterr(dd, ppd->port, - "I2C interface Reset for write failed\n"); - return -EIO; - } - cnt = 0; while (cnt < len) { int wlen = len - cnt; @@ -106,11 +98,22 @@ int i2c_write(struct hfi1_pportdata *ppd, u32 target, int i2c_addr, int offset, int ret; ret = mutex_lock_interruptible(&dd->qsfp_i2c_mutex); - if (!ret) { - ret = __i2c_write(ppd, target, i2c_addr, offset, bp, len); - mutex_unlock(&dd->qsfp_i2c_mutex); + if (ret) + return ret; + + /* make sure the TWSI bus is in a sane state */ + ret = hfi1_twsi_reset(ppd->dd, target); + if (ret) { + hfi1_dev_porterr(ppd->dd, ppd->port, + "I2C write interface reset failed\n"); + ret = -EIO; + goto done; } + ret = __i2c_write(ppd, target, i2c_addr, offset, bp, len); + +done: + mutex_unlock(&dd->qsfp_i2c_mutex); return ret; } @@ -125,16 +128,6 @@ static int __i2c_read(struct hfi1_pportdata *ppd, u32 target, int i2c_addr, int stuck = 0; u8 *buff = bp; - /* Make sure TWSI bus is in sane state. */ - ret = hfi1_twsi_reset(dd, target); - if (ret) { - hfi1_dev_porterr(dd, ppd->port, - "I2C interface Reset for read failed\n"); - ret = -EIO; - stuck = 1; - goto exit; - } - cnt = 0; while (cnt < len) { int rlen = len - cnt; @@ -178,11 +171,22 @@ int i2c_read(struct hfi1_pportdata *ppd, u32 target, int i2c_addr, int offset, int ret; ret = mutex_lock_interruptible(&dd->qsfp_i2c_mutex); - if (!ret) { - ret = __i2c_read(ppd, target, i2c_addr, offset, bp, len); - mutex_unlock(&dd->qsfp_i2c_mutex); + if (ret) + return ret; + + /* make sure the TWSI bus is in a sane state */ + ret = hfi1_twsi_reset(ppd->dd, target); + if (ret) { + hfi1_dev_porterr(ppd->dd, ppd->port, + "I2C read interface reset failed\n"); + ret = -EIO; + goto done; } + ret = __i2c_read(ppd, target, i2c_addr, offset, bp, len); + +done: + mutex_unlock(&dd->qsfp_i2c_mutex); return ret; } @@ -203,6 +207,15 @@ int qsfp_write(struct hfi1_pportdata *ppd, u32 target, int addr, void *bp, if (ret) return ret; + /* make sure the TWSI bus is in a sane state */ + ret = hfi1_twsi_reset(ppd->dd, target); + if (ret) { + hfi1_dev_porterr(ppd->dd, ppd->port, + "QSFP write interface reset failed\n"); + mutex_unlock(&ppd->dd->qsfp_i2c_mutex); + return -EIO; + } + while (count < len) { /* * Set the qsfp page based on a zero-based addresss @@ -210,8 +223,8 @@ int qsfp_write(struct hfi1_pportdata *ppd, u32 target, int addr, void *bp, */ page = (u8)(addr / QSFP_PAGESIZE); - ret = __i2c_write(ppd, target, QSFP_DEV, - QSFP_PAGE_SELECT_BYTE_OFFS, &page, 1); + ret = __i2c_write(ppd, target, QSFP_DEV | QSFP_OFFSET_SIZE, + QSFP_PAGE_SELECT_BYTE_OFFS, &page, 1); if (ret != 1) { hfi1_dev_porterr( ppd->dd, @@ -227,8 +240,8 @@ int qsfp_write(struct hfi1_pportdata *ppd, u32 target, int addr, void *bp, if (((addr % QSFP_RW_BOUNDARY) + nwrite) > QSFP_RW_BOUNDARY) nwrite = QSFP_RW_BOUNDARY - (addr % QSFP_RW_BOUNDARY); - ret = __i2c_write(ppd, target, QSFP_DEV, offset, bp + count, - nwrite); + ret = __i2c_write(ppd, target, QSFP_DEV | QSFP_OFFSET_SIZE, + offset, bp + count, nwrite); if (ret <= 0) /* stop on error or nothing written */ break; @@ -260,14 +273,23 @@ int qsfp_read(struct hfi1_pportdata *ppd, u32 target, int addr, void *bp, if (ret) return ret; + /* make sure the TWSI bus is in a sane state */ + ret = hfi1_twsi_reset(ppd->dd, target); + if (ret) { + hfi1_dev_porterr(ppd->dd, ppd->port, + "QSFP read interface reset failed\n"); + mutex_unlock(&ppd->dd->qsfp_i2c_mutex); + return -EIO; + } + while (count < len) { /* * Set the qsfp page based on a zero-based address * and a page size of QSFP_PAGESIZE bytes. */ page = (u8)(addr / QSFP_PAGESIZE); - ret = __i2c_write(ppd, target, QSFP_DEV, - QSFP_PAGE_SELECT_BYTE_OFFS, &page, 1); + ret = __i2c_write(ppd, target, QSFP_DEV | QSFP_OFFSET_SIZE, + QSFP_PAGE_SELECT_BYTE_OFFS, &page, 1); if (ret != 1) { hfi1_dev_porterr( ppd->dd, @@ -283,8 +305,10 @@ int qsfp_read(struct hfi1_pportdata *ppd, u32 target, int addr, void *bp, if (((addr % QSFP_RW_BOUNDARY) + nread) > QSFP_RW_BOUNDARY) nread = QSFP_RW_BOUNDARY - (addr % QSFP_RW_BOUNDARY); - ret = __i2c_read(ppd, target, QSFP_DEV, offset, bp + count, - nread); + /* QSFPs require a 5-10msec delay after write operations */ + mdelay(5); + ret = __i2c_read(ppd, target, QSFP_DEV | QSFP_OFFSET_SIZE, + offset, bp + count, nread); if (ret <= 0) /* stop on error or nothing read */ break; diff --git a/drivers/staging/rdma/hfi1/qsfp.h b/drivers/staging/rdma/hfi1/qsfp.h index b1b9e4a..af59a43 100644 --- a/drivers/staging/rdma/hfi1/qsfp.h +++ b/drivers/staging/rdma/hfi1/qsfp.h @@ -70,6 +70,10 @@ /* Reads/writes cannot cross 128 byte boundaries */ #define QSFP_RW_BOUNDARY 128 +/* number of bytes in i2c offset for QSFP devices */ +#define __QSFP_OFFSET_SIZE 1 /* num address bytes */ +#define QSFP_OFFSET_SIZE (__QSFP_OFFSET_SIZE << 8) /* shifted value */ + /* Defined fields that Intel requires of qualified cables */ /* Byte 0 is Identifier, not checked */ /* Byte 1 is reserved "status MSB" */ diff --git a/drivers/staging/rdma/hfi1/twsi.c b/drivers/staging/rdma/hfi1/twsi.c index ea54fd2..7c579b3 100644 --- a/drivers/staging/rdma/hfi1/twsi.c +++ b/drivers/staging/rdma/hfi1/twsi.c @@ -365,17 +365,25 @@ static int twsi_wr(struct hfi1_devdata *dd, u32 target, int data, int flags) * HFI1_TWSI_NO_DEV and does the correct operation for the legacy part, * which responded to all TWSI device codes, interpreting them as * address within device. On all other devices found on board handled by - * this driver, the device is followed by a one-byte "address" which selects + * this driver, the device is followed by a N-byte "address" which selects * the "register" or "offset" within the device from which data should * be read. */ int hfi1_twsi_blk_rd(struct hfi1_devdata *dd, u32 target, int dev, int addr, void *buffer, int len) { - int ret; u8 *bp = buffer; + int ret = 1; + int i; + int offset_size; + + /* obtain the offset size, strip it from the device address */ + offset_size = (dev >> 8) & 0xff; + dev &= 0xff; - ret = 1; + /* allow at most a 2 byte offset */ + if (offset_size > 2) + goto bail; if (dev == HFI1_TWSI_NO_DEV) { /* legacy not-really-I2C */ @@ -383,34 +391,29 @@ int hfi1_twsi_blk_rd(struct hfi1_devdata *dd, u32 target, int dev, int addr, ret = twsi_wr(dd, target, addr, HFI1_TWSI_START); } else { /* Actual I2C */ - ret = twsi_wr(dd, target, dev | WRITE_CMD, HFI1_TWSI_START); - if (ret) { - stop_cmd(dd, target); - ret = 1; - goto bail; - } - /* - * SFF spec claims we do _not_ stop after the addr - * but simply issue a start with the "read" dev-addr. - * Since we are implicitly waiting for ACK here, - * we need t_buf (nominally 20uSec) before that start, - * and cannot rely on the delay built in to the STOP - */ - ret = twsi_wr(dd, target, addr, 0); - udelay(TWSI_BUF_WAIT_USEC); + if (offset_size) { + ret = twsi_wr(dd, target, + dev | WRITE_CMD, HFI1_TWSI_START); + if (ret) { + stop_cmd(dd, target); + goto bail; + } - if (ret) { - dd_dev_err(dd, - "Failed to write interface read addr %02X\n", - addr); - ret = 1; - goto bail; + for (i = 0; i < offset_size; i++) { + ret = twsi_wr(dd, target, + (addr >> (i * 8)) & 0xff, 0); + udelay(TWSI_BUF_WAIT_USEC); + if (ret) { + dd_dev_err(dd, "Failed to write byte %d of offset 0x%04X\n", + i, addr); + goto bail; + } + } } ret = twsi_wr(dd, target, dev | READ_CMD, HFI1_TWSI_START); } if (ret) { stop_cmd(dd, target); - ret = 1; goto bail; } @@ -442,76 +445,55 @@ bail: * HFI1_TWSI_NO_DEV and does the correct operation for the legacy part, * which responded to all TWSI device codes, interpreting them as * address within device. On all other devices found on board handled by - * this driver, the device is followed by a one-byte "address" which selects + * this driver, the device is followed by a N-byte "address" which selects * the "register" or "offset" within the device to which data should * be written. */ int hfi1_twsi_blk_wr(struct hfi1_devdata *dd, u32 target, int dev, int addr, const void *buffer, int len) { - int sub_len; const u8 *bp = buffer; - int max_wait_time, i; int ret = 1; + int i; + int offset_size; - while (len > 0) { - if (dev == HFI1_TWSI_NO_DEV) { - if (twsi_wr(dd, target, (addr << 1) | WRITE_CMD, - HFI1_TWSI_START)) { - goto failed_write; - } - } else { - /* Real I2C */ - if (twsi_wr(dd, target, - dev | WRITE_CMD, HFI1_TWSI_START)) - goto failed_write; - ret = twsi_wr(dd, target, addr, 0); - if (ret) { - dd_dev_err(dd, - "Failed to write interface write addr %02X\n", - addr); - goto failed_write; - } - } + /* obtain the offset size, strip it from the device address */ + offset_size = (dev >> 8) & 0xff; + dev &= 0xff; - sub_len = min(len, 4); - addr += sub_len; - len -= sub_len; - - for (i = 0; i < sub_len; i++) - if (twsi_wr(dd, target, *bp++, 0)) - goto failed_write; + /* allow at most a 2 byte offset */ + if (offset_size > 2) + goto bail; - stop_cmd(dd, target); + if (dev == HFI1_TWSI_NO_DEV) { + if (twsi_wr(dd, target, (addr << 1) | WRITE_CMD, + HFI1_TWSI_START)) { + goto failed_write; + } + } else { + /* Real I2C */ + if (twsi_wr(dd, target, dev | WRITE_CMD, HFI1_TWSI_START)) + goto failed_write; + } - /* - * Wait for write complete by waiting for a successful - * read (the chip replies with a zero after the write - * cmd completes, and before it writes to the eeprom. - * The startcmd for the read will fail the ack until - * the writes have completed. We do this inline to avoid - * the debug prints that are in the real read routine - * if the startcmd fails. - * We also use the proper device address, so it doesn't matter - * whether we have real eeprom_dev. Legacy likes any address. - */ - max_wait_time = 100; - while (twsi_wr(dd, target, - dev | READ_CMD, HFI1_TWSI_START)) { - stop_cmd(dd, target); - if (!--max_wait_time) - goto failed_write; + for (i = 0; i < offset_size; i++) { + ret = twsi_wr(dd, target, (addr >> (i * 8)) & 0xff, 0); + udelay(TWSI_BUF_WAIT_USEC); + if (ret) { + dd_dev_err(dd, "Failed to write byte %d of offset 0x%04X\n", + i, addr); + goto bail; } - /* now read (and ignore) the resulting byte */ - rd_byte(dd, target, 1); } + for (i = 0; i < len; i++) + if (twsi_wr(dd, target, *bp++, 0)) + goto failed_write; + ret = 0; - goto bail; failed_write: stop_cmd(dd, target); - ret = 1; bail: return ret; -- cgit v0.10.2 From 23cd4716b7e2792f1fdc31b83feb9e9a9812c25f Mon Sep 17 00:00:00 2001 From: Vennila Megavannan Date: Wed, 3 Feb 2016 14:34:23 -0800 Subject: staging/rdma/hfi1: Allow a fair scheduling of QPs This patch fixes the fairness issues in QP scheduling - the timeout for cond_resched is changed to a ratio of qp->timeout_jiffies - workqueue_congested is used to determine if qp needs to reschedule itself Reviewed-by: Mike Marciniszyn Signed-off-by: Vennila Megavannan Signed-off-by: Jubin John Signed-off-by: Doug Ledford diff --git a/drivers/staging/rdma/hfi1/ruc.c b/drivers/staging/rdma/hfi1/ruc.c index ae28b85..f09badb 100644 --- a/drivers/staging/rdma/hfi1/ruc.c +++ b/drivers/staging/rdma/hfi1/ruc.c @@ -822,29 +822,42 @@ void _hfi1_do_send(struct work_struct *work) void hfi1_do_send(struct rvt_qp *qp) { struct hfi1_pkt_state ps; + struct hfi1_qp_priv *priv = qp->priv; int (*make_req)(struct rvt_qp *qp); unsigned long flags; unsigned long timeout; + unsigned long timeout_int; + int cpu; ps.dev = to_idev(qp->ibqp.device); ps.ibp = to_iport(qp->ibqp.device, qp->port_num); ps.ppd = ppd_from_ibp(ps.ibp); - if ((qp->ibqp.qp_type == IB_QPT_RC || - qp->ibqp.qp_type == IB_QPT_UC) && - !loopback && - (qp->remote_ah_attr.dlid & ~((1 << ps.ppd->lmc) - 1)) == - ps.ppd->lid) { - ruc_loopback(qp); - return; - } - - if (qp->ibqp.qp_type == IB_QPT_RC) + switch (qp->ibqp.qp_type) { + case IB_QPT_RC: + if (!loopback && ((qp->remote_ah_attr.dlid & ~((1 << ps.ppd->lmc + ) - 1)) == + ps.ppd->lid)) { + ruc_loopback(qp); + return; + } make_req = hfi1_make_rc_req; - else if (qp->ibqp.qp_type == IB_QPT_UC) + timeout_int = (qp->timeout_jiffies); + break; + case IB_QPT_UC: + if (!loopback && ((qp->remote_ah_attr.dlid & ~((1 << ps.ppd->lmc + ) - 1)) == + ps.ppd->lid)) { + ruc_loopback(qp); + return; + } make_req = hfi1_make_uc_req; - else + timeout_int = SEND_RESCHED_TIMEOUT; + break; + default: make_req = hfi1_make_ud_req; + timeout_int = SEND_RESCHED_TIMEOUT; + } spin_lock_irqsave(&qp->s_lock, flags); @@ -858,7 +871,9 @@ void hfi1_do_send(struct rvt_qp *qp) spin_unlock_irqrestore(&qp->s_lock, flags); - timeout = jiffies + SEND_RESCHED_TIMEOUT; + timeout = jiffies + (timeout_int) / 8; + cpu = priv->s_sde ? priv->s_sde->cpu : + cpumask_first(cpumask_of_node(ps.ppd->dd->node)); do { /* Check for a constructed packet to be sent. */ if (qp->s_hdrwords != 0) { @@ -874,9 +889,18 @@ void hfi1_do_send(struct rvt_qp *qp) /* allow other tasks to run */ if (unlikely(time_after(jiffies, timeout))) { + if (workqueue_congested(cpu, ps.ppd->hfi1_wq)) { + spin_lock_irqsave(&qp->s_lock, flags); + qp->s_flags &= ~RVT_S_BUSY; + hfi1_schedule_send(qp); + spin_unlock_irqrestore(&qp->s_lock, + flags); + this_cpu_inc(*ps.ppd->dd->send_schedule); + return; + } cond_resched(); this_cpu_inc(*ps.ppd->dd->send_schedule); - timeout = jiffies + SEND_RESCHED_TIMEOUT; + timeout = jiffies + (timeout_int) / 8; } } while (make_req(qp)); } -- cgit v0.10.2 From e002dcc0abd318b0c5d7b2d05ba41ef4d00abe73 Mon Sep 17 00:00:00 2001 From: Sebastian Sanchez Date: Wed, 3 Feb 2016 14:34:32 -0800 Subject: staging/rdma/hfi1: Fix for module parameter rcvhdrcnt when it's 2097152 The driver crashes when loaded with parameter rcvhdrcnt=2097152. The root cause was that rcvhdrcnt was initially a 32 bit variable and its value was assigned to a 16 bit variable, truncating the upper 16 bits. This patch prevents the user from passing a value for rcvhdrcnt greater than 16352 (Maximum number for rcvhdrcnt). Reviewed-by: Dean Luick Reviewed-by: Mitko Haralanov Signed-off-by: Sebastian Sanchez Signed-off-by: Jubin John Signed-off-by: Doug Ledford diff --git a/drivers/staging/rdma/hfi1/init.c b/drivers/staging/rdma/hfi1/init.c index 6ddf3c8..eec9130 100644 --- a/drivers/staging/rdma/hfi1/init.c +++ b/drivers/staging/rdma/hfi1/init.c @@ -77,6 +77,7 @@ #define HFI1_MIN_USER_CTXT_BUFCNT 7 #define HFI1_MIN_HDRQ_EGRBUF_CNT 2 +#define HFI1_MAX_HDRQ_EGRBUF_CNT 16352 #define HFI1_MIN_EAGER_BUFFER_SIZE (4 * 1024) /* 4KB */ #define HFI1_MAX_EAGER_BUFFER_SIZE (256 * 1024) /* 256KB */ @@ -1355,6 +1356,13 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent) ret = -EINVAL; goto bail; } + if (rcvhdrcnt > HFI1_MAX_HDRQ_EGRBUF_CNT) { + hfi1_early_err(&pdev->dev, + "Receive header queue count cannot be greater than %u\n", + HFI1_MAX_HDRQ_EGRBUF_CNT); + ret = -EINVAL; + goto bail; + } /* use the encoding function as a sanitization check */ if (!encode_rcv_header_entry_size(hfi1_hdrq_entsize)) { hfi1_early_err(&pdev->dev, "Invalid HdrQ Entry size %u\n", -- cgit v0.10.2 From a92ba6d628d362811c8112280826de0e8b178e67 Mon Sep 17 00:00:00 2001 From: Mitko Haralanov Date: Wed, 3 Feb 2016 14:34:41 -0800 Subject: staging/rdma/hfi1: Improve performance of TID cache look up When TID caching was enabled, the way the driver found RB nodes when PSM was unprogramming TID entries was by traversing the RB tree, looking for a match on the RcvArray entry index. The performance of this algorithm was not only poor but also inconsistent depending on how many RB nodes would have to be traversed before a match was found. The lower performance was especially evident in cases where there was a cache miss with the cache full, requiring the unprogramming of several TID entries. This commit changes how RB nodes are looked up when being free'd by PSM to a index-based lookup into a flat array on the index of the RcvArray entry. This turns the entire look-up process into an O(1) algorithm. Special care needs to be taken for situations when TID caching is disabled. In those cases, there is no need to insert the RB nodes into an actual RB tree. Since the entire RcvArray management mechanism is managed by an index-based algorithm, the RB nodes can be saved into the flat array, making both "insertion" and "removal" faster. Reviewed-by: Arthur Kepner Reviewed-by: Dennis Dalessandro Signed-off-by: Mitko Haralanov Signed-off-by: Jubin John Signed-off-by: Doug Ledford diff --git a/drivers/staging/rdma/hfi1/hfi.h b/drivers/staging/rdma/hfi1/hfi.h index 112f790..f3c1e67 100644 --- a/drivers/staging/rdma/hfi1/hfi.h +++ b/drivers/staging/rdma/hfi1/hfi.h @@ -1171,6 +1171,7 @@ struct hfi1_filedata { int rec_cpu_num; struct mmu_notifier mn; struct rb_root tid_rb_root; + struct mmu_rb_node **entry_to_rb; spinlock_t tid_lock; /* protect tid_[limit,used] counters */ u32 tid_limit; u32 tid_used; @@ -1178,7 +1179,10 @@ struct hfi1_filedata { u32 *invalid_tids; u32 invalid_tid_idx; spinlock_t invalid_lock; /* protect the invalid_tids array */ - int (*mmu_rb_insert)(struct rb_root *, struct mmu_rb_node *); + int (*mmu_rb_insert)(struct hfi1_filedata *, struct rb_root *, + struct mmu_rb_node *); + void (*mmu_rb_remove)(struct hfi1_filedata *, struct rb_root *, + struct mmu_rb_node *); }; extern struct list_head hfi1_dev_list; diff --git a/drivers/staging/rdma/hfi1/user_exp_rcv.c b/drivers/staging/rdma/hfi1/user_exp_rcv.c index 79612a2..36b61b5 100644 --- a/drivers/staging/rdma/hfi1/user_exp_rcv.c +++ b/drivers/staging/rdma/hfi1/user_exp_rcv.c @@ -5,7 +5,7 @@ * * GPL LICENSE SUMMARY * - * Copyright(c) 2015 Intel Corporation. + * Copyright(c) 2015, 2016 Intel Corporation. * * This program is free software; you can redistribute it and/or modify * it under the terms of version 2 of the GNU General Public License as @@ -18,7 +18,7 @@ * * BSD LICENSE * - * Copyright(c) 2015 Intel Corporation. + * Copyright(c) 2015, 2016 Intel Corporation. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -102,12 +102,15 @@ static int set_rcvarray_entry(struct file *, unsigned long, u32, struct tid_group *, struct page **, unsigned); static inline int mmu_addr_cmp(struct mmu_rb_node *, unsigned long, unsigned long); -static struct mmu_rb_node *mmu_rb_search_by_addr(struct rb_root *, - unsigned long); -static inline struct mmu_rb_node *mmu_rb_search_by_entry(struct rb_root *, - u32); -static int mmu_rb_insert_by_addr(struct rb_root *, struct mmu_rb_node *); -static int mmu_rb_insert_by_entry(struct rb_root *, struct mmu_rb_node *); +static struct mmu_rb_node *mmu_rb_search(struct rb_root *, unsigned long); +static int mmu_rb_insert_by_addr(struct hfi1_filedata *, struct rb_root *, + struct mmu_rb_node *); +static int mmu_rb_insert_by_entry(struct hfi1_filedata *, struct rb_root *, + struct mmu_rb_node *); +static void mmu_rb_remove_by_addr(struct hfi1_filedata *, struct rb_root *, + struct mmu_rb_node *); +static void mmu_rb_remove_by_entry(struct hfi1_filedata *, struct rb_root *, + struct mmu_rb_node *); static void mmu_notifier_mem_invalidate(struct mmu_notifier *, unsigned long, unsigned long, enum mmu_call_types); @@ -219,6 +222,12 @@ int hfi1_user_exp_rcv_init(struct file *fp) } } + fd->entry_to_rb = kcalloc(uctxt->expected_count, + sizeof(struct rb_node *), + GFP_KERNEL); + if (!fd->entry_to_rb) + return -ENOMEM; + if (!HFI1_CAP_IS_USET(TID_UNMAP)) { fd->invalid_tid_idx = 0; fd->invalid_tids = kzalloc(uctxt->expected_count * @@ -226,27 +235,30 @@ int hfi1_user_exp_rcv_init(struct file *fp) if (!fd->invalid_tids) { ret = -ENOMEM; goto done; - } else { - /* - * Register MMU notifier callbacks. If the registration - * fails, continue but turn off the TID caching for - * all user contexts. - */ - ret = mmu_notifier_register(&fd->mn, current->mm); - if (ret) { - dd_dev_info(dd, - "Failed MMU notifier registration %d\n", - ret); - HFI1_CAP_USET(TID_UNMAP); - ret = 0; - } + } + + /* + * Register MMU notifier callbacks. If the registration + * fails, continue but turn off the TID caching for + * all user contexts. + */ + ret = mmu_notifier_register(&fd->mn, current->mm); + if (ret) { + dd_dev_info(dd, + "Failed MMU notifier registration %d\n", + ret); + HFI1_CAP_USET(TID_UNMAP); + ret = 0; } } - if (HFI1_CAP_IS_USET(TID_UNMAP)) + if (HFI1_CAP_IS_USET(TID_UNMAP)) { fd->mmu_rb_insert = mmu_rb_insert_by_entry; - else + fd->mmu_rb_remove = mmu_rb_remove_by_entry; + } else { fd->mmu_rb_insert = mmu_rb_insert_by_addr; + fd->mmu_rb_remove = mmu_rb_remove_by_addr; + } /* * PSM does not have a good way to separate, count, and @@ -318,6 +330,8 @@ int hfi1_user_exp_rcv_free(struct hfi1_filedata *fd) spin_unlock(&fd->rb_lock); hfi1_clear_tids(uctxt); } + + kfree(fd->entry_to_rb); return 0; } @@ -890,7 +904,7 @@ static int set_rcvarray_entry(struct file *fp, unsigned long vaddr, memcpy(node->pages, pages, sizeof(struct page *) * npages); spin_lock(&fd->rb_lock); - ret = fd->mmu_rb_insert(root, node); + ret = fd->mmu_rb_insert(fd, root, node); spin_unlock(&fd->rb_lock); if (ret) { @@ -915,8 +929,7 @@ static int unprogram_rcvarray(struct file *fp, u32 tidinfo, struct hfi1_devdata *dd = uctxt->dd; struct mmu_rb_node *node; u8 tidctrl = EXP_TID_GET(tidinfo, CTRL); - u32 tidbase = uctxt->expected_base, - tididx = EXP_TID_GET(tidinfo, IDX) << 1, rcventry; + u32 tididx = EXP_TID_GET(tidinfo, IDX) << 1, rcventry; if (tididx >= uctxt->expected_count) { dd_dev_err(dd, "Invalid RcvArray entry (%u) index for ctxt %u\n", @@ -927,15 +940,15 @@ static int unprogram_rcvarray(struct file *fp, u32 tidinfo, if (tidctrl == 0x3) return -EINVAL; - rcventry = tidbase + tididx + (tidctrl - 1); + rcventry = tididx + (tidctrl - 1); spin_lock(&fd->rb_lock); - node = mmu_rb_search_by_entry(&fd->tid_rb_root, rcventry); - if (!node) { + node = fd->entry_to_rb[rcventry]; + if (!node || node->rcventry != (uctxt->expected_base + rcventry)) { spin_unlock(&fd->rb_lock); return -EBADF; } - rb_erase(&node->rbnode, &fd->tid_rb_root); + fd->mmu_rb_remove(fd, &fd->tid_rb_root, node); spin_unlock(&fd->rb_lock); if (grp) *grp = node->grp; @@ -993,10 +1006,11 @@ static void unlock_exp_tids(struct hfi1_ctxtdata *uctxt, u16 rcventry = grp->base + i; struct mmu_rb_node *node; - node = mmu_rb_search_by_entry(root, rcventry); - if (!node) + node = fd->entry_to_rb[rcventry - + uctxt->expected_base]; + if (!node || node->rcventry != rcventry) continue; - rb_erase(&node->rbnode, root); + fd->mmu_rb_remove(fd, root, node); clear_tid_node(fd, -1, node); } } @@ -1034,7 +1048,7 @@ static void mmu_notifier_mem_invalidate(struct mmu_notifier *mn, spin_lock(&fd->rb_lock); while (addr < end) { - node = mmu_rb_search_by_addr(root, addr); + node = mmu_rb_search(root, addr); if (!node) { /* @@ -1116,8 +1130,8 @@ static inline int mmu_entry_cmp(struct mmu_rb_node *node, u32 entry) return 0; } -static struct mmu_rb_node *mmu_rb_search_by_addr(struct rb_root *root, - unsigned long addr) +static struct mmu_rb_node *mmu_rb_search(struct rb_root *root, + unsigned long addr) { struct rb_node *node = root->rb_node; @@ -1142,48 +1156,21 @@ static struct mmu_rb_node *mmu_rb_search_by_addr(struct rb_root *root, return NULL; } -static inline struct mmu_rb_node *mmu_rb_search_by_entry(struct rb_root *root, - u32 index) -{ - struct mmu_rb_node *rbnode; - struct rb_node *node; - - if (root && !RB_EMPTY_ROOT(root)) - for (node = rb_first(root); node; node = rb_next(node)) { - rbnode = rb_entry(node, struct mmu_rb_node, rbnode); - if (rbnode->rcventry == index) - return rbnode; - } - return NULL; -} - -static int mmu_rb_insert_by_entry(struct rb_root *root, +static int mmu_rb_insert_by_entry(struct hfi1_filedata *fdata, + struct rb_root *root, struct mmu_rb_node *node) { - struct rb_node **new = &root->rb_node, *parent = NULL; + u32 base = fdata->uctxt->expected_base; - while (*new) { - struct mmu_rb_node *this = - container_of(*new, struct mmu_rb_node, rbnode); - int result = mmu_entry_cmp(this, node->rcventry); - - parent = *new; - if (result < 0) - new = &((*new)->rb_left); - else if (result > 0) - new = &((*new)->rb_right); - else - return 1; - } - - rb_link_node(&node->rbnode, parent, new); - rb_insert_color(&node->rbnode, root); + fdata->entry_to_rb[node->rcventry - base] = node; return 0; } -static int mmu_rb_insert_by_addr(struct rb_root *root, struct mmu_rb_node *node) +static int mmu_rb_insert_by_addr(struct hfi1_filedata *fdata, + struct rb_root *root, struct mmu_rb_node *node) { struct rb_node **new = &root->rb_node, *parent = NULL; + u32 base = fdata->uctxt->expected_base; /* Figure out where to put new node */ while (*new) { @@ -1204,5 +1191,25 @@ static int mmu_rb_insert_by_addr(struct rb_root *root, struct mmu_rb_node *node) rb_link_node(&node->rbnode, parent, new); rb_insert_color(&node->rbnode, root); + fdata->entry_to_rb[node->rcventry - base] = node; return 0; } + +static void mmu_rb_remove_by_entry(struct hfi1_filedata *fdata, + struct rb_root *root, + struct mmu_rb_node *node) +{ + u32 base = fdata->uctxt->expected_base; + + fdata->entry_to_rb[node->rcventry - base] = NULL; +} + +static void mmu_rb_remove_by_addr(struct hfi1_filedata *fdata, + struct rb_root *root, + struct mmu_rb_node *node) +{ + u32 base = fdata->uctxt->expected_base; + + fdata->entry_to_rb[node->rcventry - base] = NULL; + rb_erase(&node->rbnode, root); +} -- cgit v0.10.2 From 82ab09e131ffb0497c9631e2c53b44fbf9ad5e1c Mon Sep 17 00:00:00 2001 From: Easwar Hariharan Date: Wed, 3 Feb 2016 14:34:49 -0800 Subject: staging/rdma/hfi1: Reduce syslog message severity and provide speed information The syslog message causes unnecessary alarm for the single and dual port x8 cards by reporting at an error level. This patch reduces the severity to informational only and adds speed information. Reviewed-by: Dennis Dalessandro Signed-off-by: Easwar Hariharan Signed-off-by: Jubin John Signed-off-by: Doug Ledford diff --git a/drivers/staging/rdma/hfi1/pcie.c b/drivers/staging/rdma/hfi1/pcie.c index 76cf807..6605a6a 100644 --- a/drivers/staging/rdma/hfi1/pcie.c +++ b/drivers/staging/rdma/hfi1/pcie.c @@ -398,9 +398,7 @@ int pcie_speeds(struct hfi1_devdata *dd) /* obtain the link width and current speed */ update_lbus_info(dd); - /* check against expected pcie width and complain if "wrong" */ - if (dd->lbus_width < 16) - dd_dev_err(dd, "PCIe width %u (x16 HFI)\n", dd->lbus_width); + dd_dev_info(dd, "%s\n", dd->lbus_info); return 0; } -- cgit v0.10.2 From d24bc6481e376da3b7f226b57b39b0ae4088b8d9 Mon Sep 17 00:00:00 2001 From: Dean Luick Date: Wed, 3 Feb 2016 14:34:58 -0800 Subject: staging/rdma/hfi1: Use device file minor to identify EPROM When writing to the EPROM, the driver will always use the "first" device. This is incorrect for multiple cards. Use the device file minor to determine the device to use. Reject the generic device file. Reviewed-by: Mitko Haralanov Signed-off-by: Dean Luick Signed-off-by: Jubin John Signed-off-by: Doug Ledford diff --git a/drivers/staging/rdma/hfi1/eprom.c b/drivers/staging/rdma/hfi1/eprom.c index fb620c9..8104a11 100644 --- a/drivers/staging/rdma/hfi1/eprom.c +++ b/drivers/staging/rdma/hfi1/eprom.c @@ -353,21 +353,26 @@ static inline u32 extract_rstart(u32 composite) * * Return 0 on success, -ERRNO on error */ -int handle_eprom_command(const struct hfi1_cmd *cmd) +int handle_eprom_command(struct file *fp, const struct hfi1_cmd *cmd) { struct hfi1_devdata *dd; u32 dev_id; u32 rlen; /* range length */ u32 rstart; /* range start */ + int i_minor; int ret = 0; /* - * The EPROM is per-device, so use unit 0 as that will always - * exist. + * Map the device file to device data using the relative minor. + * The device file minor number is the unit number + 1. 0 is + * the generic device file - reject it. */ - dd = hfi1_lookup(0); + i_minor = iminor(file_inode(fp)) - HFI1_USER_MINOR_BASE; + if (i_minor <= 0) + return -EINVAL; + dd = hfi1_lookup(i_minor - 1); if (!dd) { - pr_err("%s: cannot find unit 0!\n", __func__); + pr_err("%s: cannot find unit %d!\n", __func__, i_minor); return -EINVAL; } diff --git a/drivers/staging/rdma/hfi1/eprom.h b/drivers/staging/rdma/hfi1/eprom.h index 64a6427..5a61ba3 100644 --- a/drivers/staging/rdma/hfi1/eprom.h +++ b/drivers/staging/rdma/hfi1/eprom.h @@ -52,4 +52,4 @@ struct hfi1_cmd; struct hfi1_devdata; int eprom_init(struct hfi1_devdata *dd); -int handle_eprom_command(const struct hfi1_cmd *cmd); +int handle_eprom_command(struct file *fp, const struct hfi1_cmd *cmd); diff --git a/drivers/staging/rdma/hfi1/file_ops.c b/drivers/staging/rdma/hfi1/file_ops.c index 2de9032..cc681f7 100644 --- a/drivers/staging/rdma/hfi1/file_ops.c +++ b/drivers/staging/rdma/hfi1/file_ops.c @@ -409,7 +409,7 @@ static ssize_t hfi1_file_write(struct file *fp, const char __user *data, case HFI1_CMD_EP_ERASE_RANGE: case HFI1_CMD_EP_READ_RANGE: case HFI1_CMD_EP_WRITE_RANGE: - ret = handle_eprom_command(&cmd); + ret = handle_eprom_command(fp, &cmd); break; } -- cgit v0.10.2 From 0f2d87d2827eb4f3c1319e69b67ba30d61cabe83 Mon Sep 17 00:00:00 2001 From: Mitko Haralanov Date: Wed, 3 Feb 2016 14:35:06 -0800 Subject: staging/rdma/hfi1: Improve performance of SDMA transfers Commit a0d406934a46 ("staging/rdma/hfi1: Add page lock limit check for SDMA requests") added a mechanism to delay the clean-up of user SDMA requests in order to facilitate proper locked page counting. This delayed processing was done using a kernel workqueue, which meant that a kernel thread would have to spin up and take CPU cycles to do the clean-up. This proved detrimental to performance because now there are two execution threads (the kernel workqueue and the user process) needing cycles on the same CPU. Performance-wise, it is much better to do as much of the clean-up as can be done in interrupt context (during the callback) and do the remaining work in-line during subsequent calls of the user process into the driver. The changes required to implement the above also significantly simplify the entire SDMA completion processing code and eliminate a memory corruption causing the following observed crash: [ 2881.703362] BUG: unable to handle kernel NULL pointer dereference at (null) [ 2881.703389] IP: [] user_sdma_send_pkts+0xcd4/0x18e0 [hfi1] [ 2881.703422] PGD 7d4d25067 PUD 77d96d067 PMD 0 [ 2881.703427] Oops: 0000 [#1] SMP [ 2881.703431] Modules linked in: [ 2881.703504] CPU: 28 PID: 6668 Comm: mpi_stress Tainted: G OENX 3.12.28-4-default #1 [ 2881.703508] Hardware name: Intel Corporation S2600KP/S2600KP, BIOS SE5C610.86B.11.01.0044.090 [ 2881.703512] task: ffff88077da8e0c0 ti: ffff880856772000 task.ti: ffff880856772000 [ 2881.703515] RIP: 0010:[] [] user_sdma_send_pkts+0xcd4/0x [ 2881.703529] RSP: 0018:ffff880856773c48 EFLAGS: 00010287 [ 2881.703531] RAX: 0000000000000000 RBX: 0000000000001000 RCX: 0000000000002000 [ 2881.703534] RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000000002000 [ 2881.703537] RBP: 0000000000000000 R08: 0000000000000001 R09: 0000000000000000 [ 2881.703540] R10: 0000000000000000 R11: 0000000000000000 R12: 0000000000000000 [ 2881.703543] R13: 0000000000000000 R14: ffff88071e782e68 R15: ffff8810532955c0 [ 2881.703546] FS: 00007f9c4375e700(0000) GS:ffff88107eec0000(0000) knlGS:0000000000000000 [ 2881.703549] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 2881.703551] CR2: 0000000000000000 CR3: 00000007d4cba000 CR4: 00000000003407e0 [ 2881.703554] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 2881.703556] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [ 2881.703558] Stack: [ 2881.703559] ffffffff00002000 ffff881000001800 ffffffff00000000 00000000000080d0 [ 2881.703570] 0000000000000000 0000200000000000 0000000000000000 ffff88071e782db8 [ 2881.703580] ffff8807d4d08d80 ffff881053295600 0000000000000008 ffff88071e782fc8 [ 2881.703589] Call Trace: [ 2881.703691] [] hfi1_user_sdma_process_request+0x84a/0xab0 [hfi1] [ 2881.703777] [] hfi1_aio_write+0xd2/0x110 [hfi1] [ 2881.703828] [] do_sync_readv_writev+0x48/0x80 [ 2881.703837] [] do_readv_writev+0xbb/0x230 [ 2881.703843] [] SyS_writev+0x48/0xc0 This commit also addresses issues related to notification of user processes of SDMA request slot availability. The slot should be cleaned up first before the user processes is notified of its availability. Reviewed-by: Arthur Kepner Reviewed-by: Dennis Dalessandro Signed-off-by: Mitko Haralanov Signed-off-by: Jubin John Signed-off-by: Doug Ledford diff --git a/drivers/staging/rdma/hfi1/user_sdma.c b/drivers/staging/rdma/hfi1/user_sdma.c index d3de771..2d238f3 100644 --- a/drivers/staging/rdma/hfi1/user_sdma.c +++ b/drivers/staging/rdma/hfi1/user_sdma.c @@ -147,6 +147,8 @@ MODULE_PARM_DESC(sdma_comp_size, "Size of User SDMA completion ring. Default: 12 /* Last packet in the request */ #define TXREQ_FLAGS_REQ_LAST_PKT BIT(0) + +/* Last packet that uses a particular io vector */ #define TXREQ_FLAGS_IOVEC_LAST_PKT BIT(0) #define SDMA_REQ_IN_USE 0 @@ -171,6 +173,7 @@ static unsigned initial_pkt_count = 8; #define SDMA_IOWAIT_TIMEOUT 1000 /* in milliseconds */ struct user_sdma_iovec { + struct list_head list; struct iovec iov; /* number of pages in this vector */ unsigned npages; @@ -214,15 +217,6 @@ struct user_sdma_request { */ u8 omfactor; /* - * pointer to the user's mm_struct. We are going to - * get a reference to it so it doesn't get freed - * since we might not be in process context when we - * are processing the iov's. - * Using this mm_struct, we can get vma based on the - * iov's address (find_vma()). - */ - struct mm_struct *user_mm; - /* * We copy the iovs for this request (based on * info.iovcnt). These are only the data vectors */ @@ -239,13 +233,13 @@ struct user_sdma_request { u16 tididx; u32 sent; u64 seqnum; + u64 seqcomp; struct list_head txps; spinlock_t txcmp_lock; /* protect txcmp list */ struct list_head txcmp; unsigned long flags; /* status of the last txreq completed */ int status; - struct work_struct worker; }; /* @@ -281,20 +275,20 @@ struct user_sdma_txreq { static int user_sdma_send_pkts(struct user_sdma_request *, unsigned); static int num_user_pages(const struct iovec *); static void user_sdma_txreq_cb(struct sdma_txreq *, int, int); -static void user_sdma_delayed_completion(struct work_struct *); -static void user_sdma_free_request(struct user_sdma_request *); +static inline void pq_update(struct hfi1_user_sdma_pkt_q *); +static void user_sdma_free_request(struct user_sdma_request *, bool); static int pin_vector_pages(struct user_sdma_request *, struct user_sdma_iovec *); -static void unpin_vector_pages(struct user_sdma_request *, - struct user_sdma_iovec *); +static void unpin_vector_pages(struct user_sdma_iovec *); static int check_header_template(struct user_sdma_request *, struct hfi1_pkt_header *, u32, u32); static int set_txreq_header(struct user_sdma_request *, struct user_sdma_txreq *, u32); static int set_txreq_header_ahg(struct user_sdma_request *, struct user_sdma_txreq *, u32); -static inline void set_comp_state(struct user_sdma_request *, - enum hfi1_sdma_comp_state, int); +static inline void set_comp_state(struct hfi1_user_sdma_pkt_q *, + struct hfi1_user_sdma_comp_q *, + u16, enum hfi1_sdma_comp_state, int); static inline u32 set_pkt_bth_psn(__be32, u8, u32); static inline u32 get_lrh_len(struct hfi1_pkt_header, u32 len); @@ -381,17 +375,19 @@ int hfi1_user_sdma_alloc_queues(struct hfi1_ctxtdata *uctxt, struct file *fp) goto pq_nomem; memsize = sizeof(*pq->reqs) * hfi1_sdma_comp_ring_size; - pq->reqs = kmalloc(memsize, GFP_KERNEL); + pq->reqs = kzalloc(memsize, GFP_KERNEL); if (!pq->reqs) goto pq_reqs_nomem; INIT_LIST_HEAD(&pq->list); + INIT_LIST_HEAD(&pq->iovec_list); pq->dd = dd; pq->ctxt = uctxt->ctxt; pq->subctxt = fd->subctxt; pq->n_max_reqs = hfi1_sdma_comp_ring_size; pq->state = SDMA_PKT_Q_INACTIVE; atomic_set(&pq->n_reqs, 0); + spin_lock_init(&pq->iovec_lock); init_waitqueue_head(&pq->wait); iowait_init(&pq->busy, 0, NULL, defer_packet_queue, @@ -447,6 +443,7 @@ int hfi1_user_sdma_free_queues(struct hfi1_filedata *fd) { struct hfi1_ctxtdata *uctxt = fd->uctxt; struct hfi1_user_sdma_pkt_q *pq; + struct user_sdma_iovec *iov; unsigned long flags; hfi1_cdbg(SDMA, "[%u:%u:%u] Freeing user SDMA queues", uctxt->dd->unit, @@ -462,6 +459,15 @@ int hfi1_user_sdma_free_queues(struct hfi1_filedata *fd) wait_event_interruptible( pq->wait, (ACCESS_ONCE(pq->state) == SDMA_PKT_Q_INACTIVE)); + /* Unpin any left over buffers. */ + while (!list_empty(&pq->iovec_list)) { + spin_lock_irqsave(&pq->iovec_lock, flags); + iov = list_first_entry(&pq->iovec_list, + struct user_sdma_iovec, list); + list_del_init(&iov->list); + spin_unlock_irqrestore(&pq->iovec_lock, flags); + unpin_vector_pages(iov); + } kfree(pq->reqs); kmem_cache_destroy(pq->txreq_cache); kfree(pq); @@ -479,16 +485,17 @@ int hfi1_user_sdma_free_queues(struct hfi1_filedata *fd) int hfi1_user_sdma_process_request(struct file *fp, struct iovec *iovec, unsigned long dim, unsigned long *count) { - int ret = 0, i = 0, sent; + int ret = 0, i = 0; struct hfi1_filedata *fd = fp->private_data; struct hfi1_ctxtdata *uctxt = fd->uctxt; struct hfi1_user_sdma_pkt_q *pq = fd->pq; struct hfi1_user_sdma_comp_q *cq = fd->cq; struct hfi1_devdata *dd = pq->dd; - unsigned long idx = 0; + unsigned long idx = 0, flags; u8 pcount = initial_pkt_count; struct sdma_req_info info; struct user_sdma_request *req; + struct user_sdma_iovec *ioptr; u8 opcode, sc, vl; if (iovec[idx].iov_len < sizeof(info) + sizeof(req->hdr)) { @@ -505,9 +512,21 @@ int hfi1_user_sdma_process_request(struct file *fp, struct iovec *iovec, dd->unit, uctxt->ctxt, fd->subctxt, ret); return -EFAULT; } + + /* Process any completed vectors */ + while (!list_empty(&pq->iovec_list)) { + spin_lock_irqsave(&pq->iovec_lock, flags); + ioptr = list_first_entry(&pq->iovec_list, + struct user_sdma_iovec, list); + list_del_init(&ioptr->list); + spin_unlock_irqrestore(&pq->iovec_lock, flags); + unpin_vector_pages(ioptr); + } + trace_hfi1_sdma_user_reqinfo(dd, uctxt->ctxt, fd->subctxt, (u16 *)&info); - if (cq->comps[info.comp_idx].status == QUEUED) { + if (cq->comps[info.comp_idx].status == QUEUED || + test_bit(SDMA_REQ_IN_USE, &pq->reqs[info.comp_idx].flags)) { hfi1_cdbg(SDMA, "[%u:%u:%u] Entry %u is in QUEUED state", dd->unit, uctxt->ctxt, fd->subctxt, info.comp_idx); @@ -534,10 +553,7 @@ int hfi1_user_sdma_process_request(struct file *fp, struct iovec *iovec, req->cq = cq; req->status = -1; INIT_LIST_HEAD(&req->txps); - INIT_LIST_HEAD(&req->txcmp); - INIT_WORK(&req->worker, user_sdma_delayed_completion); - spin_lock_init(&req->txcmp_lock); memcpy(&req->info, &info, sizeof(info)); if (req_opcode(info.ctrl) == EXPECTED) @@ -606,6 +622,7 @@ int hfi1_user_sdma_process_request(struct file *fp, struct iovec *iovec, /* Save all the IO vector structures */ while (i < req->data_iovs) { + INIT_LIST_HEAD(&req->iovs[i].list); memcpy(&req->iovs[i].iov, iovec + idx++, sizeof(struct iovec)); req->iovs[i].offset = 0; req->data_len += req->iovs[i++].iov.iov_len; @@ -671,47 +688,52 @@ int hfi1_user_sdma_process_request(struct file *fp, struct iovec *iovec, } } - set_comp_state(req, QUEUED, 0); + set_comp_state(pq, cq, info.comp_idx, QUEUED, 0); + atomic_inc(&pq->n_reqs); /* Send the first N packets in the request to buy us some time */ - sent = user_sdma_send_pkts(req, pcount); - if (unlikely(sent < 0)) { - if (sent != -EBUSY) { - req->status = sent; - set_comp_state(req, ERROR, req->status); - return sent; - } else - sent = 0; + ret = user_sdma_send_pkts(req, pcount); + if (unlikely(ret < 0 && ret != -EBUSY)) { + req->status = ret; + atomic_dec(&pq->n_reqs); + goto free_req; } - atomic_inc(&pq->n_reqs); - xchg(&pq->state, SDMA_PKT_Q_ACTIVE); - if (sent < req->info.npkts) { - /* - * This is a somewhat blocking send implementation. - * The driver will block the caller until all packets of the - * request have been submitted to the SDMA engine. However, it - * will not wait for send completions. - */ - while (!test_bit(SDMA_REQ_SEND_DONE, &req->flags)) { - ret = user_sdma_send_pkts(req, pcount); - if (ret < 0) { - if (ret != -EBUSY) { - req->status = ret; - return ret; - } - wait_event_interruptible_timeout( - pq->busy.wait_dma, - (pq->state == SDMA_PKT_Q_ACTIVE), - msecs_to_jiffies( - SDMA_IOWAIT_TIMEOUT)); + /* + * It is possible that the SDMA engine would have processed all the + * submitted packets by the time we get here. Therefore, only set + * packet queue state to ACTIVE if there are still uncompleted + * requests. + */ + if (atomic_read(&pq->n_reqs)) + xchg(&pq->state, SDMA_PKT_Q_ACTIVE); + + /* + * This is a somewhat blocking send implementation. + * The driver will block the caller until all packets of the + * request have been submitted to the SDMA engine. However, it + * will not wait for send completions. + */ + while (!test_bit(SDMA_REQ_SEND_DONE, &req->flags)) { + ret = user_sdma_send_pkts(req, pcount); + if (ret < 0) { + if (ret != -EBUSY) { + req->status = ret; + set_bit(SDMA_REQ_DONE_ERROR, &req->flags); + return ret; } + wait_event_interruptible_timeout( + pq->busy.wait_dma, + (pq->state == SDMA_PKT_Q_ACTIVE), + msecs_to_jiffies( + SDMA_IOWAIT_TIMEOUT)); } } *count += idx; return 0; free_req: - user_sdma_free_request(req); + user_sdma_free_request(req, true); + set_comp_state(pq, cq, info.comp_idx, ERROR, req->status); return ret; } @@ -937,16 +959,8 @@ static int user_sdma_send_pkts(struct user_sdma_request *req, unsigned maxpkts) iovec->pages[pageidx], offset, len); if (ret) { - int i; - SDMA_DBG(req, "SDMA txreq add page failed %d\n", ret); - /* Mark all assigned vectors as complete so they - * are unpinned in the callback. */ - for (i = tx->idx; i >= 0; i--) { - tx->iovecs[i].flags |= - TXREQ_FLAGS_IOVEC_LAST_PKT; - } goto free_txreq; } iov_offset += len; @@ -1043,12 +1057,6 @@ static int pin_vector_pages(struct user_sdma_request *req, return -ENOMEM; } - /* - * Get a reference to the process's mm so we can use it when - * unpinning the io vectors. - */ - req->pq->user_mm = get_task_mm(current); - pinned = hfi1_acquire_user_pages((unsigned long)iovec->iov.iov_base, npages, 0, iovec->pages); @@ -1058,34 +1066,20 @@ static int pin_vector_pages(struct user_sdma_request *req, iovec->npages = pinned; if (pinned != npages) { SDMA_DBG(req, "Failed to pin pages (%d/%u)", pinned, npages); - unpin_vector_pages(req, iovec); + unpin_vector_pages(iovec); return -EFAULT; } + /* + * Get a reference to the process's mm so we can use it when + * unpinning the io vectors. + */ return 0; } -static void unpin_vector_pages(struct user_sdma_request *req, - struct user_sdma_iovec *iovec) +static void unpin_vector_pages(struct user_sdma_iovec *iovec) { - /* - * Unpinning is done through the workqueue so use the - * process's mm if we have a reference to it. - */ - if ((current->flags & PF_KTHREAD) && req->pq->user_mm) - use_mm(req->pq->user_mm); - hfi1_release_user_pages(iovec->pages, iovec->npages, 0); - /* - * Unuse the user's mm (see above) and release the - * reference to it. - */ - if (req->pq->user_mm) { - if (current->flags & PF_KTHREAD) - unuse_mm(req->pq->user_mm); - mmput(req->pq->user_mm); - } - kfree(iovec->pages); iovec->pages = NULL; iovec->npages = 0; @@ -1365,18 +1359,17 @@ static void user_sdma_txreq_cb(struct sdma_txreq *txreq, int status, struct user_sdma_txreq *tx = container_of(txreq, struct user_sdma_txreq, txreq); struct user_sdma_request *req; - bool defer; + struct hfi1_user_sdma_pkt_q *pq; + struct hfi1_user_sdma_comp_q *cq; + u16 idx; int i; if (!tx->req) return; req = tx->req; - /* - * If this is the callback for the last packet of the request, - * queue up the request for clean up. - */ - defer = (tx->seqnum == req->info.npkts - 1); + pq = req->pq; + cq = req->cq; /* * If we have any io vectors associated with this txreq, @@ -1385,87 +1378,52 @@ static void user_sdma_txreq_cb(struct sdma_txreq *txreq, int status, */ for (i = tx->idx; i >= 0; i--) { if (tx->iovecs[i].flags & TXREQ_FLAGS_IOVEC_LAST_PKT) { - defer = true; - break; + spin_lock(&pq->iovec_lock); + list_add_tail(&tx->iovecs[i].vec->list, + &pq->iovec_list); + spin_unlock(&pq->iovec_lock); } } - req->status = status; if (status != SDMA_TXREQ_S_OK) { SDMA_DBG(req, "SDMA completion with error %d", status); set_bit(SDMA_REQ_HAS_ERROR, &req->flags); - defer = true; } - /* - * Defer the clean up of the iovectors and the request until later - * so it can be done outside of interrupt context. - */ - if (defer) { - spin_lock(&req->txcmp_lock); - list_add_tail(&tx->list, &req->txcmp); - spin_unlock(&req->txcmp_lock); - schedule_work(&req->worker); + req->seqcomp = tx->seqnum; + kmem_cache_free(pq->txreq_cache, tx); + tx = NULL; + + idx = req->info.comp_idx; + if (req->status == -1 && status == SDMA_TXREQ_S_OK) { + if (req->seqcomp == req->info.npkts - 1) { + req->status = 0; + user_sdma_free_request(req, false); + pq_update(pq); + set_comp_state(pq, cq, idx, COMPLETE, 0); + } } else { - kmem_cache_free(req->pq->txreq_cache, tx); + if (status != SDMA_TXREQ_S_OK) + req->status = status; + if (req->seqcomp == ACCESS_ONCE(req->seqnum) && + test_bit(SDMA_REQ_DONE_ERROR, &req->flags)) { + user_sdma_free_request(req, false); + pq_update(pq); + set_comp_state(pq, cq, idx, ERROR, req->status); + } } } -static void user_sdma_delayed_completion(struct work_struct *work) +static inline void pq_update(struct hfi1_user_sdma_pkt_q *pq) { - struct user_sdma_request *req = - container_of(work, struct user_sdma_request, worker); - struct hfi1_user_sdma_pkt_q *pq = req->pq; - struct user_sdma_txreq *tx = NULL; - unsigned long flags; - u64 seqnum; - int i; - - while (1) { - spin_lock_irqsave(&req->txcmp_lock, flags); - if (!list_empty(&req->txcmp)) { - tx = list_first_entry(&req->txcmp, - struct user_sdma_txreq, list); - list_del(&tx->list); - } - spin_unlock_irqrestore(&req->txcmp_lock, flags); - if (!tx) - break; - - for (i = tx->idx; i >= 0; i--) - if (tx->iovecs[i].flags & TXREQ_FLAGS_IOVEC_LAST_PKT) - unpin_vector_pages(req, tx->iovecs[i].vec); - - seqnum = tx->seqnum; - kmem_cache_free(pq->txreq_cache, tx); - tx = NULL; - - if (req->status != SDMA_TXREQ_S_OK) { - if (seqnum == ACCESS_ONCE(req->seqnum) && - test_bit(SDMA_REQ_DONE_ERROR, &req->flags)) { - atomic_dec(&pq->n_reqs); - set_comp_state(req, ERROR, req->status); - user_sdma_free_request(req); - break; - } - } else { - if (seqnum == req->info.npkts - 1) { - atomic_dec(&pq->n_reqs); - set_comp_state(req, COMPLETE, 0); - user_sdma_free_request(req); - break; - } - } - } - - if (!atomic_read(&pq->n_reqs)) { + if (atomic_dec_and_test(&pq->n_reqs)) { xchg(&pq->state, SDMA_PKT_Q_INACTIVE); wake_up(&pq->wait); } } -static void user_sdma_free_request(struct user_sdma_request *req) +static void user_sdma_free_request(struct user_sdma_request *req, bool unpin) { if (!list_empty(&req->txps)) { struct sdma_txreq *t, *p; @@ -1478,26 +1436,27 @@ static void user_sdma_free_request(struct user_sdma_request *req) kmem_cache_free(req->pq->txreq_cache, tx); } } - if (req->data_iovs) { + if (req->data_iovs && unpin) { int i; for (i = 0; i < req->data_iovs; i++) if (req->iovs[i].npages && req->iovs[i].pages) - unpin_vector_pages(req, &req->iovs[i]); + unpin_vector_pages(&req->iovs[i]); } kfree(req->tids); clear_bit(SDMA_REQ_IN_USE, &req->flags); } -static inline void set_comp_state(struct user_sdma_request *req, - enum hfi1_sdma_comp_state state, - int ret) +static inline void set_comp_state(struct hfi1_user_sdma_pkt_q *pq, + struct hfi1_user_sdma_comp_q *cq, + u16 idx, enum hfi1_sdma_comp_state state, + int ret) { - SDMA_DBG(req, "Setting completion status %u %d", state, ret); - req->cq->comps[req->info.comp_idx].status = state; + hfi1_cdbg(SDMA, "[%u:%u:%u:%u] Setting completion status %u %d", + pq->dd->unit, pq->ctxt, pq->subctxt, idx, state, ret); + cq->comps[idx].status = state; if (state == ERROR) - req->cq->comps[req->info.comp_idx].errcode = -ret; - trace_hfi1_sdma_user_completion(req->pq->dd, req->pq->ctxt, - req->pq->subctxt, req->info.comp_idx, - state, ret); + cq->comps[idx].errcode = -ret; + trace_hfi1_sdma_user_completion(pq->dd, pq->ctxt, pq->subctxt, + idx, state, ret); } diff --git a/drivers/staging/rdma/hfi1/user_sdma.h b/drivers/staging/rdma/hfi1/user_sdma.h index 0afa285..317f0e8 100644 --- a/drivers/staging/rdma/hfi1/user_sdma.h +++ b/drivers/staging/rdma/hfi1/user_sdma.h @@ -69,7 +69,8 @@ struct hfi1_user_sdma_pkt_q { struct iowait busy; unsigned state; wait_queue_head_t wait; - struct mm_struct *user_mm; + struct list_head iovec_list; + spinlock_t iovec_lock; /* protect iovec_list */ }; struct hfi1_user_sdma_comp_q { -- cgit v0.10.2 From ecd42f8df2b9a0a77f2638c7780cda96de2b489b Mon Sep 17 00:00:00 2001 From: Dean Luick Date: Wed, 3 Feb 2016 14:35:14 -0800 Subject: staging/rdma/hfi1: correctly check for post-interrupt packets At the end of the packet processing interrupt and thread handler, the RcvAvail interrupt is finally cleared down. There is a window between the last packet check (via DMA to memory) and interrupt clear-down. The code to recheck for a packet once the RcvAVail interrupt is enabled must ultimately use a CSR read of RcvHdrTail rather than depend on DMA'ed memory. This change adds a CSR read of RcvHdrTail if the memory check does not show a packet preset. The memory check is retained as a quick test before doing the more expensive, but always correct, CSR read. In the ASIC, the CSR read used to force the RcvAvail clear-down write to complete may bypass queued DMA writes to memory. The only correct way to decide if a packet has arrived without an interrupt to push DMA to memory ahead of itself is to read the tail directly after RcvAvail has been cleared down. It is not sufficient to just read the tail and skip pushing the clear-down. Both must be done. The tail read will not push clear-down write due to it being in a different area of the chip. At this point, it is OK to have packet data still being DMA'ed to memory. This is the end of packet processing for previous packets. If the driver detects a new packet has arrived before interrputs were re-enabled, it will force a new interrupt and the interrupt will push the packet DMAs to memory, where the driver will then react to the interrupt and do normal packet processing. Reviewed-by: Mike Marciniszyn Signed-off-by: Dean Luick Signed-off-by: Jubin John Signed-off-by: Doug Ledford diff --git a/drivers/staging/rdma/hfi1/chip.c b/drivers/staging/rdma/hfi1/chip.c index 13b92a3..a67483e 100644 --- a/drivers/staging/rdma/hfi1/chip.c +++ b/drivers/staging/rdma/hfi1/chip.c @@ -8022,9 +8022,9 @@ static irqreturn_t sdma_interrupt(int irq, void *data) } /* - * Clear the receive interrupt, forcing the write and making sure - * we have data from the chip, pushing everything in front of it - * back to the host. + * Clear the receive interrupt. Use a read of the interrupt clear CSR + * to insure that the write completed. This does NOT guarantee that + * queued DMA writes to memory from the chip are pushed. */ static inline void clear_recv_intr(struct hfi1_ctxtdata *rcd) { @@ -8043,15 +8043,33 @@ void force_recv_intr(struct hfi1_ctxtdata *rcd) write_csr(rcd->dd, CCE_INT_FORCE + (8 * rcd->ireg), rcd->imask); } -/* return non-zero if a packet is present */ +/* + * Return non-zero if a packet is present. + * + * This routine is called when rechecking for packets after the RcvAvail + * interrupt has been cleared down. First, do a quick check of memory for + * a packet present. If not found, use an expensive CSR read of the context + * tail to determine the actual tail. The CSR read is necessary because there + * is no method to push pending DMAs to memory other than an interrupt and we + * are trying to determine if we need to force an interrupt. + */ static inline int check_packet_present(struct hfi1_ctxtdata *rcd) { + u32 tail; + int present; + if (!HFI1_CAP_IS_KSET(DMA_RTAIL)) - return (rcd->seq_cnt == + present = (rcd->seq_cnt == rhf_rcv_seq(rhf_to_cpu(get_rhf_addr(rcd)))); + else /* is RDMA rtail */ + present = (rcd->head != get_rcvhdrtail(rcd)); + + if (present) + return 1; - /* else is RDMA rtail */ - return (rcd->head != get_rcvhdrtail(rcd)); + /* fall back to a CSR read, correct indpendent of DMA_RTAIL */ + tail = (u32)read_uctxt_csr(rcd->dd, rcd->ctxt, RCV_HDR_TAIL); + return rcd->head != tail; } /* -- cgit v0.10.2 From c7cbf2fabbe6e7cbf4f82b6f79bc8e499761c4d2 Mon Sep 17 00:00:00 2001 From: Mitko Haralanov Date: Wed, 3 Feb 2016 14:35:23 -0800 Subject: staging/rdma/hfi1: Properly determine error status of SDMA slots To ensure correct operation between the driver and PSM with respect to managing the SDMA request ring, it is important that the status for a particular request slot is set at the correct time. Otherwise, PSM can get out of sync with the driver, which could lead to hangs or errors on new requests. Properly determining of when to set the error status of a SDMA slot depends on knowing exactly when the last txreq for that request has been completed. This in turn requires that the driver knows exactly how many requests have been generated and how many of those requests have been successfully submitted to the SDMA queue. The previous implementation of the mid-layer SDMA API did not provide a way for the caller of sdma_send_txlist() to know how many of the txreqs in the input list have actually been submitted without traversing the list and counting. Since sdma_send_txlist() already traverses the list in order to process it, requiring such traversal in the caller is completely unnecessary. Therefore, it is much easier to enhance sdma_send_txlist() to return the number of successfully submitted txreqs. This, in turn, allows the caller to accurately determine the progress of the SDMA request and, therefore, correctly set the error status at the right time. Reviewed-by: Mike Marciniszyn Signed-off-by: Mitko Haralanov Signed-off-by: Jubin John Signed-off-by: Doug Ledford diff --git a/drivers/staging/rdma/hfi1/sdma.c b/drivers/staging/rdma/hfi1/sdma.c index ddaaaac..579d821 100644 --- a/drivers/staging/rdma/hfi1/sdma.c +++ b/drivers/staging/rdma/hfi1/sdma.c @@ -2144,8 +2144,8 @@ nodesc: * side locking. * * Return: - * 0 - Success, -EINVAL - sdma_txreq incomplete, -EBUSY - no space in ring - * (wait == NULL) + * > 0 - Success (value is number of sdma_txreq's submitted), + * -EINVAL - sdma_txreq incomplete, -EBUSY - no space in ring (wait == NULL) * -EIOCBQUEUED - tx queued to iowait, -ECOMM bad sdma state */ int sdma_send_txlist(struct sdma_engine *sde, @@ -2185,7 +2185,7 @@ update_tail: if (tail != INVALID_TAIL) sdma_update_tail(sde, tail); spin_unlock_irqrestore(&sde->tail_lock, flags); - return ret; + return ret == 0 ? count : ret; unlock_noconn: spin_lock(&sde->flushlist_lock); list_for_each_entry_safe(tx, tx_next, tx_list, list) { diff --git a/drivers/staging/rdma/hfi1/user_sdma.c b/drivers/staging/rdma/hfi1/user_sdma.c index 2d238f3..0c32eaf 100644 --- a/drivers/staging/rdma/hfi1/user_sdma.c +++ b/drivers/staging/rdma/hfi1/user_sdma.c @@ -234,6 +234,7 @@ struct user_sdma_request { u32 sent; u64 seqnum; u64 seqcomp; + u64 seqsubmitted; struct list_head txps; spinlock_t txcmp_lock; /* protect txcmp list */ struct list_head txcmp; @@ -1001,18 +1002,19 @@ static int user_sdma_send_pkts(struct user_sdma_request *req, unsigned maxpkts) TXREQ_FLAGS_IOVEC_LAST_PKT; } + list_add_tail(&tx->txreq.list, &req->txps); /* * It is important to increment this here as it is used to * generate the BTH.PSN and, therefore, can't be bulk-updated * outside of the loop. */ tx->seqnum = req->seqnum++; - list_add_tail(&tx->txreq.list, &req->txps); npkts++; } dosend: ret = sdma_send_txlist(req->sde, &pq->busy, &req->txps); - if (list_empty(&req->txps)) + if (list_empty(&req->txps)) { + req->seqsubmitted = req->seqnum; if (req->seqnum == req->info.npkts) { set_bit(SDMA_REQ_SEND_DONE, &req->flags); /* @@ -1024,6 +1026,10 @@ dosend: if (test_bit(SDMA_REQ_HAVE_AHG, &req->flags)) sdma_ahg_free(req->sde, req->ahg_idx); } + } else if (ret > 0) { + req->seqsubmitted += ret; + ret = 0; + } return ret; free_txreq: @@ -1406,8 +1412,9 @@ static void user_sdma_txreq_cb(struct sdma_txreq *txreq, int status, } else { if (status != SDMA_TXREQ_S_OK) req->status = status; - if (req->seqcomp == ACCESS_ONCE(req->seqnum) && - test_bit(SDMA_REQ_DONE_ERROR, &req->flags)) { + if (req->seqcomp == (ACCESS_ONCE(req->seqsubmitted) - 1) && + (test_bit(SDMA_REQ_SEND_DONE, &req->flags) || + test_bit(SDMA_REQ_DONE_ERROR, &req->flags))) { user_sdma_free_request(req, false); pq_update(pq); set_comp_state(pq, cq, idx, ERROR, req->status); -- cgit v0.10.2 From f45c8dc8543783701fbad39a995e7a074a233b9d Mon Sep 17 00:00:00 2001 From: Dean Luick Date: Wed, 3 Feb 2016 14:35:31 -0800 Subject: staging/rdma/hfi1: Report physical state changes per device instead of globally Make physical state change reporting be per-device, not global to reduce excessive reports of "physical state changed" Reviewed-by: Dennis Dalessandro Signed-off-by: Dean Luick Signed-off-by: Jubin John Signed-off-by: Doug Ledford diff --git a/drivers/staging/rdma/hfi1/chip.c b/drivers/staging/rdma/hfi1/chip.c index a67483e..5b8fb02 100644 --- a/drivers/staging/rdma/hfi1/chip.c +++ b/drivers/staging/rdma/hfi1/chip.c @@ -12201,18 +12201,17 @@ static int wait_logical_linkstate(struct hfi1_pportdata *ppd, u32 state, u8 hfi1_ibphys_portstate(struct hfi1_pportdata *ppd) { - static u32 remembered_state = 0xff; u32 pstate; u32 ib_pstate; pstate = read_physical_state(ppd->dd); ib_pstate = chip_to_opa_pstate(ppd->dd, pstate); - if (remembered_state != ib_pstate) { + if (ppd->last_pstate != ib_pstate) { dd_dev_info(ppd->dd, "%s: physical state changed to %s (0x%x), phy 0x%x\n", __func__, opa_pstate_name(ib_pstate), ib_pstate, pstate); - remembered_state = ib_pstate; + ppd->last_pstate = ib_pstate; } return ib_pstate; } @@ -14019,6 +14018,7 @@ struct hfi1_devdata *hfi1_init_dd(struct pci_dev *pdev, /* start in offline */ ppd->host_link_state = HLS_DN_OFFLINE; init_vl_arb_caches(ppd); + ppd->last_pstate = 0xff; /* invalid value */ } dd->link_default = HLS_DN_POLL; diff --git a/drivers/staging/rdma/hfi1/hfi.h b/drivers/staging/rdma/hfi1/hfi.h index f3c1e67..da42991 100644 --- a/drivers/staging/rdma/hfi1/hfi.h +++ b/drivers/staging/rdma/hfi1/hfi.h @@ -663,6 +663,7 @@ struct hfi1_pportdata { u8 link_enabled; /* link enabled? */ u8 linkinit_reason; u8 local_tx_rate; /* rate given to 8051 firmware */ + u8 last_pstate; /* info only */ /* placeholders for IB MAD packet settings */ u8 overrun_threshold; -- cgit v0.10.2 From 53f449e4bf04ac5dce6385a1546ab6108666def2 Mon Sep 17 00:00:00 2001 From: Dean Luick Date: Wed, 3 Feb 2016 14:35:40 -0800 Subject: staging/rdma/hfi1: Fix fabric serdes reset by re-downloading firmware A host fabric serdes reset is required to go back to polling. However, access to the fabric serdes may have been invalidated by the sibling HFI when it downloads its fabric serdes firmware. Work around this by re-downloading and re-validating the serdes firmware at reset time on Bx hardware. Reviewed-by: Dennis Dalessandro Signed-off-by: Dean Luick Signed-off-by: Jubin John Signed-off-by: Doug Ledford diff --git a/drivers/staging/rdma/hfi1/firmware.c b/drivers/staging/rdma/hfi1/firmware.c index 4ba524b..0b23e3e 100644 --- a/drivers/staging/rdma/hfi1/firmware.c +++ b/drivers/staging/rdma/hfi1/firmware.c @@ -233,6 +233,8 @@ static const u8 all_pcie_serdes_broadcast = 0xe0; /* forwards */ static void dispose_one_firmware(struct firmware_details *fdet); +static int load_fabric_serdes_firmware(struct hfi1_devdata *dd, + struct firmware_details *fdet); /* * Read a single 64-bit value from 8051 data memory. @@ -1092,27 +1094,56 @@ static void turn_off_spicos(struct hfi1_devdata *dd, int flags) } /* - * Reset all of the fabric serdes for our HFI. + * Reset all of the fabric serdes for this HFI in preparation to take the + * link to Polling. + * + * To do a reset, we need to write to to the serdes registers. Unfortunately, + * the fabric serdes download to the other HFI on the ASIC will have turned + * off the firmware validation on this HFI. This means we can't write to the + * registers to reset the serdes. Work around this by performing a complete + * re-download and validation of the fabric serdes firmware. This, as a + * by-product, will reset the serdes. NOTE: the re-download requires that + * the 8051 be in the Offline state. I.e. not actively trying to use the + * serdes. This routine is called at the point where the link is Offline and + * is getting ready to go to Polling. */ void fabric_serdes_reset(struct hfi1_devdata *dd) { - u8 ra; - - if (dd->icode != ICODE_RTL_SILICON) /* only for RTL */ + if (!fw_fabric_serdes_load) return; - ra = fabric_serdes_broadcast[dd->hfi1_id]; + if (is_ax(dd)) { + /* A0 serdes do not work with a re-download */ + u8 ra = fabric_serdes_broadcast[dd->hfi1_id]; + + acquire_hw_mutex(dd); + set_sbus_fast_mode(dd); + /* place SerDes in reset and disable SPICO */ + sbus_request(dd, ra, 0x07, WRITE_SBUS_RECEIVER, 0x00000011); + /* wait 100 refclk cycles @ 156.25MHz => 640ns */ + udelay(1); + /* remove SerDes reset */ + sbus_request(dd, ra, 0x07, WRITE_SBUS_RECEIVER, 0x00000010); + /* turn SPICO enable on */ + sbus_request(dd, ra, 0x07, WRITE_SBUS_RECEIVER, 0x00000002); + clear_sbus_fast_mode(dd); + release_hw_mutex(dd); + return; + } acquire_hw_mutex(dd); set_sbus_fast_mode(dd); - /* place SerDes in reset and disable SPICO */ - sbus_request(dd, ra, 0x07, WRITE_SBUS_RECEIVER, 0x00000011); - /* wait 100 refclk cycles @ 156.25MHz => 640ns */ - udelay(1); - /* remove SerDes reset */ - sbus_request(dd, ra, 0x07, WRITE_SBUS_RECEIVER, 0x00000010); - /* turn SPICO enable on */ - sbus_request(dd, ra, 0x07, WRITE_SBUS_RECEIVER, 0x00000002); + + turn_off_spicos(dd, SPICO_FABRIC); + /* + * No need for firmware retry - what to download has already been + * decided. + * No need to pay attention to the load return - the only failure + * is a validation failure, which has already been checked by the + * initial download. + */ + (void)load_fabric_serdes_firmware(dd, &fw_fabric); + clear_sbus_fast_mode(dd); release_hw_mutex(dd); } -- cgit v0.10.2 From 7b0b01aa8f48cd237322cbffa05662a9c6b156f8 Mon Sep 17 00:00:00 2001 From: Dean Luick Date: Wed, 3 Feb 2016 14:35:49 -0800 Subject: staging/rdma/hfi1: Split last 8 bytes of copy to user buffer Copy the last 8 bytes of user mode RC WRITE_ONLY and WRITE_LAST opcodes separately from the rest of the data. It is a de-facto standard for some MPI implementations to use a poll on the last few bytes of a verbs message to indicate that the message has been received rather than follow the required function method. The driver uses the kernel memcpy routine, which becomes "rep movsb" on modern machines. This copy, while very fast, does not guarantee in-order copy completion and the result is an occasional perceived corrupted packet. Avoid the issue by splitting the last 8 bytes to copy from the verbs opcodes where it matters and performing an in-order byte copy. Reviewed-by: Mike Marciniszyn Signed-off-by: Dean Luick Signed-off-by: Jubin John Signed-off-by: Doug Ledford diff --git a/drivers/staging/rdma/hfi1/rc.c b/drivers/staging/rdma/hfi1/rc.c index 50559fd..371edc3 100644 --- a/drivers/staging/rdma/hfi1/rc.c +++ b/drivers/staging/rdma/hfi1/rc.c @@ -1539,7 +1539,7 @@ read_middle: qp->s_rdma_read_len -= pmtu; update_last_psn(qp, psn); spin_unlock_irqrestore(&qp->s_lock, flags); - hfi1_copy_sge(&qp->s_rdma_read_sge, data, pmtu, 0); + hfi1_copy_sge(&qp->s_rdma_read_sge, data, pmtu, 0, 0); goto bail; case OP(RDMA_READ_RESPONSE_ONLY): @@ -1583,7 +1583,7 @@ read_last: if (unlikely(tlen != qp->s_rdma_read_len)) goto ack_len_err; aeth = be32_to_cpu(ohdr->u.aeth); - hfi1_copy_sge(&qp->s_rdma_read_sge, data, tlen, 0); + hfi1_copy_sge(&qp->s_rdma_read_sge, data, tlen, 0, 0); WARN_ON(qp->s_rdma_read_sge.num_sge); (void) do_rc_ack(qp, aeth, psn, OP(RDMA_READ_RESPONSE_LAST), 0, rcd); @@ -1977,6 +1977,7 @@ void hfi1_rc_rcv(struct hfi1_packet *packet) unsigned long flags; u32 bth1; int ret, is_fecn = 0; + int copy_last = 0; bth0 = be32_to_cpu(ohdr->bth[0]); if (hfi1_ruc_check_hdr(ibp, hdr, rcv_flags & HFI1_HAS_GRH, qp, bth0)) @@ -2081,7 +2082,7 @@ send_middle: qp->r_rcv_len += pmtu; if (unlikely(qp->r_rcv_len > qp->r_len)) goto nack_inv; - hfi1_copy_sge(&qp->r_sge, data, pmtu, 1); + hfi1_copy_sge(&qp->r_sge, data, pmtu, 1, 0); break; case OP(RDMA_WRITE_LAST_WITH_IMMEDIATE): @@ -2109,8 +2110,10 @@ send_last_imm: wc.ex.imm_data = ohdr->u.imm_data; wc.wc_flags = IB_WC_WITH_IMM; goto send_last; - case OP(SEND_LAST): case OP(RDMA_WRITE_LAST): + copy_last = ibpd_to_rvtpd(qp->ibqp.pd)->user; + /* fall through */ + case OP(SEND_LAST): no_immediate_data: wc.wc_flags = 0; wc.ex.imm_data = 0; @@ -2126,7 +2129,7 @@ send_last: wc.byte_len = tlen + qp->r_rcv_len; if (unlikely(wc.byte_len > qp->r_len)) goto nack_inv; - hfi1_copy_sge(&qp->r_sge, data, tlen, 1); + hfi1_copy_sge(&qp->r_sge, data, tlen, 1, copy_last); rvt_put_ss(&qp->r_sge); qp->r_msn++; if (!test_and_clear_bit(RVT_R_WRID_VALID, &qp->r_aflags)) @@ -2163,8 +2166,10 @@ send_last: (bth0 & IB_BTH_SOLICITED) != 0); break; - case OP(RDMA_WRITE_FIRST): case OP(RDMA_WRITE_ONLY): + copy_last = 1; + /* fall through */ + case OP(RDMA_WRITE_FIRST): case OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE): if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_WRITE))) goto nack_inv; diff --git a/drivers/staging/rdma/hfi1/ruc.c b/drivers/staging/rdma/hfi1/ruc.c index f09badb..6aeea6c 100644 --- a/drivers/staging/rdma/hfi1/ruc.c +++ b/drivers/staging/rdma/hfi1/ruc.c @@ -370,6 +370,7 @@ static void ruc_loopback(struct rvt_qp *sqp) enum ib_wc_status send_status; int release; int ret; + int copy_last = 0; rcu_read_lock(); @@ -459,10 +460,13 @@ again: goto op_err; if (!ret) goto rnr_nak; - /* FALLTHROUGH */ + /* skip copy_last set and qp_access_flags recheck */ + goto do_write; case IB_WR_RDMA_WRITE: + copy_last = ibpd_to_rvtpd(qp->ibqp.pd)->user; if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_WRITE))) goto inv_err; +do_write: if (wqe->length == 0) if (unlikely(!rvt_rkey_ok(qp, &qp->r_sge.sge, wqe->length, wqe->rdma_wr.remote_addr, @@ -526,7 +530,7 @@ again: if (len > sge->sge_length) len = sge->sge_length; WARN_ON_ONCE(len == 0); - hfi1_copy_sge(&qp->r_sge, sge->vaddr, len, release); + hfi1_copy_sge(&qp->r_sge, sge->vaddr, len, release, copy_last); sge->vaddr += len; sge->length -= len; sge->sge_length -= len; diff --git a/drivers/staging/rdma/hfi1/uc.c b/drivers/staging/rdma/hfi1/uc.c index 1e50d30..0aa604b 100644 --- a/drivers/staging/rdma/hfi1/uc.c +++ b/drivers/staging/rdma/hfi1/uc.c @@ -418,7 +418,7 @@ send_first: qp->r_rcv_len += pmtu; if (unlikely(qp->r_rcv_len > qp->r_len)) goto rewind; - hfi1_copy_sge(&qp->r_sge, data, pmtu, 0); + hfi1_copy_sge(&qp->r_sge, data, pmtu, 0, 0); break; case OP(SEND_LAST_WITH_IMMEDIATE): @@ -443,7 +443,7 @@ send_last: if (unlikely(wc.byte_len > qp->r_len)) goto rewind; wc.opcode = IB_WC_RECV; - hfi1_copy_sge(&qp->r_sge, data, tlen, 0); + hfi1_copy_sge(&qp->r_sge, data, tlen, 0, 0); rvt_put_ss(&qp->s_rdma_read_sge); last_imm: wc.wr_id = qp->r_wr_id; @@ -518,7 +518,7 @@ rdma_first: qp->r_rcv_len += pmtu; if (unlikely(qp->r_rcv_len > qp->r_len)) goto drop; - hfi1_copy_sge(&qp->r_sge, data, pmtu, 1); + hfi1_copy_sge(&qp->r_sge, data, pmtu, 1, 0); break; case OP(RDMA_WRITE_LAST_WITH_IMMEDIATE): @@ -547,7 +547,7 @@ rdma_last_imm: } wc.byte_len = qp->r_len; wc.opcode = IB_WC_RECV_RDMA_WITH_IMM; - hfi1_copy_sge(&qp->r_sge, data, tlen, 1); + hfi1_copy_sge(&qp->r_sge, data, tlen, 1, 0); rvt_put_ss(&qp->r_sge); goto last_imm; @@ -563,7 +563,7 @@ rdma_last: tlen -= (hdrsize + pad + 4); if (unlikely(tlen + qp->r_rcv_len != qp->r_len)) goto drop; - hfi1_copy_sge(&qp->r_sge, data, tlen, 1); + hfi1_copy_sge(&qp->r_sge, data, tlen, 1, 0); rvt_put_ss(&qp->r_sge); break; diff --git a/drivers/staging/rdma/hfi1/ud.c b/drivers/staging/rdma/hfi1/ud.c index 2eae167..fdf6e3b 100644 --- a/drivers/staging/rdma/hfi1/ud.c +++ b/drivers/staging/rdma/hfi1/ud.c @@ -187,7 +187,7 @@ static void ud_loopback(struct rvt_qp *sqp, struct rvt_swqe *swqe) if (ah_attr->ah_flags & IB_AH_GRH) { hfi1_copy_sge(&qp->r_sge, &ah_attr->grh, - sizeof(struct ib_grh), 1); + sizeof(struct ib_grh), 1, 0); wc.wc_flags |= IB_WC_GRH; } else hfi1_skip_sge(&qp->r_sge, sizeof(struct ib_grh), 1); @@ -203,7 +203,7 @@ static void ud_loopback(struct rvt_qp *sqp, struct rvt_swqe *swqe) if (len > sge->sge_length) len = sge->sge_length; WARN_ON_ONCE(len == 0); - hfi1_copy_sge(&qp->r_sge, sge->vaddr, len, 1); + hfi1_copy_sge(&qp->r_sge, sge->vaddr, len, 1, 0); sge->vaddr += len; sge->length -= len; sge->sge_length -= len; @@ -836,11 +836,12 @@ void hfi1_ud_rcv(struct hfi1_packet *packet) } if (has_grh) { hfi1_copy_sge(&qp->r_sge, &hdr->u.l.grh, - sizeof(struct ib_grh), 1); + sizeof(struct ib_grh), 1, 0); wc.wc_flags |= IB_WC_GRH; } else hfi1_skip_sge(&qp->r_sge, sizeof(struct ib_grh), 1); - hfi1_copy_sge(&qp->r_sge, data, wc.byte_len - sizeof(struct ib_grh), 1); + hfi1_copy_sge(&qp->r_sge, data, wc.byte_len - sizeof(struct ib_grh), + 1, 0); rvt_put_ss(&qp->r_sge); if (!test_and_clear_bit(RVT_R_WRID_VALID, &qp->r_aflags)) return; diff --git a/drivers/staging/rdma/hfi1/verbs.c b/drivers/staging/rdma/hfi1/verbs.c index d617324..8f351bc 100644 --- a/drivers/staging/rdma/hfi1/verbs.c +++ b/drivers/staging/rdma/hfi1/verbs.c @@ -242,14 +242,28 @@ __be64 ib_hfi1_sys_image_guid; * @ss: the SGE state * @data: the data to copy * @length: the length of the data + * @copy_last: do a separate copy of the last 8 bytes */ void hfi1_copy_sge( struct rvt_sge_state *ss, void *data, u32 length, - int release) + int release, + int copy_last) { struct rvt_sge *sge = &ss->sge; + int in_last = 0; + int i; + + if (copy_last) { + if (length > 8) { + length -= 8; + } else { + copy_last = 0; + in_last = 1; + } + } +again: while (length) { u32 len = sge->length; @@ -258,7 +272,13 @@ void hfi1_copy_sge( if (len > sge->sge_length) len = sge->sge_length; WARN_ON_ONCE(len == 0); - memcpy(sge->vaddr, data, len); + if (in_last) { + /* enforce byte transer ordering */ + for (i = 0; i < len; i++) + ((u8 *)sge->vaddr)[i] = ((u8 *)data)[i]; + } else { + memcpy(sge->vaddr, data, len); + } sge->vaddr += len; sge->length -= len; sge->sge_length -= len; @@ -281,6 +301,13 @@ void hfi1_copy_sge( data += len; length -= len; } + + if (copy_last) { + copy_last = 0; + in_last = 1; + length = 8; + goto again; + } } /** diff --git a/drivers/staging/rdma/hfi1/verbs.h b/drivers/staging/rdma/hfi1/verbs.h index ac84dd7..afb2d7f 100644 --- a/drivers/staging/rdma/hfi1/verbs.h +++ b/drivers/staging/rdma/hfi1/verbs.h @@ -398,7 +398,7 @@ void hfi1_put_txreq(struct verbs_txreq *tx); int hfi1_verbs_send(struct rvt_qp *qp, struct hfi1_pkt_state *ps); void hfi1_copy_sge(struct rvt_sge_state *ss, void *data, u32 length, - int release); + int release, int copy_last); void hfi1_skip_sge(struct rvt_sge_state *ss, u32 length, int release); -- cgit v0.10.2 From 91ab4ed334d0ea2f6c720ecb6204c3de350aaa08 Mon Sep 17 00:00:00 2001 From: Easwar Hariharan Date: Wed, 3 Feb 2016 14:35:57 -0800 Subject: staging/rdma/hfi1: Implement LED beaconing for maintenance This patch implements LED beaconing for maintenance. A MAD packet with the LEDInfo attribute set to 1 will enable LED beaconing with a duty cycle of 2s on and 1.5s off. A MAD packet with the LEDInfo attribute set to 0 will disable beaconing and return the LED to normal operation. Reviewed-by: Dennis Dalessandro Signed-off-by: Easwar Hariharan Signed-off-by: Mitko Haralanov Signed-off-by: Jubin John Signed-off-by: Doug Ledford diff --git a/drivers/staging/rdma/hfi1/driver.c b/drivers/staging/rdma/hfi1/driver.c index 59ce85f..5d012fe 100644 --- a/drivers/staging/rdma/hfi1/driver.c +++ b/drivers/staging/rdma/hfi1/driver.c @@ -5,7 +5,7 @@ * * GPL LICENSE SUMMARY * - * Copyright(c) 2015 Intel Corporation. + * Copyright(c) 2015, 2016 Intel Corporation. * * This program is free software; you can redistribute it and/or modify * it under the terms of version 2 of the GNU General Public License as @@ -18,7 +18,7 @@ * * BSD LICENSE * - * Copyright(c) 2015 Intel Corporation. + * Copyright(c) 2015, 2016 Intel Corporation. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -1172,63 +1172,64 @@ int hfi1_set_lid(struct hfi1_pportdata *ppd, u32 lid, u8 lmc) return 0; } -/* - * Following deal with the "obviously simple" task of overriding the state - * of the LEDs, which normally indicate link physical and logical status. - * The complications arise in dealing with different hardware mappings - * and the board-dependent routine being called from interrupts. - * and then there's the requirement to _flash_ them. - */ -#define LED_OVER_FREQ_SHIFT 8 -#define LED_OVER_FREQ_MASK (0xFF<dd; + + if (atomic_read(&ppd->led_override_timer_active)) { + del_timer_sync(&ppd->led_override_timer); + atomic_set(&ppd->led_override_timer_active, 0); + } + + /* Shut off LEDs after we are sure timer is not running */ + setextled(dd, 0); +} static void run_led_override(unsigned long opaque) { struct hfi1_pportdata *ppd = (struct hfi1_pportdata *)opaque; struct hfi1_devdata *dd = ppd->dd; - int timeoff; - int ph_idx; + unsigned long timeout; + int phase_idx; if (!(dd->flags & HFI1_INITTED)) return; - ph_idx = ppd->led_override_phase++ & 1; - ppd->led_override = ppd->led_override_vals[ph_idx]; - timeoff = ppd->led_override_timeoff; + phase_idx = ppd->led_override_phase & 1; + setextled(dd, phase_idx); + + timeout = ppd->led_override_vals[phase_idx]; + /* Set up for next phase */ + ppd->led_override_phase = !ppd->led_override_phase; /* * don't re-fire the timer if user asked for it to be off; we let * it fire one more time after they turn it off to simplify */ if (ppd->led_override_vals[0] || ppd->led_override_vals[1]) - mod_timer(&ppd->led_override_timer, jiffies + timeoff); + mod_timer(&ppd->led_override_timer, jiffies + timeout); + else + /* Hand control of the LED to the DC for normal operation */ + write_csr(dd, DCC_CFG_LED_CNTRL, 0); } -void hfi1_set_led_override(struct hfi1_pportdata *ppd, unsigned int val) +/* + * To have the LED blink in a particular pattern, provide timeon and timeoff + * in milliseconds. To turn off custom blinking and return to normal operation, + * provide timeon = timeoff = 0. + */ +void hfi1_set_led_override(struct hfi1_pportdata *ppd, unsigned int timeon, + unsigned int timeoff) { struct hfi1_devdata *dd = ppd->dd; - int timeoff, freq; if (!(dd->flags & HFI1_INITTED)) return; - /* First check if we are blinking. If not, use 1HZ polling */ - timeoff = HZ; - freq = (val & LED_OVER_FREQ_MASK) >> LED_OVER_FREQ_SHIFT; - - if (freq) { - /* For blink, set each phase from one nybble of val */ - ppd->led_override_vals[0] = val & 0xF; - ppd->led_override_vals[1] = (val >> 4) & 0xF; - timeoff = (HZ << 4)/freq; - } else { - /* Non-blink set both phases the same. */ - ppd->led_override_vals[0] = val & 0xF; - ppd->led_override_vals[1] = val & 0xF; - } - ppd->led_override_timeoff = timeoff; + /* Convert to jiffies for direct use in timer */ + ppd->led_override_vals[0] = msecs_to_jiffies(timeoff); + ppd->led_override_vals[1] = msecs_to_jiffies(timeon); + ppd->led_override_phase = 1; /* Arbitrarily start from LED on phase */ /* * If the timer has not already been started, do so. Use a "quick" @@ -1293,14 +1294,8 @@ int hfi1_reset_device(int unit) for (pidx = 0; pidx < dd->num_pports; ++pidx) { ppd = dd->pport + pidx; - if (atomic_read(&ppd->led_override_timer_active)) { - /* Need to stop LED timer, _then_ shut off LEDs */ - del_timer_sync(&ppd->led_override_timer); - atomic_set(&ppd->led_override_timer_active, 0); - } - /* Shut off LEDs after we are sure timer is not running */ - ppd->led_override = LED_OVER_BOTH_OFF; + shutdown_led_override(ppd); } if (dd->flags & HFI1_HAS_SEND_DMA) sdma_exit(dd); diff --git a/drivers/staging/rdma/hfi1/hfi.h b/drivers/staging/rdma/hfi1/hfi.h index da42991..18508c9 100644 --- a/drivers/staging/rdma/hfi1/hfi.h +++ b/drivers/staging/rdma/hfi1/hfi.h @@ -7,7 +7,7 @@ * * GPL LICENSE SUMMARY * - * Copyright(c) 2015 Intel Corporation. + * Copyright(c) 2015, 2016 Intel Corporation. * * This program is free software; you can redistribute it and/or modify * it under the terms of version 2 of the GNU General Public License as @@ -20,7 +20,7 @@ * * BSD LICENSE * - * Copyright(c) 2015 Intel Corporation. + * Copyright(c) 2015, 2016 Intel Corporation. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -669,14 +669,17 @@ struct hfi1_pportdata { u8 overrun_threshold; u8 phy_error_threshold; - /* used to override LED behavior */ - u8 led_override; /* Substituted for normal value, if non-zero */ - u16 led_override_timeoff; /* delta to next timer event */ - u8 led_override_vals[2]; /* Alternates per blink-frame */ - u8 led_override_phase; /* Just counts, LSB picks from vals[] */ + /* Used to override LED behavior for things like maintenance beaconing*/ + /* + * Alternates per phase of blink + * [0] holds LED off duration, [1] holds LED on duration + */ + unsigned long led_override_vals[2]; + u8 led_override_phase; /* LSB picks from vals[] */ atomic_t led_override_timer_active; /* Used to flash LEDs in override mode */ struct timer_list led_override_timer; + u32 sm_trap_qp; u32 sa_qp; @@ -1599,14 +1602,14 @@ void hfi1_free_devdata(struct hfi1_devdata *); void cc_state_reclaim(struct rcu_head *rcu); struct hfi1_devdata *hfi1_alloc_devdata(struct pci_dev *pdev, size_t extra); +void hfi1_set_led_override(struct hfi1_pportdata *ppd, unsigned int timeon, + unsigned int timeoff); /* - * Set LED override, only the two LSBs have "public" meaning, but - * any non-zero value substitutes them for the Link and LinkTrain - * LED states. + * Only to be used for driver unload or device reset where we cannot allow + * the timer to fire even the one extra time, else use hfi1_set_led_override + * with timeon = timeoff = 0 */ -#define HFI1_LED_PHYS 1 /* Physical (linktraining) GREEN LED */ -#define HFI1_LED_LOG 2 /* Logical (link) YELLOW LED */ -void hfi1_set_led_override(struct hfi1_pportdata *ppd, unsigned int val); +void shutdown_led_override(struct hfi1_pportdata *ppd); #define HFI1_CREDIT_RETURN_RATE (100) diff --git a/drivers/staging/rdma/hfi1/init.c b/drivers/staging/rdma/hfi1/init.c index eec9130..fe5e1e5 100644 --- a/drivers/staging/rdma/hfi1/init.c +++ b/drivers/staging/rdma/hfi1/init.c @@ -5,7 +5,7 @@ * * GPL LICENSE SUMMARY * - * Copyright(c) 2015 Intel Corporation. + * Copyright(c) 2015, 2016 Intel Corporation. * * This program is free software; you can redistribute it and/or modify * it under the terms of version 2 of the GNU General Public License as @@ -18,7 +18,7 @@ * * BSD LICENSE * - * Copyright(c) 2015 Intel Corporation. + * Copyright(c) 2015, 2016 Intel Corporation. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -908,6 +908,8 @@ static void shutdown_device(struct hfi1_devdata *dd) /* disable the send device */ pio_send_control(dd, PSC_GLOBAL_DISABLE); + shutdown_led_override(ppd); + /* * Clear SerdesEnable. * We can't count on interrupts since we are stopping. diff --git a/drivers/staging/rdma/hfi1/mad.c b/drivers/staging/rdma/hfi1/mad.c index 5146f5d..6976f93 100644 --- a/drivers/staging/rdma/hfi1/mad.c +++ b/drivers/staging/rdma/hfi1/mad.c @@ -5,7 +5,7 @@ * * GPL LICENSE SUMMARY * - * Copyright(c) 2015 Intel Corporation. + * Copyright(c) 2015, 2016 Intel Corporation. * * This program is free software; you can redistribute it and/or modify * it under the terms of version 2 of the GNU General Public License as @@ -18,7 +18,7 @@ * * BSD LICENSE * - * Copyright(c) 2015 Intel Corporation. + * Copyright(c) 2015, 2016 Intel Corporation. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -3449,7 +3449,10 @@ static int __subn_set_opa_led_info(struct opa_smp *smp, u32 am, u8 *data, return reply((struct ib_mad_hdr *)smp); } - setextled(dd, on); + if (on) + hfi1_set_led_override(dd->pport, 2000, 1500); + else + hfi1_set_led_override(dd->pport, 0, 0); return __subn_get_opa_led_info(smp, am, data, ibdev, port, resp_len); } -- cgit v0.10.2 From 0096765be01926e7efcc22032032347448743de5 Mon Sep 17 00:00:00 2001 From: Dean Luick Date: Wed, 3 Feb 2016 14:36:06 -0800 Subject: staging/rdma/hfi1: Remove PCIe AER diagnostic message There are several reasons why PCIE AER cannot be enabled. Do not report the failure to enable as an error. Reviewed-by: Dennis Dalessandro Signed-off-by: Dean Luick Signed-off-by: Jubin John Signed-off-by: Doug Ledford diff --git a/drivers/staging/rdma/hfi1/pcie.c b/drivers/staging/rdma/hfi1/pcie.c index 6605a6a..3d0c2e2 100644 --- a/drivers/staging/rdma/hfi1/pcie.c +++ b/drivers/staging/rdma/hfi1/pcie.c @@ -132,13 +132,7 @@ int hfi1_pcie_init(struct pci_dev *pdev, const struct pci_device_id *ent) } pci_set_master(pdev); - ret = pci_enable_pcie_error_reporting(pdev); - if (ret) { - hfi1_early_err(&pdev->dev, - "Unable to enable pcie error reporting: %d\n", - ret); - ret = 0; - } + (void)pci_enable_pcie_error_reporting(pdev); goto done; bail: -- cgit v0.10.2 From cfe3e656d8cd5ff03b8f0ce24f920f306313b013 Mon Sep 17 00:00:00 2001 From: Dean Luick Date: Wed, 3 Feb 2016 14:36:14 -0800 Subject: staging/rdma/hfi1: Correct TWSI reset Change the TWSI reset function so it will stop the reset once the lines are in an expected state. Reviewed-by: Easwar Hariharan Reviewed-by: Dean Luick Signed-off-by: Pablo Cacho Signed-off-by: Jubin John Signed-off-by: Doug Ledford diff --git a/drivers/staging/rdma/hfi1/qsfp.c b/drivers/staging/rdma/hfi1/qsfp.c index 0e1a492..c9d1e64 100644 --- a/drivers/staging/rdma/hfi1/qsfp.c +++ b/drivers/staging/rdma/hfi1/qsfp.c @@ -5,7 +5,7 @@ * * GPL LICENSE SUMMARY * - * Copyright(c) 2015 Intel Corporation. + * Copyright(c) 2015, 2016 Intel Corporation. * * This program is free software; you can redistribute it and/or modify * it under the terms of version 2 of the GNU General Public License as @@ -18,7 +18,7 @@ * * BSD LICENSE * - * Copyright(c) 2015 Intel Corporation. + * Copyright(c) 2015, 2016 Intel Corporation. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -106,7 +106,6 @@ int i2c_write(struct hfi1_pportdata *ppd, u32 target, int i2c_addr, int offset, if (ret) { hfi1_dev_porterr(ppd->dd, ppd->port, "I2C write interface reset failed\n"); - ret = -EIO; goto done; } @@ -179,7 +178,6 @@ int i2c_read(struct hfi1_pportdata *ppd, u32 target, int i2c_addr, int offset, if (ret) { hfi1_dev_porterr(ppd->dd, ppd->port, "I2C read interface reset failed\n"); - ret = -EIO; goto done; } @@ -213,7 +211,7 @@ int qsfp_write(struct hfi1_pportdata *ppd, u32 target, int addr, void *bp, hfi1_dev_porterr(ppd->dd, ppd->port, "QSFP write interface reset failed\n"); mutex_unlock(&ppd->dd->qsfp_i2c_mutex); - return -EIO; + return ret; } while (count < len) { @@ -279,7 +277,7 @@ int qsfp_read(struct hfi1_pportdata *ppd, u32 target, int addr, void *bp, hfi1_dev_porterr(ppd->dd, ppd->port, "QSFP read interface reset failed\n"); mutex_unlock(&ppd->dd->qsfp_i2c_mutex); - return -EIO; + return ret; } while (count < len) { diff --git a/drivers/staging/rdma/hfi1/twsi.c b/drivers/staging/rdma/hfi1/twsi.c index 7c579b3..d7dfdd2 100644 --- a/drivers/staging/rdma/hfi1/twsi.c +++ b/drivers/staging/rdma/hfi1/twsi.c @@ -5,7 +5,7 @@ * * GPL LICENSE SUMMARY * - * Copyright(c) 2015 Intel Corporation. + * Copyright(c) 2015, 2016 Intel Corporation. * * This program is free software; you can redistribute it and/or modify * it under the terms of version 2 of the GNU General Public License as @@ -18,7 +18,7 @@ * * BSD LICENSE * - * Copyright(c) 2015 Intel Corporation. + * Copyright(c) 2015, 2016 Intel Corporation. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -136,6 +136,19 @@ static void scl_out(struct hfi1_devdata *dd, u32 target, u8 bit) i2c_wait_for_writes(dd, target); } +static u8 scl_in(struct hfi1_devdata *dd, u32 target, int wait) +{ + u32 read_val, mask; + + mask = QSFP_HFI0_I2CCLK; + /* SCL is meant to be bare-drain, so never set "OUT", just DIR */ + hfi1_gpio_mod(dd, target, 0, 0, mask); + read_val = hfi1_gpio_mod(dd, target, 0, 0, 0); + if (wait) + i2c_wait_for_writes(dd, target); + return (read_val & mask) >> GPIO_SCL_NUM; +} + static void sda_out(struct hfi1_devdata *dd, u32 target, u8 bit) { u32 mask; @@ -274,13 +287,12 @@ static void stop_cmd(struct hfi1_devdata *dd, u32 target) /** * hfi1_twsi_reset - reset I2C communication * @dd: the hfi1_ib device + * returns 0 if ok, -EIO on error */ - int hfi1_twsi_reset(struct hfi1_devdata *dd, u32 target) { int clock_cycles_left = 9; - int was_high = 0; - u32 pins, mask; + u32 mask; /* Both SCL and SDA should be high. If not, there * is something wrong. @@ -294,43 +306,23 @@ int hfi1_twsi_reset(struct hfi1_devdata *dd, u32 target) */ hfi1_gpio_mod(dd, target, 0, 0, mask); - /* - * Clock nine times to get all listeners into a sane state. - * If SDA does not go high at any point, we are wedged. - * One vendor recommends then issuing START followed by STOP. - * we cannot use our "normal" functions to do that, because - * if SCL drops between them, another vendor's part will - * wedge, dropping SDA and keeping it low forever, at the end of - * the next transaction (even if it was not the device addressed). - * So our START and STOP take place with SCL held high. + /* Check if SCL is low, if it is low then we have a slave device + * misbehaving and there is not much we can do. + */ + if (!scl_in(dd, target, 0)) + return -EIO; + + /* Check if SDA is low, if it is low then we have to clock SDA + * up to 9 times for the device to release the bus */ while (clock_cycles_left--) { + if (sda_in(dd, target, 0)) + return 0; scl_out(dd, target, 0); scl_out(dd, target, 1); - /* Note if SDA is high, but keep clocking to sync slave */ - was_high |= sda_in(dd, target, 0); - } - - if (was_high) { - /* - * We saw a high, which we hope means the slave is sync'd. - * Issue START, STOP, pause for T_BUF. - */ - - pins = hfi1_gpio_mod(dd, target, 0, 0, 0); - if ((pins & mask) != mask) - dd_dev_err(dd, "GPIO pins not at rest: %d\n", - pins & mask); - /* Drop SDA to issue START */ - udelay(1); /* Guarantee .6 uSec setup */ - sda_out(dd, target, 0); - udelay(1); /* Guarantee .6 uSec hold */ - /* At this point, SCL is high, SDA low. Raise SDA for STOP */ - sda_out(dd, target, 1); - udelay(TWSI_BUF_WAIT_USEC); } - return !was_high; + return -EIO; } #define HFI1_TWSI_START 0x100 diff --git a/drivers/staging/rdma/hfi1/twsi.h b/drivers/staging/rdma/hfi1/twsi.h index 5907e02..6cb30e5 100644 --- a/drivers/staging/rdma/hfi1/twsi.h +++ b/drivers/staging/rdma/hfi1/twsi.h @@ -7,7 +7,7 @@ * * GPL LICENSE SUMMARY * - * Copyright(c) 2015 Intel Corporation. + * Copyright(c) 2015, 2016 Intel Corporation. * * This program is free software; you can redistribute it and/or modify * it under the terms of version 2 of the GNU General Public License as @@ -20,7 +20,7 @@ * * BSD LICENSE * - * Copyright(c) 2015 Intel Corporation. + * Copyright(c) 2015, 2016 Intel Corporation. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -54,8 +54,9 @@ struct hfi1_devdata; -/* Bit position of SDA pin in ASIC_QSFP* registers */ +/* Bit position of SDA/SCL pins in ASIC_QSFP* registers */ #define GPIO_SDA_NUM 1 +#define GPIO_SCL_NUM 0 /* these functions must be called with qsfp_lock held */ int hfi1_twsi_reset(struct hfi1_devdata *dd, u32 target); -- cgit v0.10.2 From 2ef907b80d1cc289a4352287bbb9fc5a19eed212 Mon Sep 17 00:00:00 2001 From: Dean Luick Date: Wed, 3 Feb 2016 14:36:22 -0800 Subject: staging/rdma/hfi1: Fix snoop packet length calculation The LRH has a 12 bit packet length field, not 11 bit. This caused a snoop packet length miscalculation leading to a crash when sending a large ping over IPoIB while running opapacketcapture. Reviewed-by: Mike Marciniszyn Signed-off-by: Dean Luick Signed-off-by: Jubin John Signed-off-by: Doug Ledford diff --git a/drivers/staging/rdma/hfi1/diag.c b/drivers/staging/rdma/hfi1/diag.c index d9889d4..fafb3d7 100644 --- a/drivers/staging/rdma/hfi1/diag.c +++ b/drivers/staging/rdma/hfi1/diag.c @@ -5,7 +5,7 @@ * * GPL LICENSE SUMMARY * - * Copyright(c) 2015 Intel Corporation. + * Copyright(c) 2015, 2016 Intel Corporation. * * This program is free software; you can redistribute it and/or modify * it under the terms of version 2 of the GNU General Public License as @@ -18,7 +18,7 @@ * * BSD LICENSE * - * Copyright(c) 2015 Intel Corporation. + * Copyright(c) 2015, 2016 Intel Corporation. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -85,10 +85,9 @@ static u8 snoop_flags; /* * Extract packet length from LRH header. - * Why & 0x7FF? Because len is only 11 bits in case it wasn't 0'd we throw the - * bogus bits away. This is in Dwords so multiply by 4 to get size in bytes + * This is in Dwords so multiply by 4 to get size in bytes */ -#define HFI1_GET_PKT_LEN(x) (((be16_to_cpu((x)->lrh[2]) & 0x7FF)) << 2) +#define HFI1_GET_PKT_LEN(x) (((be16_to_cpu((x)->lrh[2]) & 0xFFF)) << 2) enum hfi1_filter_status { HFI1_FILTER_HIT, -- cgit v0.10.2 From d05de3413da29d635ccaff528af6e37dd932b393 Mon Sep 17 00:00:00 2001 From: Jubin John Date: Wed, 3 Feb 2016 14:36:31 -0800 Subject: staging/rdma/hfi1: Clean up init_cntrs() Clean up init_cntrs() by removing unnecessary memsets and debug statements Suggested-by: Dan Carpenter Reviewed-by: Dennis Dalessandro Reviewed-by: Mike Marciniszyn Signed-off-by: Jubin John Signed-off-by: Doug Ledford diff --git a/drivers/staging/rdma/hfi1/chip.c b/drivers/staging/rdma/hfi1/chip.c index 5b8fb02..8c06e3b 100644 --- a/drivers/staging/rdma/hfi1/chip.c +++ b/drivers/staging/rdma/hfi1/chip.c @@ -11813,17 +11813,14 @@ static int init_cntrs(struct hfi1_devdata *dd) sz = 0; for (i = 0; i < DEV_CNTR_LAST; i++) { - hfi1_dbg_early("Init cntr %s\n", dev_cntrs[i].name); if (dev_cntrs[i].flags & CNTR_DISABLED) { hfi1_dbg_early("\tSkipping %s\n", dev_cntrs[i].name); continue; } if (dev_cntrs[i].flags & CNTR_VL) { - hfi1_dbg_early("\tProcessing VL cntr\n"); dev_cntrs[i].offset = dd->ndevcntrs; for (j = 0; j < C_VL_COUNT; j++) { - memset(name, '\0', C_MAX_NAME); snprintf(name, C_MAX_NAME, "%s%d", dev_cntrs[i].name, vl_from_idx(j)); @@ -11832,16 +11829,11 @@ static int init_cntrs(struct hfi1_devdata *dd) if (dev_cntrs[i].flags & CNTR_32BIT) sz += bit_type_32_sz; sz++; - hfi1_dbg_early("\t\t%s\n", name); dd->ndevcntrs++; } } else if (dev_cntrs[i].flags & CNTR_SDMA) { - hfi1_dbg_early( - "\tProcessing per SDE counters chip enginers %u\n", - dd->chip_sdma_engines); dev_cntrs[i].offset = dd->ndevcntrs; for (j = 0; j < dd->chip_sdma_engines; j++) { - memset(name, '\0', C_MAX_NAME); snprintf(name, C_MAX_NAME, "%s%d", dev_cntrs[i].name, j); sz += strlen(name); @@ -11849,7 +11841,6 @@ static int init_cntrs(struct hfi1_devdata *dd) if (dev_cntrs[i].flags & CNTR_32BIT) sz += bit_type_32_sz; sz++; - hfi1_dbg_early("\t\t%s\n", name); dd->ndevcntrs++; } } else { @@ -11860,7 +11851,6 @@ static int init_cntrs(struct hfi1_devdata *dd) sz += bit_type_32_sz; dev_cntrs[i].offset = dd->ndevcntrs; dd->ndevcntrs++; - hfi1_dbg_early("\tAdding %s\n", dev_cntrs[i].name); } } @@ -11886,7 +11876,6 @@ static int init_cntrs(struct hfi1_devdata *dd) /* Nothing */ } else if (dev_cntrs[i].flags & CNTR_VL) { for (j = 0; j < C_VL_COUNT; j++) { - memset(name, '\0', C_MAX_NAME); snprintf(name, C_MAX_NAME, "%s%d", dev_cntrs[i].name, vl_from_idx(j)); @@ -11903,7 +11892,6 @@ static int init_cntrs(struct hfi1_devdata *dd) } } else if (dev_cntrs[i].flags & CNTR_SDMA) { for (j = 0; j < dd->chip_sdma_engines; j++) { - memset(name, '\0', C_MAX_NAME); snprintf(name, C_MAX_NAME, "%s%d", dev_cntrs[i].name, j); memcpy(p, name, strlen(name)); @@ -11950,17 +11938,14 @@ static int init_cntrs(struct hfi1_devdata *dd) sz = 0; dd->nportcntrs = 0; for (i = 0; i < PORT_CNTR_LAST; i++) { - hfi1_dbg_early("Init pcntr %s\n", port_cntrs[i].name); if (port_cntrs[i].flags & CNTR_DISABLED) { hfi1_dbg_early("\tSkipping %s\n", port_cntrs[i].name); continue; } if (port_cntrs[i].flags & CNTR_VL) { - hfi1_dbg_early("\tProcessing VL cntr\n"); port_cntrs[i].offset = dd->nportcntrs; for (j = 0; j < C_VL_COUNT; j++) { - memset(name, '\0', C_MAX_NAME); snprintf(name, C_MAX_NAME, "%s%d", port_cntrs[i].name, vl_from_idx(j)); @@ -11969,7 +11954,6 @@ static int init_cntrs(struct hfi1_devdata *dd) if (port_cntrs[i].flags & CNTR_32BIT) sz += bit_type_32_sz; sz++; - hfi1_dbg_early("\t\t%s\n", name); dd->nportcntrs++; } } else { @@ -11980,7 +11964,6 @@ static int init_cntrs(struct hfi1_devdata *dd) sz += bit_type_32_sz; port_cntrs[i].offset = dd->nportcntrs; dd->nportcntrs++; - hfi1_dbg_early("\tAdding %s\n", port_cntrs[i].name); } } @@ -11997,7 +11980,6 @@ static int init_cntrs(struct hfi1_devdata *dd) if (port_cntrs[i].flags & CNTR_VL) { for (j = 0; j < C_VL_COUNT; j++) { - memset(name, '\0', C_MAX_NAME); snprintf(name, C_MAX_NAME, "%s%d", port_cntrs[i].name, vl_from_idx(j)); -- cgit v0.10.2 From 251314635ad5043e9438a18b2de17ddf86309641 Mon Sep 17 00:00:00 2001 From: Dennis Dalessandro Date: Wed, 3 Feb 2016 14:36:40 -0800 Subject: staging/rdma/hfi1: Support query gid in rdmavt Query gid is in rdmavt, but still relies on the driver to maintain the guid table. Add the necessary driver call back and remove the existing verb handler. Reviewed-by: Harish Chegondi Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford diff --git a/drivers/staging/rdma/hfi1/verbs.c b/drivers/staging/rdma/hfi1/verbs.c index 8f351bc..a85fd81 100644 --- a/drivers/staging/rdma/hfi1/verbs.c +++ b/drivers/staging/rdma/hfi1/verbs.c @@ -1345,28 +1345,20 @@ static int modify_port(struct ib_device *ibdev, u8 port, return ret; } -static int query_gid(struct ib_device *ibdev, u8 port, - int index, union ib_gid *gid) +static int hfi1_get_guid_be(struct rvt_dev_info *rdi, struct rvt_ibport *rvp, + int guid_index, __be64 *guid) { - struct hfi1_devdata *dd = dd_from_ibdev(ibdev); - int ret = 0; + struct hfi1_ibport *ibp = container_of(rvp, struct hfi1_ibport, rvp); + struct hfi1_pportdata *ppd = ppd_from_ibp(ibp); - if (!port || port > dd->num_pports) - ret = -EINVAL; - else { - struct hfi1_ibport *ibp = to_iport(ibdev, port); - struct hfi1_pportdata *ppd = ppd_from_ibp(ibp); - - gid->global.subnet_prefix = ibp->rvp.gid_prefix; - if (index == 0) - gid->global.interface_id = cpu_to_be64(ppd->guid); - else if (index < HFI1_GUIDS_PER_PORT) - gid->global.interface_id = ibp->guids[index - 1]; - else - ret = -EINVAL; - } + if (guid_index == 0) + *guid = cpu_to_be64(ppd->guid); + else if (guid_index < HFI1_GUIDS_PER_PORT) + *guid = ibp->guids[guid_index - 1]; + else + return -EINVAL; - return ret; + return 0; } /* @@ -1538,7 +1530,6 @@ int hfi1_register_ib_device(struct hfi1_devdata *dd) ibdev->modify_device = modify_device; ibdev->query_port = query_port; ibdev->modify_port = modify_port; - ibdev->query_gid = query_gid; /* keep process mad in the driver */ ibdev->process_mad = hfi1_process_mad; @@ -1555,6 +1546,7 @@ int hfi1_register_ib_device(struct hfi1_devdata *dd) dd->verbs_dev.rdi.driver_f.get_pci_dev = get_pci_dev; dd->verbs_dev.rdi.driver_f.check_ah = hfi1_check_ah; dd->verbs_dev.rdi.driver_f.notify_new_ah = hfi1_notify_new_ah; + dd->verbs_dev.rdi.driver_f.get_guid_be = hfi1_get_guid_be; /* * Fill in rvt info device attributes. */ -- cgit v0.10.2 From 45b59eefcca95a3dc75b68e063390f7a1aedd90b Mon Sep 17 00:00:00 2001 From: Harish Chegondi Date: Wed, 3 Feb 2016 14:36:49 -0800 Subject: staging/rdma/hfi1: Remove modify_port and port_immutable functions Delete code from query_port which has been moved into rvt_query_port Create a call back function to shut down a port which may be called from rvt_modify_port Signed-off-by: Harish Chegondi Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford diff --git a/drivers/staging/rdma/hfi1/hfi.h b/drivers/staging/rdma/hfi1/hfi.h index 18508c9..363e6ef 100644 --- a/drivers/staging/rdma/hfi1/hfi.h +++ b/drivers/staging/rdma/hfi1/hfi.h @@ -1536,6 +1536,11 @@ static inline struct hfi1_pportdata *ppd_from_ibp(struct hfi1_ibport *ibp) return container_of(ibp, struct hfi1_pportdata, ibport_data); } +static inline struct hfi1_ibdev *dev_from_rdi(struct rvt_dev_info *rdi) +{ + return container_of(rdi, struct hfi1_ibdev, rdi); +} + static inline struct hfi1_ibport *to_iport(struct ib_device *ibdev, u8 port) { struct hfi1_devdata *dd = dd_from_ibdev(ibdev); diff --git a/drivers/staging/rdma/hfi1/mad.c b/drivers/staging/rdma/hfi1/mad.c index 6976f93..3df1c8e 100644 --- a/drivers/staging/rdma/hfi1/mad.c +++ b/drivers/staging/rdma/hfi1/mad.c @@ -234,9 +234,12 @@ static void bad_mkey(struct hfi1_ibport *ibp, struct ib_mad_hdr *mad, /* * Send a Port Capability Mask Changed trap (ch. 14.3.11). */ -void hfi1_cap_mask_chg(struct hfi1_ibport *ibp) +void hfi1_cap_mask_chg(struct rvt_dev_info *rdi, u8 port_num) { struct opa_mad_notice_attr data; + struct hfi1_ibdev *verbs_dev = dev_from_rdi(rdi); + struct hfi1_devdata *dd = dd_from_dev(verbs_dev); + struct hfi1_ibport *ibp = &dd->pport[port_num - 1].ibport_data; u32 lid = ppd_from_ibp(ibp)->lid; memset(&data, 0, sizeof(data)); diff --git a/drivers/staging/rdma/hfi1/verbs.c b/drivers/staging/rdma/hfi1/verbs.c index a85fd81..0ee6b1d 100644 --- a/drivers/staging/rdma/hfi1/verbs.c +++ b/drivers/staging/rdma/hfi1/verbs.c @@ -1220,33 +1220,24 @@ static inline u16 opa_width_to_ib(u16 in) } } -static int query_port(struct ib_device *ibdev, u8 port, +static int query_port(struct rvt_dev_info *rdi, u8 port_num, struct ib_port_attr *props) { - struct hfi1_devdata *dd = dd_from_ibdev(ibdev); - struct hfi1_ibport *ibp = to_iport(ibdev, port); - struct hfi1_pportdata *ppd = ppd_from_ibp(ibp); + struct hfi1_ibdev *verbs_dev = dev_from_rdi(rdi); + struct hfi1_devdata *dd = dd_from_dev(verbs_dev); + struct hfi1_pportdata *ppd = &dd->pport[port_num - 1]; u16 lid = ppd->lid; - memset(props, 0, sizeof(*props)); props->lid = lid ? lid : 0; props->lmc = ppd->lmc; - props->sm_lid = ibp->rvp.sm_lid; - props->sm_sl = ibp->rvp.sm_sl; /* OPA logical states match IB logical states */ props->state = driver_lstate(ppd); props->phys_state = hfi1_ibphys_portstate(ppd); - props->port_cap_flags = ibp->rvp.port_cap_flags; props->gid_tbl_len = HFI1_GUIDS_PER_PORT; - props->max_msg_sz = 0x80000000; - props->pkey_tbl_len = hfi1_get_npkeys(dd); - props->bad_pkey_cntr = ibp->rvp.pkey_violations; - props->qkey_viol_cntr = ibp->rvp.qkey_violations; props->active_width = (u8)opa_width_to_ib(ppd->link_width_active); /* see rate_show() in ib core/sysfs.c */ props->active_speed = (u8)opa_speed_to_ib(ppd->link_speed_active); props->max_vl_num = ppd->vls_supported; - props->init_type_reply = 0; /* Once we are a "first class" citizen and have added the OPA MTUs to * the core we can advertise the larger MTU enum to the ULPs, for now @@ -1260,27 +1251,6 @@ static int query_port(struct ib_device *ibdev, u8 port, 4096 : hfi1_max_mtu), IB_MTU_4096); props->active_mtu = !valid_ib_mtu(ppd->ibmtu) ? props->max_mtu : mtu_to_enum(ppd->ibmtu, IB_MTU_2048); - props->subnet_timeout = ibp->rvp.subnet_timeout; - - return 0; -} - -static int port_immutable(struct ib_device *ibdev, u8 port_num, - struct ib_port_immutable *immutable) -{ - struct ib_port_attr attr; - int err; - - err = query_port(ibdev, port_num, &attr); - if (err) - return err; - - memset(immutable, 0, sizeof(*immutable)); - - immutable->pkey_tbl_len = attr.pkey_tbl_len; - immutable->gid_tbl_len = attr.gid_tbl_len; - immutable->core_cap_flags = RDMA_CORE_PORT_INTEL_OPA; - immutable->max_mad_size = OPA_MGMT_MAD_SIZE; return 0; } @@ -1324,24 +1294,16 @@ bail: return ret; } -static int modify_port(struct ib_device *ibdev, u8 port, - int port_modify_mask, struct ib_port_modify *props) +static int shut_down_port(struct rvt_dev_info *rdi, u8 port_num) { - struct hfi1_ibport *ibp = to_iport(ibdev, port); - struct hfi1_pportdata *ppd = ppd_from_ibp(ibp); - int ret = 0; + struct hfi1_ibdev *verbs_dev = dev_from_rdi(rdi); + struct hfi1_devdata *dd = dd_from_dev(verbs_dev); + struct hfi1_pportdata *ppd = &dd->pport[port_num - 1]; + int ret; - ibp->rvp.port_cap_flags |= props->set_port_cap_mask; - ibp->rvp.port_cap_flags &= ~props->clr_port_cap_mask; - if (props->set_port_cap_mask || props->clr_port_cap_mask) - hfi1_cap_mask_chg(ibp); - if (port_modify_mask & IB_PORT_SHUTDOWN) { - set_link_down_reason(ppd, OPA_LINKDOWN_REASON_UNKNOWN, 0, - OPA_LINKDOWN_REASON_UNKNOWN); - ret = set_link_state(ppd, HLS_DN_DOWNDEF); - } - if (port_modify_mask & IB_PORT_RESET_QKEY_CNTR) - ibp->rvp.qkey_violations = 0; + set_link_down_reason(ppd, OPA_LINKDOWN_REASON_UNKNOWN, 0, + OPA_LINKDOWN_REASON_UNKNOWN); + ret = set_link_state(ppd, HLS_DN_DOWNDEF); return ret; } @@ -1528,12 +1490,9 @@ int hfi1_register_ib_device(struct hfi1_devdata *dd) ibdev->phys_port_cnt = dd->num_pports; ibdev->dma_device = &dd->pcidev->dev; ibdev->modify_device = modify_device; - ibdev->query_port = query_port; - ibdev->modify_port = modify_port; /* keep process mad in the driver */ ibdev->process_mad = hfi1_process_mad; - ibdev->get_port_immutable = port_immutable; strncpy(ibdev->node_desc, init_utsname()->nodename, sizeof(ibdev->node_desc)); @@ -1547,6 +1506,9 @@ int hfi1_register_ib_device(struct hfi1_devdata *dd) dd->verbs_dev.rdi.driver_f.check_ah = hfi1_check_ah; dd->verbs_dev.rdi.driver_f.notify_new_ah = hfi1_notify_new_ah; dd->verbs_dev.rdi.driver_f.get_guid_be = hfi1_get_guid_be; + dd->verbs_dev.rdi.driver_f.query_port_state = query_port; + dd->verbs_dev.rdi.driver_f.shut_down_port = shut_down_port; + dd->verbs_dev.rdi.driver_f.cap_mask_chg = hfi1_cap_mask_chg; /* * Fill in rvt info device attributes. */ @@ -1564,6 +1526,9 @@ int hfi1_register_ib_device(struct hfi1_devdata *dd) dd->verbs_dev.rdi.dparms.psn_mask = PSN_MASK; dd->verbs_dev.rdi.dparms.psn_shift = PSN_SHIFT; dd->verbs_dev.rdi.dparms.psn_modify_mask = PSN_MODIFY_MASK; + dd->verbs_dev.rdi.dparms.core_cap_flags = RDMA_CORE_PORT_INTEL_OPA; + dd->verbs_dev.rdi.dparms.max_mad_size = OPA_MGMT_MAD_SIZE; + dd->verbs_dev.rdi.driver_f.qp_priv_alloc = qp_priv_alloc; dd->verbs_dev.rdi.driver_f.qp_priv_free = qp_priv_free; dd->verbs_dev.rdi.driver_f.free_all_qps = free_all_qps; diff --git a/drivers/staging/rdma/hfi1/verbs.h b/drivers/staging/rdma/hfi1/verbs.h index afb2d7f..a157e64 100644 --- a/drivers/staging/rdma/hfi1/verbs.h +++ b/drivers/staging/rdma/hfi1/verbs.h @@ -327,7 +327,7 @@ static inline int hfi1_send_ok(struct rvt_qp *qp) */ void hfi1_bad_pqkey(struct hfi1_ibport *ibp, __be16 trap_num, u32 key, u32 sl, u32 qp1, u32 qp2, u16 lid1, u16 lid2); -void hfi1_cap_mask_chg(struct hfi1_ibport *ibp); +void hfi1_cap_mask_chg(struct rvt_dev_info *rdi, u8 port_num); void hfi1_sys_guid_chg(struct hfi1_ibport *ibp); void hfi1_node_desc_chg(struct hfi1_ibport *ibp); int hfi1_process_mad(struct ib_device *ibdev, int mad_flags, u8 port, -- cgit v0.10.2 From e1bf0d5ecdc49cd4e2014da0d60efa74f5714fba Mon Sep 17 00:00:00 2001 From: Easwar Hariharan Date: Wed, 3 Feb 2016 14:36:58 -0800 Subject: staging/rdma/hfi1, IB/core: Fix LinkDownReason define for consistency LinkDownReason LocalMediaNotInstalled lacked an underscore and was inconsistent with other defines in the same family. This patch fixes this. Reviewed-by: Ira Weiny Signed-off-by: Easwar Hariharan Signed-off-by: Doug Ledford diff --git a/drivers/staging/rdma/hfi1/chip.c b/drivers/staging/rdma/hfi1/chip.c index 8c06e3b..f31cc23 100644 --- a/drivers/staging/rdma/hfi1/chip.c +++ b/drivers/staging/rdma/hfi1/chip.c @@ -5950,12 +5950,12 @@ static void handle_qsfp_int(struct hfi1_devdata *dd, u32 src_ctx, u64 reg) if ((ppd->offline_disabled_reason > HFI1_ODR_MASK( - OPA_LINKDOWN_REASONLOCAL_MEDIA_NOT_INSTALLED)) || + OPA_LINKDOWN_REASON_LOCAL_MEDIA_NOT_INSTALLED)) || (ppd->offline_disabled_reason == HFI1_ODR_MASK(OPA_LINKDOWN_REASON_NONE))) ppd->offline_disabled_reason = HFI1_ODR_MASK( - OPA_LINKDOWN_REASONLOCAL_MEDIA_NOT_INSTALLED); + OPA_LINKDOWN_REASON_LOCAL_MEDIA_NOT_INSTALLED); if (ppd->host_link_state == HLS_DN_POLL) { /* diff --git a/drivers/staging/rdma/hfi1/platform.c b/drivers/staging/rdma/hfi1/platform.c index c3df1d8..506a827 100644 --- a/drivers/staging/rdma/hfi1/platform.c +++ b/drivers/staging/rdma/hfi1/platform.c @@ -816,7 +816,7 @@ void tune_serdes(struct hfi1_pportdata *ppd) } else ppd->offline_disabled_reason = HFI1_ODR_MASK( - OPA_LINKDOWN_REASONLOCAL_MEDIA_NOT_INSTALLED); + OPA_LINKDOWN_REASON_LOCAL_MEDIA_NOT_INSTALLED); break; default: dd_dev_info(ppd->dd, "%s: Unknown port type\n", __func__); diff --git a/include/rdma/opa_port_info.h b/include/rdma/opa_port_info.h index a0fa975..2b95c2c 100644 --- a/include/rdma/opa_port_info.h +++ b/include/rdma/opa_port_info.h @@ -97,7 +97,7 @@ #define OPA_LINKDOWN_REASON_WIDTH_POLICY 41 /* 42-48 reserved */ #define OPA_LINKDOWN_REASON_DISCONNECTED 49 -#define OPA_LINKDOWN_REASONLOCAL_MEDIA_NOT_INSTALLED 50 +#define OPA_LINKDOWN_REASON_LOCAL_MEDIA_NOT_INSTALLED 50 #define OPA_LINKDOWN_REASON_NOT_INSTALLED 51 #define OPA_LINKDOWN_REASON_CHASSIS_CONFIG 52 /* 53 reserved */ -- cgit v0.10.2 From 0840aea98cdf9024aff7f69e1167c4648665d48b Mon Sep 17 00:00:00 2001 From: Mitko Haralanov Date: Wed, 3 Feb 2016 14:37:06 -0800 Subject: staging/rdma/hfi1: Improve performance of user SDMA To facilitate locked page counting, the user SDMA routines would maintain a list of io vectors, which were freed in the completion callback and then unpin the associated pages during the next call into the kernel. Since the size of this list was unbounded, doing this was bad for performance because the driver ended up spending too much time freeing the io vectors. This commit changes how the io vector freeing is done by moving the actual page unpinning in the callback and maintaining a count of unpinned pages. This count can then be used during the next call into the kernel to update the mm->pinned_vm variable (since that requires process context and the ability to sleep.) Reviewed-by: Mike Marciniszyn Signed-off-by: Mitko Haralanov Signed-off-by: Jubin John Signed-off-by: Doug Ledford diff --git a/drivers/staging/rdma/hfi1/user_sdma.c b/drivers/staging/rdma/hfi1/user_sdma.c index 0c32eaf..55c7e6a 100644 --- a/drivers/staging/rdma/hfi1/user_sdma.c +++ b/drivers/staging/rdma/hfi1/user_sdma.c @@ -5,7 +5,7 @@ * * GPL LICENSE SUMMARY * - * Copyright(c) 2015 Intel Corporation. + * Copyright(c) 2015, 2016 Intel Corporation. * * This program is free software; you can redistribute it and/or modify * it under the terms of version 2 of the GNU General Public License as @@ -18,7 +18,7 @@ * * BSD LICENSE * - * Copyright(c) 2015 Intel Corporation. + * Copyright(c) 2015, 2016 Intel Corporation. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -236,8 +236,6 @@ struct user_sdma_request { u64 seqcomp; u64 seqsubmitted; struct list_head txps; - spinlock_t txcmp_lock; /* protect txcmp list */ - struct list_head txcmp; unsigned long flags; /* status of the last txreq completed */ int status; @@ -381,14 +379,12 @@ int hfi1_user_sdma_alloc_queues(struct hfi1_ctxtdata *uctxt, struct file *fp) goto pq_reqs_nomem; INIT_LIST_HEAD(&pq->list); - INIT_LIST_HEAD(&pq->iovec_list); pq->dd = dd; pq->ctxt = uctxt->ctxt; pq->subctxt = fd->subctxt; pq->n_max_reqs = hfi1_sdma_comp_ring_size; pq->state = SDMA_PKT_Q_INACTIVE; atomic_set(&pq->n_reqs, 0); - spin_lock_init(&pq->iovec_lock); init_waitqueue_head(&pq->wait); iowait_init(&pq->busy, 0, NULL, defer_packet_queue, @@ -444,7 +440,6 @@ int hfi1_user_sdma_free_queues(struct hfi1_filedata *fd) { struct hfi1_ctxtdata *uctxt = fd->uctxt; struct hfi1_user_sdma_pkt_q *pq; - struct user_sdma_iovec *iov; unsigned long flags; hfi1_cdbg(SDMA, "[%u:%u:%u] Freeing user SDMA queues", uctxt->dd->unit, @@ -460,15 +455,6 @@ int hfi1_user_sdma_free_queues(struct hfi1_filedata *fd) wait_event_interruptible( pq->wait, (ACCESS_ONCE(pq->state) == SDMA_PKT_Q_INACTIVE)); - /* Unpin any left over buffers. */ - while (!list_empty(&pq->iovec_list)) { - spin_lock_irqsave(&pq->iovec_lock, flags); - iov = list_first_entry(&pq->iovec_list, - struct user_sdma_iovec, list); - list_del_init(&iov->list); - spin_unlock_irqrestore(&pq->iovec_lock, flags); - unpin_vector_pages(iov); - } kfree(pq->reqs); kmem_cache_destroy(pq->txreq_cache); kfree(pq); @@ -492,11 +478,10 @@ int hfi1_user_sdma_process_request(struct file *fp, struct iovec *iovec, struct hfi1_user_sdma_pkt_q *pq = fd->pq; struct hfi1_user_sdma_comp_q *cq = fd->cq; struct hfi1_devdata *dd = pq->dd; - unsigned long idx = 0, flags; + unsigned long idx = 0, unpinned; u8 pcount = initial_pkt_count; struct sdma_req_info info; struct user_sdma_request *req; - struct user_sdma_iovec *ioptr; u8 opcode, sc, vl; if (iovec[idx].iov_len < sizeof(info) + sizeof(req->hdr)) { @@ -515,13 +500,11 @@ int hfi1_user_sdma_process_request(struct file *fp, struct iovec *iovec, } /* Process any completed vectors */ - while (!list_empty(&pq->iovec_list)) { - spin_lock_irqsave(&pq->iovec_lock, flags); - ioptr = list_first_entry(&pq->iovec_list, - struct user_sdma_iovec, list); - list_del_init(&ioptr->list); - spin_unlock_irqrestore(&pq->iovec_lock, flags); - unpin_vector_pages(ioptr); + unpinned = xchg(&pq->unpinned, 0); + if (unpinned) { + down_write(¤t->mm->mmap_sem); + current->mm->pinned_vm -= unpinned; + up_write(¤t->mm->mmap_sem); } trace_hfi1_sdma_user_reqinfo(dd, uctxt->ctxt, fd->subctxt, @@ -1075,10 +1058,6 @@ static int pin_vector_pages(struct user_sdma_request *req, unpin_vector_pages(iovec); return -EFAULT; } - /* - * Get a reference to the process's mm so we can use it when - * unpinning the io vectors. - */ return 0; } @@ -1368,7 +1347,7 @@ static void user_sdma_txreq_cb(struct sdma_txreq *txreq, int status, struct hfi1_user_sdma_pkt_q *pq; struct hfi1_user_sdma_comp_q *cq; u16 idx; - int i; + int i, j; if (!tx->req) return; @@ -1379,15 +1358,19 @@ static void user_sdma_txreq_cb(struct sdma_txreq *txreq, int status, /* * If we have any io vectors associated with this txreq, - * check whether they need to be 'freed'. We can't free them - * here because the unpin function needs to be able to sleep. + * check whether they need to be 'freed'. */ for (i = tx->idx; i >= 0; i--) { if (tx->iovecs[i].flags & TXREQ_FLAGS_IOVEC_LAST_PKT) { - spin_lock(&pq->iovec_lock); - list_add_tail(&tx->iovecs[i].vec->list, - &pq->iovec_list); - spin_unlock(&pq->iovec_lock); + struct user_sdma_iovec *vec = + tx->iovecs[i].vec; + + for (j = 0; j < vec->npages; j++) + put_page(vec->pages[j]); + xadd(&pq->unpinned, vec->npages); + kfree(vec->pages); + vec->pages = NULL; + vec->npages = 0; } } diff --git a/drivers/staging/rdma/hfi1/user_sdma.h b/drivers/staging/rdma/hfi1/user_sdma.h index 317f0e8..7ef31a6 100644 --- a/drivers/staging/rdma/hfi1/user_sdma.h +++ b/drivers/staging/rdma/hfi1/user_sdma.h @@ -5,7 +5,7 @@ * * GPL LICENSE SUMMARY * - * Copyright(c) 2015 Intel Corporation. + * Copyright(c) 2015, 2016 Intel Corporation. * * This program is free software; you can redistribute it and/or modify * it under the terms of version 2 of the GNU General Public License as @@ -18,7 +18,7 @@ * * BSD LICENSE * - * Copyright(c) 2015 Intel Corporation. + * Copyright(c) 2015, 2016 Intel Corporation. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -69,8 +69,7 @@ struct hfi1_user_sdma_pkt_q { struct iowait busy; unsigned state; wait_queue_head_t wait; - struct list_head iovec_list; - spinlock_t iovec_lock; /* protect iovec_list */ + unsigned long unpinned; }; struct hfi1_user_sdma_comp_q { -- cgit v0.10.2 From 3fafebb6f6c7084c899924b51c0716a778915c3b Mon Sep 17 00:00:00 2001 From: Sadanand Warrier Date: Wed, 3 Feb 2016 14:37:15 -0800 Subject: staging/rdma/hfi1: Add credits for VL0 to VL7 in snoop mode Add a new option to the snoop ioctl which allows credits to be allocated across all VLs. Previously only VL0 and VL15 had credits allocated. The new option used in the ioctl HFI1_SNOOP_IOCSET_OPTS allows credits to be allocated so that VL15 will have at least 8.5KB credits and the other VLs will have the rest of the credits divided equally across themselves. The total number of credits are stored in the upper 16 bits of the integer passed and the cumulative value should ensure that VL0 has at least 8.5KB and each VL a minimum of 2KB + 128 bytes Reviewed-by: Dennis Dalessandro Reviewed-by: Dean Luick Signed-off-by: Sadanand Warrier Signed-off-by: Jubin John Signed-off-by: Doug Ledford diff --git a/drivers/staging/rdma/hfi1/chip.c b/drivers/staging/rdma/hfi1/chip.c index f31cc23..77b07c3 100644 --- a/drivers/staging/rdma/hfi1/chip.c +++ b/drivers/staging/rdma/hfi1/chip.c @@ -10711,8 +10711,7 @@ static void wait_for_vl_status_clear(struct hfi1_devdata *dd, u64 mask, * raise = if the new limit is higher than the current value (may be changed * earlier in the algorithm), set the new limit to the new value */ -static int set_buffer_control(struct hfi1_devdata *dd, - struct buffer_control *new_bc) +int set_buffer_control(struct hfi1_devdata *dd, struct buffer_control *new_bc) { u64 changing_mask, ld_mask, stat_mask; int change_count; diff --git a/drivers/staging/rdma/hfi1/diag.c b/drivers/staging/rdma/hfi1/diag.c index fafb3d7..bfce812 100644 --- a/drivers/staging/rdma/hfi1/diag.c +++ b/drivers/staging/rdma/hfi1/diag.c @@ -80,6 +80,7 @@ /* Snoop option mask */ #define SNOOP_DROP_SEND BIT(0) #define SNOOP_USE_METADATA BIT(1) +#define SNOOP_SET_VL0TOVL15 BIT(2) static u8 snoop_flags; @@ -965,6 +966,65 @@ static ssize_t hfi1_snoop_read(struct file *fp, char __user *data, return ret; } +/** + * hfi1_assign_snoop_link_credits -- Set up credits for VL15 and others + * @ppd : ptr to hfi1 port data + * @value : options from user space + * + * Assumes the rest of the CM credit registers are zero from a + * previous global or credit reset. + * Leave shared count at zero for both global and all vls. + * In snoop mode ideally we don't use shared credits + * Reserve 8.5k for VL15 + * If total credits less than 8.5kbytes return error. + * Divide the rest of the credits across VL0 to VL7 and if + * each of these levels has less than 34 credits (at least 2048 + 128 bytes) + * return with an error. + * The credit registers will be reset to zero on link negotiation or link up + * so this function should be activated from user space only if the port has + * gone past link negotiation and link up. + * + * Return -- 0 if successful else error condition + * + */ +static long hfi1_assign_snoop_link_credits(struct hfi1_pportdata *ppd, + int value) +{ +#define OPA_MIN_PER_VL_CREDITS 34 /* 2048 + 128 bytes */ + struct buffer_control t; + int i; + struct hfi1_devdata *dd = ppd->dd; + u16 total_credits = (value >> 16) & 0xffff; + u16 vl15_credits = dd->vl15_init / 2; + u16 per_vl_credits; + __be16 be_per_vl_credits; + + if (!(ppd->host_link_state & HLS_UP)) + goto err_exit; + if (total_credits < vl15_credits) + goto err_exit; + + per_vl_credits = (total_credits - vl15_credits) / TXE_NUM_DATA_VL; + + if (per_vl_credits < OPA_MIN_PER_VL_CREDITS) + goto err_exit; + + memset(&t, 0, sizeof(t)); + be_per_vl_credits = cpu_to_be16(per_vl_credits); + + for (i = 0; i < TXE_NUM_DATA_VL; i++) + t.vl[i].dedicated = be_per_vl_credits; + + t.vl[15].dedicated = cpu_to_be16(vl15_credits); + return set_buffer_control(ppd->dd, &t); + +err_exit: + snoop_dbg("port_state = 0x%x, total_credits = %d, vl15_credits = %d", + ppd->host_link_state, total_credits, vl15_credits); + + return -EINVAL; +} + static long hfi1_ioctl(struct file *fp, unsigned int cmd, unsigned long arg) { struct hfi1_devdata *dd; @@ -1191,6 +1251,10 @@ static long hfi1_ioctl(struct file *fp, unsigned int cmd, unsigned long arg) snoop_flags |= SNOOP_DROP_SEND; if (value & SNOOP_USE_METADATA) snoop_flags |= SNOOP_USE_METADATA; + if (value & (SNOOP_SET_VL0TOVL15)) { + ppd = &dd->pport[0]; /* first port will do */ + ret = hfi1_assign_snoop_link_credits(ppd, value); + } break; default: return -ENOTTY; diff --git a/drivers/staging/rdma/hfi1/hfi.h b/drivers/staging/rdma/hfi1/hfi.h index 363e6ef..023c504 100644 --- a/drivers/staging/rdma/hfi1/hfi.h +++ b/drivers/staging/rdma/hfi1/hfi.h @@ -1515,6 +1515,7 @@ int snoop_send_pio_handler(struct rvt_qp *qp, struct hfi1_pkt_state *ps, u64 pbc); void snoop_inline_pio_send(struct hfi1_devdata *dd, struct pio_buf *pbuf, u64 pbc, const void *from, size_t count); +int set_buffer_control(struct hfi1_devdata *dd, struct buffer_control *bc); static inline struct hfi1_devdata *dd_from_ppd(struct hfi1_pportdata *ppd) { -- cgit v0.10.2 From e154f12716ffbbd7bab52b48b8e78142a22a59c0 Mon Sep 17 00:00:00 2001 From: Dean Luick Date: Wed, 3 Feb 2016 14:37:24 -0800 Subject: staging/rdma/hfi1: Make EPROM check per device Add a variable eprom_available to each device, replacing the global of the same name. This is to allow multiple HFI devices with different EPROM availability to operate correctly on the the same system. Reviewed-by: Dennis Dalessandro Signed-off-by: Dean Luick Signed-off-by: Jubin John Signed-off-by: Doug Ledford diff --git a/drivers/staging/rdma/hfi1/eprom.c b/drivers/staging/rdma/hfi1/eprom.c index 8104a11..29958aa 100644 --- a/drivers/staging/rdma/hfi1/eprom.c +++ b/drivers/staging/rdma/hfi1/eprom.c @@ -106,10 +106,8 @@ /* * Use the EP mutex to guard against other callers from within the driver. - * Also covers usage of eprom_available. */ static DEFINE_MUTEX(eprom_mutex); -static int eprom_available; /* default: not available */ /* * Turn on external enable line that allows writing on the flash. @@ -376,15 +374,13 @@ int handle_eprom_command(struct file *fp, const struct hfi1_cmd *cmd) return -EINVAL; } + /* some devices do not have an EPROM */ + if (!dd->eprom_available) + return -EOPNOTSUPP; + /* lock against other callers touching the ASIC block */ mutex_lock(&eprom_mutex); - /* some platforms do not have an EPROM */ - if (!eprom_available) { - ret = -ENOSYS; - goto done_asic; - } - /* lock against the other HFI on another OS */ ret = acquire_hw_mutex(dd); if (ret) { @@ -458,8 +454,6 @@ int eprom_init(struct hfi1_devdata *dd) /* lock against other callers */ mutex_lock(&eprom_mutex); - if (eprom_available) /* already initialized */ - goto done_asic; /* * Lock against the other HFI on another OS - the mutex above @@ -487,7 +481,7 @@ int eprom_init(struct hfi1_devdata *dd) /* wake the device with command "release powerdown NoID" */ write_csr(dd, ASIC_EEP_ADDR_CMD, CMD_RELEASE_POWERDOWN_NOID); - eprom_available = 1; + dd->eprom_available = true; release_hw_mutex(dd); done_asic: mutex_unlock(&eprom_mutex); diff --git a/drivers/staging/rdma/hfi1/hfi.h b/drivers/staging/rdma/hfi1/hfi.h index 023c504..585485b 100644 --- a/drivers/staging/rdma/hfi1/hfi.h +++ b/drivers/staging/rdma/hfi1/hfi.h @@ -1145,6 +1145,7 @@ struct hfi1_devdata { __le64 *rcvhdrtail_dummy_kvaddr; dma_addr_t rcvhdrtail_dummy_physaddr; + bool eprom_available; /* true if EPROM is available for this device */ bool aspm_supported; /* Does HW support ASPM */ bool aspm_enabled; /* ASPM state: enabled/disabled */ /* Serialize ASPM enable/disable between multiple verbs contexts */ -- cgit v0.10.2 From 7580fc31dfbfcacab2a3243342d4b5de4b494cbf Mon Sep 17 00:00:00 2001 From: Dean Luick Date: Wed, 3 Feb 2016 14:37:32 -0800 Subject: staging/rdma/hfi1: Remove unused variable nsbr Remove unused nsbr count from PCIe Gen3 code Reviewed-by: Stuart Summers Signed-off-by: Dean Luick Signed-off-by: Jubin John Signed-off-by: Doug Ledford diff --git a/drivers/staging/rdma/hfi1/pcie.c b/drivers/staging/rdma/hfi1/pcie.c index 3d0c2e2..5642d85 100644 --- a/drivers/staging/rdma/hfi1/pcie.c +++ b/drivers/staging/rdma/hfi1/pcie.c @@ -955,7 +955,6 @@ int do_pcie_gen3_transition(struct hfi1_devdata *dd) uint default_pset; u16 target_vector, target_speed; u16 lnkctl2, vendor; - u8 nsbr = 1; u8 div; const u8 (*eq)[3]; int return_error = 0; @@ -989,12 +988,6 @@ int do_pcie_gen3_transition(struct hfi1_devdata *dd) } /* - * A0 needs an additional SBR - */ - if (is_ax(dd)) - nsbr++; - - /* * Do the Gen3 transition. Steps are those of the PCIe Gen3 * recipe. */ -- cgit v0.10.2 From a402d6ab409e0e943150a803b94dee76c9de5c27 Mon Sep 17 00:00:00 2001 From: Mitko Haralanov Date: Wed, 3 Feb 2016 14:37:41 -0800 Subject: staging/rdma/hfi1: Fix bug that could block the process on context exit A race was discovred in the user SDMA code, which could result in an process being stuck in the kernel call indefinitely in certain error conditions. If, during the processing of a user SDMA request, there was an error *and* all outstanding SDMA descriptor had been completed by the time the that error case was handled in the calling function, the state of the packet queue would not get correctly updated resulting in the process subsequently getting stuck, thinking that there are more descriptors to be completed. To handle this scenario, the driver now checks the submitted packet count vs. the completed. If all submitted packets have also been completed, the driver can safely free the request and signal user level. Otherwise, this will be handled by the completion callback. Reviewed-by: Dennis Dalessandro Signed-off-by: Mitko Haralanov Signed-off-by: Jubin John Signed-off-by: Doug Ledford diff --git a/drivers/staging/rdma/hfi1/user_sdma.c b/drivers/staging/rdma/hfi1/user_sdma.c index 55c7e6a..ac90309 100644 --- a/drivers/staging/rdma/hfi1/user_sdma.c +++ b/drivers/staging/rdma/hfi1/user_sdma.c @@ -678,7 +678,6 @@ int hfi1_user_sdma_process_request(struct file *fp, struct iovec *iovec, ret = user_sdma_send_pkts(req, pcount); if (unlikely(ret < 0 && ret != -EBUSY)) { req->status = ret; - atomic_dec(&pq->n_reqs); goto free_req; } @@ -703,6 +702,9 @@ int hfi1_user_sdma_process_request(struct file *fp, struct iovec *iovec, if (ret != -EBUSY) { req->status = ret; set_bit(SDMA_REQ_DONE_ERROR, &req->flags); + if (ACCESS_ONCE(req->seqcomp) == + req->seqsubmitted - 1) + goto free_req; return ret; } wait_event_interruptible_timeout( @@ -717,6 +719,7 @@ int hfi1_user_sdma_process_request(struct file *fp, struct iovec *iovec, return 0; free_req: user_sdma_free_request(req, true); + pq_update(pq); set_comp_state(pq, cq, info.comp_idx, ERROR, req->status); return ret; } -- cgit v0.10.2 From eb2e557c3663bb43a49f223b49e5101bbfc1d546 Mon Sep 17 00:00:00 2001 From: Sebastian Sanchez Date: Wed, 3 Feb 2016 14:37:50 -0800 Subject: staging/rdma/hfi1: Change for data type of port number This commit changes the data type for port_num in pma_get_opa_porterrors() from unsigned long to u8. Reviewed-by: Ira Weiny Signed-off-by: Sebastian Sanchez Signed-off-by: Doug Ledford diff --git a/drivers/staging/rdma/hfi1/mad.c b/drivers/staging/rdma/hfi1/mad.c index 3df1c8e..97bdcb7 100644 --- a/drivers/staging/rdma/hfi1/mad.c +++ b/drivers/staging/rdma/hfi1/mad.c @@ -2687,7 +2687,7 @@ static int pma_get_opa_porterrors(struct opa_pma_mad *pmp, { size_t response_data_size; struct _port_ectrs *rsp; - unsigned long port_num; + u8 port_num; struct opa_port_error_counters64_msg *req; struct hfi1_devdata *dd = dd_from_ibdev(ibdev); u32 num_ports; @@ -2728,7 +2728,7 @@ static int pma_get_opa_porterrors(struct opa_pma_mad *pmp, port_num = find_first_bit((unsigned long *)&port_mask, sizeof(port_mask)); - if ((u8)port_num != port) { + if (port_num != port) { pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD; return reply((struct ib_mad_hdr *)pmp); } @@ -2739,7 +2739,7 @@ static int pma_get_opa_porterrors(struct opa_pma_mad *pmp, ppd = ppd_from_ibp(ibp); memset(rsp, 0, sizeof(*rsp)); - rsp->port_number = (u8)port_num; + rsp->port_number = port_num; rsp->port_rcv_constraint_errors = cpu_to_be64(read_port_cntr(ppd, C_SW_RCV_CSTR_ERR, @@ -2807,7 +2807,7 @@ static int pma_get_opa_errorinfo(struct opa_pma_mad *pmp, struct hfi1_devdata *dd = dd_from_ibdev(ibdev); u64 port_mask; u32 num_ports; - unsigned long port_num; + u8 port_num; u8 num_pslm; u64 reg; @@ -2840,7 +2840,7 @@ static int pma_get_opa_errorinfo(struct opa_pma_mad *pmp, port_num = find_first_bit((unsigned long *)&port_mask, sizeof(port_mask)); - if ((u8)port_num != port) { + if (port_num != port) { pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD; return reply((struct ib_mad_hdr *)pmp); } @@ -3048,7 +3048,7 @@ static int pma_set_opa_errorinfo(struct opa_pma_mad *pmp, struct hfi1_devdata *dd = dd_from_ibdev(ibdev); u64 port_mask; u32 num_ports; - unsigned long port_num; + u8 port_num; u8 num_pslm; u32 error_info_select; @@ -3073,7 +3073,7 @@ static int pma_set_opa_errorinfo(struct opa_pma_mad *pmp, port_num = find_first_bit((unsigned long *)&port_mask, sizeof(port_mask)); - if ((u8)port_num != port) { + if (port_num != port) { pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD; return reply((struct ib_mad_hdr *)pmp); } -- cgit v0.10.2 From 5950e9b184ae47c6e4ec9cfb0dc698194d524f80 Mon Sep 17 00:00:00 2001 From: Sebastian Sanchez Date: Wed, 3 Feb 2016 14:37:59 -0800 Subject: staging/rdma/hfi1: Replacement of goto's for break/returns It replaces goto's for break and return statements in process_perf_opa(). Reviewed-by: Ira Weiny Signed-off-by: Sebastian Sanchez Signed-off-by: Doug Ledford diff --git a/drivers/staging/rdma/hfi1/mad.c b/drivers/staging/rdma/hfi1/mad.c index 97bdcb7..a315579 100644 --- a/drivers/staging/rdma/hfi1/mad.c +++ b/drivers/staging/rdma/hfi1/mad.c @@ -3828,7 +3828,7 @@ static int process_subn_opa(struct ib_device *ibdev, int mad_flags, if (smp->class_version != OPA_SMI_CLASS_VERSION) { smp->status |= IB_SMP_UNSUP_VERSION; ret = reply((struct ib_mad_hdr *)smp); - goto bail; + return ret; } ret = check_mkey(ibp, (struct ib_mad_hdr *)smp, mad_flags, smp->mkey, smp->route.dr.dr_slid, smp->route.dr.return_path, @@ -3854,7 +3854,7 @@ static int process_subn_opa(struct ib_device *ibdev, int mad_flags, smp->route.dr.return_path, smp->hop_cnt); ret = IB_MAD_RESULT_FAILURE; - goto bail; + return ret; } *resp_len = opa_get_smp_header_size(smp); @@ -3866,23 +3866,25 @@ static int process_subn_opa(struct ib_device *ibdev, int mad_flags, clear_opa_smp_data(smp); ret = subn_get_opa_sma(attr_id, smp, am, data, ibdev, port, resp_len); - goto bail; + break; case OPA_ATTRIB_ID_AGGREGATE: ret = subn_get_opa_aggregate(smp, ibdev, port, resp_len); - goto bail; + break; } + break; case IB_MGMT_METHOD_SET: switch (attr_id) { default: ret = subn_set_opa_sma(attr_id, smp, am, data, ibdev, port, resp_len); - goto bail; + break; case OPA_ATTRIB_ID_AGGREGATE: ret = subn_set_opa_aggregate(smp, ibdev, port, resp_len); - goto bail; + break; } + break; case IB_MGMT_METHOD_TRAP: case IB_MGMT_METHOD_REPORT: case IB_MGMT_METHOD_REPORT_RESP: @@ -3893,13 +3895,13 @@ static int process_subn_opa(struct ib_device *ibdev, int mad_flags, * Just tell the caller to process it normally. */ ret = IB_MAD_RESULT_SUCCESS; - goto bail; + break; default: smp->status |= IB_SMP_UNSUP_METHOD; ret = reply((struct ib_mad_hdr *)smp); + break; } -bail: return ret; } @@ -3915,7 +3917,7 @@ static int process_subn(struct ib_device *ibdev, int mad_flags, if (smp->class_version != 1) { smp->status |= IB_SMP_UNSUP_VERSION; ret = reply((struct ib_mad_hdr *)smp); - goto bail; + return ret; } ret = check_mkey(ibp, (struct ib_mad_hdr *)smp, mad_flags, @@ -3942,7 +3944,7 @@ static int process_subn(struct ib_device *ibdev, int mad_flags, (__force __be32)smp->dr_slid, smp->return_path, smp->hop_cnt); ret = IB_MAD_RESULT_FAILURE; - goto bail; + return ret; } switch (smp->method) { @@ -3950,15 +3952,15 @@ static int process_subn(struct ib_device *ibdev, int mad_flags, switch (smp->attr_id) { case IB_SMP_ATTR_NODE_INFO: ret = subn_get_nodeinfo(smp, ibdev, port); - goto bail; + break; default: smp->status |= IB_SMP_UNSUP_METH_ATTR; ret = reply((struct ib_mad_hdr *)smp); - goto bail; + break; } + break; } -bail: return ret; } @@ -3983,44 +3985,46 @@ static int process_perf_opa(struct ib_device *ibdev, u8 port, switch (pmp->mad_hdr.attr_id) { case IB_PMA_CLASS_PORT_INFO: ret = pma_get_opa_classportinfo(pmp, ibdev, resp_len); - goto bail; + break; case OPA_PM_ATTRIB_ID_PORT_STATUS: ret = pma_get_opa_portstatus(pmp, ibdev, port, resp_len); - goto bail; + break; case OPA_PM_ATTRIB_ID_DATA_PORT_COUNTERS: ret = pma_get_opa_datacounters(pmp, ibdev, port, resp_len); - goto bail; + break; case OPA_PM_ATTRIB_ID_ERROR_PORT_COUNTERS: ret = pma_get_opa_porterrors(pmp, ibdev, port, resp_len); - goto bail; + break; case OPA_PM_ATTRIB_ID_ERROR_INFO: ret = pma_get_opa_errorinfo(pmp, ibdev, port, resp_len); - goto bail; + break; default: pmp->mad_hdr.status |= IB_SMP_UNSUP_METH_ATTR; ret = reply((struct ib_mad_hdr *)pmp); - goto bail; + break; } + break; case IB_MGMT_METHOD_SET: switch (pmp->mad_hdr.attr_id) { case OPA_PM_ATTRIB_ID_CLEAR_PORT_STATUS: ret = pma_set_opa_portstatus(pmp, ibdev, port, resp_len); - goto bail; + break; case OPA_PM_ATTRIB_ID_ERROR_INFO: ret = pma_set_opa_errorinfo(pmp, ibdev, port, resp_len); - goto bail; + break; default: pmp->mad_hdr.status |= IB_SMP_UNSUP_METH_ATTR; ret = reply((struct ib_mad_hdr *)pmp); - goto bail; + break; } + break; case IB_MGMT_METHOD_TRAP: case IB_MGMT_METHOD_GET_RESP: @@ -4030,14 +4034,14 @@ static int process_perf_opa(struct ib_device *ibdev, u8 port, * Just tell the caller to process it normally. */ ret = IB_MAD_RESULT_SUCCESS; - goto bail; + break; default: pmp->mad_hdr.status |= IB_SMP_UNSUP_METHOD; ret = reply((struct ib_mad_hdr *)pmp); + break; } -bail: return ret; } @@ -4102,12 +4106,12 @@ static int hfi1_process_ib_mad(struct ib_device *ibdev, int mad_flags, u8 port, case IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE: case IB_MGMT_CLASS_SUBN_LID_ROUTED: ret = process_subn(ibdev, mad_flags, port, in_mad, out_mad); - goto bail; + break; default: ret = IB_MAD_RESULT_SUCCESS; + break; } -bail: return ret; } -- cgit v0.10.2 From b8d114ebb6fb6dfb61a6f7bd5b2bef529015b0f0 Mon Sep 17 00:00:00 2001 From: Sebastian Sanchez Date: Wed, 3 Feb 2016 14:38:07 -0800 Subject: staging/rdma/hfi1: Adding support for hfi counters via sysfs It enables access to counters in /sys/class/infiniband/hfi1_0/ports/1/counters by providing infrastructure when PMA queries occur. Counters symbol_error and VL15_dropped are not supported in OPA, therefore, 0 will always be returned. In addition, two common routines (pma_get_opa_port_dctrs, pma_get_opa_port_ectrs) were created to query counters to avoid code duplication. Reviewed-by: Mike Marciniszyn Signed-off-by: Sebastian Sanchez Signed-off-by: Doug Ledford diff --git a/drivers/staging/rdma/hfi1/mad.c b/drivers/staging/rdma/hfi1/mad.c index a315579..2fcc9f3 100644 --- a/drivers/staging/rdma/hfi1/mad.c +++ b/drivers/staging/rdma/hfi1/mad.c @@ -2524,6 +2524,27 @@ static void a0_datacounters(struct hfi1_pportdata *ppd, struct _port_dctrs *rsp, } } +static void pma_get_opa_port_dctrs(struct ib_device *ibdev, + struct _port_dctrs *rsp) +{ + struct hfi1_devdata *dd = dd_from_ibdev(ibdev); + + rsp->port_xmit_data = cpu_to_be64(read_dev_cntr(dd, C_DC_XMIT_FLITS, + CNTR_INVALID_VL)); + rsp->port_rcv_data = cpu_to_be64(read_dev_cntr(dd, C_DC_RCV_FLITS, + CNTR_INVALID_VL)); + rsp->port_xmit_pkts = cpu_to_be64(read_dev_cntr(dd, C_DC_XMIT_PKTS, + CNTR_INVALID_VL)); + rsp->port_rcv_pkts = cpu_to_be64(read_dev_cntr(dd, C_DC_RCV_PKTS, + CNTR_INVALID_VL)); + rsp->port_multicast_xmit_pkts = + cpu_to_be64(read_dev_cntr(dd, C_DC_MC_XMIT_PKTS, + CNTR_INVALID_VL)); + rsp->port_multicast_rcv_pkts = + cpu_to_be64(read_dev_cntr(dd, C_DC_MC_RCV_PKTS, + CNTR_INVALID_VL)); +} + static int pma_get_opa_datacounters(struct opa_pma_mad *pmp, struct ib_device *ibdev, u8 port, u32 *resp_len) { @@ -2592,34 +2613,14 @@ static int pma_get_opa_datacounters(struct opa_pma_mad *pmp, */ hfi1_read_link_quality(dd, &lq); rsp->link_quality_indicator = cpu_to_be32((u32)lq); + pma_get_opa_port_dctrs(ibdev, rsp); - /* rsp->sw_port_congestion is 0 for HFIs */ - /* rsp->port_xmit_time_cong is 0 for HFIs */ - /* rsp->port_xmit_wasted_bw ??? */ - /* rsp->port_xmit_wait_data ??? */ - /* rsp->port_mark_fecn is 0 for HFIs */ - - rsp->port_xmit_data = cpu_to_be64(read_dev_cntr(dd, C_DC_XMIT_FLITS, - CNTR_INVALID_VL)); - rsp->port_rcv_data = cpu_to_be64(read_dev_cntr(dd, C_DC_RCV_FLITS, - CNTR_INVALID_VL)); - rsp->port_xmit_pkts = cpu_to_be64(read_dev_cntr(dd, C_DC_XMIT_PKTS, - CNTR_INVALID_VL)); - rsp->port_rcv_pkts = cpu_to_be64(read_dev_cntr(dd, C_DC_RCV_PKTS, - CNTR_INVALID_VL)); - rsp->port_multicast_xmit_pkts = - cpu_to_be64(read_dev_cntr(dd, C_DC_MC_XMIT_PKTS, - CNTR_INVALID_VL)); - rsp->port_multicast_rcv_pkts = - cpu_to_be64(read_dev_cntr(dd, C_DC_MC_RCV_PKTS, - CNTR_INVALID_VL)); rsp->port_xmit_wait = cpu_to_be64(read_port_cntr(ppd, C_TX_WAIT, CNTR_INVALID_VL)); rsp->port_rcv_fecn = cpu_to_be64(read_dev_cntr(dd, C_DC_RCV_FCN, CNTR_INVALID_VL)); rsp->port_rcv_becn = cpu_to_be64(read_dev_cntr(dd, C_DC_RCV_BCN, CNTR_INVALID_VL)); - rsp->port_error_counter_summary = cpu_to_be64(get_error_counter_summary(ibdev, port, res_lli, res_ler)); @@ -2682,6 +2683,81 @@ static int pma_get_opa_datacounters(struct opa_pma_mad *pmp, return reply((struct ib_mad_hdr *)pmp); } +static int pma_get_ib_portcounters_ext(struct ib_pma_mad *pmp, + struct ib_device *ibdev, u8 port) +{ + struct ib_pma_portcounters_ext *p = (struct ib_pma_portcounters_ext *) + pmp->data; + struct _port_dctrs rsp; + + if (pmp->mad_hdr.attr_mod != 0 || p->port_select != port) { + pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD; + goto bail; + } + + memset(&rsp, 0, sizeof(rsp)); + pma_get_opa_port_dctrs(ibdev, &rsp); + + p->port_xmit_data = rsp.port_xmit_data; + p->port_rcv_data = rsp.port_rcv_data; + p->port_xmit_packets = rsp.port_xmit_pkts; + p->port_rcv_packets = rsp.port_rcv_pkts; + p->port_unicast_xmit_packets = 0; + p->port_unicast_rcv_packets = 0; + p->port_multicast_xmit_packets = rsp.port_multicast_xmit_pkts; + p->port_multicast_rcv_packets = rsp.port_multicast_rcv_pkts; + +bail: + return reply((struct ib_mad_hdr *)pmp); +} + +static void pma_get_opa_port_ectrs(struct ib_device *ibdev, + struct _port_ectrs *rsp, u8 port) +{ + u64 tmp, tmp2; + struct hfi1_devdata *dd = dd_from_ibdev(ibdev); + struct hfi1_ibport *ibp = to_iport(ibdev, port); + struct hfi1_pportdata *ppd = ppd_from_ibp(ibp); + + tmp = read_dev_cntr(dd, C_DC_SEQ_CRC_CNT, CNTR_INVALID_VL); + tmp2 = tmp + read_dev_cntr(dd, C_DC_REINIT_FROM_PEER_CNT, + CNTR_INVALID_VL); + if (tmp2 > (u32)UINT_MAX || tmp2 < tmp) { + /* overflow/wrapped */ + rsp->link_error_recovery = cpu_to_be32(~0); + } else { + rsp->link_error_recovery = cpu_to_be32(tmp2); + } + + rsp->link_downed = cpu_to_be32(read_port_cntr(ppd, C_SW_LINK_DOWN, + CNTR_INVALID_VL)); + rsp->port_rcv_errors = + cpu_to_be64(read_dev_cntr(dd, C_DC_RCV_ERR, CNTR_INVALID_VL)); + rsp->port_rcv_remote_physical_errors = + cpu_to_be64(read_dev_cntr(dd, C_DC_RMT_PHY_ERR, + CNTR_INVALID_VL)); + rsp->port_rcv_switch_relay_errors = 0; + rsp->port_xmit_discards = + cpu_to_be64(read_port_cntr(ppd, C_SW_XMIT_DSCD, + CNTR_INVALID_VL)); + rsp->port_xmit_constraint_errors = + cpu_to_be64(read_port_cntr(ppd, C_SW_XMIT_CSTR_ERR, + CNTR_INVALID_VL)); + rsp->port_rcv_constraint_errors = + cpu_to_be64(read_port_cntr(ppd, C_SW_RCV_CSTR_ERR, + CNTR_INVALID_VL)); + tmp = read_dev_cntr(dd, C_DC_RX_REPLAY, CNTR_INVALID_VL); + tmp2 = tmp + read_dev_cntr(dd, C_DC_TX_REPLAY, CNTR_INVALID_VL); + if (tmp2 < tmp) { + /* overflow/wrapped */ + rsp->local_link_integrity_errors = cpu_to_be64(~0); + } else { + rsp->local_link_integrity_errors = cpu_to_be64(tmp2); + } + rsp->excessive_buffer_overruns = + cpu_to_be64(read_dev_cntr(dd, C_RCV_OVF, CNTR_INVALID_VL)); +} + static int pma_get_opa_porterrors(struct opa_pma_mad *pmp, struct ib_device *ibdev, u8 port, u32 *resp_len) { @@ -2697,7 +2773,7 @@ static int pma_get_opa_porterrors(struct opa_pma_mad *pmp, struct hfi1_pportdata *ppd; struct _vls_ectrs *vlinfo; unsigned long vl; - u64 port_mask, tmp, tmp2; + u64 port_mask, tmp; u32 vl_select_mask; int vfi; @@ -2741,44 +2817,16 @@ static int pma_get_opa_porterrors(struct opa_pma_mad *pmp, memset(rsp, 0, sizeof(*rsp)); rsp->port_number = port_num; - rsp->port_rcv_constraint_errors = - cpu_to_be64(read_port_cntr(ppd, C_SW_RCV_CSTR_ERR, - CNTR_INVALID_VL)); - /* port_rcv_switch_relay_errors is 0 for HFIs */ - rsp->port_xmit_discards = - cpu_to_be64(read_port_cntr(ppd, C_SW_XMIT_DSCD, - CNTR_INVALID_VL)); + pma_get_opa_port_ectrs(ibdev, rsp, port_num); + rsp->port_rcv_remote_physical_errors = cpu_to_be64(read_dev_cntr(dd, C_DC_RMT_PHY_ERR, - CNTR_INVALID_VL)); - tmp = read_dev_cntr(dd, C_DC_RX_REPLAY, CNTR_INVALID_VL); - tmp2 = tmp + read_dev_cntr(dd, C_DC_TX_REPLAY, CNTR_INVALID_VL); - if (tmp2 < tmp) { - /* overflow/wrapped */ - rsp->local_link_integrity_errors = cpu_to_be64(~0); - } else { - rsp->local_link_integrity_errors = cpu_to_be64(tmp2); - } - tmp = read_dev_cntr(dd, C_DC_SEQ_CRC_CNT, CNTR_INVALID_VL); - tmp2 = tmp + read_dev_cntr(dd, C_DC_REINIT_FROM_PEER_CNT, - CNTR_INVALID_VL); - if (tmp2 > (u32)UINT_MAX || tmp2 < tmp) { - /* overflow/wrapped */ - rsp->link_error_recovery = cpu_to_be32(~0); - } else { - rsp->link_error_recovery = cpu_to_be32(tmp2); - } - rsp->port_xmit_constraint_errors = - cpu_to_be64(read_port_cntr(ppd, C_SW_XMIT_CSTR_ERR, - CNTR_INVALID_VL)); - rsp->excessive_buffer_overruns = - cpu_to_be64(read_dev_cntr(dd, C_RCV_OVF, CNTR_INVALID_VL)); + CNTR_INVALID_VL)); rsp->fm_config_errors = cpu_to_be64(read_dev_cntr(dd, C_DC_FM_CFG_ERR, CNTR_INVALID_VL)); - rsp->link_downed = cpu_to_be32(read_port_cntr(ppd, C_SW_LINK_DOWN, - CNTR_INVALID_VL)); tmp = read_dev_cntr(dd, C_DC_UNC_ERR, CNTR_INVALID_VL); + rsp->uncorrectable_errors = tmp < 0x100 ? (tmp & 0xff) : 0xff; vlinfo = (struct _vls_ectrs *)&(rsp->vls[0]); @@ -2798,6 +2846,91 @@ static int pma_get_opa_porterrors(struct opa_pma_mad *pmp, return reply((struct ib_mad_hdr *)pmp); } +static int pma_get_ib_portcounters(struct ib_pma_mad *pmp, + struct ib_device *ibdev, u8 port) +{ + struct ib_pma_portcounters *p = (struct ib_pma_portcounters *) + pmp->data; + struct _port_ectrs rsp; + u64 temp_link_overrun_errors; + u64 temp_64; + u32 temp_32; + + memset(&rsp, 0, sizeof(rsp)); + pma_get_opa_port_ectrs(ibdev, &rsp, port); + + if (pmp->mad_hdr.attr_mod != 0 || p->port_select != port) { + pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD; + goto bail; + } + + p->symbol_error_counter = 0; /* N/A for OPA */ + + temp_32 = be32_to_cpu(rsp.link_error_recovery); + if (temp_32 > 0xFFUL) + p->link_error_recovery_counter = 0xFF; + else + p->link_error_recovery_counter = (u8)temp_32; + + temp_32 = be32_to_cpu(rsp.link_downed); + if (temp_32 > 0xFFUL) + p->link_downed_counter = 0xFF; + else + p->link_downed_counter = (u8)temp_32; + + temp_64 = be64_to_cpu(rsp.port_rcv_errors); + if (temp_64 > 0xFFFFUL) + p->port_rcv_errors = cpu_to_be16(0xFFFF); + else + p->port_rcv_errors = cpu_to_be16((u16)temp_64); + + temp_64 = be64_to_cpu(rsp.port_rcv_remote_physical_errors); + if (temp_64 > 0xFFFFUL) + p->port_rcv_remphys_errors = cpu_to_be16(0xFFFF); + else + p->port_rcv_remphys_errors = cpu_to_be16((u16)temp_64); + + temp_64 = be64_to_cpu(rsp.port_rcv_switch_relay_errors); + p->port_rcv_switch_relay_errors = cpu_to_be16((u16)temp_64); + + temp_64 = be64_to_cpu(rsp.port_xmit_discards); + if (temp_64 > 0xFFFFUL) + p->port_xmit_discards = cpu_to_be16(0xFFFF); + else + p->port_xmit_discards = cpu_to_be16((u16)temp_64); + + temp_64 = be64_to_cpu(rsp.port_xmit_constraint_errors); + if (temp_64 > 0xFFUL) + p->port_xmit_constraint_errors = 0xFF; + else + p->port_xmit_constraint_errors = (u8)temp_64; + + temp_64 = be64_to_cpu(rsp.port_rcv_constraint_errors); + if (temp_64 > 0xFFUL) + p->port_rcv_constraint_errors = 0xFFUL; + else + p->port_rcv_constraint_errors = (u8)temp_64; + + /* LocalLink: 7:4, BufferOverrun: 3:0 */ + temp_64 = be64_to_cpu(rsp.local_link_integrity_errors); + if (temp_64 > 0xFUL) + temp_64 = 0xFUL; + + temp_link_overrun_errors = temp_64 << 4; + + temp_64 = be64_to_cpu(rsp.excessive_buffer_overruns); + if (temp_64 > 0xFUL) + temp_64 = 0xFUL; + temp_link_overrun_errors |= temp_64; + + p->link_overrun_errors = (u8)temp_link_overrun_errors; + + p->vl15_dropped = 0; /* N/A for OPA */ + +bail: + return reply((struct ib_mad_hdr *)pmp); +} + static int pma_get_opa_errorinfo(struct opa_pma_mad *pmp, struct ib_device *ibdev, u8 port, u32 *resp_len) { @@ -3964,6 +4097,68 @@ static int process_subn(struct ib_device *ibdev, int mad_flags, return ret; } +static int process_perf(struct ib_device *ibdev, u8 port, + const struct ib_mad *in_mad, + struct ib_mad *out_mad) +{ + struct ib_pma_mad *pmp = (struct ib_pma_mad *)out_mad; + struct ib_class_port_info *cpi = (struct ib_class_port_info *) + &pmp->data; + int ret = IB_MAD_RESULT_FAILURE; + + *out_mad = *in_mad; + if (pmp->mad_hdr.class_version != 1) { + pmp->mad_hdr.status |= IB_SMP_UNSUP_VERSION; + ret = reply((struct ib_mad_hdr *)pmp); + return ret; + } + + switch (pmp->mad_hdr.method) { + case IB_MGMT_METHOD_GET: + switch (pmp->mad_hdr.attr_id) { + case IB_PMA_PORT_COUNTERS: + ret = pma_get_ib_portcounters(pmp, ibdev, port); + break; + case IB_PMA_PORT_COUNTERS_EXT: + ret = pma_get_ib_portcounters_ext(pmp, ibdev, port); + break; + case IB_PMA_CLASS_PORT_INFO: + cpi->capability_mask = IB_PMA_CLASS_CAP_EXT_WIDTH; + ret = reply((struct ib_mad_hdr *)pmp); + break; + default: + pmp->mad_hdr.status |= IB_SMP_UNSUP_METH_ATTR; + ret = reply((struct ib_mad_hdr *)pmp); + break; + } + break; + + case IB_MGMT_METHOD_SET: + if (pmp->mad_hdr.attr_id) { + pmp->mad_hdr.status |= IB_SMP_UNSUP_METH_ATTR; + ret = reply((struct ib_mad_hdr *)pmp); + } + break; + + case IB_MGMT_METHOD_TRAP: + case IB_MGMT_METHOD_GET_RESP: + /* + * The ib_mad module will call us to process responses + * before checking for other consumers. + * Just tell the caller to process it normally. + */ + ret = IB_MAD_RESULT_SUCCESS; + break; + + default: + pmp->mad_hdr.status |= IB_SMP_UNSUP_METHOD; + ret = reply((struct ib_mad_hdr *)pmp); + break; + } + + return ret; +} + static int process_perf_opa(struct ib_device *ibdev, u8 port, const struct opa_mad *in_mad, struct opa_mad *out_mad, u32 *resp_len) @@ -4107,6 +4302,9 @@ static int hfi1_process_ib_mad(struct ib_device *ibdev, int mad_flags, u8 port, case IB_MGMT_CLASS_SUBN_LID_ROUTED: ret = process_subn(ibdev, mad_flags, port, in_mad, out_mad); break; + case IB_MGMT_CLASS_PERF_MGMT: + ret = process_perf(ibdev, port, in_mad, out_mad); + break; default: ret = IB_MAD_RESULT_SUCCESS; break; -- cgit v0.10.2 From cd93a9e8c5a58e451c834e48f1278383fbfa1072 Mon Sep 17 00:00:00 2001 From: Sebastian Sanchez Date: Wed, 3 Feb 2016 14:38:16 -0800 Subject: staging/rdma/hfi1: Removing unused struct hfi1_verbs_counters It removes the unused struct hfi1_verbs_counters from verbs.h Reviewed-by: Mike Marciniszyn Signed-off-by: Sebastian Sanchez Signed-off-by: Doug Ledford diff --git a/drivers/staging/rdma/hfi1/verbs.h b/drivers/staging/rdma/hfi1/verbs.h index a157e64..335e3a8 100644 --- a/drivers/staging/rdma/hfi1/verbs.h +++ b/drivers/staging/rdma/hfi1/verbs.h @@ -279,22 +279,6 @@ struct hfi1_ibdev { #endif }; -struct hfi1_verbs_counters { - u64 symbol_error_counter; - u64 link_error_recovery_counter; - u64 link_downed_counter; - u64 port_rcv_errors; - u64 port_rcv_remphys_errors; - u64 port_xmit_discards; - u64 port_xmit_data; - u64 port_rcv_data; - u64 port_xmit_packets; - u64 port_rcv_packets; - u32 local_link_integrity_errors; - u32 excessive_buffer_overrun_errors; - u32 vl15_dropped; -}; - static inline struct hfi1_ibdev *to_idev(struct ib_device *ibdev) { struct rvt_dev_info *rdi; -- cgit v0.10.2 From 9171bfdd363304713a5a82ae03da6ec55a0cae39 Mon Sep 17 00:00:00 2001 From: Mike Marciniszyn Date: Thu, 4 Feb 2016 10:59:01 -0800 Subject: staging/rdma/hfi1: centralize timer routines into rc Centralize disparate timer maintenance. This allow for central control and changes to the RC timer handling including future optimizations. Reviewed-by: Jubin John Signed-off-by: Mike Marciniszyn Signed-off-by: Doug Ledford diff --git a/drivers/staging/rdma/hfi1/rc.c b/drivers/staging/rdma/hfi1/rc.c index 371edc3..350faaa 100644 --- a/drivers/staging/rdma/hfi1/rc.c +++ b/drivers/staging/rdma/hfi1/rc.c @@ -62,6 +62,113 @@ static void rc_timeout(unsigned long arg); +/** + * hfi1_add_retry_timer - add/start a retry timer + * @qp - the QP + * + * add a retry timer on the QP + */ +static inline void hfi1_add_retry_timer(struct rvt_qp *qp) +{ + qp->s_flags |= RVT_S_TIMER; + qp->s_timer.function = rc_timeout; + /* 4.096 usec. * (1 << qp->timeout) */ + qp->s_timer.expires = jiffies + qp->timeout_jiffies; + add_timer(&qp->s_timer); +} + +/** + * hfi1_add_rnr_timer - add/start an rnr timer + * @qp - the QP + * @to - timeout in usecs + * + * add an rnr timer on the QP + */ +static inline void hfi1_add_rnr_timer(struct rvt_qp *qp, u32 to) +{ + qp->s_flags |= RVT_S_WAIT_RNR; + qp->s_timer.function = hfi1_rc_rnr_retry; + qp->s_timer.expires = jiffies + usecs_to_jiffies(to); + add_timer(&qp->s_timer); +} + +/** + * hfi1_mod_retry_timer - mod a retry timer + * @qp - the QP + * + * Modify a potentially already running retry + * timer + */ +static inline void hfi1_mod_retry_timer(struct rvt_qp *qp) +{ + qp->s_flags |= RVT_S_TIMER; + qp->s_timer.function = rc_timeout; + /* 4.096 usec. * (1 << qp->timeout) */ + mod_timer(&qp->s_timer, jiffies + qp->timeout_jiffies); +} + +/** + * hfi1_stop_retry_timer - stop a retry timer + * @qp - the QP + * + * stop a retry timer and return if the timer + * had been pending. + */ +static inline int hfi1_stop_retry_timer(struct rvt_qp *qp) +{ + int rval = 0; + + /* Remove QP from retry */ + if (qp->s_flags & RVT_S_TIMER) { + qp->s_flags &= ~RVT_S_TIMER; + rval = del_timer(&qp->s_timer); + } + return rval; +} + +/** + * hfi1_stop_rc_timers - stop all timers + * @qp - the QP + * + * stop any pending timers + */ +static inline void hfi1_stop_rc_timers(struct rvt_qp *qp) +{ + /* Remove QP from all timers */ + if (qp->s_flags & (RVT_S_TIMER | RVT_S_WAIT_RNR)) { + qp->s_flags &= ~(RVT_S_TIMER | RVT_S_WAIT_RNR); + del_timer(&qp->s_timer); + } +} + +/** + * hfi1_stop_rnr_timer - stop an rnr timer + * @qp - the QP + * + * stop an rnr timer and return if the timer + * had been pending. + */ +static inline int hfi1_stop_rnr_timer(struct rvt_qp *qp) +{ + int rval = 0; + + /* Remove QP from rnr timer */ + if (qp->s_flags & RVT_S_WAIT_RNR) { + qp->s_flags &= ~RVT_S_WAIT_RNR; + rval = del_timer(&qp->s_timer); + } + return rval; +} + +/** + * hfi1_del_timers_sync - wait for any timeout routines to exit + * @qp - the QP + */ +static inline void hfi1_del_timers_sync(struct rvt_qp *qp) +{ + del_timer_sync(&qp->s_timer); +} + static u32 restart_sge(struct rvt_sge_state *ss, struct rvt_swqe *wqe, u32 psn, u32 pmtu) { -- cgit v0.10.2 From e6f8c2b31f107f39e7301a02b5a6808d79c9f1f0 Mon Sep 17 00:00:00 2001 From: Mike Marciniszyn Date: Thu, 4 Feb 2016 10:59:09 -0800 Subject: staging/rdma/hfi1: use new timer routines Use the new timer routines. Reviewed-by: Jubin John Signed-off-by: Mike Marciniszyn Signed-off-by: Doug Ledford diff --git a/drivers/staging/rdma/hfi1/rc.c b/drivers/staging/rdma/hfi1/rc.c index 350faaa..5c32182 100644 --- a/drivers/staging/rdma/hfi1/rc.c +++ b/drivers/staging/rdma/hfi1/rc.c @@ -183,15 +183,6 @@ static u32 restart_sge(struct rvt_sge_state *ss, struct rvt_swqe *wqe, return wqe->length - len; } -static void start_timer(struct rvt_qp *qp) -{ - qp->s_flags |= RVT_S_TIMER; - qp->s_timer.function = rc_timeout; - /* 4.096 usec. * (1 << qp->timeout) */ - qp->s_timer.expires = jiffies + qp->timeout_jiffies; - add_timer(&qp->s_timer); -} - /** * make_rc_ack - construct a response packet (ACK, NAK, or RDMA read) * @dev: the device for this QP @@ -1054,11 +1045,8 @@ void hfi1_rc_rnr_retry(unsigned long arg) unsigned long flags; spin_lock_irqsave(&qp->s_lock, flags); - if (qp->s_flags & RVT_S_WAIT_RNR) { - qp->s_flags &= ~RVT_S_WAIT_RNR; - del_timer(&qp->s_timer); - hfi1_schedule_send(qp); - } + hfi1_stop_rnr_timer(qp); + hfi1_schedule_send(qp); spin_unlock_irqrestore(&qp->s_lock, flags); } @@ -1128,7 +1116,7 @@ void hfi1_rc_send_complete(struct rvt_qp *qp, struct hfi1_ib_header *hdr) !(qp->s_flags & (RVT_S_TIMER | RVT_S_WAIT_RNR | RVT_S_WAIT_PSN)) && (ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK)) - start_timer(qp); + hfi1_add_retry_timer(qp); while (qp->s_last != qp->s_acked) { wqe = rvt_get_swqe_ptr(qp, qp->s_last); @@ -1276,12 +1264,10 @@ static int do_rc_ack(struct rvt_qp *qp, u32 aeth, u32 psn, int opcode, int ret = 0; u32 ack_psn; int diff; + unsigned long to; /* Remove QP from retry timer */ - if (qp->s_flags & (RVT_S_TIMER | RVT_S_WAIT_RNR)) { - qp->s_flags &= ~(RVT_S_TIMER | RVT_S_WAIT_RNR); - del_timer(&qp->s_timer); - } + hfi1_stop_rc_timers(qp); /* * Note that NAKs implicitly ACK outstanding SEND and RDMA write @@ -1378,7 +1364,7 @@ static int do_rc_ack(struct rvt_qp *qp, u32 aeth, u32 psn, int opcode, * We are expecting more ACKs so * reset the re-transmit timer. */ - start_timer(qp); + hfi1_add_retry_timer(qp); /* * We can stop re-sending the earlier packets and * continue with the next packet the receiver wants. @@ -1421,12 +1407,10 @@ static int do_rc_ack(struct rvt_qp *qp, u32 aeth, u32 psn, int opcode, reset_psn(qp, psn); qp->s_flags &= ~(RVT_S_WAIT_SSN_CREDIT | RVT_S_WAIT_ACK); - qp->s_flags |= RVT_S_WAIT_RNR; - qp->s_timer.function = hfi1_rc_rnr_retry; - qp->s_timer.expires = jiffies + usecs_to_jiffies( + to = ib_hfi1_rnr_table[(aeth >> HFI1_AETH_CREDIT_SHIFT) & - HFI1_AETH_CREDIT_MASK]); - add_timer(&qp->s_timer); + HFI1_AETH_CREDIT_MASK]; + hfi1_add_rnr_timer(qp, to); goto bail; case 3: /* NAK */ @@ -1496,10 +1480,7 @@ static void rdma_seq_err(struct rvt_qp *qp, struct hfi1_ibport *ibp, u32 psn, struct rvt_swqe *wqe; /* Remove QP from retry timer */ - if (qp->s_flags & (RVT_S_TIMER | RVT_S_WAIT_RNR)) { - qp->s_flags &= ~(RVT_S_TIMER | RVT_S_WAIT_RNR); - del_timer(&qp->s_timer); - } + hfi1_stop_rc_timers(qp); wqe = rvt_get_swqe_ptr(qp, qp->s_acked); -- cgit v0.10.2 From 633d27399514e7726633c9029e3947f0526d2565 Mon Sep 17 00:00:00 2001 From: Mike Marciniszyn Date: Thu, 4 Feb 2016 10:59:18 -0800 Subject: staging/rdma/hfi1: use mod_timer when appropriate Use new timer API to optimize maintenance of timers during ACK processing. When we are still expecting ACKs, mod the timer to avoid a heavyweight delete/add. Otherwise, insure do_rc_ack() maintains the timer as it had. Reviewed-by: Jubin John Signed-off-by: Mike Marciniszyn Signed-off-by: Doug Ledford diff --git a/drivers/staging/rdma/hfi1/rc.c b/drivers/staging/rdma/hfi1/rc.c index 5c32182..700d849 100644 --- a/drivers/staging/rdma/hfi1/rc.c +++ b/drivers/staging/rdma/hfi1/rc.c @@ -1266,9 +1266,6 @@ static int do_rc_ack(struct rvt_qp *qp, u32 aeth, u32 psn, int opcode, int diff; unsigned long to; - /* Remove QP from retry timer */ - hfi1_stop_rc_timers(qp); - /* * Note that NAKs implicitly ACK outstanding SEND and RDMA write * requests and implicitly NAK RDMA read and atomic requests issued @@ -1296,7 +1293,7 @@ static int do_rc_ack(struct rvt_qp *qp, u32 aeth, u32 psn, int opcode, opcode == OP(RDMA_READ_RESPONSE_ONLY) && diff == 0) { ret = 1; - goto bail; + goto bail_stop; } /* * If this request is a RDMA read or atomic, and the ACK is @@ -1327,7 +1324,7 @@ static int do_rc_ack(struct rvt_qp *qp, u32 aeth, u32 psn, int opcode, * No need to process the ACK/NAK since we are * restarting an earlier request. */ - goto bail; + goto bail_stop; } if (wqe->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP || wqe->wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD) { @@ -1362,18 +1359,22 @@ static int do_rc_ack(struct rvt_qp *qp, u32 aeth, u32 psn, int opcode, if (qp->s_acked != qp->s_tail) { /* * We are expecting more ACKs so - * reset the re-transmit timer. + * mod the retry timer. */ - hfi1_add_retry_timer(qp); + hfi1_mod_retry_timer(qp); /* * We can stop re-sending the earlier packets and * continue with the next packet the receiver wants. */ if (cmp_psn(qp->s_psn, psn) <= 0) reset_psn(qp, psn + 1); - } else if (cmp_psn(qp->s_psn, psn) <= 0) { - qp->s_state = OP(SEND_LAST); - qp->s_psn = psn + 1; + } else { + /* No more acks - kill all timers */ + hfi1_stop_rc_timers(qp); + if (cmp_psn(qp->s_psn, psn) <= 0) { + qp->s_state = OP(SEND_LAST); + qp->s_psn = psn + 1; + } } if (qp->s_flags & RVT_S_WAIT_ACK) { qp->s_flags &= ~RVT_S_WAIT_ACK; @@ -1383,15 +1384,14 @@ static int do_rc_ack(struct rvt_qp *qp, u32 aeth, u32 psn, int opcode, qp->s_rnr_retry = qp->s_rnr_retry_cnt; qp->s_retry = qp->s_retry_cnt; update_last_psn(qp, psn); - ret = 1; - goto bail; + return 1; case 1: /* RNR NAK */ ibp->rvp.n_rnr_naks++; if (qp->s_acked == qp->s_tail) - goto bail; + goto bail_stop; if (qp->s_flags & RVT_S_WAIT_RNR) - goto bail; + goto bail_stop; if (qp->s_rnr_retry == 0) { status = IB_WC_RNR_RETRY_EXC_ERR; goto class_b; @@ -1407,15 +1407,16 @@ static int do_rc_ack(struct rvt_qp *qp, u32 aeth, u32 psn, int opcode, reset_psn(qp, psn); qp->s_flags &= ~(RVT_S_WAIT_SSN_CREDIT | RVT_S_WAIT_ACK); + hfi1_stop_rc_timers(qp); to = ib_hfi1_rnr_table[(aeth >> HFI1_AETH_CREDIT_SHIFT) & HFI1_AETH_CREDIT_MASK]; hfi1_add_rnr_timer(qp, to); - goto bail; + return 0; case 3: /* NAK */ if (qp->s_acked == qp->s_tail) - goto bail; + goto bail_stop; /* The last valid PSN is the previous PSN. */ update_last_psn(qp, psn - 1); switch ((aeth >> HFI1_AETH_CREDIT_SHIFT) & @@ -1458,15 +1459,16 @@ class_b: } qp->s_retry = qp->s_retry_cnt; qp->s_rnr_retry = qp->s_rnr_retry_cnt; - goto bail; + goto bail_stop; default: /* 2: reserved */ reserved: /* Ignore reserved NAK codes. */ - goto bail; + goto bail_stop; } - -bail: + return ret; +bail_stop: + hfi1_stop_rc_timers(qp); return ret; } -- cgit v0.10.2 From 3c9d149bdc797a7dfb48bcf327f9ceeb6060a512 Mon Sep 17 00:00:00 2001 From: Mike Marciniszyn Date: Thu, 4 Feb 2016 10:59:27 -0800 Subject: staging/rdma/hfi1: add unique rnr timer Add a new rnr timer to hfi1. This allows for future optimizations having the retry and rnr timers separate. Reviewed-by: Jubin John Signed-off-by: Mike Marciniszyn Signed-off-by: Doug Ledford diff --git a/drivers/staging/rdma/hfi1/qp.c b/drivers/staging/rdma/hfi1/qp.c index b78c8ea..a5f0e2e 100644 --- a/drivers/staging/rdma/hfi1/qp.c +++ b/drivers/staging/rdma/hfi1/qp.c @@ -607,7 +607,7 @@ void *qp_priv_alloc(struct rvt_dev_info *rdi, struct rvt_qp *qp, kfree(priv); return ERR_PTR(-ENOMEM); } - + setup_timer(&priv->s_rnr_timer, hfi1_rc_rnr_retry, (unsigned long)qp); return priv; } diff --git a/drivers/staging/rdma/hfi1/rc.c b/drivers/staging/rdma/hfi1/rc.c index 700d849..1ff19aa 100644 --- a/drivers/staging/rdma/hfi1/rc.c +++ b/drivers/staging/rdma/hfi1/rc.c @@ -164,7 +164,7 @@ static inline int hfi1_stop_rnr_timer(struct rvt_qp *qp) * hfi1_del_timers_sync - wait for any timeout routines to exit * @qp - the QP */ -static inline void hfi1_del_timers_sync(struct rvt_qp *qp) +void hfi1_del_timers_sync(struct rvt_qp *qp) { del_timer_sync(&qp->s_timer); } diff --git a/drivers/staging/rdma/hfi1/verbs.h b/drivers/staging/rdma/hfi1/verbs.h index 335e3a8..6294fa8 100644 --- a/drivers/staging/rdma/hfi1/verbs.h +++ b/drivers/staging/rdma/hfi1/verbs.h @@ -210,6 +210,7 @@ struct hfi1_qp_priv { u8 s_sc; /* SC[0..4] for next packet */ u8 r_adefered; /* number of acks defered */ struct iowait s_iowait; + struct timer_list s_rnr_timer; struct rvt_qp *owner; }; @@ -403,6 +404,7 @@ u8 ah_to_sc(struct ib_device *ibdev, struct ib_ah_attr *ah_attr); struct ib_ah *hfi1_create_qp0_ah(struct hfi1_ibport *ibp, u16 dlid); void hfi1_rc_rnr_retry(unsigned long arg); +void hfi1_del_timers_sync(struct rvt_qp *qp); void hfi1_rc_send_complete(struct rvt_qp *qp, struct hfi1_ib_header *hdr); -- cgit v0.10.2 From 08279d5c9424afd710c90d0b6df95612d2bb5a3f Mon Sep 17 00:00:00 2001 From: Mike Marciniszyn Date: Thu, 4 Feb 2016 10:59:36 -0800 Subject: staging/rdma/hfi1: use new RNR timer Use the new RNR timer for hfi1. For qib, this timer doesn't exist, so exploit driver callbacks to use the new timer as appropriate. Reviewed-by: Jubin John Signed-off-by: Mike Marciniszyn Signed-off-by: Doug Ledford diff --git a/drivers/infiniband/hw/qib/qib_qp.c b/drivers/infiniband/hw/qib/qib_qp.c index 45bed5f..787116f 100644 --- a/drivers/infiniband/hw/qib/qib_qp.c +++ b/drivers/infiniband/hw/qib/qib_qp.c @@ -412,6 +412,7 @@ void stop_send_queue(struct rvt_qp *qp) struct qib_qp_priv *priv = qp->priv; cancel_work_sync(&priv->s_work); + del_timer_sync(&qp->s_timer); } void quiesce_qp(struct rvt_qp *qp) diff --git a/drivers/infiniband/sw/rdmavt/qp.c b/drivers/infiniband/sw/rdmavt/qp.c index 322de64..439213c 100644 --- a/drivers/infiniband/sw/rdmavt/qp.c +++ b/drivers/infiniband/sw/rdmavt/qp.c @@ -405,7 +405,6 @@ void rvt_reset_qp(struct rvt_dev_info *rdi, struct rvt_qp *qp, /* Stop the send queue and the retry timer */ rdi->driver_f.stop_send_queue(qp); - del_timer_sync(&qp->s_timer); /* Wait for things to stop */ rdi->driver_f.quiesce_qp(qp); diff --git a/drivers/staging/rdma/hfi1/qp.c b/drivers/staging/rdma/hfi1/qp.c index a5f0e2e..b96d5ee 100644 --- a/drivers/staging/rdma/hfi1/qp.c +++ b/drivers/staging/rdma/hfi1/qp.c @@ -608,6 +608,7 @@ void *qp_priv_alloc(struct rvt_dev_info *rdi, struct rvt_qp *qp, return ERR_PTR(-ENOMEM); } setup_timer(&priv->s_rnr_timer, hfi1_rc_rnr_retry, (unsigned long)qp); + qp->s_timer.function = hfi1_rc_timeout; return priv; } @@ -647,6 +648,7 @@ unsigned free_all_qps(struct rvt_dev_info *rdi) void flush_qp_waiters(struct rvt_qp *qp) { flush_iowait(qp); + hfi1_stop_rc_timers(qp); } void stop_send_queue(struct rvt_qp *qp) @@ -654,6 +656,7 @@ void stop_send_queue(struct rvt_qp *qp) struct hfi1_qp_priv *priv = qp->priv; cancel_work_sync(&priv->s_iowait.iowork); + hfi1_del_timers_sync(qp); } void quiesce_qp(struct rvt_qp *qp) diff --git a/drivers/staging/rdma/hfi1/rc.c b/drivers/staging/rdma/hfi1/rc.c index 1ff19aa..2c46491 100644 --- a/drivers/staging/rdma/hfi1/rc.c +++ b/drivers/staging/rdma/hfi1/rc.c @@ -60,8 +60,6 @@ /* cut down ridiculously long IB macro names */ #define OP(x) IB_OPCODE_RC_##x -static void rc_timeout(unsigned long arg); - /** * hfi1_add_retry_timer - add/start a retry timer * @qp - the QP @@ -71,7 +69,6 @@ static void rc_timeout(unsigned long arg); static inline void hfi1_add_retry_timer(struct rvt_qp *qp) { qp->s_flags |= RVT_S_TIMER; - qp->s_timer.function = rc_timeout; /* 4.096 usec. * (1 << qp->timeout) */ qp->s_timer.expires = jiffies + qp->timeout_jiffies; add_timer(&qp->s_timer); @@ -86,10 +83,11 @@ static inline void hfi1_add_retry_timer(struct rvt_qp *qp) */ static inline void hfi1_add_rnr_timer(struct rvt_qp *qp, u32 to) { + struct hfi1_qp_priv *priv = qp->priv; + qp->s_flags |= RVT_S_WAIT_RNR; - qp->s_timer.function = hfi1_rc_rnr_retry; qp->s_timer.expires = jiffies + usecs_to_jiffies(to); - add_timer(&qp->s_timer); + add_timer(&priv->s_rnr_timer); } /** @@ -102,7 +100,6 @@ static inline void hfi1_add_rnr_timer(struct rvt_qp *qp, u32 to) static inline void hfi1_mod_retry_timer(struct rvt_qp *qp) { qp->s_flags |= RVT_S_TIMER; - qp->s_timer.function = rc_timeout; /* 4.096 usec. * (1 << qp->timeout) */ mod_timer(&qp->s_timer, jiffies + qp->timeout_jiffies); } @@ -132,12 +129,15 @@ static inline int hfi1_stop_retry_timer(struct rvt_qp *qp) * * stop any pending timers */ -static inline void hfi1_stop_rc_timers(struct rvt_qp *qp) +void hfi1_stop_rc_timers(struct rvt_qp *qp) { + struct hfi1_qp_priv *priv = qp->priv; + /* Remove QP from all timers */ if (qp->s_flags & (RVT_S_TIMER | RVT_S_WAIT_RNR)) { qp->s_flags &= ~(RVT_S_TIMER | RVT_S_WAIT_RNR); del_timer(&qp->s_timer); + del_timer(&priv->s_rnr_timer); } } @@ -151,11 +151,12 @@ static inline void hfi1_stop_rc_timers(struct rvt_qp *qp) static inline int hfi1_stop_rnr_timer(struct rvt_qp *qp) { int rval = 0; + struct hfi1_qp_priv *priv = qp->priv; /* Remove QP from rnr timer */ if (qp->s_flags & RVT_S_WAIT_RNR) { qp->s_flags &= ~RVT_S_WAIT_RNR; - rval = del_timer(&qp->s_timer); + rval = del_timer(&priv->s_rnr_timer); } return rval; } @@ -166,7 +167,10 @@ static inline int hfi1_stop_rnr_timer(struct rvt_qp *qp) */ void hfi1_del_timers_sync(struct rvt_qp *qp) { + struct hfi1_qp_priv *priv = qp->priv; + del_timer_sync(&qp->s_timer); + del_timer_sync(&priv->s_rnr_timer); } static u32 restart_sge(struct rvt_sge_state *ss, struct rvt_swqe *wqe, @@ -1015,7 +1019,7 @@ static void restart_rc(struct rvt_qp *qp, u32 psn, int wait) /* * This is called from s_timer for missing responses. */ -static void rc_timeout(unsigned long arg) +void hfi1_rc_timeout(unsigned long arg) { struct rvt_qp *qp = (struct rvt_qp *)arg; struct hfi1_ibport *ibp; diff --git a/drivers/staging/rdma/hfi1/verbs.h b/drivers/staging/rdma/hfi1/verbs.h index 6294fa8..26eda8a 100644 --- a/drivers/staging/rdma/hfi1/verbs.h +++ b/drivers/staging/rdma/hfi1/verbs.h @@ -404,7 +404,9 @@ u8 ah_to_sc(struct ib_device *ibdev, struct ib_ah_attr *ah_attr); struct ib_ah *hfi1_create_qp0_ah(struct hfi1_ibport *ibp, u16 dlid); void hfi1_rc_rnr_retry(unsigned long arg); +void hfi1_rc_timeout(unsigned long arg); void hfi1_del_timers_sync(struct rvt_qp *qp); +void hfi1_stop_rc_timers(struct rvt_qp *qp); void hfi1_rc_send_complete(struct rvt_qp *qp, struct hfi1_ib_header *hdr); -- cgit v0.10.2 From 0940e0f68e59fca500cbad6f5f58bddefdb6dc53 Mon Sep 17 00:00:00 2001 From: Mike Marciniszyn Date: Thu, 4 Feb 2016 11:03:02 -0800 Subject: staging/rdma/hfi1: remove duplicate timeout print The qp->timeout field is duplicated in the seqfile print. Remove it. Reviewed-by: Jubin John Reviewed-by: Dennis Dalessandro Signed-off-by: Mike Marciniszyn Signed-off-by: Doug Ledford diff --git a/drivers/staging/rdma/hfi1/qp.c b/drivers/staging/rdma/hfi1/qp.c index b96d5ee..9901ef0 100644 --- a/drivers/staging/rdma/hfi1/qp.c +++ b/drivers/staging/rdma/hfi1/qp.c @@ -547,7 +547,7 @@ void qp_iter_print(struct seq_file *s, struct qp_iter *iter) sde = qp_to_sdma_engine(qp, priv->s_sc); wqe = rvt_get_swqe_ptr(qp, qp->s_last); seq_printf(s, - "N %d %s QP%u R %u %s %u %u %u f=%x %u %u %u %u %u PSN %x %x %x %x %x (%u %u %u %u %u %u) QP%u LID %x SL %u MTU %d %u %u %u SDE %p,%u\n", + "N %d %s QP%u R %u %s %u %u %u f=%x %u %u %u %u %u PSN %x %x %x %x %x (%u %u %u %u %u %u) QP%u LID %x SL %u MTU %u %u %u SDE %p,%u\n", iter->n, qp_idle(qp) ? "I" : "B", qp->ibqp.qp_num, @@ -572,7 +572,6 @@ void qp_iter_print(struct seq_file *s, struct qp_iter *iter) qp->remote_ah_attr.sl, qp->pmtu, qp->s_retry_cnt, - qp->timeout, qp->s_rnr_retry_cnt, sde, sde ? sde->this_idx : 0); -- cgit v0.10.2 From 20658661bc0712c51ad9798914f5eb3e28cb8121 Mon Sep 17 00:00:00 2001 From: Mike Marciniszyn Date: Thu, 4 Feb 2016 11:03:11 -0800 Subject: staging/rdma/hfi1: add s_retry to diagnostics This is needed to debug ULP issues with getting retry attributes correctly specified. Reviewed-by: Jubin John Reviewed-by: Dennis Dalessandro Signed-off-by: Mike Marciniszyn Signed-off-by: Doug Ledford diff --git a/drivers/staging/rdma/hfi1/qp.c b/drivers/staging/rdma/hfi1/qp.c index 9901ef0..ec9ee72 100644 --- a/drivers/staging/rdma/hfi1/qp.c +++ b/drivers/staging/rdma/hfi1/qp.c @@ -547,7 +547,7 @@ void qp_iter_print(struct seq_file *s, struct qp_iter *iter) sde = qp_to_sdma_engine(qp, priv->s_sc); wqe = rvt_get_swqe_ptr(qp, qp->s_last); seq_printf(s, - "N %d %s QP%u R %u %s %u %u %u f=%x %u %u %u %u %u PSN %x %x %x %x %x (%u %u %u %u %u %u) QP%u LID %x SL %u MTU %u %u %u SDE %p,%u\n", + "N %d %s QP%u R %u %s %u %u %u f=%x %u %u %u %u %u PSN %x %x %x %x %x (%u %u %u %u %u %u) QP%u LID %x SL %u MTU %u %u %u %u SDE %p,%u\n", iter->n, qp_idle(qp) ? "I" : "B", qp->ibqp.qp_num, @@ -571,6 +571,7 @@ void qp_iter_print(struct seq_file *s, struct qp_iter *iter) qp->remote_ah_attr.dlid, qp->remote_ah_attr.sl, qp->pmtu, + qp->s_retry, qp->s_retry_cnt, qp->s_rnr_retry_cnt, sde, -- cgit v0.10.2 From 6c2ab0b857d1b674c5f710d2cbf06a0f3ac52313 Mon Sep 17 00:00:00 2001 From: Mike Marciniszyn Date: Thu, 4 Feb 2016 11:03:19 -0800 Subject: staging/rdma/hfi1: Insure last cursor is updated prior to complete This patch is a prerequisite for adding a separate lock for post send. The timing of updating s_last needs to be before returning any send completion to avoid a race between a poll cq seeing a completion and the post send checking for a full queue. Reviewed-by: Dennis Dalessandro Signed-off-by: Mike Marciniszyn Signed-off-by: Doug Ledford diff --git a/drivers/staging/rdma/hfi1/rc.c b/drivers/staging/rdma/hfi1/rc.c index 2c46491..e54e0b4 100644 --- a/drivers/staging/rdma/hfi1/rc.c +++ b/drivers/staging/rdma/hfi1/rc.c @@ -1123,10 +1123,18 @@ void hfi1_rc_send_complete(struct rvt_qp *qp, struct hfi1_ib_header *hdr) hfi1_add_retry_timer(qp); while (qp->s_last != qp->s_acked) { + u32 s_last; + wqe = rvt_get_swqe_ptr(qp, qp->s_last); if (cmp_psn(wqe->lpsn, qp->s_sending_psn) >= 0 && cmp_psn(qp->s_sending_psn, qp->s_sending_hpsn) <= 0) break; + s_last = qp->s_last; + if (++s_last >= qp->s_size) + s_last = 0; + qp->s_last = s_last; + /* see post_send() */ + barrier(); for (i = 0; i < wqe->wr.num_sge; i++) { struct rvt_sge *sge = &wqe->sg_list[i]; @@ -1143,8 +1151,6 @@ void hfi1_rc_send_complete(struct rvt_qp *qp, struct hfi1_ib_header *hdr) wc.qp = &qp->ibqp; rvt_cq_enter(ibcq_to_rvtcq(qp->ibqp.send_cq), &wc, 0); } - if (++qp->s_last >= qp->s_size) - qp->s_last = 0; } /* * If we were waiting for sends to complete before re-sending, @@ -1184,11 +1190,19 @@ static struct rvt_swqe *do_rc_completion(struct rvt_qp *qp, */ if (cmp_psn(wqe->lpsn, qp->s_sending_psn) < 0 || cmp_psn(qp->s_sending_psn, qp->s_sending_hpsn) > 0) { + u32 s_last; + for (i = 0; i < wqe->wr.num_sge; i++) { struct rvt_sge *sge = &wqe->sg_list[i]; rvt_put_mr(sge->mr); } + s_last = qp->s_last; + if (++s_last >= qp->s_size) + s_last = 0; + qp->s_last = s_last; + /* see post_send() */ + barrier(); /* Post a send completion queue entry if requested. */ if (!(qp->s_flags & RVT_S_SIGNAL_REQ_WR) || (wqe->wr.send_flags & IB_SEND_SIGNALED)) { @@ -1200,8 +1214,6 @@ static struct rvt_swqe *do_rc_completion(struct rvt_qp *qp, wc.qp = &qp->ibqp; rvt_cq_enter(ibcq_to_rvtcq(qp->ibqp.send_cq), &wc, 0); } - if (++qp->s_last >= qp->s_size) - qp->s_last = 0; } else { struct hfi1_pportdata *ppd = ppd_from_ibp(ibp); diff --git a/drivers/staging/rdma/hfi1/ruc.c b/drivers/staging/rdma/hfi1/ruc.c index 6aeea6c..66449ac 100644 --- a/drivers/staging/rdma/hfi1/ruc.c +++ b/drivers/staging/rdma/hfi1/ruc.c @@ -921,6 +921,13 @@ void hfi1_send_complete(struct rvt_qp *qp, struct rvt_swqe *wqe, if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_OR_FLUSH_SEND)) return; + last = qp->s_last; + old_last = last; + if (++last >= qp->s_size) + last = 0; + qp->s_last = last; + /* See post_send() */ + barrier(); for (i = 0; i < wqe->wr.num_sge; i++) { struct rvt_sge *sge = &wqe->sg_list[i]; @@ -948,11 +955,6 @@ void hfi1_send_complete(struct rvt_qp *qp, struct rvt_swqe *wqe, status != IB_WC_SUCCESS); } - last = qp->s_last; - old_last = last; - if (++last >= qp->s_size) - last = 0; - qp->s_last = last; if (qp->s_acked == old_last) qp->s_acked = last; if (qp->s_cur == old_last) -- cgit v0.10.2 From ee84541ad11e70d372670160e727680051801517 Mon Sep 17 00:00:00 2001 From: Mike Marciniszyn Date: Thu, 4 Feb 2016 11:03:28 -0800 Subject: IB/qib: Insure last cursor is updated prior to complete This patch is a prerequisite for adding a separate lock for post send. The timing of updating s_last needs to be before returning any send completion to avoid a race between a poll cq seeing a completion and the post send checking for a full queue. Reviewed-by: Dennis Dalessandro Signed-off-by: Mike Marciniszyn Signed-off-by: Doug Ledford diff --git a/drivers/infiniband/hw/qib/qib_rc.c b/drivers/infiniband/hw/qib/qib_rc.c index 044525d9..ce886b2 100644 --- a/drivers/infiniband/hw/qib/qib_rc.c +++ b/drivers/infiniband/hw/qib/qib_rc.c @@ -1008,10 +1008,18 @@ void qib_rc_send_complete(struct rvt_qp *qp, struct qib_ib_header *hdr) start_timer(qp); while (qp->s_last != qp->s_acked) { + u32 s_last; + wqe = rvt_get_swqe_ptr(qp, qp->s_last); if (qib_cmp24(wqe->lpsn, qp->s_sending_psn) >= 0 && qib_cmp24(qp->s_sending_psn, qp->s_sending_hpsn) <= 0) break; + s_last = qp->s_last; + if (++s_last >= qp->s_size) + s_last = 0; + qp->s_last = s_last; + /* see post_send() */ + barrier(); for (i = 0; i < wqe->wr.num_sge; i++) { struct rvt_sge *sge = &wqe->sg_list[i]; @@ -1028,8 +1036,6 @@ void qib_rc_send_complete(struct rvt_qp *qp, struct qib_ib_header *hdr) wc.qp = &qp->ibqp; rvt_cq_enter(ibcq_to_rvtcq(qp->ibqp.send_cq), &wc, 0); } - if (++qp->s_last >= qp->s_size) - qp->s_last = 0; } /* * If we were waiting for sends to complete before resending, @@ -1068,11 +1074,19 @@ static struct rvt_swqe *do_rc_completion(struct rvt_qp *qp, */ if (qib_cmp24(wqe->lpsn, qp->s_sending_psn) < 0 || qib_cmp24(qp->s_sending_psn, qp->s_sending_hpsn) > 0) { + u32 s_last; + for (i = 0; i < wqe->wr.num_sge; i++) { struct rvt_sge *sge = &wqe->sg_list[i]; rvt_put_mr(sge->mr); } + s_last = qp->s_last; + if (++s_last >= qp->s_size) + s_last = 0; + qp->s_last = s_last; + /* see post_send() */ + barrier(); /* Post a send completion queue entry if requested. */ if (!(qp->s_flags & RVT_S_SIGNAL_REQ_WR) || (wqe->wr.send_flags & IB_SEND_SIGNALED)) { @@ -1084,8 +1098,6 @@ static struct rvt_swqe *do_rc_completion(struct rvt_qp *qp, wc.qp = &qp->ibqp; rvt_cq_enter(ibcq_to_rvtcq(qp->ibqp.send_cq), &wc, 0); } - if (++qp->s_last >= qp->s_size) - qp->s_last = 0; } else this_cpu_inc(*ibp->rvp.rc_delayed_comp); diff --git a/drivers/infiniband/hw/qib/qib_ruc.c b/drivers/infiniband/hw/qib/qib_ruc.c index 56668cb..2623684 100644 --- a/drivers/infiniband/hw/qib/qib_ruc.c +++ b/drivers/infiniband/hw/qib/qib_ruc.c @@ -795,6 +795,13 @@ void qib_send_complete(struct rvt_qp *qp, struct rvt_swqe *wqe, if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_OR_FLUSH_SEND)) return; + last = qp->s_last; + old_last = last; + if (++last >= qp->s_size) + last = 0; + qp->s_last = last; + /* See post_send() */ + barrier(); for (i = 0; i < wqe->wr.num_sge; i++) { struct rvt_sge *sge = &wqe->sg_list[i]; @@ -822,11 +829,6 @@ void qib_send_complete(struct rvt_qp *qp, struct rvt_swqe *wqe, status != IB_WC_SUCCESS); } - last = qp->s_last; - old_last = last; - if (++last >= qp->s_size) - last = 0; - qp->s_last = last; if (qp->s_acked == old_last) qp->s_acked = last; if (qp->s_cur == old_last) -- cgit v0.10.2 From 066fad289408e7d48ce00b54a292e7a90602cb30 Mon Sep 17 00:00:00 2001 From: Mike Marciniszyn Date: Thu, 4 Feb 2016 11:03:36 -0800 Subject: IB/rdmavt: remove unused qp field The field is a vestige from ipath. Reviewed-by: Dennis Dalessandro Signed-off-by: Mike Marciniszyn Signed-off-by: Doug Ledford diff --git a/include/rdma/rdmavt_qp.h b/include/rdma/rdmavt_qp.h index 91f20fd..aed13e1 100644 --- a/include/rdma/rdmavt_qp.h +++ b/include/rdma/rdmavt_qp.h @@ -251,7 +251,6 @@ struct rvt_qp { u32 pmtu; /* decoded from path_mtu */ u32 qkey; /* QKEY for this QP (for UD or RD) */ u32 s_size; /* send work queue size */ - u32 s_rnr_timeout; /* number of milliseconds for RNR timeout */ u32 s_ahgpsn; /* set to the psn in the copy of the header */ u8 state; /* QP state */ -- cgit v0.10.2 From 045277cf1548ab04a114bf560a01e38cf33b91b4 Mon Sep 17 00:00:00 2001 From: Hari Prasath Gujulan Elango Date: Thu, 4 Feb 2016 11:03:45 -0800 Subject: IB/qib,staging/rdma/hfi1: use setup_timer api Replace the timer API's to initialize a timer & then assign the callback function by the setup_timer() API. Signed-off-by: Hari Prasath Gujulan Elango Signed-off-by: Mike Marciniszyn Signed-off-by: Doug Ledford diff --git a/drivers/infiniband/hw/qib/qib_verbs.c b/drivers/infiniband/hw/qib/qib_verbs.c index 4158362..2abe31d 100644 --- a/drivers/infiniband/hw/qib/qib_verbs.c +++ b/drivers/infiniband/hw/qib/qib_verbs.c @@ -1602,9 +1602,7 @@ int qib_register_ib_device(struct qib_devdata *dd) init_ibport(ppd + i); /* Only need to initialize non-zero fields. */ - init_timer(&dev->mem_timer); - dev->mem_timer.function = mem_timer; - dev->mem_timer.data = (unsigned long) dev; + setup_timer(&dev->mem_timer, mem_timer, (unsigned long)dev); qpt_mask = dd->qpn_mask; diff --git a/drivers/staging/rdma/hfi1/verbs.c b/drivers/staging/rdma/hfi1/verbs.c index 0ee6b1d..35f6d92 100644 --- a/drivers/staging/rdma/hfi1/verbs.c +++ b/drivers/staging/rdma/hfi1/verbs.c @@ -1455,9 +1455,7 @@ int hfi1_register_ib_device(struct hfi1_devdata *dd) /* Only need to initialize non-zero fields. */ - init_timer(&dev->mem_timer); - dev->mem_timer.function = mem_timer; - dev->mem_timer.data = (unsigned long) dev; + setup_timer(&dev->mem_timer, mem_timer, (unsigned long)dev); seqlock_init(&dev->iowait_lock); INIT_LIST_HEAD(&dev->txwait); -- cgit v0.10.2 From c3838b396b425b4242bfe627bfabefc4c1af56f2 Mon Sep 17 00:00:00 2001 From: Easwar Hariharan Date: Tue, 9 Feb 2016 14:29:13 -0800 Subject: staging/rdma/hfi1: Fetch platform configuration data from EFI variable The platform configuration data has been moved into the EFI variable store where it is populated by the HFI1 option ROM. This patch pulls the configuration data from the new location, retaining a fallback to request_firmware. Reviewed-by: Dean Luick Signed-off-by: Easwar Hariharan Signed-off-by: Jubin John Signed-off-by: Doug Ledford diff --git a/drivers/staging/rdma/hfi1/chip.c b/drivers/staging/rdma/hfi1/chip.c index 77b07c3..4750e3c 100644 --- a/drivers/staging/rdma/hfi1/chip.c +++ b/drivers/staging/rdma/hfi1/chip.c @@ -14096,6 +14096,9 @@ struct hfi1_devdata *hfi1_init_dd(struct pci_dev *pdev, if (ret) goto bail_cleanup; + /* Needs to be called before hfi1_firmware_init */ + get_platform_config(dd); + /* read in firmware */ ret = hfi1_firmware_init(dd); if (ret) diff --git a/drivers/staging/rdma/hfi1/chip.h b/drivers/staging/rdma/hfi1/chip.h index 0e95f0b..3b041dc 100644 --- a/drivers/staging/rdma/hfi1/chip.h +++ b/drivers/staging/rdma/hfi1/chip.h @@ -617,6 +617,8 @@ u64 create_pbc(struct hfi1_pportdata *ppd, u64, int, u32, u32); #define NUM_PCIE_SERDES 16 /* number of PCIe serdes on the SBus */ extern const u8 pcie_serdes_broadcast[]; extern const u8 pcie_pcs_addrs[2][NUM_PCIE_SERDES]; +extern uint platform_config_load; + /* SBus commands */ #define RESET_SBUS_RECEIVER 0x20 #define WRITE_SBUS_RECEIVER 0x21 diff --git a/drivers/staging/rdma/hfi1/firmware.c b/drivers/staging/rdma/hfi1/firmware.c index 0b23e3e..d2ec6c5 100644 --- a/drivers/staging/rdma/hfi1/firmware.c +++ b/drivers/staging/rdma/hfi1/firmware.c @@ -77,7 +77,13 @@ static uint fw_8051_load = 1; static uint fw_fabric_serdes_load = 1; static uint fw_pcie_serdes_load = 1; static uint fw_sbus_load = 1; -static uint platform_config_load = 1; + +/* + * Access required in platform.c + * Maintains state of whether the platform config was fetched via the + * fallback option + */ +uint platform_config_load; /* Firmware file names get set in hfi1_firmware_init() based on the above */ static char *fw_8051_name; @@ -677,10 +683,15 @@ static int obtain_firmware(struct hfi1_devdata *dd) } /* not in FW_TRY state */ - if (fw_state == FW_FINAL) + if (fw_state == FW_FINAL) { + if (platform_config) { + dd->platform_config.data = platform_config->data; + dd->platform_config.size = platform_config->size; + } goto done; /* already acquired */ - else if (fw_state == FW_ERR) + } else if (fw_state == FW_ERR) { goto done; /* already tried and failed */ + } /* fw_state is FW_EMPTY */ /* set fw_state to FW_TRY, FW_FINAL, or FW_ERR, and fw_err */ @@ -690,8 +701,14 @@ static int obtain_firmware(struct hfi1_devdata *dd) platform_config = NULL; err = request_firmware(&platform_config, platform_config_name, &dd->pcidev->dev); - if (err) + if (err) { platform_config = NULL; + fw_state = FW_ERR; + fw_err = -ENOENT; + goto done; + } + dd->platform_config.data = platform_config->data; + dd->platform_config.size = platform_config->size; } done: @@ -1457,14 +1474,14 @@ int parse_platform_config(struct hfi1_devdata *dd) { struct platform_config_cache *pcfgcache = &dd->pcfg_cache; u32 *ptr = NULL; - u32 header1 = 0, header2 = 0, magic_num = 0, crc = 0; + u32 header1 = 0, header2 = 0, magic_num = 0, crc = 0, file_length = 0; u32 record_idx = 0, table_type = 0, table_length_dwords = 0; - if (platform_config == NULL) { + if (!dd->platform_config.data) { dd_dev_info(dd, "%s: Missing config file\n", __func__); goto bail; } - ptr = (u32 *)platform_config->data; + ptr = (u32 *)dd->platform_config.data; magic_num = *ptr; ptr++; @@ -1473,12 +1490,31 @@ int parse_platform_config(struct hfi1_devdata *dd) goto bail; } - while (ptr < (u32 *)(platform_config->data + platform_config->size)) { + /* Field is file size in DWORDs */ + file_length = (*ptr) * 4; + ptr++; + + if (file_length > dd->platform_config.size) { + dd_dev_info(dd, "%s:File claims to be larger than read size\n", + __func__); + goto bail; + } else if (file_length < dd->platform_config.size) { + dd_dev_info(dd, "%s:File claims to be smaller than read size\n", + __func__); + } + /* exactly equal, perfection */ + + /* + * In both cases where we proceed, using the self-reported file length + * is the safer option + */ + while (ptr < (u32 *)(dd->platform_config.data + file_length)) { header1 = *ptr; header2 = *(ptr + 1); if (header1 != ~header2) { dd_dev_info(dd, "%s: Failed validation at offset %ld\n", - __func__, (ptr - (u32 *)platform_config->data)); + __func__, (ptr - + (u32 *)dd->platform_config.data)); goto bail; } @@ -1520,7 +1556,7 @@ int parse_platform_config(struct hfi1_devdata *dd) dd_dev_info(dd, "%s: Unknown data table %d, offset %ld\n", __func__, table_type, - (ptr - (u32 *)platform_config->data)); + (ptr - (u32 *)dd->platform_config.data)); goto bail; /* We don't trust this file now */ } pcfgcache->config_tables[table_type].table = ptr; @@ -1541,9 +1577,10 @@ int parse_platform_config(struct hfi1_devdata *dd) break; default: dd_dev_info(dd, - "%s: Unknown metadata table %d, offset %ld\n", - __func__, table_type, - (ptr - (u32 *)platform_config->data)); + "%s: Unknown meta table %d, offset %ld\n", + __func__, table_type, + (ptr - + (u32 *)dd->platform_config.data)); goto bail; /* We don't trust this file now */ } pcfgcache->config_tables[table_type].table_metadata = @@ -1559,7 +1596,9 @@ int parse_platform_config(struct hfi1_devdata *dd) ptr += table_length_dwords; if (crc != *ptr) { dd_dev_info(dd, "%s: Failed CRC check at offset %ld\n", - __func__, (ptr - (u32 *)platform_config->data)); + __func__, (ptr - + (u32 *) + dd->platform_config.data)); goto bail; } /* Jump the CRC DWORD */ @@ -1675,7 +1714,7 @@ int get_platform_config_field(struct hfi1_devdata *dd, } break; case PLATFORM_CONFIG_PORT_TABLE: - /* Port table is 4 DWORDS in META_VERSION 0 */ + /* Port table is 4 DWORDS */ src_ptr = dd->hfi1_id ? pcfgcache->config_tables[table_type].table + 4 : pcfgcache->config_tables[table_type].table; diff --git a/drivers/staging/rdma/hfi1/hfi.h b/drivers/staging/rdma/hfi1/hfi.h index 585485b..702723b 100644 --- a/drivers/staging/rdma/hfi1/hfi.h +++ b/drivers/staging/rdma/hfi1/hfi.h @@ -1028,6 +1028,7 @@ struct hfi1_devdata { u16 irev; /* implementation revision */ u16 dc8051_ver; /* 8051 firmware version */ + struct platform_config platform_config; struct platform_config_cache pcfg_cache; /* control high-level access to qsfp */ struct mutex qsfp_i2c_mutex; diff --git a/drivers/staging/rdma/hfi1/init.c b/drivers/staging/rdma/hfi1/init.c index fe5e1e5..112cb6c 100644 --- a/drivers/staging/rdma/hfi1/init.c +++ b/drivers/staging/rdma/hfi1/init.c @@ -983,6 +983,7 @@ void hfi1_free_devdata(struct hfi1_devdata *dd) idr_remove(&hfi1_unit_table, dd->unit); list_del(&dd->list); spin_unlock_irqrestore(&hfi1_devs_lock, flags); + free_platform_config(dd); hfi1_dbg_ibdev_exit(&dd->verbs_dev); rcu_barrier(); /* wait for rcu callbacks to complete */ free_percpu(dd->int_counter); diff --git a/drivers/staging/rdma/hfi1/platform.c b/drivers/staging/rdma/hfi1/platform.c index 506a827..0309c52 100644 --- a/drivers/staging/rdma/hfi1/platform.c +++ b/drivers/staging/rdma/hfi1/platform.c @@ -47,7 +47,48 @@ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * */ + #include "hfi.h" +#include "efivar.h" + +void get_platform_config(struct hfi1_devdata *dd) +{ + int ret = 0; + unsigned long size = 0; + u8 *temp_platform_config = NULL; + + ret = read_hfi1_efi_var(dd, "configuration", &size, + (void **)&temp_platform_config); + if (ret) { + dd_dev_info(dd, + "%s: Failed to get platform config from UEFI, falling back to request firmware\n", + __func__); + /* fall back to request firmware */ + platform_config_load = 1; + goto bail; + } + + dd->platform_config.data = temp_platform_config; + dd->platform_config.size = size; + +bail: + /* exit */; +} + +void free_platform_config(struct hfi1_devdata *dd) +{ + if (!platform_config_load) { + /* + * was loaded from EFI, release memory + * allocated by read_efi_var + */ + kfree(dd->platform_config.data); + } + /* + * else do nothing, dispose_firmware will release + * struct firmware platform_config on driver exit + */ +} int set_qsfp_tx(struct hfi1_pportdata *ppd, int on) { @@ -739,8 +780,7 @@ void tune_serdes(struct hfi1_pportdata *ppd) /* Skip the tuning for testing (loopback != none) and simulations */ if (loopback != LOOPBACK_NONE || - ppd->dd->icode == ICODE_FUNCTIONAL_SIMULATOR || - !dd->pcfg_cache.cache_valid) { + ppd->dd->icode == ICODE_FUNCTIONAL_SIMULATOR) { ppd->driver_link_ready = 1; return; } @@ -805,6 +845,12 @@ void tune_serdes(struct hfi1_pportdata *ppd) &rx_preset_index, &tuning_method, &total_atten); + + /* + * We may have modified the QSFP memory, so + * update the cache to reflect the changes + */ + refresh_qsfp_cache(ppd, &ppd->qsfp_info); if (ret) goto bail; } else { @@ -820,7 +866,7 @@ void tune_serdes(struct hfi1_pportdata *ppd) break; default: dd_dev_info(ppd->dd, "%s: Unknown port type\n", __func__); - break; + goto bail; } if (ppd->offline_disabled_reason == HFI1_ODR_MASK(OPA_LINKDOWN_REASON_NONE)) @@ -828,10 +874,8 @@ void tune_serdes(struct hfi1_pportdata *ppd) total_atten, ppd->qsfp_info.limiting_active); - if (ppd->port_type == PORT_TYPE_QSFP) - refresh_qsfp_cache(ppd, &ppd->qsfp_info); - - ppd->driver_link_ready = 1; + if (!ret) + ppd->driver_link_ready = 1; return; bail: diff --git a/drivers/staging/rdma/hfi1/platform.h b/drivers/staging/rdma/hfi1/platform.h index 5b53d71..cc280cc 100644 --- a/drivers/staging/rdma/hfi1/platform.h +++ b/drivers/staging/rdma/hfi1/platform.h @@ -150,6 +150,11 @@ enum platform_config_variable_settings_table_fields { VARIABLE_SETTINGS_TABLE_MAX }; +struct platform_config { + size_t size; + const u8 *data; +}; + struct platform_config_data { u32 *table; u32 *table_metadata; @@ -293,6 +298,10 @@ enum link_tuning_encoding { OPA_UNKNOWN_TUNING }; +/* platform.c */ +void get_platform_config(struct hfi1_devdata *dd); +void free_platform_config(struct hfi1_devdata *dd); int set_qsfp_tx(struct hfi1_pportdata *ppd, int on); void tune_serdes(struct hfi1_pportdata *ppd); + #endif /*__PLATFORM_H*/ -- cgit v0.10.2 From 97167e8134150eb5104e19fd7208e3ac3525f48b Mon Sep 17 00:00:00 2001 From: Easwar Hariharan Date: Tue, 9 Feb 2016 14:29:22 -0800 Subject: staging/rdma/hfi1: Tune for unknown channel if configuration file is absent Currently, the driver fails to tune the SerDes and therefore prevents link up if the configuration file is missing or fails parsing or validation. This patch adds a fallback option so that the 8051 is asked to tune for an unknown channel and possibly get the link up if tuning succeeds. It also adds a user-friendly message to update the configuration file if it is out-of-date. Reviewed-by: Mike Marciniszyn Reviewed-by: Dean Luick Signed-off-by: Easwar Hariharan Signed-off-by: Jubin John Signed-off-by: Doug Ledford diff --git a/drivers/staging/rdma/hfi1/chip.h b/drivers/staging/rdma/hfi1/chip.h index 3b041dc..b86c220 100644 --- a/drivers/staging/rdma/hfi1/chip.h +++ b/drivers/staging/rdma/hfi1/chip.h @@ -390,6 +390,10 @@ #define LINK_QUALITY_INFO 0x14 #define REMOTE_DEVICE_ID 0x15 +/* 8051 lane specific register field IDs */ +#define TX_EQ_SETTINGS 0x00 +#define CHANNEL_LOSS_SETTINGS 0x05 + /* Lane ID for general configuration registers */ #define GENERAL_CONFIG 4 diff --git a/drivers/staging/rdma/hfi1/firmware.c b/drivers/staging/rdma/hfi1/firmware.c index d2ec6c5..35084b7 100644 --- a/drivers/staging/rdma/hfi1/firmware.c +++ b/drivers/staging/rdma/hfi1/firmware.c @@ -703,8 +703,6 @@ static int obtain_firmware(struct hfi1_devdata *dd) &dd->pcidev->dev); if (err) { platform_config = NULL; - fw_state = FW_ERR; - fw_err = -ENOENT; goto done; } dd->platform_config.data = platform_config->data; @@ -1470,12 +1468,51 @@ int hfi1_firmware_init(struct hfi1_devdata *dd) return obtain_firmware(dd); } +/* + * This function is a helper function for parse_platform_config(...) and + * does not check for validity of the platform configuration cache + * (because we know it is invalid as we are building up the cache). + * As such, this should not be called from anywhere other than + * parse_platform_config + */ +static int check_meta_version(struct hfi1_devdata *dd, u32 *system_table) +{ + u32 meta_ver, meta_ver_meta, ver_start, ver_len, mask; + struct platform_config_cache *pcfgcache = &dd->pcfg_cache; + + if (!system_table) + return -EINVAL; + + meta_ver_meta = + *(pcfgcache->config_tables[PLATFORM_CONFIG_SYSTEM_TABLE].table_metadata + + SYSTEM_TABLE_META_VERSION); + + mask = ((1 << METADATA_TABLE_FIELD_START_LEN_BITS) - 1); + ver_start = meta_ver_meta & mask; + + meta_ver_meta >>= METADATA_TABLE_FIELD_LEN_SHIFT; + + mask = ((1 << METADATA_TABLE_FIELD_LEN_LEN_BITS) - 1); + ver_len = meta_ver_meta & mask; + + ver_start /= 8; + meta_ver = *((u8 *)system_table + ver_start) & ((1 << ver_len) - 1); + + if (meta_ver < 5) { + dd_dev_info( + dd, "%s:Please update platform config\n", __func__); + return -EINVAL; + } + return 0; +} + int parse_platform_config(struct hfi1_devdata *dd) { struct platform_config_cache *pcfgcache = &dd->pcfg_cache; u32 *ptr = NULL; u32 header1 = 0, header2 = 0, magic_num = 0, crc = 0, file_length = 0; u32 record_idx = 0, table_type = 0, table_length_dwords = 0; + int ret = -EINVAL; /* assume failure */ if (!dd->platform_config.data) { dd_dev_info(dd, "%s: Missing config file\n", __func__); @@ -1499,7 +1536,8 @@ int parse_platform_config(struct hfi1_devdata *dd) __func__); goto bail; } else if (file_length < dd->platform_config.size) { - dd_dev_info(dd, "%s:File claims to be smaller than read size\n", + dd_dev_info(dd, + "%s:File claims to be smaller than read size, continuing\n", __func__); } /* exactly equal, perfection */ @@ -1537,6 +1575,9 @@ int parse_platform_config(struct hfi1_devdata *dd) case PLATFORM_CONFIG_SYSTEM_TABLE: pcfgcache->config_tables[table_type].num_table = 1; + ret = check_meta_version(dd, ptr); + if (ret) + goto bail; break; case PLATFORM_CONFIG_PORT_TABLE: pcfgcache->config_tables[table_type].num_table = @@ -1609,7 +1650,7 @@ int parse_platform_config(struct hfi1_devdata *dd) return 0; bail: memset(pcfgcache, 0, sizeof(struct platform_config_cache)); - return -EINVAL; + return ret; } static int get_platform_fw_field_metadata(struct hfi1_devdata *dd, int table, diff --git a/drivers/staging/rdma/hfi1/platform.c b/drivers/staging/rdma/hfi1/platform.c index 0309c52..2f07bec 100644 --- a/drivers/staging/rdma/hfi1/platform.c +++ b/drivers/staging/rdma/hfi1/platform.c @@ -498,14 +498,14 @@ static void apply_rx_amplitude_settings( #define OPA_INVALID_INDEX 0xFFF -static void apply_tx_lanes(struct hfi1_pportdata *ppd, u32 config_data, - const char *message) +static void apply_tx_lanes(struct hfi1_pportdata *ppd, u8 field_id, + u32 config_data, const char *message) { u8 i; int ret = HCMD_SUCCESS; for (i = 0; i < 4; i++) { - ret = load_8051_config(ppd->dd, 0, i, config_data); + ret = load_8051_config(ppd->dd, field_id, i, config_data); if (ret != HCMD_SUCCESS) { dd_dev_err( ppd->dd, @@ -524,6 +524,7 @@ static void apply_tunings( u8 precur = 0, attn = 0, postcur = 0, external_device_config = 0; u8 *cache = ppd->qsfp_info.cache; + /* Enable external device config if channel is limiting active */ read_8051_config(ppd->dd, LINK_OPTIMIZATION_SETTINGS, GENERAL_CONFIG, &config_data); config_data |= limiting_active; @@ -536,6 +537,7 @@ static void apply_tunings( __func__); config_data = 0; /* re-init */ + /* Pass tuning method to 8051 */ read_8051_config(ppd->dd, LINK_TUNING_PARAMETERS, GENERAL_CONFIG, &config_data); config_data |= tuning_method; @@ -545,47 +547,39 @@ static void apply_tunings( dd_dev_err(ppd->dd, "%s: Failed to set tuning method\n", __func__); - external_device_config = - ((cache[QSFP_MOD_PWR_OFFS] & 0x4) << 3) | - ((cache[QSFP_MOD_PWR_OFFS] & 0x8) << 2) | - ((cache[QSFP_EQ_INFO_OFFS] & 0x2) << 1) | - (cache[QSFP_EQ_INFO_OFFS] & 0x4); - - config_data = 0; /* re-init */ - read_8051_config(ppd->dd, DC_HOST_COMM_SETTINGS, GENERAL_CONFIG, - &config_data); - config_data |= (external_device_config << 24); - ret = load_8051_config(ppd->dd, DC_HOST_COMM_SETTINGS, GENERAL_CONFIG, - config_data); - if (ret != HCMD_SUCCESS) - dd_dev_err( - ppd->dd, - "%s: Failed to set external device config parameters\n", - __func__); - - config_data = 0; /* re-init */ - read_8051_config(ppd->dd, TX_SETTINGS, GENERAL_CONFIG, &config_data); - if ((ppd->link_speed_supported & OPA_LINK_SPEED_25G) && - (ppd->link_speed_enabled & OPA_LINK_SPEED_25G)) - config_data |= 0x02; - if ((ppd->link_speed_supported & OPA_LINK_SPEED_12_5G) && - (ppd->link_speed_enabled & OPA_LINK_SPEED_12_5G)) - config_data |= 0x01; - ret = load_8051_config(ppd->dd, TX_SETTINGS, GENERAL_CONFIG, - config_data); - if (ret != HCMD_SUCCESS) - dd_dev_err( - ppd->dd, - "%s: Failed to set external device config parameters\n", - __func__); - - config_data = (total_atten << 8) | (total_atten); - - apply_tx_lanes(ppd, config_data, "Setting channel loss"); + /* Set same channel loss for both TX and RX */ + config_data = 0 | (total_atten << 16) | (total_atten << 24); + apply_tx_lanes(ppd, CHANNEL_LOSS_SETTINGS, config_data, + "Setting channel loss"); + + /* Inform 8051 of cable capabilities */ + if (ppd->qsfp_info.cache_valid) { + external_device_config = + ((cache[QSFP_MOD_PWR_OFFS] & 0x4) << 3) | + ((cache[QSFP_MOD_PWR_OFFS] & 0x8) << 2) | + ((cache[QSFP_EQ_INFO_OFFS] & 0x2) << 1) | + (cache[QSFP_EQ_INFO_OFFS] & 0x4); + ret = read_8051_config(ppd->dd, DC_HOST_COMM_SETTINGS, + GENERAL_CONFIG, &config_data); + /* Clear, then set the external device config field */ + config_data &= ~(0xFF << 24); + config_data |= (external_device_config << 24); + ret = load_8051_config(ppd->dd, DC_HOST_COMM_SETTINGS, + GENERAL_CONFIG, config_data); + if (ret != HCMD_SUCCESS) + dd_dev_info(ppd->dd, + "%s: Failed set ext device config params\n", + __func__); + } - if (tx_preset_index == OPA_INVALID_INDEX) + if (tx_preset_index == OPA_INVALID_INDEX) { + if (ppd->port_type == PORT_TYPE_QSFP && limiting_active) + dd_dev_info(ppd->dd, "%s: Invalid Tx preset index\n", + __func__); return; + } + /* Following for limiting active channels only */ get_platform_config_field( ppd->dd, PLATFORM_CONFIG_TX_PRESET_TABLE, tx_preset_index, TX_PRESET_TABLE_PRECUR, &tx_preset, 4); @@ -603,7 +597,8 @@ static void apply_tunings( config_data = precur | (attn << 8) | (postcur << 16); - apply_tx_lanes(ppd, config_data, "Applying TX settings"); + apply_tx_lanes(ppd, TX_EQ_SETTINGS, config_data, + "Applying TX settings"); } static int tune_active_qsfp(struct hfi1_pportdata *ppd, u32 *ptr_tx_preset, @@ -766,7 +761,7 @@ void tune_serdes(struct hfi1_pportdata *ppd) u32 total_atten = 0; u32 remote_atten = 0, platform_atten = 0; u32 rx_preset_index, tx_preset_index; - u8 tuning_method = 0; + u8 tuning_method = 0, limiting_active = 0; struct hfi1_devdata *dd = ppd->dd; rx_preset_index = OPA_INVALID_INDEX; @@ -789,7 +784,7 @@ void tune_serdes(struct hfi1_pportdata *ppd) PORT_TABLE_PORT_TYPE, &ppd->port_type, 4); if (ret) - goto bail; + ppd->port_type = PORT_TYPE_UNKNOWN; switch (ppd->port_type) { case PORT_TYPE_DISCONNECTED: @@ -853,6 +848,9 @@ void tune_serdes(struct hfi1_pportdata *ppd) refresh_qsfp_cache(ppd, &ppd->qsfp_info); if (ret) goto bail; + + limiting_active = + ppd->qsfp_info.limiting_active; } else { dd_dev_err(dd, "%s: Reading QSFP memory failed\n", @@ -866,13 +864,18 @@ void tune_serdes(struct hfi1_pportdata *ppd) break; default: dd_dev_info(ppd->dd, "%s: Unknown port type\n", __func__); - goto bail; + ppd->port_type = PORT_TYPE_UNKNOWN; + tuning_method = OPA_UNKNOWN_TUNING; + total_atten = 0; + limiting_active = 0; + tx_preset_index = OPA_INVALID_INDEX; + break; } + if (ppd->offline_disabled_reason == HFI1_ODR_MASK(OPA_LINKDOWN_REASON_NONE)) apply_tunings(ppd, tx_preset_index, tuning_method, - total_atten, - ppd->qsfp_info.limiting_active); + total_atten, limiting_active); if (!ret) ppd->driver_link_ready = 1; diff --git a/drivers/staging/rdma/hfi1/qsfp.c b/drivers/staging/rdma/hfi1/qsfp.c index c9d1e64..42e5be4 100644 --- a/drivers/staging/rdma/hfi1/qsfp.c +++ b/drivers/staging/rdma/hfi1/qsfp.c @@ -344,7 +344,6 @@ int refresh_qsfp_cache(struct hfi1_pportdata *ppd, struct qsfp_data *cp) ppd->qsfp_info.cache_valid = 0; spin_unlock_irqrestore(&ppd->qsfp_info.qsfp_lock, flags); - dd_dev_info(ppd->dd, "%s called\n", __func__); if (!qsfp_mod_present(ppd)) { ret = -ENODEV; goto bail; -- cgit v0.10.2 From 34cee28f0bb067f4210271c4d7c4febe34bad2d3 Mon Sep 17 00:00:00 2001 From: Mike Marciniszyn Date: Tue, 9 Feb 2016 14:29:31 -0800 Subject: staging/rdma/hfi1: actually use new RNR timer API in loopback path The patch series which added a new API for the RNR timer did not include an updated call in the loopback path. RC/UC RNR loopback would be broken without this. Reviewed-by: Dennis Dalessandro Signed-off-by: Mike Marciniszyn Signed-off-by: Doug Ledford diff --git a/drivers/staging/rdma/hfi1/rc.c b/drivers/staging/rdma/hfi1/rc.c index e54e0b4..ba2a2cc 100644 --- a/drivers/staging/rdma/hfi1/rc.c +++ b/drivers/staging/rdma/hfi1/rc.c @@ -81,7 +81,7 @@ static inline void hfi1_add_retry_timer(struct rvt_qp *qp) * * add an rnr timer on the QP */ -static inline void hfi1_add_rnr_timer(struct rvt_qp *qp, u32 to) +void hfi1_add_rnr_timer(struct rvt_qp *qp, u32 to) { struct hfi1_qp_priv *priv = qp->priv; diff --git a/drivers/staging/rdma/hfi1/ruc.c b/drivers/staging/rdma/hfi1/ruc.c index 66449ac..a7add3c 100644 --- a/drivers/staging/rdma/hfi1/ruc.c +++ b/drivers/staging/rdma/hfi1/ruc.c @@ -371,6 +371,7 @@ static void ruc_loopback(struct rvt_qp *sqp) int release; int ret; int copy_last = 0; + u32 to; rcu_read_lock(); @@ -600,11 +601,8 @@ rnr_nak: spin_lock_irqsave(&sqp->s_lock, flags); if (!(ib_rvt_state_ops[sqp->state] & RVT_PROCESS_RECV_OK)) goto clr_busy; - sqp->s_flags |= RVT_S_WAIT_RNR; - sqp->s_timer.function = hfi1_rc_rnr_retry; - sqp->s_timer.expires = jiffies + - usecs_to_jiffies(ib_hfi1_rnr_table[qp->r_min_rnr_timer]); - add_timer(&sqp->s_timer); + to = ib_hfi1_rnr_table[qp->r_min_rnr_timer]; + hfi1_add_rnr_timer(sqp, to); goto clr_busy; op_err: diff --git a/drivers/staging/rdma/hfi1/verbs.h b/drivers/staging/rdma/hfi1/verbs.h index 26eda8a..adb63bb 100644 --- a/drivers/staging/rdma/hfi1/verbs.h +++ b/drivers/staging/rdma/hfi1/verbs.h @@ -404,6 +404,7 @@ u8 ah_to_sc(struct ib_device *ibdev, struct ib_ah_attr *ah_attr); struct ib_ah *hfi1_create_qp0_ah(struct hfi1_ibport *ibp, u16 dlid); void hfi1_rc_rnr_retry(unsigned long arg); +void hfi1_add_rnr_timer(struct rvt_qp *qp, u32 to); void hfi1_rc_timeout(unsigned long arg); void hfi1_del_timers_sync(struct rvt_qp *qp); void hfi1_stop_rc_timers(struct rvt_qp *qp); -- cgit v0.10.2 From 773d0451685b96f157ccd617a5e9cd9d3fa7d986 Mon Sep 17 00:00:00 2001 From: Sebastian Sanchez Date: Tue, 9 Feb 2016 14:29:40 -0800 Subject: staging/rdma/hfi1: Turning off LED without checking if stepping is Ax It prevents the LED from staying on when the QSFP module is not present. Reviewed-by: Easwar Hariharan Signed-off-by: Sebastian Sanchez Signed-off-by: Doug Ledford diff --git a/drivers/staging/rdma/hfi1/chip.c b/drivers/staging/rdma/hfi1/chip.c index 4750e3c..1294617 100644 --- a/drivers/staging/rdma/hfi1/chip.c +++ b/drivers/staging/rdma/hfi1/chip.c @@ -13418,8 +13418,8 @@ static void init_chip(struct hfi1_devdata *dd) write_csr(dd, CCE_DC_CTRL, 0); /* Set the LED off */ - if (is_ax(dd)) - setextled(dd, 0); + setextled(dd, 0); + /* * Clear the QSFP reset. * An FLR enforces a 0 on all out pins. The driver does not touch diff --git a/drivers/staging/rdma/hfi1/pcie.c b/drivers/staging/rdma/hfi1/pcie.c index 5642d85..04f2d8a3 100644 --- a/drivers/staging/rdma/hfi1/pcie.c +++ b/drivers/staging/rdma/hfi1/pcie.c @@ -1261,8 +1261,7 @@ retry: write_csr(dd, CCE_DC_CTRL, 0); /* Set the LED off */ - if (is_ax(dd)) - setextled(dd, 0); + setextled(dd, 0); /* check for any per-lane errors */ pci_read_config_dword(dd->pcidev, PCIE_CFG_SPCIE2, ®32); -- cgit v0.10.2 From bfee5e32e701b98634b380a9eef8b5820feb7488 Mon Sep 17 00:00:00 2001 From: Vennila Megavannan Date: Tue, 9 Feb 2016 14:29:49 -0800 Subject: IB/rdmavt, staging/rdma/hfi1: use qps to dynamically scale timeout value A busy_jiffies variable is maintained and updated when rc qps are created and deleted. busy_jiffies is a scaled value of the number of rc qps in the device. busy_jiffies is incremented every rc qp scaling interval. busy_jiffies is added to the rc timeout in add_retry_timer and mod_retry_timer. The rc qp scaling interval is selected based on extensive performance evaluation of targeted workloads. Reviewed-by: Dennis Dalessandro Reviewed-by: Mike Marciniszyn Signed-off-by: Vennila Megavannan Signed-off-by: Jubin John Signed-off-by: Doug Ledford diff --git a/drivers/infiniband/sw/rdmavt/qp.c b/drivers/infiniband/sw/rdmavt/qp.c index 439213c..7dc837c 100644 --- a/drivers/infiniband/sw/rdmavt/qp.c +++ b/drivers/infiniband/sw/rdmavt/qp.c @@ -685,6 +685,19 @@ struct ib_qp *rvt_create_qp(struct ib_pd *ibpd, } rdi->n_qps_allocated++; + /* + * Maintain a busy_jiffies variable that will be added to the timeout + * period in mod_retry_timer and add_retry_timer. This busy jiffies + * is scaled by the number of rc qps created for the device to reduce + * the number of timeouts occurring when there is a large number of + * qps. busy_jiffies is incremented every rc qp scaling interval. + * The scaling interval is selected based on extensive performance + * evaluation of targeted workloads. + */ + if (init_attr->qp_type == IB_QPT_RC) { + rdi->n_rc_qps++; + rdi->busy_jiffies = rdi->n_rc_qps / RC_QP_SCALING_INTERVAL; + } spin_unlock(&rdi->n_qps_lock); if (qp->ip) { @@ -1223,6 +1236,10 @@ int rvt_destroy_qp(struct ib_qp *ibqp) spin_lock(&rdi->n_qps_lock); rdi->n_qps_allocated--; + if (qp->ibqp.qp_type == IB_QPT_RC) { + rdi->n_rc_qps--; + rdi->busy_jiffies = rdi->n_rc_qps / RC_QP_SCALING_INTERVAL; + } spin_unlock(&rdi->n_qps_lock); if (qp->ip) diff --git a/drivers/staging/rdma/hfi1/rc.c b/drivers/staging/rdma/hfi1/rc.c index ba2a2cc..a4a44d3 100644 --- a/drivers/staging/rdma/hfi1/rc.c +++ b/drivers/staging/rdma/hfi1/rc.c @@ -68,9 +68,13 @@ */ static inline void hfi1_add_retry_timer(struct rvt_qp *qp) { + struct ib_qp *ibqp = &qp->ibqp; + struct rvt_dev_info *rdi = ib_to_rvt(ibqp->device); + qp->s_flags |= RVT_S_TIMER; /* 4.096 usec. * (1 << qp->timeout) */ - qp->s_timer.expires = jiffies + qp->timeout_jiffies; + qp->s_timer.expires = jiffies + qp->timeout_jiffies + + rdi->busy_jiffies; add_timer(&qp->s_timer); } @@ -99,9 +103,13 @@ void hfi1_add_rnr_timer(struct rvt_qp *qp, u32 to) */ static inline void hfi1_mod_retry_timer(struct rvt_qp *qp) { + struct ib_qp *ibqp = &qp->ibqp; + struct rvt_dev_info *rdi = ib_to_rvt(ibqp->device); + qp->s_flags |= RVT_S_TIMER; /* 4.096 usec. * (1 << qp->timeout) */ - mod_timer(&qp->s_timer, jiffies + qp->timeout_jiffies); + mod_timer(&qp->s_timer, jiffies + qp->timeout_jiffies + + rdi->busy_jiffies); } /** diff --git a/include/rdma/rdma_vt.h b/include/rdma/rdma_vt.h index 4242fea..5ccf683 100644 --- a/include/rdma/rdma_vt.h +++ b/include/rdma/rdma_vt.h @@ -318,7 +318,9 @@ struct rvt_dev_info { /* QP */ struct rvt_qp_ibdev *qp_dev; u32 n_qps_allocated; /* number of QPs allocated for device */ - spinlock_t n_qps_lock; /* keep track of number of qps */ + u32 n_rc_qps; /* number of RC QPs allocated for device */ + u32 busy_jiffies; /* timeout scaling based on RC QP count */ + spinlock_t n_qps_lock; /* protect qps, rc qps and busy jiffy counts */ /* memory maps */ struct list_head pending_mmaps; diff --git a/include/rdma/rdmavt_qp.h b/include/rdma/rdmavt_qp.h index aed13e1..b3ea745 100644 --- a/include/rdma/rdmavt_qp.h +++ b/include/rdma/rdmavt_qp.h @@ -225,6 +225,8 @@ struct rvt_ack_entry { }; }; +#define RC_QP_SCALING_INTERVAL 5 + /* * Variables prefixed with s_ are for the requester (sender). * Variables prefixed with r_ are for the responder (receiver). -- cgit v0.10.2 From 20f333b61300fa658952713ca9b8b4b72bbaed9f Mon Sep 17 00:00:00 2001 From: Harish Chegondi Date: Sun, 14 Feb 2016 12:09:55 -0800 Subject: IB/qib: Rename several functions by adding a "qib_" prefix This would avoid conflict with the functions in hfi1 that have similar names when both qib and hfi1 drivers are configured to be built into the kernel. This issue came up in the 0-day build report. Reviewed-by: Mike Marciniszyn Reviewed-by: Dennis Dalessandro Signed-off-by: Harish Chegondi Signed-off-by: Doug Ledford diff --git a/drivers/infiniband/hw/qib/qib.h b/drivers/infiniband/hw/qib/qib.h index 5ba073e..bbf0a16 100644 --- a/drivers/infiniband/hw/qib/qib.h +++ b/drivers/infiniband/hw/qib/qib.h @@ -1540,13 +1540,13 @@ void qib_format_hwerrors(u64 hwerrs, const struct qib_hwerror_msgs *hwerrmsgs, size_t nhwerrmsgs, char *msg, size_t lmsg); -void stop_send_queue(struct rvt_qp *qp); -void quiesce_qp(struct rvt_qp *qp); -void flush_qp_waiters(struct rvt_qp *qp); -int mtu_to_path_mtu(u32 mtu); -u32 mtu_from_qp(struct rvt_dev_info *rdi, struct rvt_qp *qp, u32 pmtu); -void notify_error_qp(struct rvt_qp *qp); -int get_pmtu_from_attr(struct rvt_dev_info *rdi, struct rvt_qp *qp, - struct ib_qp_attr *attr); +void qib_stop_send_queue(struct rvt_qp *qp); +void qib_quiesce_qp(struct rvt_qp *qp); +void qib_flush_qp_waiters(struct rvt_qp *qp); +int qib_mtu_to_path_mtu(u32 mtu); +u32 qib_mtu_from_qp(struct rvt_dev_info *rdi, struct rvt_qp *qp, u32 pmtu); +void qib_notify_error_qp(struct rvt_qp *qp); +int qib_get_pmtu_from_attr(struct rvt_dev_info *rdi, struct rvt_qp *qp, + struct ib_qp_attr *attr); #endif /* _QIB_KERNEL_H */ diff --git a/drivers/infiniband/hw/qib/qib_qp.c b/drivers/infiniband/hw/qib/qib_qp.c index 787116f..01d49dc 100644 --- a/drivers/infiniband/hw/qib/qib_qp.c +++ b/drivers/infiniband/hw/qib/qib_qp.c @@ -127,8 +127,8 @@ static void get_map_page(struct rvt_qpn_table *qpt, struct rvt_qpn_map *map, * Allocate the next available QPN or * zero/one for QP type IB_QPT_SMI/IB_QPT_GSI. */ -int alloc_qpn(struct rvt_dev_info *rdi, struct rvt_qpn_table *qpt, - enum ib_qp_type type, u8 port, gfp_t gfp) +int qib_alloc_qpn(struct rvt_dev_info *rdi, struct rvt_qpn_table *qpt, + enum ib_qp_type type, u8 port, gfp_t gfp) { u32 i, offset, max_scan, qpn; struct rvt_qpn_map *map; @@ -232,14 +232,14 @@ unsigned qib_free_all_qps(struct rvt_dev_info *rdi) return qp_inuse; } -void notify_qp_reset(struct rvt_qp *qp) +void qib_notify_qp_reset(struct rvt_qp *qp) { struct qib_qp_priv *priv = qp->priv; atomic_set(&priv->s_dma_busy, 0); } -void notify_error_qp(struct rvt_qp *qp) +void qib_notify_error_qp(struct rvt_qp *qp) { struct qib_qp_priv *priv = qp->priv; struct qib_ibdev *dev = to_idev(qp->ibqp.device); @@ -290,8 +290,8 @@ static int mtu_to_enum(u32 mtu) return enum_mtu; } -int get_pmtu_from_attr(struct rvt_dev_info *rdi, struct rvt_qp *qp, - struct ib_qp_attr *attr) +int qib_get_pmtu_from_attr(struct rvt_dev_info *rdi, struct rvt_qp *qp, + struct ib_qp_attr *attr) { int mtu, pmtu, pidx = qp->port_num - 1; struct qib_ibdev *verbs_dev = container_of(rdi, struct qib_ibdev, rdi); @@ -308,12 +308,12 @@ int get_pmtu_from_attr(struct rvt_dev_info *rdi, struct rvt_qp *qp, return pmtu; } -int mtu_to_path_mtu(u32 mtu) +int qib_mtu_to_path_mtu(u32 mtu) { return mtu_to_enum(mtu); } -u32 mtu_from_qp(struct rvt_dev_info *rdi, struct rvt_qp *qp, u32 pmtu) +u32 qib_mtu_from_qp(struct rvt_dev_info *rdi, struct rvt_qp *qp, u32 pmtu) { return ib_mtu_enum_to_int(pmtu); } @@ -378,7 +378,7 @@ __be32 qib_compute_aeth(struct rvt_qp *qp) return cpu_to_be32(aeth); } -void *qp_priv_alloc(struct rvt_dev_info *rdi, struct rvt_qp *qp, gfp_t gfp) +void *qib_qp_priv_alloc(struct rvt_dev_info *rdi, struct rvt_qp *qp, gfp_t gfp) { struct qib_qp_priv *priv; @@ -399,7 +399,7 @@ void *qp_priv_alloc(struct rvt_dev_info *rdi, struct rvt_qp *qp, gfp_t gfp) return priv; } -void qp_priv_free(struct rvt_dev_info *rdi, struct rvt_qp *qp) +void qib_qp_priv_free(struct rvt_dev_info *rdi, struct rvt_qp *qp) { struct qib_qp_priv *priv = qp->priv; @@ -407,7 +407,7 @@ void qp_priv_free(struct rvt_dev_info *rdi, struct rvt_qp *qp) kfree(priv); } -void stop_send_queue(struct rvt_qp *qp) +void qib_stop_send_queue(struct rvt_qp *qp) { struct qib_qp_priv *priv = qp->priv; @@ -415,7 +415,7 @@ void stop_send_queue(struct rvt_qp *qp) del_timer_sync(&qp->s_timer); } -void quiesce_qp(struct rvt_qp *qp) +void qib_quiesce_qp(struct rvt_qp *qp) { struct qib_qp_priv *priv = qp->priv; @@ -426,7 +426,7 @@ void quiesce_qp(struct rvt_qp *qp) } } -void flush_qp_waiters(struct rvt_qp *qp) +void qib_flush_qp_waiters(struct rvt_qp *qp) { struct qib_qp_priv *priv = qp->priv; struct qib_ibdev *dev = to_idev(qp->ibqp.device); diff --git a/drivers/infiniband/hw/qib/qib_verbs.c b/drivers/infiniband/hw/qib/qib_verbs.c index 2abe31d..fa94f78 100644 --- a/drivers/infiniband/hw/qib/qib_verbs.c +++ b/drivers/infiniband/hw/qib/qib_verbs.c @@ -1394,7 +1394,7 @@ bail: return ret; } -static int shut_down_port(struct rvt_dev_info *rdi, u8 port_num) +static int qib_shut_down_port(struct rvt_dev_info *rdi, u8 port_num) { struct qib_ibdev *ibdev = container_of(rdi, struct qib_ibdev, rdi); struct qib_devdata *dd = dd_from_dev(ibdev); @@ -1663,22 +1663,22 @@ int qib_register_ib_device(struct qib_devdata *dd) dd->verbs_dev.rdi.driver_f.get_pci_dev = qib_get_pci_dev; dd->verbs_dev.rdi.driver_f.check_ah = qib_check_ah; dd->verbs_dev.rdi.driver_f.notify_new_ah = qib_notify_new_ah; - dd->verbs_dev.rdi.driver_f.alloc_qpn = alloc_qpn; - dd->verbs_dev.rdi.driver_f.qp_priv_alloc = qp_priv_alloc; - dd->verbs_dev.rdi.driver_f.qp_priv_free = qp_priv_free; + dd->verbs_dev.rdi.driver_f.alloc_qpn = qib_alloc_qpn; + dd->verbs_dev.rdi.driver_f.qp_priv_alloc = qib_qp_priv_alloc; + dd->verbs_dev.rdi.driver_f.qp_priv_free = qib_qp_priv_free; dd->verbs_dev.rdi.driver_f.free_all_qps = qib_free_all_qps; - dd->verbs_dev.rdi.driver_f.notify_qp_reset = notify_qp_reset; + dd->verbs_dev.rdi.driver_f.notify_qp_reset = qib_notify_qp_reset; dd->verbs_dev.rdi.driver_f.do_send = qib_do_send; dd->verbs_dev.rdi.driver_f.schedule_send = qib_schedule_send; - dd->verbs_dev.rdi.driver_f.quiesce_qp = quiesce_qp; - dd->verbs_dev.rdi.driver_f.stop_send_queue = stop_send_queue; - dd->verbs_dev.rdi.driver_f.flush_qp_waiters = flush_qp_waiters; - dd->verbs_dev.rdi.driver_f.notify_error_qp = notify_error_qp; - dd->verbs_dev.rdi.driver_f.mtu_to_path_mtu = mtu_to_path_mtu; - dd->verbs_dev.rdi.driver_f.mtu_from_qp = mtu_from_qp; - dd->verbs_dev.rdi.driver_f.get_pmtu_from_attr = get_pmtu_from_attr; + dd->verbs_dev.rdi.driver_f.quiesce_qp = qib_quiesce_qp; + dd->verbs_dev.rdi.driver_f.stop_send_queue = qib_stop_send_queue; + dd->verbs_dev.rdi.driver_f.flush_qp_waiters = qib_flush_qp_waiters; + dd->verbs_dev.rdi.driver_f.notify_error_qp = qib_notify_error_qp; + dd->verbs_dev.rdi.driver_f.mtu_to_path_mtu = qib_mtu_to_path_mtu; + dd->verbs_dev.rdi.driver_f.mtu_from_qp = qib_mtu_from_qp; + dd->verbs_dev.rdi.driver_f.get_pmtu_from_attr = qib_get_pmtu_from_attr; dd->verbs_dev.rdi.driver_f.query_port_state = qib_query_port; - dd->verbs_dev.rdi.driver_f.shut_down_port = shut_down_port; + dd->verbs_dev.rdi.driver_f.shut_down_port = qib_shut_down_port; dd->verbs_dev.rdi.driver_f.cap_mask_chg = qib_cap_mask_chg; dd->verbs_dev.rdi.dparms.max_rdma_atomic = QIB_MAX_RDMA_ATOMIC; diff --git a/drivers/infiniband/hw/qib/qib_verbs.h b/drivers/infiniband/hw/qib/qib_verbs.h index cd73a97..b88e027 100644 --- a/drivers/infiniband/hw/qib/qib_verbs.h +++ b/drivers/infiniband/hw/qib/qib_verbs.h @@ -350,11 +350,11 @@ __be32 qib_compute_aeth(struct rvt_qp *qp); * Functions provided by qib driver for rdmavt to use */ unsigned qib_free_all_qps(struct rvt_dev_info *rdi); -void *qp_priv_alloc(struct rvt_dev_info *rdi, struct rvt_qp *qp, gfp_t gfp); -void qp_priv_free(struct rvt_dev_info *rdi, struct rvt_qp *qp); -void notify_qp_reset(struct rvt_qp *qp); -int alloc_qpn(struct rvt_dev_info *rdi, struct rvt_qpn_table *qpt, - enum ib_qp_type type, u8 port, gfp_t gfp); +void *qib_qp_priv_alloc(struct rvt_dev_info *rdi, struct rvt_qp *qp, gfp_t gfp); +void qib_qp_priv_free(struct rvt_dev_info *rdi, struct rvt_qp *qp); +void qib_notify_qp_reset(struct rvt_qp *qp); +int qib_alloc_qpn(struct rvt_dev_info *rdi, struct rvt_qpn_table *qpt, + enum ib_qp_type type, u8 port, gfp_t gfp); #ifdef CONFIG_DEBUG_FS -- cgit v0.10.2 From 46a80d62e6e0ccfc9d8a05c5b773405b84a4afd7 Mon Sep 17 00:00:00 2001 From: Mike Marciniszyn Date: Sun, 14 Feb 2016 12:10:04 -0800 Subject: IB/qib, staging/rdma/hfi1: add s_hlock for use in post send This patch adds an additional lock to reduce contention on the s_lock. This lock is used in post_send() so that the post_send is not serialized with the send engine and other send related processing. To do this the s_next_psn is now maintained on post_send() while post_send() related fields are moved to a new cache line. There is an s_avail maintained for the post_send() to mitigate trading cache lines with the send engine. The lock is released/acquired around releasing the just built packet to the egress mechanism. Reviewed-by: Jubin John Reviewed-by: Dennis Dalessandro Signed-off-by: Dean Luick Signed-off-by: Harish Chegondi Signed-off-by: Mike Marciniszyn Signed-off-by: Ira Weiny Signed-off-by: Doug Ledford diff --git a/drivers/infiniband/hw/qib/qib_qp.c b/drivers/infiniband/hw/qib/qib_qp.c index 01d49dc..6ffa022 100644 --- a/drivers/infiniband/hw/qib/qib_qp.c +++ b/drivers/infiniband/hw/qib/qib_qp.c @@ -474,6 +474,42 @@ void qib_get_credit(struct rvt_qp *qp, u32 aeth) } } +/** + * qib_check_send_wqe - validate wr/wqe + * @qp - The qp + * @wqe - The built wqe + * + * validate wr/wqe. This is called + * prior to inserting the wqe into + * the ring but after the wqe has been + * setup. + * + * Returns 0 on success, -EINVAL on failure + */ +int qib_check_send_wqe(struct rvt_qp *qp, + struct rvt_swqe *wqe) +{ + struct rvt_ah *ah; + + switch (qp->ibqp.qp_type) { + case IB_QPT_RC: + case IB_QPT_UC: + if (wqe->length > 0x80000000U) + return -EINVAL; + break; + case IB_QPT_SMI: + case IB_QPT_GSI: + case IB_QPT_UD: + ah = ibah_to_rvtah(wqe->ud_wr.ah); + if (wqe->length > (1 << ah->log_pmtu)) + return -EINVAL; + break; + default: + break; + } + return 0; +} + #ifdef CONFIG_DEBUG_FS struct qib_qp_iter { diff --git a/drivers/infiniband/hw/qib/qib_rc.c b/drivers/infiniband/hw/qib/qib_rc.c index ce886b2..9088e26 100644 --- a/drivers/infiniband/hw/qib/qib_rc.c +++ b/drivers/infiniband/hw/qib/qib_rc.c @@ -226,6 +226,8 @@ bail: * qib_make_rc_req - construct a request packet (SEND, RDMA r/w, ATOMIC) * @qp: a pointer to the QP * + * Assumes the s_lock is held. + * * Return 1 if constructed; otherwise, return 0. */ int qib_make_rc_req(struct rvt_qp *qp) @@ -241,7 +243,6 @@ int qib_make_rc_req(struct rvt_qp *qp) u32 bth2; u32 pmtu = qp->pmtu; char newreq; - unsigned long flags; int ret = 0; int delta; @@ -249,12 +250,6 @@ int qib_make_rc_req(struct rvt_qp *qp) if (qp->remote_ah_attr.ah_flags & IB_AH_GRH) ohdr = &priv->s_hdr->u.l.oth; - /* - * The lock is needed to synchronize between the sending tasklet, - * the receive interrupt handler, and timeout resends. - */ - spin_lock_irqsave(&qp->s_lock, flags); - /* Sending responses has higher priority over sending requests. */ if ((qp->s_flags & RVT_S_RESP_PENDING) && qib_make_rc_ack(dev, qp, ohdr, pmtu)) @@ -264,7 +259,8 @@ int qib_make_rc_req(struct rvt_qp *qp) if (!(ib_rvt_state_ops[qp->state] & RVT_FLUSH_SEND)) goto bail; /* We are in the error state, flush the work request. */ - if (qp->s_last == qp->s_head) + smp_read_barrier_depends(); /* see post_one_send() */ + if (qp->s_last == ACCESS_ONCE(qp->s_head)) goto bail; /* If DMAs are in progress, we can't flush immediately. */ if (atomic_read(&priv->s_dma_busy)) { @@ -321,8 +317,8 @@ int qib_make_rc_req(struct rvt_qp *qp) qp->s_flags |= RVT_S_WAIT_FENCE; goto bail; } - wqe->psn = qp->s_next_psn; newreq = 1; + qp->s_psn = wqe->psn; } /* * Note that we have to be careful not to modify the @@ -341,9 +337,7 @@ int qib_make_rc_req(struct rvt_qp *qp) qp->s_flags |= RVT_S_WAIT_SSN_CREDIT; goto bail; } - wqe->lpsn = wqe->psn; if (len > pmtu) { - wqe->lpsn += (len - 1) / pmtu; qp->s_state = OP(SEND_FIRST); len = pmtu; break; @@ -381,9 +375,7 @@ int qib_make_rc_req(struct rvt_qp *qp) cpu_to_be32(wqe->rdma_wr.rkey); ohdr->u.rc.reth.length = cpu_to_be32(len); hwords += sizeof(struct ib_reth) / sizeof(u32); - wqe->lpsn = wqe->psn; if (len > pmtu) { - wqe->lpsn += (len - 1) / pmtu; qp->s_state = OP(RDMA_WRITE_FIRST); len = pmtu; break; @@ -418,13 +410,6 @@ int qib_make_rc_req(struct rvt_qp *qp) qp->s_num_rd_atomic++; if (!(qp->s_flags & RVT_S_UNLIMITED_CREDIT)) qp->s_lsn++; - /* - * Adjust s_next_psn to count the - * expected number of responses. - */ - if (len > pmtu) - qp->s_next_psn += (len - 1) / pmtu; - wqe->lpsn = qp->s_next_psn++; } ohdr->u.rc.reth.vaddr = @@ -456,7 +441,6 @@ int qib_make_rc_req(struct rvt_qp *qp) qp->s_num_rd_atomic++; if (!(qp->s_flags & RVT_S_UNLIMITED_CREDIT)) qp->s_lsn++; - wqe->lpsn = wqe->psn; } if (wqe->atomic_wr.wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP) { qp->s_state = OP(COMPARE_SWAP); @@ -499,11 +483,8 @@ int qib_make_rc_req(struct rvt_qp *qp) } if (wqe->wr.opcode == IB_WR_RDMA_READ) qp->s_psn = wqe->lpsn + 1; - else { + else qp->s_psn++; - if (qib_cmp24(qp->s_psn, qp->s_next_psn) > 0) - qp->s_next_psn = qp->s_psn; - } break; case OP(RDMA_READ_RESPONSE_FIRST): @@ -523,8 +504,6 @@ int qib_make_rc_req(struct rvt_qp *qp) /* FALLTHROUGH */ case OP(SEND_MIDDLE): bth2 = qp->s_psn++ & QIB_PSN_MASK; - if (qib_cmp24(qp->s_psn, qp->s_next_psn) > 0) - qp->s_next_psn = qp->s_psn; ss = &qp->s_sge; len = qp->s_len; if (len > pmtu) { @@ -564,8 +543,6 @@ int qib_make_rc_req(struct rvt_qp *qp) /* FALLTHROUGH */ case OP(RDMA_WRITE_MIDDLE): bth2 = qp->s_psn++ & QIB_PSN_MASK; - if (qib_cmp24(qp->s_psn, qp->s_next_psn) > 0) - qp->s_next_psn = qp->s_psn; ss = &qp->s_sge; len = qp->s_len; if (len > pmtu) { @@ -630,13 +607,9 @@ int qib_make_rc_req(struct rvt_qp *qp) qp->s_cur_size = len; qib_make_ruc_header(qp, ohdr, bth0 | (qp->s_state << 24), bth2); done: - ret = 1; - goto unlock; - + return 1; bail: qp->s_flags &= ~RVT_S_BUSY; -unlock: - spin_unlock_irqrestore(&qp->s_lock, flags); return ret; } @@ -1454,7 +1427,8 @@ static void qib_rc_rcv_resp(struct qib_ibport *ibp, goto ack_done; /* Ignore invalid responses. */ - if (qib_cmp24(psn, qp->s_next_psn) >= 0) + smp_read_barrier_depends(); /* see post_one_send */ + if (qib_cmp24(psn, ACCESS_ONCE(qp->s_next_psn)) >= 0) goto ack_done; /* Ignore duplicate responses. */ diff --git a/drivers/infiniband/hw/qib/qib_ruc.c b/drivers/infiniband/hw/qib/qib_ruc.c index 2623684..a5f07a6 100644 --- a/drivers/infiniband/hw/qib/qib_ruc.c +++ b/drivers/infiniband/hw/qib/qib_ruc.c @@ -391,7 +391,8 @@ static void qib_ruc_loopback(struct rvt_qp *sqp) sqp->s_flags |= RVT_S_BUSY; again: - if (sqp->s_last == sqp->s_head) + smp_read_barrier_depends(); /* see post_one_send() */ + if (sqp->s_last == ACCESS_ONCE(sqp->s_head)) goto clr_busy; wqe = rvt_get_swqe_ptr(sqp, sqp->s_last); @@ -765,22 +766,24 @@ void qib_do_send(struct rvt_qp *qp) qp->s_flags |= RVT_S_BUSY; - spin_unlock_irqrestore(&qp->s_lock, flags); - do { /* Check for a constructed packet to be sent. */ if (qp->s_hdrwords != 0) { + spin_unlock_irqrestore(&qp->s_lock, flags); /* * If the packet cannot be sent now, return and * the send tasklet will be woken up later. */ if (qib_verbs_send(qp, priv->s_hdr, qp->s_hdrwords, qp->s_cur_sge, qp->s_cur_size)) - break; + return; /* Record that s_hdr is empty. */ qp->s_hdrwords = 0; + spin_lock_irqsave(&qp->s_lock, flags); } } while (make_req(qp)); + + spin_unlock_irqrestore(&qp->s_lock, flags); } /* diff --git a/drivers/infiniband/hw/qib/qib_uc.c b/drivers/infiniband/hw/qib/qib_uc.c index 1b2fc69..7bdbc79 100644 --- a/drivers/infiniband/hw/qib/qib_uc.c +++ b/drivers/infiniband/hw/qib/qib_uc.c @@ -41,6 +41,8 @@ * qib_make_uc_req - construct a request packet (SEND, RDMA write) * @qp: a pointer to the QP * + * Assumes the s_lock is held. + * * Return 1 if constructed; otherwise, return 0. */ int qib_make_uc_req(struct rvt_qp *qp) @@ -48,20 +50,18 @@ int qib_make_uc_req(struct rvt_qp *qp) struct qib_qp_priv *priv = qp->priv; struct qib_other_headers *ohdr; struct rvt_swqe *wqe; - unsigned long flags; u32 hwords; u32 bth0; u32 len; u32 pmtu = qp->pmtu; int ret = 0; - spin_lock_irqsave(&qp->s_lock, flags); - if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_SEND_OK)) { if (!(ib_rvt_state_ops[qp->state] & RVT_FLUSH_SEND)) goto bail; /* We are in the error state, flush the work request. */ - if (qp->s_last == qp->s_head) + smp_read_barrier_depends(); /* see post_one_send() */ + if (qp->s_last == ACCESS_ONCE(qp->s_head)) goto bail; /* If DMAs are in progress, we can't flush immediately. */ if (atomic_read(&priv->s_dma_busy)) { @@ -90,13 +90,13 @@ int qib_make_uc_req(struct rvt_qp *qp) RVT_PROCESS_NEXT_SEND_OK)) goto bail; /* Check if send work queue is empty. */ - if (qp->s_cur == qp->s_head) + smp_read_barrier_depends(); /* see post_one_send() */ + if (qp->s_cur == ACCESS_ONCE(qp->s_head)) goto bail; /* * Start a new request. */ - wqe->psn = qp->s_next_psn; - qp->s_psn = qp->s_next_psn; + qp->s_psn = wqe->psn; qp->s_sge.sge = wqe->sg_list[0]; qp->s_sge.sg_list = wqe->sg_list + 1; qp->s_sge.num_sge = wqe->wr.num_sge; @@ -215,15 +215,11 @@ int qib_make_uc_req(struct rvt_qp *qp) qp->s_cur_sge = &qp->s_sge; qp->s_cur_size = len; qib_make_ruc_header(qp, ohdr, bth0 | (qp->s_state << 24), - qp->s_next_psn++ & QIB_PSN_MASK); + qp->s_psn++ & QIB_PSN_MASK); done: - ret = 1; - goto unlock; - + return 1; bail: qp->s_flags &= ~RVT_S_BUSY; -unlock: - spin_unlock_irqrestore(&qp->s_lock, flags); return ret; } diff --git a/drivers/infiniband/hw/qib/qib_ud.c b/drivers/infiniband/hw/qib/qib_ud.c index fe49172..d950213 100644 --- a/drivers/infiniband/hw/qib/qib_ud.c +++ b/drivers/infiniband/hw/qib/qib_ud.c @@ -234,6 +234,8 @@ drop: * qib_make_ud_req - construct a UD request packet * @qp: the QP * + * Assumes the s_lock is held. + * * Return 1 if constructed; otherwise, return 0. */ int qib_make_ud_req(struct rvt_qp *qp) @@ -244,7 +246,6 @@ int qib_make_ud_req(struct rvt_qp *qp) struct qib_pportdata *ppd; struct qib_ibport *ibp; struct rvt_swqe *wqe; - unsigned long flags; u32 nwords; u32 extra_bytes; u32 bth0; @@ -253,13 +254,12 @@ int qib_make_ud_req(struct rvt_qp *qp) int ret = 0; int next_cur; - spin_lock_irqsave(&qp->s_lock, flags); - if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_NEXT_SEND_OK)) { if (!(ib_rvt_state_ops[qp->state] & RVT_FLUSH_SEND)) goto bail; /* We are in the error state, flush the work request. */ - if (qp->s_last == qp->s_head) + smp_read_barrier_depends(); /* see post_one_send */ + if (qp->s_last == ACCESS_ONCE(qp->s_head)) goto bail; /* If DMAs are in progress, we can't flush immediately. */ if (atomic_read(&priv->s_dma_busy)) { @@ -271,7 +271,9 @@ int qib_make_ud_req(struct rvt_qp *qp) goto done; } - if (qp->s_cur == qp->s_head) + /* see post_one_send() */ + smp_read_barrier_depends(); + if (qp->s_cur == ACCESS_ONCE(qp->s_head)) goto bail; wqe = rvt_get_swqe_ptr(qp, qp->s_cur); @@ -292,6 +294,7 @@ int qib_make_ud_req(struct rvt_qp *qp) this_cpu_inc(ibp->pmastats->n_unicast_xmit); lid = ah_attr->dlid & ~((1 << ppd->lmc) - 1); if (unlikely(lid == ppd->lid)) { + unsigned long flags; /* * If DMAs are in progress, we can't generate * a completion for the loopback packet since @@ -304,6 +307,7 @@ int qib_make_ud_req(struct rvt_qp *qp) goto bail; } qp->s_cur = next_cur; + local_irq_save(flags); spin_unlock_irqrestore(&qp->s_lock, flags); qib_ud_loopback(qp, wqe); spin_lock_irqsave(&qp->s_lock, flags); @@ -378,7 +382,7 @@ int qib_make_ud_req(struct rvt_qp *qp) ah_attr->dlid != be16_to_cpu(IB_LID_PERMISSIVE) ? cpu_to_be32(QIB_MULTICAST_QPN) : cpu_to_be32(wqe->ud_wr.remote_qpn); - ohdr->bth[2] = cpu_to_be32(qp->s_next_psn++ & QIB_PSN_MASK); + ohdr->bth[2] = cpu_to_be32(wqe->psn & QIB_PSN_MASK); /* * Qkeys with the high order bit set mean use the * qkey from the QP context instead of the WR (see 10.2.5). @@ -388,13 +392,9 @@ int qib_make_ud_req(struct rvt_qp *qp) ohdr->u.ud.deth[1] = cpu_to_be32(qp->ibqp.qp_num); done: - ret = 1; - goto unlock; - + return 1; bail: qp->s_flags &= ~RVT_S_BUSY; -unlock: - spin_unlock_irqrestore(&qp->s_lock, flags); return ret; } diff --git a/drivers/infiniband/hw/qib/qib_verbs.c b/drivers/infiniband/hw/qib/qib_verbs.c index fa94f78..5cf019f 100644 --- a/drivers/infiniband/hw/qib/qib_verbs.c +++ b/drivers/infiniband/hw/qib/qib_verbs.c @@ -1662,6 +1662,7 @@ int qib_register_ib_device(struct qib_devdata *dd) dd->verbs_dev.rdi.driver_f.get_card_name = qib_get_card_name; dd->verbs_dev.rdi.driver_f.get_pci_dev = qib_get_pci_dev; dd->verbs_dev.rdi.driver_f.check_ah = qib_check_ah; + dd->verbs_dev.rdi.driver_f.check_send_wqe = qib_check_send_wqe; dd->verbs_dev.rdi.driver_f.notify_new_ah = qib_notify_new_ah; dd->verbs_dev.rdi.driver_f.alloc_qpn = qib_alloc_qpn; dd->verbs_dev.rdi.driver_f.qp_priv_alloc = qib_qp_priv_alloc; @@ -1677,6 +1678,7 @@ int qib_register_ib_device(struct qib_devdata *dd) dd->verbs_dev.rdi.driver_f.mtu_to_path_mtu = qib_mtu_to_path_mtu; dd->verbs_dev.rdi.driver_f.mtu_from_qp = qib_mtu_from_qp; dd->verbs_dev.rdi.driver_f.get_pmtu_from_attr = qib_get_pmtu_from_attr; + dd->verbs_dev.rdi.driver_f.schedule_send_no_lock = _qib_schedule_send; dd->verbs_dev.rdi.driver_f.query_port_state = qib_query_port; dd->verbs_dev.rdi.driver_f.shut_down_port = qib_shut_down_port; dd->verbs_dev.rdi.driver_f.cap_mask_chg = qib_cap_mask_chg; @@ -1778,17 +1780,34 @@ void qib_unregister_ib_device(struct qib_devdata *dd) dev->pio_hdrs, dev->pio_hdrs_phys); } -/* - * This must be called with s_lock held. +/** + * _qib_schedule_send - schedule progress + * @qp - the qp + * + * This schedules progress w/o regard to the s_flags. + * + * It is only used in post send, which doesn't hold + * the s_lock. */ -void qib_schedule_send(struct rvt_qp *qp) +void _qib_schedule_send(struct rvt_qp *qp) { + struct qib_ibport *ibp = + to_iport(qp->ibqp.device, qp->port_num); + struct qib_pportdata *ppd = ppd_from_ibp(ibp); struct qib_qp_priv *priv = qp->priv; - if (qib_send_ok(qp)) { - struct qib_ibport *ibp = - to_iport(qp->ibqp.device, qp->port_num); - struct qib_pportdata *ppd = ppd_from_ibp(ibp); - queue_work(ppd->qib_wq, &priv->s_work); - } + queue_work(ppd->qib_wq, &priv->s_work); +} + +/** + * qib_schedule_send - schedule progress + * @qp - the qp + * + * This schedules qp progress. The s_lock + * should be held. + */ +void qib_schedule_send(struct rvt_qp *qp) +{ + if (qib_send_ok(qp)) + _qib_schedule_send(qp); } diff --git a/drivers/infiniband/hw/qib/qib_verbs.h b/drivers/infiniband/hw/qib/qib_verbs.h index b88e027..d137d71 100644 --- a/drivers/infiniband/hw/qib/qib_verbs.h +++ b/drivers/infiniband/hw/qib/qib_verbs.h @@ -298,9 +298,7 @@ static inline int qib_send_ok(struct rvt_qp *qp) !(qp->s_flags & RVT_S_ANY_WAIT_SEND)); } -/* - * This must be called with s_lock held. - */ +void _qib_schedule_send(struct rvt_qp *qp); void qib_schedule_send(struct rvt_qp *qp); static inline int qib_pkey_ok(u16 pkey1, u16 pkey2) @@ -392,6 +390,8 @@ void qib_rc_rcv(struct qib_ctxtdata *rcd, struct qib_ib_header *hdr, int qib_check_ah(struct ib_device *ibdev, struct ib_ah_attr *ah_attr); +int qib_check_send_wqe(struct rvt_qp *qp, struct rvt_swqe *wqe); + struct ib_ah *qib_create_qp0_ah(struct qib_ibport *ibp, u16 dlid); void qib_rc_rnr_retry(unsigned long arg); diff --git a/drivers/infiniband/sw/rdmavt/qp.c b/drivers/infiniband/sw/rdmavt/qp.c index 7dc837c..522404a 100644 --- a/drivers/infiniband/sw/rdmavt/qp.c +++ b/drivers/infiniband/sw/rdmavt/qp.c @@ -401,6 +401,7 @@ void rvt_reset_qp(struct rvt_dev_info *rdi, struct rvt_qp *qp, rdi->driver_f.flush_qp_waiters(qp); qp->s_flags &= ~(RVT_S_TIMER | RVT_S_ANY_WAIT); spin_unlock(&qp->s_lock); + spin_unlock(&qp->s_hlock); spin_unlock_irq(&qp->r_lock); /* Stop the send queue and the retry timer */ @@ -415,6 +416,7 @@ void rvt_reset_qp(struct rvt_dev_info *rdi, struct rvt_qp *qp, /* grab the lock b/c it was locked at call time */ spin_lock_irq(&qp->r_lock); + spin_lock(&qp->s_hlock); spin_lock(&qp->s_lock); rvt_clear_mr_refs(qp, 1); @@ -610,6 +612,7 @@ struct ib_qp *rvt_create_qp(struct ib_pd *ibpd, * except for qp->ibqp.qp_num. */ spin_lock_init(&qp->r_lock); + spin_lock_init(&qp->s_hlock); spin_lock_init(&qp->s_lock); spin_lock_init(&qp->r_rq.lock); atomic_set(&qp->refcount, 0); @@ -620,6 +623,7 @@ struct ib_qp *rvt_create_qp(struct ib_pd *ibpd, qp->state = IB_QPS_RESET; qp->s_wq = swq; qp->s_size = init_attr->cap.max_send_wr + 1; + qp->s_avail = init_attr->cap.max_send_wr; qp->s_max_sge = init_attr->cap.max_send_sge; if (init_attr->sq_sig_type == IB_SIGNAL_REQ_WR) qp->s_flags = RVT_S_SIGNAL_REQ_WR; @@ -779,6 +783,7 @@ void rvt_clear_mr_refs(struct rvt_qp *qp, int clr_sends) wqe->ud_wr.ah)->refcount); if (++qp->s_last >= qp->s_size) qp->s_last = 0; + smp_wmb(); /* see qp_set_savail */ } if (qp->s_rdma_mr) { rvt_put_mr(qp->s_rdma_mr); @@ -833,7 +838,7 @@ int rvt_error_qp(struct rvt_qp *qp, enum ib_wc_status err) rdi->driver_f.notify_error_qp(qp); /* Schedule the sending tasklet to drain the send work queue. */ - if (qp->s_last != qp->s_head) + if (ACCESS_ONCE(qp->s_last) != qp->s_head) rdi->driver_f.schedule_send(qp); rvt_clear_mr_refs(qp, 0); @@ -979,6 +984,7 @@ int rvt_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, link = rdma_port_get_link_layer(ibqp->device, qp->port_num); spin_lock_irq(&qp->r_lock); + spin_lock(&qp->s_hlock); spin_lock(&qp->s_lock); cur_state = attr_mask & IB_QP_CUR_STATE ? @@ -1151,6 +1157,7 @@ int rvt_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, if (attr_mask & IB_QP_PATH_MTU) { qp->pmtu = rdi->driver_f.mtu_from_qp(rdi, qp, pmtu); qp->path_mtu = rdi->driver_f.mtu_to_path_mtu(qp->pmtu); + qp->log_pmtu = ilog2(qp->pmtu); } if (attr_mask & IB_QP_RETRY_CNT) { @@ -1186,6 +1193,7 @@ int rvt_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, rdi->driver_f.modify_qp(qp, attr, attr_mask, udata); spin_unlock(&qp->s_lock); + spin_unlock(&qp->s_hlock); spin_unlock_irq(&qp->r_lock); if (cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT) @@ -1207,6 +1215,7 @@ int rvt_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, inval: spin_unlock(&qp->s_lock); + spin_unlock(&qp->s_hlock); spin_unlock_irq(&qp->r_lock); return -EINVAL; } @@ -1226,9 +1235,11 @@ int rvt_destroy_qp(struct ib_qp *ibqp) struct rvt_dev_info *rdi = ib_to_rvt(ibqp->device); spin_lock_irq(&qp->r_lock); + spin_lock(&qp->s_hlock); spin_lock(&qp->s_lock); rvt_reset_qp(rdi, qp, ibqp->qp_type); spin_unlock(&qp->s_lock); + spin_unlock(&qp->s_hlock); spin_unlock_irq(&qp->r_lock); /* qpn is now available for use again */ @@ -1358,6 +1369,28 @@ int rvt_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr, } /** + * qp_get_savail - return number of avail send entries + * + * @qp - the qp + * + * This assumes the s_hlock is held but the s_last + * qp variable is uncontrolled. + */ +static inline u32 qp_get_savail(struct rvt_qp *qp) +{ + u32 slast; + u32 ret; + + smp_read_barrier_depends(); /* see rc.c */ + slast = ACCESS_ONCE(qp->s_last); + if (qp->s_head >= slast) + ret = qp->s_size - (qp->s_head - slast); + else + ret = slast - qp->s_head; + return ret - 1; +} + +/** * rvt_post_one_wr - post one RC, UC, or UD send work request * @qp: the QP to post on * @wr: the work request to send @@ -1372,6 +1405,8 @@ static int rvt_post_one_wr(struct rvt_qp *qp, struct ib_send_wr *wr) struct rvt_lkey_table *rkt; struct rvt_pd *pd; struct rvt_dev_info *rdi = ib_to_rvt(qp->ibqp.device); + u8 log_pmtu; + int ret; /* IB spec says that num_sge == 0 is OK. */ if (unlikely(wr->num_sge > qp->s_max_sge)) @@ -1403,16 +1438,16 @@ static int rvt_post_one_wr(struct rvt_qp *qp, struct ib_send_wr *wr) } else if (wr->opcode >= IB_WR_RDMA_READ && !qp->s_max_rd_atomic) { return -EINVAL; } - + /* check for avail */ + if (unlikely(!qp->s_avail)) { + qp->s_avail = qp_get_savail(qp); + WARN_ON(qp->s_avail > (qp->s_size - 1)); + if (!qp->s_avail) + return -ENOMEM; + } next = qp->s_head + 1; if (next >= qp->s_size) next = 0; - if (next == qp->s_last) - return -ENOMEM; - - if (rdi->driver_f.check_send_wr && - rdi->driver_f.check_send_wr(qp, wr)) - return -EINVAL; rkt = &rdi->lkey_table; pd = ibpd_to_rvtpd(qp->ibqp.pd); @@ -1444,21 +1479,39 @@ static int rvt_post_one_wr(struct rvt_qp *qp, struct ib_send_wr *wr) continue; ok = rvt_lkey_ok(rkt, pd, &wqe->sg_list[j], &wr->sg_list[i], acc); - if (!ok) + if (!ok) { + ret = -EINVAL; goto bail_inval_free; + } wqe->length += length; j++; } wqe->wr.num_sge = j; } - if (qp->ibqp.qp_type == IB_QPT_UC || - qp->ibqp.qp_type == IB_QPT_RC) { - if (wqe->length > 0x80000000U) + + /* general part of wqe valid - allow for driver checks */ + if (rdi->driver_f.check_send_wqe) { + ret = rdi->driver_f.check_send_wqe(qp, wqe); + if (ret) goto bail_inval_free; - } else { + } + + log_pmtu = qp->log_pmtu; + if (qp->ibqp.qp_type != IB_QPT_UC && + qp->ibqp.qp_type != IB_QPT_RC) { + struct rvt_ah *ah = ibah_to_rvtah(wqe->ud_wr.ah); + + log_pmtu = ah->log_pmtu; atomic_inc(&ibah_to_rvtah(ud_wr(wr)->ah)->refcount); } + wqe->ssn = qp->s_ssn++; + wqe->psn = qp->s_next_psn; + wqe->lpsn = wqe->psn + + (wqe->length ? ((wqe->length - 1) >> log_pmtu) : 0); + qp->s_next_psn = wqe->lpsn + 1; + smp_wmb(); /* see request builders */ + qp->s_avail--; qp->s_head = next; return 0; @@ -1470,7 +1523,7 @@ bail_inval_free: rvt_put_mr(sge->mr); } - return -EINVAL; + return ret; } /** @@ -1491,14 +1544,14 @@ int rvt_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, unsigned nreq = 0; int err = 0; - spin_lock_irqsave(&qp->s_lock, flags); + spin_lock_irqsave(&qp->s_hlock, flags); /* * Ensure QP state is such that we can send. If not bail out early, * there is no need to do this every time we post a send. */ if (unlikely(!(ib_rvt_state_ops[qp->state] & RVT_POST_SEND_OK))) { - spin_unlock_irqrestore(&qp->s_lock, flags); + spin_unlock_irqrestore(&qp->s_hlock, flags); return -EINVAL; } @@ -1518,11 +1571,13 @@ int rvt_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, nreq++; } bail: - if (nreq && !call_send) - rdi->driver_f.schedule_send(qp); - spin_unlock_irqrestore(&qp->s_lock, flags); - if (nreq && call_send) - rdi->driver_f.do_send(qp); + spin_unlock_irqrestore(&qp->s_hlock, flags); + if (nreq) { + if (call_send) + rdi->driver_f.schedule_send_no_lock(qp); + else + rdi->driver_f.do_send(qp); + } return err; } diff --git a/drivers/staging/rdma/hfi1/qp.c b/drivers/staging/rdma/hfi1/qp.c index ec9ee72..00866c0 100644 --- a/drivers/staging/rdma/hfi1/qp.c +++ b/drivers/staging/rdma/hfi1/qp.c @@ -226,16 +226,45 @@ void hfi1_modify_qp(struct rvt_qp *qp, struct ib_qp_attr *attr, } } -int hfi1_check_send_wr(struct rvt_qp *qp, struct ib_send_wr *wr) +/** + * hfi1_check_send_wqe - validate wqe + * @qp - The qp + * @wqe - The built wqe + * + * validate wqe. This is called + * prior to inserting the wqe into + * the ring but after the wqe has been + * setup. + * + * Returns 0 on success, -EINVAL on failure + * + */ +int hfi1_check_send_wqe(struct rvt_qp *qp, + struct rvt_swqe *wqe) { struct hfi1_ibport *ibp = to_iport(qp->ibqp.device, qp->port_num); - struct rvt_ah *ah = ibah_to_rvtah(ud_wr(wr)->ah); + struct rvt_ah *ah; - if (qp->ibqp.qp_type != IB_QPT_RC && - qp->ibqp.qp_type != IB_QPT_UC && - qp->ibqp.qp_type != IB_QPT_SMI && - ibp->sl_to_sc[ah->attr.sl] == 0xf) { - return -EINVAL; + switch (qp->ibqp.qp_type) { + case IB_QPT_RC: + case IB_QPT_UC: + if (wqe->length > 0x80000000U) + return -EINVAL; + break; + case IB_QPT_SMI: + ah = ibah_to_rvtah(wqe->ud_wr.ah); + if (wqe->length > (1 << ah->log_pmtu)) + return -EINVAL; + break; + case IB_QPT_GSI: + case IB_QPT_UD: + ah = ibah_to_rvtah(wqe->ud_wr.ah); + if (wqe->length > (1 << ah->log_pmtu)) + return -EINVAL; + if (ibp->sl_to_sc[ah->attr.sl] == 0xf) + return -EINVAL; + default: + break; } return 0; } @@ -302,6 +331,42 @@ __be32 hfi1_compute_aeth(struct rvt_qp *qp) } /** + * _hfi1_schedule_send - schedule progress + * @qp: the QP + * + * This schedules qp progress w/o regard to the s_flags. + * + * It is only used in the post send, which doesn't hold + * the s_lock. + */ +void _hfi1_schedule_send(struct rvt_qp *qp) +{ + struct hfi1_qp_priv *priv = qp->priv; + struct hfi1_ibport *ibp = + to_iport(qp->ibqp.device, qp->port_num); + struct hfi1_pportdata *ppd = ppd_from_ibp(ibp); + struct hfi1_devdata *dd = dd_from_ibdev(qp->ibqp.device); + + iowait_schedule(&priv->s_iowait, ppd->hfi1_wq, + priv->s_sde ? + priv->s_sde->cpu : + cpumask_first(cpumask_of_node(dd->node))); +} + +/** + * hfi1_schedule_send - schedule progress + * @qp: the QP + * + * This schedules qp progress and caller should hold + * the s_lock. + */ +void hfi1_schedule_send(struct rvt_qp *qp) +{ + if (hfi1_send_ok(qp)) + _hfi1_schedule_send(qp); +} + +/** * hfi1_get_credit - flush the send work queue of a QP * @qp: the qp who's send work queue to flush * @aeth: the Acknowledge Extended Transport Header diff --git a/drivers/staging/rdma/hfi1/qp.h b/drivers/staging/rdma/hfi1/qp.h index 973c14b..98827b5 100644 --- a/drivers/staging/rdma/hfi1/qp.h +++ b/drivers/staging/rdma/hfi1/qp.h @@ -137,41 +137,8 @@ void qp_iter_print(struct seq_file *s, struct qp_iter *iter); */ void qp_comm_est(struct rvt_qp *qp); -/** - * _hfi1_schedule_send - schedule progress - * @qp: the QP - * - * This schedules qp progress w/o regard to the s_flags. - * - * It is only used in the post send, which doesn't hold - * the s_lock. - */ -static inline void _hfi1_schedule_send(struct rvt_qp *qp) -{ - struct hfi1_qp_priv *priv = qp->priv; - struct hfi1_ibport *ibp = - to_iport(qp->ibqp.device, qp->port_num); - struct hfi1_pportdata *ppd = ppd_from_ibp(ibp); - struct hfi1_devdata *dd = dd_from_ibdev(qp->ibqp.device); - - iowait_schedule(&priv->s_iowait, ppd->hfi1_wq, - priv->s_sde ? - priv->s_sde->cpu : - cpumask_first(cpumask_of_node(dd->node))); -} - -/** - * hfi1_schedule_send - schedule progress - * @qp: the QP - * - * This schedules qp progress and caller should hold - * the s_lock. - */ -static inline void hfi1_schedule_send(struct rvt_qp *qp) -{ - if (hfi1_send_ok(qp)) - _hfi1_schedule_send(qp); -} +void _hfi1_schedule_send(struct rvt_qp *qp); +void hfi1_schedule_send(struct rvt_qp *qp); void hfi1_migrate_qp(struct rvt_qp *qp); diff --git a/drivers/staging/rdma/hfi1/rc.c b/drivers/staging/rdma/hfi1/rc.c index a4a44d3..a62c9424 100644 --- a/drivers/staging/rdma/hfi1/rc.c +++ b/drivers/staging/rdma/hfi1/rc.c @@ -367,6 +367,8 @@ bail: * hfi1_make_rc_req - construct a request packet (SEND, RDMA r/w, ATOMIC) * @qp: a pointer to the QP * + * Assumes s_lock is held. + * * Return 1 if constructed; otherwise, return 0. */ int hfi1_make_rc_req(struct rvt_qp *qp) @@ -383,7 +385,6 @@ int hfi1_make_rc_req(struct rvt_qp *qp) u32 bth2; u32 pmtu = qp->pmtu; char newreq; - unsigned long flags; int ret = 0; int middle = 0; int delta; @@ -392,12 +393,6 @@ int hfi1_make_rc_req(struct rvt_qp *qp) if (qp->remote_ah_attr.ah_flags & IB_AH_GRH) ohdr = &priv->s_hdr->ibh.u.l.oth; - /* - * The lock is needed to synchronize between the sending tasklet, - * the receive interrupt handler, and timeout re-sends. - */ - spin_lock_irqsave(&qp->s_lock, flags); - /* Sending responses has higher priority over sending requests. */ if ((qp->s_flags & RVT_S_RESP_PENDING) && make_rc_ack(dev, qp, ohdr, pmtu)) @@ -407,7 +402,8 @@ int hfi1_make_rc_req(struct rvt_qp *qp) if (!(ib_rvt_state_ops[qp->state] & RVT_FLUSH_SEND)) goto bail; /* We are in the error state, flush the work request. */ - if (qp->s_last == qp->s_head) + smp_read_barrier_depends(); /* see post_one_send() */ + if (qp->s_last == ACCESS_ONCE(qp->s_head)) goto bail; /* If DMAs are in progress, we can't flush immediately. */ if (atomic_read(&priv->s_iowait.sdma_busy)) { @@ -463,8 +459,8 @@ int hfi1_make_rc_req(struct rvt_qp *qp) qp->s_flags |= RVT_S_WAIT_FENCE; goto bail; } - wqe->psn = qp->s_next_psn; newreq = 1; + qp->s_psn = wqe->psn; } /* * Note that we have to be careful not to modify the @@ -483,9 +479,7 @@ int hfi1_make_rc_req(struct rvt_qp *qp) qp->s_flags |= RVT_S_WAIT_SSN_CREDIT; goto bail; } - wqe->lpsn = wqe->psn; if (len > pmtu) { - wqe->lpsn += (len - 1) / pmtu; qp->s_state = OP(SEND_FIRST); len = pmtu; break; @@ -522,9 +516,7 @@ int hfi1_make_rc_req(struct rvt_qp *qp) cpu_to_be32(wqe->rdma_wr.rkey); ohdr->u.rc.reth.length = cpu_to_be32(len); hwords += sizeof(struct ib_reth) / sizeof(u32); - wqe->lpsn = wqe->psn; if (len > pmtu) { - wqe->lpsn += (len - 1) / pmtu; qp->s_state = OP(RDMA_WRITE_FIRST); len = pmtu; break; @@ -559,13 +551,6 @@ int hfi1_make_rc_req(struct rvt_qp *qp) qp->s_num_rd_atomic++; if (!(qp->s_flags & RVT_S_UNLIMITED_CREDIT)) qp->s_lsn++; - /* - * Adjust s_next_psn to count the - * expected number of responses. - */ - if (len > pmtu) - qp->s_next_psn += (len - 1) / pmtu; - wqe->lpsn = qp->s_next_psn++; } ohdr->u.rc.reth.vaddr = cpu_to_be64(wqe->rdma_wr.remote_addr); @@ -596,7 +581,6 @@ int hfi1_make_rc_req(struct rvt_qp *qp) qp->s_num_rd_atomic++; if (!(qp->s_flags & RVT_S_UNLIMITED_CREDIT)) qp->s_lsn++; - wqe->lpsn = wqe->psn; } if (wqe->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP) { qp->s_state = OP(COMPARE_SWAP); @@ -639,11 +623,8 @@ int hfi1_make_rc_req(struct rvt_qp *qp) } if (wqe->wr.opcode == IB_WR_RDMA_READ) qp->s_psn = wqe->lpsn + 1; - else { + else qp->s_psn++; - if (cmp_psn(qp->s_psn, qp->s_next_psn) > 0) - qp->s_next_psn = qp->s_psn; - } break; case OP(RDMA_READ_RESPONSE_FIRST): @@ -663,8 +644,6 @@ int hfi1_make_rc_req(struct rvt_qp *qp) /* FALLTHROUGH */ case OP(SEND_MIDDLE): bth2 = mask_psn(qp->s_psn++); - if (cmp_psn(qp->s_psn, qp->s_next_psn) > 0) - qp->s_next_psn = qp->s_psn; ss = &qp->s_sge; len = qp->s_len; if (len > pmtu) { @@ -705,8 +684,6 @@ int hfi1_make_rc_req(struct rvt_qp *qp) /* FALLTHROUGH */ case OP(RDMA_WRITE_MIDDLE): bth2 = mask_psn(qp->s_psn++); - if (cmp_psn(qp->s_psn, qp->s_next_psn) > 0) - qp->s_next_psn = qp->s_psn; ss = &qp->s_sge; len = qp->s_len; if (len > pmtu) { @@ -777,13 +754,9 @@ int hfi1_make_rc_req(struct rvt_qp *qp) bth2, middle); done: - ret = 1; - goto unlock; - + return 1; bail: qp->s_flags &= ~RVT_S_BUSY; -unlock: - spin_unlock_irqrestore(&qp->s_lock, flags); return ret; } @@ -1563,7 +1536,8 @@ static void rc_rcv_resp(struct hfi1_ibport *ibp, trace_hfi1_rc_ack(qp, psn); /* Ignore invalid responses. */ - if (cmp_psn(psn, qp->s_next_psn) >= 0) + smp_read_barrier_depends(); /* see post_one_send */ + if (cmp_psn(psn, ACCESS_ONCE(qp->s_next_psn)) >= 0) goto ack_done; /* Ignore duplicate responses. */ diff --git a/drivers/staging/rdma/hfi1/ruc.c b/drivers/staging/rdma/hfi1/ruc.c index a7add3c..6114550 100644 --- a/drivers/staging/rdma/hfi1/ruc.c +++ b/drivers/staging/rdma/hfi1/ruc.c @@ -392,7 +392,8 @@ static void ruc_loopback(struct rvt_qp *sqp) sqp->s_flags |= RVT_S_BUSY; again: - if (sqp->s_last == sqp->s_head) + smp_read_barrier_depends(); /* see post_one_send() */ + if (sqp->s_last == ACCESS_ONCE(sqp->s_head)) goto clr_busy; wqe = rvt_get_swqe_ptr(sqp, sqp->s_last); @@ -871,40 +872,43 @@ void hfi1_do_send(struct rvt_qp *qp) qp->s_flags |= RVT_S_BUSY; - spin_unlock_irqrestore(&qp->s_lock, flags); - timeout = jiffies + (timeout_int) / 8; cpu = priv->s_sde ? priv->s_sde->cpu : cpumask_first(cpumask_of_node(ps.ppd->dd->node)); do { /* Check for a constructed packet to be sent. */ if (qp->s_hdrwords != 0) { + spin_unlock_irqrestore(&qp->s_lock, flags); /* * If the packet cannot be sent now, return and * the send tasklet will be woken up later. */ if (hfi1_verbs_send(qp, &ps)) - break; + return; /* Record that s_hdr is empty. */ qp->s_hdrwords = 0; - } - - /* allow other tasks to run */ - if (unlikely(time_after(jiffies, timeout))) { - if (workqueue_congested(cpu, ps.ppd->hfi1_wq)) { - spin_lock_irqsave(&qp->s_lock, flags); - qp->s_flags &= ~RVT_S_BUSY; - hfi1_schedule_send(qp); - spin_unlock_irqrestore(&qp->s_lock, - flags); + /* allow other tasks to run */ + if (unlikely(time_after(jiffies, timeout))) { + if (workqueue_congested(cpu, + ps.ppd->hfi1_wq)) { + spin_lock_irqsave(&qp->s_lock, flags); + qp->s_flags &= ~RVT_S_BUSY; + hfi1_schedule_send(qp); + spin_unlock_irqrestore(&qp->s_lock, + flags); + this_cpu_inc( + *ps.ppd->dd->send_schedule); + return; + } + cond_resched(); this_cpu_inc(*ps.ppd->dd->send_schedule); - return; + timeout = jiffies + (timeout_int) / 8; } - cond_resched(); - this_cpu_inc(*ps.ppd->dd->send_schedule); - timeout = jiffies + (timeout_int) / 8; + spin_lock_irqsave(&qp->s_lock, flags); } } while (make_req(qp)); + + spin_unlock_irqrestore(&qp->s_lock, flags); } /* diff --git a/drivers/staging/rdma/hfi1/uc.c b/drivers/staging/rdma/hfi1/uc.c index 0aa604b..f884b5c 100644 --- a/drivers/staging/rdma/hfi1/uc.c +++ b/drivers/staging/rdma/hfi1/uc.c @@ -59,6 +59,8 @@ * hfi1_make_uc_req - construct a request packet (SEND, RDMA write) * @qp: a pointer to the QP * + * Assume s_lock is held. + * * Return 1 if constructed; otherwise, return 0. */ int hfi1_make_uc_req(struct rvt_qp *qp) @@ -66,7 +68,6 @@ int hfi1_make_uc_req(struct rvt_qp *qp) struct hfi1_qp_priv *priv = qp->priv; struct hfi1_other_headers *ohdr; struct rvt_swqe *wqe; - unsigned long flags; u32 hwords = 5; u32 bth0 = 0; u32 len; @@ -74,13 +75,12 @@ int hfi1_make_uc_req(struct rvt_qp *qp) int ret = 0; int middle = 0; - spin_lock_irqsave(&qp->s_lock, flags); - if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_SEND_OK)) { if (!(ib_rvt_state_ops[qp->state] & RVT_FLUSH_SEND)) goto bail; /* We are in the error state, flush the work request. */ - if (qp->s_last == qp->s_head) + smp_read_barrier_depends(); /* see post_one_send() */ + if (qp->s_last == ACCESS_ONCE(qp->s_head)) goto bail; /* If DMAs are in progress, we can't flush immediately. */ if (atomic_read(&priv->s_iowait.sdma_busy)) { @@ -106,15 +106,15 @@ int hfi1_make_uc_req(struct rvt_qp *qp) RVT_PROCESS_NEXT_SEND_OK)) goto bail; /* Check if send work queue is empty. */ - if (qp->s_cur == qp->s_head) { + smp_read_barrier_depends(); /* see post_one_send() */ + if (qp->s_cur == ACCESS_ONCE(qp->s_head)) { clear_ahg(qp); goto bail; } /* * Start a new request. */ - wqe->psn = qp->s_next_psn; - qp->s_psn = qp->s_next_psn; + qp->s_psn = wqe->psn; qp->s_sge.sge = wqe->sg_list[0]; qp->s_sge.sg_list = wqe->sg_list + 1; qp->s_sge.num_sge = wqe->wr.num_sge; @@ -235,15 +235,12 @@ int hfi1_make_uc_req(struct rvt_qp *qp) qp->s_cur_sge = &qp->s_sge; qp->s_cur_size = len; hfi1_make_ruc_header(qp, ohdr, bth0 | (qp->s_state << 24), - mask_psn(qp->s_next_psn++), middle); + mask_psn(qp->s_psn++), middle); done: - ret = 1; - goto unlock; + return 1; bail: qp->s_flags &= ~RVT_S_BUSY; -unlock: - spin_unlock_irqrestore(&qp->s_lock, flags); return ret; } diff --git a/drivers/staging/rdma/hfi1/ud.c b/drivers/staging/rdma/hfi1/ud.c index fdf6e3b..ba78e2e 100644 --- a/drivers/staging/rdma/hfi1/ud.c +++ b/drivers/staging/rdma/hfi1/ud.c @@ -261,6 +261,8 @@ drop: * hfi1_make_ud_req - construct a UD request packet * @qp: the QP * + * Assume s_lock is held. + * * Return 1 if constructed; otherwise, return 0. */ int hfi1_make_ud_req(struct rvt_qp *qp) @@ -271,7 +273,6 @@ int hfi1_make_ud_req(struct rvt_qp *qp) struct hfi1_pportdata *ppd; struct hfi1_ibport *ibp; struct rvt_swqe *wqe; - unsigned long flags; u32 nwords; u32 extra_bytes; u32 bth0; @@ -281,13 +282,12 @@ int hfi1_make_ud_req(struct rvt_qp *qp) int next_cur; u8 sc5; - spin_lock_irqsave(&qp->s_lock, flags); - if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_NEXT_SEND_OK)) { if (!(ib_rvt_state_ops[qp->state] & RVT_FLUSH_SEND)) goto bail; /* We are in the error state, flush the work request. */ - if (qp->s_last == qp->s_head) + smp_read_barrier_depends(); /* see post_one_send */ + if (qp->s_last == ACCESS_ONCE(qp->s_head)) goto bail; /* If DMAs are in progress, we can't flush immediately. */ if (atomic_read(&priv->s_iowait.sdma_busy)) { @@ -299,7 +299,9 @@ int hfi1_make_ud_req(struct rvt_qp *qp) goto done; } - if (qp->s_cur == qp->s_head) + /* see post_one_send() */ + smp_read_barrier_depends(); + if (qp->s_cur == ACCESS_ONCE(qp->s_head)) goto bail; wqe = rvt_get_swqe_ptr(qp, qp->s_cur); @@ -317,6 +319,7 @@ int hfi1_make_ud_req(struct rvt_qp *qp) if (unlikely(!loopback && (lid == ppd->lid || (lid == be16_to_cpu(IB_LID_PERMISSIVE) && qp->ibqp.qp_type == IB_QPT_GSI)))) { + unsigned long flags; /* * If DMAs are in progress, we can't generate * a completion for the loopback packet since @@ -329,6 +332,7 @@ int hfi1_make_ud_req(struct rvt_qp *qp) goto bail; } qp->s_cur = next_cur; + local_irq_save(flags); spin_unlock_irqrestore(&qp->s_lock, flags); ud_loopback(qp, wqe); spin_lock_irqsave(&qp->s_lock, flags); @@ -408,7 +412,7 @@ int hfi1_make_ud_req(struct rvt_qp *qp) bth0 |= hfi1_get_pkey(ibp, qp->s_pkey_index); ohdr->bth[0] = cpu_to_be32(bth0); ohdr->bth[1] = cpu_to_be32(wqe->ud_wr.remote_qpn); - ohdr->bth[2] = cpu_to_be32(mask_psn(qp->s_next_psn++)); + ohdr->bth[2] = cpu_to_be32(mask_psn(wqe->psn)); /* * Qkeys with the high order bit set mean use the * qkey from the QP context instead of the WR (see 10.2.5). @@ -423,13 +427,9 @@ int hfi1_make_ud_req(struct rvt_qp *qp) priv->s_hdr->sde = NULL; done: - ret = 1; - goto unlock; - + return 1; bail: qp->s_flags &= ~RVT_S_BUSY; -unlock: - spin_unlock_irqrestore(&qp->s_lock, flags); return ret; } diff --git a/drivers/staging/rdma/hfi1/verbs.c b/drivers/staging/rdma/hfi1/verbs.c index 35f6d92..1df4648 100644 --- a/drivers/staging/rdma/hfi1/verbs.c +++ b/drivers/staging/rdma/hfi1/verbs.c @@ -1533,6 +1533,7 @@ int hfi1_register_ib_device(struct hfi1_devdata *dd) dd->verbs_dev.rdi.driver_f.notify_qp_reset = notify_qp_reset; dd->verbs_dev.rdi.driver_f.do_send = hfi1_do_send; dd->verbs_dev.rdi.driver_f.schedule_send = hfi1_schedule_send; + dd->verbs_dev.rdi.driver_f.schedule_send_no_lock = _hfi1_schedule_send; dd->verbs_dev.rdi.driver_f.get_pmtu_from_attr = get_pmtu_from_attr; dd->verbs_dev.rdi.driver_f.notify_error_qp = notify_error_qp; dd->verbs_dev.rdi.driver_f.flush_qp_waiters = flush_qp_waiters; @@ -1543,7 +1544,7 @@ int hfi1_register_ib_device(struct hfi1_devdata *dd) dd->verbs_dev.rdi.driver_f.mtu_to_path_mtu = mtu_to_path_mtu; dd->verbs_dev.rdi.driver_f.check_modify_qp = hfi1_check_modify_qp; dd->verbs_dev.rdi.driver_f.modify_qp = hfi1_modify_qp; - dd->verbs_dev.rdi.driver_f.check_send_wr = hfi1_check_send_wr; + dd->verbs_dev.rdi.driver_f.check_send_wqe = hfi1_check_send_wqe; /* completeion queue */ snprintf(dd->verbs_dev.rdi.dparms.cq_name, diff --git a/drivers/staging/rdma/hfi1/verbs.h b/drivers/staging/rdma/hfi1/verbs.h index adb63bb..d00c55d 100644 --- a/drivers/staging/rdma/hfi1/verbs.h +++ b/drivers/staging/rdma/hfi1/verbs.h @@ -427,7 +427,7 @@ int hfi1_check_modify_qp(struct rvt_qp *qp, struct ib_qp_attr *attr, void hfi1_modify_qp(struct rvt_qp *qp, struct ib_qp_attr *attr, int attr_mask, struct ib_udata *udata); -int hfi1_check_send_wr(struct rvt_qp *qp, struct ib_send_wr *wr); +int hfi1_check_send_wqe(struct rvt_qp *qp, struct rvt_swqe *wqe); int hfi1_ruc_check_hdr(struct hfi1_ibport *ibp, struct hfi1_ib_header *hdr, int has_grh, struct rvt_qp *qp, u32 bth0); diff --git a/include/rdma/rdma_vt.h b/include/rdma/rdma_vt.h index 5ccf683..aabd2e5 100644 --- a/include/rdma/rdma_vt.h +++ b/include/rdma/rdma_vt.h @@ -220,6 +220,7 @@ struct rvt_ah { }; struct rvt_dev_info; +struct rvt_swqe; struct rvt_driver_provided { /* * The work to create port files in /sys/class Infiniband is different @@ -240,6 +241,7 @@ struct rvt_driver_provided { void (*qp_priv_free)(struct rvt_dev_info *rdi, struct rvt_qp *qp); void (*notify_qp_reset)(struct rvt_qp *qp); void (*schedule_send)(struct rvt_qp *qp); + void (*schedule_send_no_lock)(struct rvt_qp *qp); void (*do_send)(struct rvt_qp *qp); int (*get_pmtu_from_attr)(struct rvt_dev_info *rdi, struct rvt_qp *qp, struct ib_qp_attr *attr); @@ -273,7 +275,7 @@ struct rvt_driver_provided { void (*modify_qp)(struct rvt_qp *qp, struct ib_qp_attr *attr, int attr_mask, struct ib_udata *udata); - int (*check_send_wr)(struct rvt_qp *qp, struct ib_send_wr *wr); + int (*check_send_wqe)(struct rvt_qp *qp, struct rvt_swqe *wqe); void (*notify_create_mad_agent)(struct rvt_dev_info *rdi, int port_idx); void (*notify_free_mad_agent)(struct rvt_dev_info *rdi, int port_idx); diff --git a/include/rdma/rdmavt_qp.h b/include/rdma/rdmavt_qp.h index b3ea745..1066b5d 100644 --- a/include/rdma/rdmavt_qp.h +++ b/include/rdma/rdmavt_qp.h @@ -250,11 +250,12 @@ struct rvt_qp { enum ib_mtu path_mtu; int srate_mbps; /* s_srate (below) converted to Mbit/s */ u32 remote_qpn; - u32 pmtu; /* decoded from path_mtu */ u32 qkey; /* QKEY for this QP (for UD or RD) */ u32 s_size; /* send work queue size */ u32 s_ahgpsn; /* set to the psn in the copy of the header */ + u16 pmtu; /* decoded from path_mtu */ + u8 log_pmtu; /* shift for pmtu */ u8 state; /* QP state */ u8 allowed_ops; /* high order bits of allowed opcodes */ u8 qp_access_flags; @@ -299,6 +300,13 @@ struct rvt_qp { struct rvt_sge_state r_sge; /* current receive data */ struct rvt_rq r_rq; /* receive work queue */ + /* post send line */ + spinlock_t s_hlock ____cacheline_aligned_in_smp; + u32 s_head; /* new entries added here */ + u32 s_next_psn; /* PSN for next request */ + u32 s_avail; /* number of entries avail */ + u32 s_ssn; /* SSN of tail entry */ + spinlock_t s_lock ____cacheline_aligned_in_smp; struct rvt_sge_state *s_cur_sge; u32 s_flags; @@ -308,19 +316,16 @@ struct rvt_qp { u32 s_cur_size; /* size of send packet in bytes */ u32 s_len; /* total length of s_sge */ u32 s_rdma_read_len; /* total length of s_rdma_read_sge */ - u32 s_next_psn; /* PSN for next request */ u32 s_last_psn; /* last response PSN processed */ u32 s_sending_psn; /* lowest PSN that is being sent */ u32 s_sending_hpsn; /* highest PSN that is being sent */ u32 s_psn; /* current packet sequence number */ u32 s_ack_rdma_psn; /* PSN for sending RDMA read responses */ u32 s_ack_psn; /* PSN for acking sends and RDMA writes */ - u32 s_head; /* new entries added here */ u32 s_tail; /* next entry to process */ u32 s_cur; /* current work queue entry */ u32 s_acked; /* last un-ACK'ed entry */ u32 s_last; /* last completed entry */ - u32 s_ssn; /* SSN of tail entry */ u32 s_lsn; /* limit sequence number (credit) */ u16 s_hdrwords; /* size of s_hdr in 32 bit words */ u16 s_rdma_ack_cnt; -- cgit v0.10.2 From e16689e49216d08336da2d96cbc8c4b6b914dc99 Mon Sep 17 00:00:00 2001 From: Harish Chegondi Date: Sun, 14 Feb 2016 12:10:12 -0800 Subject: IB/rdmavt: Add trace and error print statements in post_one_wr These trace and error print statements would help in debugging issues which are caused due to messed up QP ring buffer pointers. Reviewed-by: Mike Marciniszyn Signed-off-by: Harish Chegondi Signed-off-by: Doug Ledford diff --git a/drivers/infiniband/sw/rdmavt/qp.c b/drivers/infiniband/sw/rdmavt/qp.c index 522404a..d629911 100644 --- a/drivers/infiniband/sw/rdmavt/qp.c +++ b/drivers/infiniband/sw/rdmavt/qp.c @@ -1441,7 +1441,12 @@ static int rvt_post_one_wr(struct rvt_qp *qp, struct ib_send_wr *wr) /* check for avail */ if (unlikely(!qp->s_avail)) { qp->s_avail = qp_get_savail(qp); - WARN_ON(qp->s_avail > (qp->s_size - 1)); + if (WARN_ON(qp->s_avail > (qp->s_size - 1))) + rvt_pr_err(rdi, + "More avail entries than QP RB size.\nQP: %u, size: %u, avail: %u\nhead: %u, tail: %u, cur: %u, acked: %u, last: %u", + qp->ibqp.qp_num, qp->s_size, qp->s_avail, + qp->s_head, qp->s_tail, qp->s_cur, + qp->s_acked, qp->s_last); if (!qp->s_avail) return -ENOMEM; } @@ -1510,6 +1515,7 @@ static int rvt_post_one_wr(struct rvt_qp *qp, struct ib_send_wr *wr) wqe->lpsn = wqe->psn + (wqe->length ? ((wqe->length - 1) >> log_pmtu) : 0); qp->s_next_psn = wqe->lpsn + 1; + trace_rvt_post_one_wr(qp, wqe); smp_wmb(); /* see request builders */ qp->s_avail--; qp->s_head = next; diff --git a/drivers/infiniband/sw/rdmavt/trace.h b/drivers/infiniband/sw/rdmavt/trace.h index d5b1281..6c0457d 100644 --- a/drivers/infiniband/sw/rdmavt/trace.h +++ b/drivers/infiniband/sw/rdmavt/trace.h @@ -54,6 +54,7 @@ #include #include +#include #include #define RDI_DEV_ENTRY(rdi) __string(dev, rdi->driver_f.get_card_name(rdi)) @@ -108,6 +109,75 @@ DEFINE_EVENT(rvt_qphash_template, rvt_qpremove, TP_PROTO(struct rvt_qp *qp, u32 bucket), TP_ARGS(qp, bucket)); +#undef TRACE_SYSTEM +#define TRACE_SYSTEM rvt_tx + +#define wr_opcode_name(opcode) { IB_WR_##opcode, #opcode } +#define show_wr_opcode(opcode) \ +__print_symbolic(opcode, \ + wr_opcode_name(RDMA_WRITE), \ + wr_opcode_name(RDMA_WRITE_WITH_IMM), \ + wr_opcode_name(SEND), \ + wr_opcode_name(SEND_WITH_IMM), \ + wr_opcode_name(RDMA_READ), \ + wr_opcode_name(ATOMIC_CMP_AND_SWP), \ + wr_opcode_name(ATOMIC_FETCH_AND_ADD), \ + wr_opcode_name(LSO), \ + wr_opcode_name(SEND_WITH_INV), \ + wr_opcode_name(RDMA_READ_WITH_INV), \ + wr_opcode_name(LOCAL_INV), \ + wr_opcode_name(MASKED_ATOMIC_CMP_AND_SWP), \ + wr_opcode_name(MASKED_ATOMIC_FETCH_AND_ADD)) + +#define POS_PRN \ +"[%s] wr_id %llx qpn %x psn 0x%x lpsn 0x%x length %u opcode 0x%.2x,%s size %u avail %u head %u last %u" + +TRACE_EVENT( + rvt_post_one_wr, + TP_PROTO(struct rvt_qp *qp, struct rvt_swqe *wqe), + TP_ARGS(qp, wqe), + TP_STRUCT__entry( + RDI_DEV_ENTRY(ib_to_rvt(qp->ibqp.device)) + __field(u64, wr_id) + __field(u32, qpn) + __field(u32, psn) + __field(u32, lpsn) + __field(u32, length) + __field(u32, opcode) + __field(u32, size) + __field(u32, avail) + __field(u32, head) + __field(u32, last) + ), + TP_fast_assign( + RDI_DEV_ASSIGN(ib_to_rvt(qp->ibqp.device)) + __entry->wr_id = wqe->wr.wr_id; + __entry->qpn = qp->ibqp.qp_num; + __entry->psn = wqe->psn; + __entry->lpsn = wqe->lpsn; + __entry->length = wqe->length; + __entry->opcode = wqe->wr.opcode; + __entry->size = qp->s_size; + __entry->avail = qp->s_avail; + __entry->head = qp->s_head; + __entry->last = qp->s_last; + ), + TP_printk( + POS_PRN, + __get_str(dev), + __entry->wr_id, + __entry->qpn, + __entry->psn, + __entry->lpsn, + __entry->length, + __entry->opcode, show_wr_opcode(__entry->opcode), + __entry->size, + __entry->avail, + __entry->head, + __entry->last + ) +); + #endif /* __RDMAVT_TRACE_H */ #undef TRACE_INCLUDE_PATH -- cgit v0.10.2 From 0ec79e875ada58bd6598d8965f9079fe1a662950 Mon Sep 17 00:00:00 2001 From: Kaike Wan Date: Sun, 14 Feb 2016 12:10:20 -0800 Subject: staging/rdma/hfi1: Put QPs into error state after SL->SC table changes If an SL->SC mapping table change occurs after an RC/UC QP is created, there is no mechanism to change the SC nor the VL for that QP. The fix is to place the QP into error state so that ULP can recreate the QP with the new SL->SC mapping. Reviewed-by: Ira Weiny Reviewed-by: Mike Marciniszyn Signed-off-by: Kaike Wan Signed-off-by: Doug Ledford diff --git a/drivers/staging/rdma/hfi1/mad.c b/drivers/staging/rdma/hfi1/mad.c index 2fcc9f3..d9efe22 100644 --- a/drivers/staging/rdma/hfi1/mad.c +++ b/drivers/staging/rdma/hfi1/mad.c @@ -55,6 +55,7 @@ #include "hfi.h" #include "mad.h" #include "trace.h" +#include "qp.h" /* the reset value from the FM is supposed to be 0xffff, handle both */ #define OPA_LINK_WIDTH_RESET_OLD 0x0fff @@ -1517,14 +1518,22 @@ static int __subn_set_opa_sl_to_sc(struct opa_smp *smp, u32 am, u8 *data, struct hfi1_ibport *ibp = to_iport(ibdev, port); u8 *p = data; int i; + u8 sc; if (am) { smp->status |= IB_SMP_INVALID_FIELD; return reply((struct ib_mad_hdr *)smp); } - for (i = 0; i < ARRAY_SIZE(ibp->sl_to_sc); i++) - ibp->sl_to_sc[i] = *p++; + for (i = 0; i < ARRAY_SIZE(ibp->sl_to_sc); i++) { + sc = *p++; + if (ibp->sl_to_sc[i] != sc) { + ibp->sl_to_sc[i] = sc; + + /* Put all stale qps into error state */ + hfi1_error_port_qps(ibp, i); + } + } return __subn_get_opa_sl_to_sc(smp, am, data, ibdev, port, resp_len); } diff --git a/drivers/staging/rdma/hfi1/qp.c b/drivers/staging/rdma/hfi1/qp.c index 00866c0..9e05314 100644 --- a/drivers/staging/rdma/hfi1/qp.c +++ b/drivers/staging/rdma/hfi1/qp.c @@ -840,3 +840,55 @@ void notify_error_qp(struct rvt_qp *qp) } } +/** + * hfi1_error_port_qps - put a port's RC/UC qps into error state + * @ibp: the ibport. + * @sl: the service level. + * + * This function places all RC/UC qps with a given service level into error + * state. It is generally called to force upper lay apps to abandon stale qps + * after an sl->sc mapping change. + */ +void hfi1_error_port_qps(struct hfi1_ibport *ibp, u8 sl) +{ + struct rvt_qp *qp = NULL; + struct hfi1_pportdata *ppd = ppd_from_ibp(ibp); + struct hfi1_ibdev *dev = &ppd->dd->verbs_dev; + int n; + int lastwqe; + struct ib_event ev; + + rcu_read_lock(); + + /* Deal only with RC/UC qps that use the given SL. */ + for (n = 0; n < dev->rdi.qp_dev->qp_table_size; n++) { + for (qp = rcu_dereference(dev->rdi.qp_dev->qp_table[n]); qp; + qp = rcu_dereference(qp->next)) { + if (qp->port_num == ppd->port && + (qp->ibqp.qp_type == IB_QPT_UC || + qp->ibqp.qp_type == IB_QPT_RC) && + qp->remote_ah_attr.sl == sl && + (ib_rvt_state_ops[qp->state] & + RVT_POST_SEND_OK)) { + spin_lock_irq(&qp->r_lock); + spin_lock(&qp->s_hlock); + spin_lock(&qp->s_lock); + lastwqe = rvt_error_qp(qp, + IB_WC_WR_FLUSH_ERR); + spin_unlock(&qp->s_lock); + spin_unlock(&qp->s_hlock); + spin_unlock_irq(&qp->r_lock); + if (lastwqe) { + ev.device = qp->ibqp.device; + ev.element.qp = &qp->ibqp; + ev.event = + IB_EVENT_QP_LAST_WQE_REACHED; + qp->ibqp.event_handler(&ev, + qp->ibqp.qp_context); + } + } + } + } + + rcu_read_unlock(); +} diff --git a/drivers/staging/rdma/hfi1/qp.h b/drivers/staging/rdma/hfi1/qp.h index 98827b5..afc2b4d 100644 --- a/drivers/staging/rdma/hfi1/qp.h +++ b/drivers/staging/rdma/hfi1/qp.h @@ -158,4 +158,5 @@ void stop_send_queue(struct rvt_qp *qp); void quiesce_qp(struct rvt_qp *qp); u32 mtu_from_qp(struct rvt_dev_info *rdi, struct rvt_qp *qp, u32 pmtu); int mtu_to_path_mtu(u32 mtu); +void hfi1_error_port_qps(struct hfi1_ibport *ibp, u8 sl); #endif /* _QP_H */ -- cgit v0.10.2 From 90793f7179478df19ac4b2244cfd9764b28e4b38 Mon Sep 17 00:00:00 2001 From: Dennis Dalessandro Date: Sun, 14 Feb 2016 12:10:29 -0800 Subject: IB/rdmavt: Clean up comments and add more documentation Add, remove, and otherwise clean up existing comments that are leftover from the initial code postings of rdmavt. Many of the comments were added to provide an idea on the direction we were thinking of going. Now that the design is solidified make a pass over and clean everything up. Also add details where lacking. Ensure all non static functions have nano comments. Reviewed-by: Jubin John Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford diff --git a/drivers/infiniband/sw/rdmavt/ah.c b/drivers/infiniband/sw/rdmavt/ah.c index 9372c43..16c4461 100644 --- a/drivers/infiniband/sw/rdmavt/ah.c +++ b/drivers/infiniband/sw/rdmavt/ah.c @@ -53,6 +53,11 @@ * rvt_check_ah - validate the attributes of AH * @ibdev: the ib device * @ah_attr: the attributes of the AH + * + * If driver supports a more detailed check_ah function call back to it + * otherwise just check the basics. + * + * Return: 0 on success */ int rvt_check_ah(struct ib_device *ibdev, struct ib_ah_attr *ah_attr) @@ -95,6 +100,8 @@ EXPORT_SYMBOL(rvt_check_ah); * @ah_attr: the attributes of the AH * * This may be called from interrupt context. + * + * Return: newly allocated ah */ struct ib_ah *rvt_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr) @@ -129,6 +136,12 @@ struct ib_ah *rvt_create_ah(struct ib_pd *pd, return &ah->ibah; } +/** + * rvt_destory_ah - Destory an address handle + * @ibah: address handle + * + * Return: 0 on success + */ int rvt_destroy_ah(struct ib_ah *ibah) { struct rvt_dev_info *dev = ib_to_rvt(ibah->device); @@ -147,6 +160,13 @@ int rvt_destroy_ah(struct ib_ah *ibah) return 0; } +/** + * rvt_modify_ah - modify an ah with given attrs + * @ibah: address handle to modify + * @ah_attr: attrs to apply + * + * Return: 0 on success + */ int rvt_modify_ah(struct ib_ah *ibah, struct ib_ah_attr *ah_attr) { struct rvt_ah *ah = ibah_to_rvtah(ibah); @@ -159,6 +179,13 @@ int rvt_modify_ah(struct ib_ah *ibah, struct ib_ah_attr *ah_attr) return 0; } +/** + * rvt_query_ah - return attrs for ah + * @ibah: address handle to query + * @ah_attr: return info in this + * + * Return: always 0 + */ int rvt_query_ah(struct ib_ah *ibah, struct ib_ah_attr *ah_attr) { struct rvt_ah *ah = ibah_to_rvtah(ibah); diff --git a/drivers/infiniband/sw/rdmavt/cq.c b/drivers/infiniband/sw/rdmavt/cq.c index 0e6dbe5..c69c070 100644 --- a/drivers/infiniband/sw/rdmavt/cq.c +++ b/drivers/infiniband/sw/rdmavt/cq.c @@ -173,10 +173,10 @@ static void send_complete(struct kthread_work *work) * @context: unused by the QLogic_IB driver * @udata: user data for libibverbs.so * - * Returns a pointer to the completion queue or negative errno values - * for failure. - * * Called by ib_create_cq() in the generic verbs code. + * + * Return: pointer to the completion queue or negative errno values + * for failure. */ struct ib_cq *rvt_create_cq(struct ib_device *ibdev, const struct ib_cq_init_attr *attr, @@ -286,9 +286,9 @@ done: * rvt_destroy_cq - destroy a completion queue * @ibcq: the completion queue to destroy. * - * Returns 0 for success. - * * Called by ib_destroy_cq() in the generic verbs code. + * + * Return: always 0 */ int rvt_destroy_cq(struct ib_cq *ibcq) { @@ -313,10 +313,10 @@ int rvt_destroy_cq(struct ib_cq *ibcq) * @ibcq: the completion queue * @notify_flags: the type of notification to request * - * Returns 0 for success. - * * This may be called from interrupt context. Also called by * ib_req_notify_cq() in the generic verbs code. + * + * Return: 0 for success. */ int rvt_req_notify_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags notify_flags) { @@ -345,7 +345,7 @@ int rvt_req_notify_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags notify_flags) * rvt_resize_cq - change the size of the CQ * @ibcq: the completion queue * - * Returns 0 for success. + * Return: 0 for success. */ int rvt_resize_cq(struct ib_cq *ibcq, int cqe, struct ib_udata *udata) { @@ -456,10 +456,10 @@ bail_free: * @num_entries: the maximum number of entries to return * @entry: pointer to array where work completions are placed * - * Returns the number of completion entries polled. - * * This may be called from interrupt context. Also called by ib_poll_cq() * in the generic verbs code. + * + * Return: the number of completion entries polled. */ int rvt_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *entry) { @@ -496,6 +496,12 @@ int rvt_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *entry) return npolled; } +/** + * rvt_driver_cq_init - Init cq resources on behalf of driver + * @rdi: rvt dev structure + * + * Return: 0 on success + */ int rvt_driver_cq_init(struct rvt_dev_info *rdi) { int ret = 0; @@ -530,6 +536,10 @@ int rvt_driver_cq_init(struct rvt_dev_info *rdi) return ret; } +/** + * rvt_cq_exit - tear down cq reources + * @rdi: rvt dev structure + */ void rvt_cq_exit(struct rvt_dev_info *rdi) { struct kthread_worker *worker; diff --git a/drivers/infiniband/sw/rdmavt/mad.c b/drivers/infiniband/sw/rdmavt/mad.c index 2feae47..f6e9977 100644 --- a/drivers/infiniband/sw/rdmavt/mad.c +++ b/drivers/infiniband/sw/rdmavt/mad.c @@ -59,14 +59,13 @@ * @in_mad: the incoming MAD * @out_mad: any outgoing MAD reply * - * Returns IB_MAD_RESULT_SUCCESS if this is a MAD that we are not - * interested in processing. - * * Note that the verbs framework has already done the MAD sanity checks, * and hop count/pointer updating for IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE * MADs. * * This is called by the ib_mad module. + * + * Return: IB_MAD_RESULT_SUCCESS or error */ int rvt_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, const struct ib_wc *in_wc, const struct ib_grh *in_grh, @@ -75,13 +74,10 @@ int rvt_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, u16 *out_mad_pkey_index) { /* - * Drivers will need to provide a number of things. For exmaple counters - * will need to be maintained by the driver but shoud live in the rvt - * structure. More study will be needed to finalize the interface - * between drivers and rvt for mad packets. - * - *VT-DRIVER-API: ???? - * + * MAD processing is quite different between hfi1 and qib. Therfore this + * is expected to be provided by the driver. Other drivers in the future + * may chose to implement this but it should not be made into a + * requirement. */ if (ibport_num_to_idx(ibdev, port_num) < 0) return -EINVAL; @@ -95,6 +91,14 @@ static void rvt_send_mad_handler(struct ib_mad_agent *agent, ib_free_send_mad(mad_send_wc->send_buf); } +/** + * rvt_create_mad_agents - create mad agents + * @rdi: rvt dev struct + * + * If driver needs to be notified of mad agent creation then call back + * + * Return 0 on success + */ int rvt_create_mad_agents(struct rvt_dev_info *rdi) { struct ib_mad_agent *agent; @@ -136,6 +140,12 @@ err: return ret; } +/** + * rvt_free_mad_agents - free up mad agents + * @rdi: rvt dev struct + * + * If driver needs notification of mad agent removal make the call back + */ void rvt_free_mad_agents(struct rvt_dev_info *rdi) { struct ib_mad_agent *agent; diff --git a/drivers/infiniband/sw/rdmavt/mcast.c b/drivers/infiniband/sw/rdmavt/mcast.c index e06a875..983d319 100644 --- a/drivers/infiniband/sw/rdmavt/mcast.c +++ b/drivers/infiniband/sw/rdmavt/mcast.c @@ -53,6 +53,12 @@ #include "mcast.h" +/** + * rvt_driver_mcast - init resources for multicast + * @rdi: rvt dev struct + * + * This is per device that registers with rdmavt + */ void rvt_driver_mcast_init(struct rvt_dev_info *rdi) { /* @@ -130,9 +136,9 @@ static void rvt_mcast_free(struct rvt_mcast *mcast) * @ibp: the IB port structure * @mgid: the multicast GID to search for * - * Returns NULL if not found. - * * The caller is responsible for decrementing the reference count if found. + * + * Return: NULL if not found. */ struct rvt_mcast *rvt_mcast_find(struct rvt_ibport *ibp, union ib_gid *mgid) { @@ -170,7 +176,7 @@ EXPORT_SYMBOL(rvt_mcast_find); * @mcast: the mcast GID table * @mqp: the QP to attach * - * Return zero if both were added. Return EEXIST if the GID was already in + * Return: zero if both were added. Return EEXIST if the GID was already in * the table but the QP was added. Return ESRCH if the QP was already * attached and neither structure was added. */ @@ -247,6 +253,14 @@ bail: return ret; } +/** + * rvt_attach_mcast - attach a qp to a multicast group + * @ibqp: Infiniband qp + * @igd: multicast guid + * @lid: multicast lid + * + * Return: 0 on success + */ int rvt_attach_mcast(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) { struct rvt_qp *qp = ibqp_to_rvtqp(ibqp); @@ -298,6 +312,14 @@ bail_mcast: return ret; } +/** + * rvt_detach_mcast - remove a qp from a multicast group + * @ibqp: Infiniband qp + * @igd: multicast guid + * @lid: multicast lid + * + * Return: 0 on success + */ int rvt_detach_mcast(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) { struct rvt_qp *qp = ibqp_to_rvtqp(ibqp); @@ -377,6 +399,12 @@ int rvt_detach_mcast(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) return 0; } +/** + *rvt_mast_tree_empty - determine if any qps are attached to any mcast group + *@rdi: rvt dev struct + * + * Return: in use count + */ int rvt_mcast_tree_empty(struct rvt_dev_info *rdi) { int i; diff --git a/drivers/infiniband/sw/rdmavt/mmap.c b/drivers/infiniband/sw/rdmavt/mmap.c index 49180c4..273974f 100644 --- a/drivers/infiniband/sw/rdmavt/mmap.c +++ b/drivers/infiniband/sw/rdmavt/mmap.c @@ -51,6 +51,10 @@ #include #include "mmap.h" +/** + * rvt_mmap_init - init link list and lock for mem map + * @rdi: rvt dev struct + */ void rvt_mmap_init(struct rvt_dev_info *rdi) { INIT_LIST_HEAD(&rdi->pending_mmaps); @@ -78,10 +82,6 @@ void rvt_release_mmap_info(struct kref *ref) } EXPORT_SYMBOL(rvt_release_mmap_info); -/* - * open and close keep track of how many times the CQ is mapped, - * to avoid releasing it. - */ static void rvt_vma_open(struct vm_area_struct *vma) { struct rvt_mmap_info *ip = vma->vm_private_data; @@ -105,7 +105,8 @@ static const struct vm_operations_struct rvt_vm_ops = { * rvt_mmap - create a new mmap region * @context: the IB user context of the process making the mmap() call * @vma: the VMA to be initialized - * Return zero if the mmap is OK. Otherwise, return an errno. + * + * Return: zero if the mmap is OK. Otherwise, return an errno. */ int rvt_mmap(struct ib_ucontext *context, struct vm_area_struct *vma) { @@ -147,8 +148,14 @@ done: } EXPORT_SYMBOL(rvt_mmap); -/* - * Allocate information for hfi1_mmap +/** + * rvt_create_mmap_info - allocate information for hfi1_mmap + * @rdi: rvt dev struct + * @size: size in bytes to map + * @context: user context + * @obj: opaque pointer to a cq, wq etc + * + * Return: rvt_mmap struct on success */ struct rvt_mmap_info *rvt_create_mmap_info(struct rvt_dev_info *rdi, u32 size, @@ -180,6 +187,13 @@ struct rvt_mmap_info *rvt_create_mmap_info(struct rvt_dev_info *rdi, } EXPORT_SYMBOL(rvt_create_mmap_info); +/** + * rvt_update_mmap_info - update a mem map + * @rdi: rvt dev struct + * @ip: mmap info pointer + * @size: size to grow by + * @obj: opaque pointer to cq, wq, etc. + */ void rvt_update_mmap_info(struct rvt_dev_info *rdi, struct rvt_mmap_info *ip, u32 size, void *obj) { diff --git a/drivers/infiniband/sw/rdmavt/mr.c b/drivers/infiniband/sw/rdmavt/mr.c index 8bff6bb..8549652 100644 --- a/drivers/infiniband/sw/rdmavt/mr.c +++ b/drivers/infiniband/sw/rdmavt/mr.c @@ -52,8 +52,13 @@ #include "vt.h" #include "mr.h" -/* +/** + * rvt_driver_mr_init - Init MR resources per driver + * @rdi: rvt dev struct + * * Do any intilization needed when a driver registers with rdmavt. + * + * Return: 0 on success or errno on failure */ int rvt_driver_mr_init(struct rvt_dev_info *rdi) { @@ -98,7 +103,10 @@ int rvt_driver_mr_init(struct rvt_dev_info *rdi) return 0; } -/* +/** + *rvt_mr_exit: clean up MR + *@rdi: rvt dev structure + * * called when drivers have unregistered or perhaps failed to register with us */ void rvt_mr_exit(struct rvt_dev_info *rdi) @@ -297,7 +305,7 @@ static void __rvt_free_mr(struct rvt_mr *mr) * @pd: protection domain for this memory region * @acc: access flags * - * Returns the memory region on success, otherwise returns an errno. + * Return: the memory region on success, otherwise returns an errno. * Note that all DMA addresses should be created via the * struct ib_dma_mapping_ops functions (see dma.c). */ @@ -348,7 +356,7 @@ bail: * @mr_access_flags: access flags for this memory region * @udata: unused by the driver * - * Returns the memory region on success, otherwise returns an errno. + * Return: the memory region on success, otherwise returns an errno. */ struct ib_mr *rvt_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, u64 virt_addr, int mr_access_flags, @@ -418,10 +426,11 @@ bail_umem: * rvt_dereg_mr - unregister and free a memory region * @ibmr: the memory region to free * - * Returns 0 on success. * * Note that this is called to free MRs created by rvt_get_dma_mr() * or rvt_reg_user_mr(). + * + * Returns 0 on success. */ int rvt_dereg_mr(struct ib_mr *ibmr) { @@ -456,7 +465,7 @@ out: * @mr_type: mem region type * @max_num_sg: Max number of segments allowed * - * Return the memory region on success, otherwise return an errno. + * Return: the memory region on success, otherwise return an errno. */ struct ib_mr *rvt_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type, @@ -480,7 +489,7 @@ struct ib_mr *rvt_alloc_mr(struct ib_pd *pd, * @mr_access_flags: access flags for this memory region * @fmr_attr: fast memory region attributes * - * Returns the memory region on success, otherwise returns an errno. + * Return: the memory region on success, otherwise returns an errno. */ struct ib_fmr *rvt_alloc_fmr(struct ib_pd *pd, int mr_access_flags, struct ib_fmr_attr *fmr_attr) @@ -537,6 +546,8 @@ bail: * @iova: the virtual address of the start of the fast memory region * * This may be called from interrupt context. + * + * Return: 0 on success */ int rvt_map_phys_fmr(struct ib_fmr *ibfmr, u64 *page_list, @@ -580,7 +591,7 @@ int rvt_map_phys_fmr(struct ib_fmr *ibfmr, u64 *page_list, * rvt_unmap_fmr - unmap fast memory regions * @fmr_list: the list of fast memory regions to unmap * - * Returns 0 on success. + * Return: 0 on success. */ int rvt_unmap_fmr(struct list_head *fmr_list) { @@ -605,7 +616,7 @@ int rvt_unmap_fmr(struct list_head *fmr_list) * rvt_dealloc_fmr - deallocate a fast memory region * @ibfmr: the fast memory region to deallocate * - * Returns 0 on success. + * Return: 0 on success. */ int rvt_dealloc_fmr(struct ib_fmr *ibfmr) { @@ -635,12 +646,13 @@ out: * @sge: SGE to check * @acc: access flags * - * Return 1 if valid and successful, otherwise returns 0. + * Check the IB SGE for validity and initialize our internal version + * of it. + * + * Return: 1 if valid and successful, otherwise returns 0. * * increments the reference count upon success * - * Check the IB SGE for validity and initialize our internal version - * of it. */ int rvt_lkey_ok(struct rvt_lkey_table *rkt, struct rvt_pd *pd, struct rvt_sge *isge, struct ib_sge *sge, int acc) @@ -733,7 +745,7 @@ EXPORT_SYMBOL(rvt_lkey_ok); * @rkey: rkey to check * @acc: access flags * - * Return 1 if successful, otherwise 0. + * Return: 1 if successful, otherwise 0. * * increments the reference count upon success */ diff --git a/drivers/infiniband/sw/rdmavt/pd.c b/drivers/infiniband/sw/rdmavt/pd.c index 62fee44..d1292f3 100644 --- a/drivers/infiniband/sw/rdmavt/pd.c +++ b/drivers/infiniband/sw/rdmavt/pd.c @@ -48,6 +48,16 @@ #include #include "pd.h" +/** + * rvt_alloc_pd - allocate a protection domain + * @ibdev: ib device + * @context: optional user context + * @udata: optional user data + * + * Allocate and keep track of a PD. + * + * Return: 0 on success + */ struct ib_pd *rvt_alloc_pd(struct ib_device *ibdev, struct ib_ucontext *context, struct ib_udata *udata) @@ -88,6 +98,12 @@ bail: return ret; } +/** + * rvt_dealloc_pd - Free PD + * @ibpd: Free up PD + * + * Return: always 0 + */ int rvt_dealloc_pd(struct ib_pd *ibpd) { struct rvt_pd *pd = ibpd_to_rvtpd(ibpd); diff --git a/drivers/infiniband/sw/rdmavt/qp.c b/drivers/infiniband/sw/rdmavt/qp.c index d629911..5809562 100644 --- a/drivers/infiniband/sw/rdmavt/qp.c +++ b/drivers/infiniband/sw/rdmavt/qp.c @@ -162,6 +162,12 @@ static void free_qpn_table(struct rvt_qpn_table *qpt) free_page((unsigned long)qpt->map[i].page); } +/** + * rvt_driver_qp_init - Init driver qp resources + * @rdi: rvt dev strucutre + * + * Return: 0 on success + */ int rvt_driver_qp_init(struct rvt_dev_info *rdi) { int i; @@ -262,6 +268,12 @@ static unsigned rvt_free_all_qps(struct rvt_dev_info *rdi) return qp_inuse; } +/** + * rvt_qp_exit - clean up qps on device exit + * @rdi: rvt dev structure + * + * Check for qp leaks and free resources. + */ void rvt_qp_exit(struct rvt_dev_info *rdi) { u32 qps_inuse = rvt_free_all_qps(rdi); @@ -483,7 +495,7 @@ EXPORT_SYMBOL(rvt_reset_qp); * unique idea of what queue pair numbers mean. For instance there is a reserved * range for PSM. * - * Returns the queue pair on success, otherwise returns an errno. + * Return: the queue pair on success, otherwise returns an errno. * * Called by the ib_create_qp() core verbs function. */ @@ -757,6 +769,11 @@ bail_swq: return ret; } +/** + * rvt_clear_mr_refs - Drop help mr refs + * @qp: rvt qp data structure + * @clr_sends: If shoudl clear send side or not + */ void rvt_clear_mr_refs(struct rvt_qp *qp, int clr_sends) { unsigned n; @@ -812,7 +829,8 @@ EXPORT_SYMBOL(rvt_clear_mr_refs); * @err: the receive completion error to signal if a RWQE is active * * Flushes both send and receive work queues. - * Returns true if last WQE event should be generated. + * + * Return: true if last WQE event should be generated. * The QP r_lock and s_lock should be held and interrupts disabled. * If we are already in error state, just return. */ @@ -912,7 +930,11 @@ static void rvt_insert_qp(struct rvt_dev_info *rdi, struct rvt_qp *qp) spin_unlock_irqrestore(&rdi->qp_dev->qpt_lock, flags); } -/* +/** + * rvt_remove_qp - remove qp form table + * @rdi: rvt dev struct + * @qp: qp to remove + * * Remove the QP from the table so it can't be found asynchronously by * the receive routine. */ @@ -967,7 +989,7 @@ EXPORT_SYMBOL(rvt_remove_qp); * @attr_mask: the mask of attributes to modify * @udata: user data for libibverbs.so * - * Returns 0 on success, otherwise returns an errno. + * Return: 0 on success, otherwise returns an errno. */ int rvt_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask, struct ib_udata *udata) @@ -1224,10 +1246,10 @@ inval: * rvt_destroy_qp - destroy a queue pair * @ibqp: the queue pair to destroy * - * Returns 0 on success. - * * Note that this can be called while the QP is actively sending or * receiving! + * + * Return: 0 on success. */ int rvt_destroy_qp(struct ib_qp *ibqp) { @@ -1263,6 +1285,15 @@ int rvt_destroy_qp(struct ib_qp *ibqp) return 0; } +/** + * rvt_query_qp - query an ipbq + * @ibqp: IB qp to query + * @attr: attr struct to fill in + * @attr_mask: attr mask ignored + * @init_attr: struct to fill in + * + * Return: always 0 + */ int rvt_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask, struct ib_qp_init_attr *init_attr) { @@ -1321,6 +1352,8 @@ int rvt_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, * @bad_wr: the first bad WR is put here * * This may be called from interrupt context. + * + * Return: 0 on success otherwise errno */ int rvt_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr, struct ib_recv_wr **bad_wr) @@ -1539,6 +1572,8 @@ bail_inval_free: * @bad_wr: the first bad WR is put here * * This may be called from interrupt context. + * + * Return: 0 on success else errno */ int rvt_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, struct ib_send_wr **bad_wr) @@ -1594,6 +1629,8 @@ bail: * @bad_wr: A pointer to the first WR to cause a problem is put here * * This may be called from interrupt context. + * + * Return: 0 on success else errno */ int rvt_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr, struct ib_recv_wr **bad_wr) @@ -1636,6 +1673,10 @@ int rvt_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr, return 0; } +/** rvt_free_qpn - Free a qpn from the bit map + * @qpt: QP table + * @qpn: queue pair number to free + */ void rvt_free_qpn(struct rvt_qpn_table *qpt, u32 qpn) { struct rvt_qpn_map *map; @@ -1646,6 +1687,10 @@ void rvt_free_qpn(struct rvt_qpn_table *qpt, u32 qpn) } EXPORT_SYMBOL(rvt_free_qpn); +/** + * rvt_dec_qp_cnt - decrement qp count + * rdi: rvt dev struct + */ void rvt_dec_qp_cnt(struct rvt_dev_info *rdi) { spin_lock(&rdi->n_qps_lock); diff --git a/drivers/infiniband/sw/rdmavt/srq.c b/drivers/infiniband/sw/rdmavt/srq.c index 4960a89..98c4927 100644 --- a/drivers/infiniband/sw/rdmavt/srq.c +++ b/drivers/infiniband/sw/rdmavt/srq.c @@ -51,7 +51,10 @@ #include "srq.h" -/* +/** + * rvt_driver_srq_init - init srq resources on a per driver basis + * @rdi: rvt dev structure + * * Do any initialization needed when a driver registers with rdmavt. */ void rvt_driver_srq_init(struct rvt_dev_info *rdi) @@ -65,6 +68,8 @@ void rvt_driver_srq_init(struct rvt_dev_info *rdi) * @ibpd: the protection domain of the SRQ to create * @srq_init_attr: the attributes of the SRQ * @udata: data from libibverbs when creating a user SRQ + * + * Return: Allocated srq object */ struct ib_srq *rvt_create_srq(struct ib_pd *ibpd, struct ib_srq_init_attr *srq_init_attr, @@ -168,6 +173,8 @@ bail_srq: * @attr: the new attributes of the SRQ * @attr_mask: indicates which attributes to modify * @udata: user data for libibverbs.so + * + * Return: 0 on success */ int rvt_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr, enum ib_srq_attr_mask attr_mask, @@ -305,6 +312,12 @@ bail_free: return ret; } +/** rvt_query_srq - query srq data + * @ibsrq: srq to query + * @attr: return info in attr + * + * Return: always 0 + */ int rvt_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr) { struct rvt_srq *srq = ibsrq_to_rvtsrq(ibsrq); @@ -315,6 +328,12 @@ int rvt_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr) return 0; } +/** + * rvt_destroy_srq - destory an srq + * @ibsrq: srq object to destroy + * + * Return always 0 + */ int rvt_destroy_srq(struct ib_srq *ibsrq) { struct rvt_srq *srq = ibsrq_to_rvtsrq(ibsrq); diff --git a/drivers/infiniband/sw/rdmavt/vt.c b/drivers/infiniband/sw/rdmavt/vt.c index 2ccf610..f5cb09b 100644 --- a/drivers/infiniband/sw/rdmavt/vt.c +++ b/drivers/infiniband/sw/rdmavt/vt.c @@ -57,16 +57,37 @@ MODULE_DESCRIPTION("RDMA Verbs Transport Library"); static int rvt_init(void) { - /* Do any work needed prior to drivers calling for registration*/ + /* + * rdmavt does not need to do anything special when it starts up. All it + * needs to do is sit and wait until a driver attempts registration. + */ return 0; } module_init(rvt_init); static void rvt_cleanup(void) { + /* + * Nothing to do at exit time either. The module won't be able to be + * removed until all drivers are gone which means all the dev structs + * are gone so there is really nothing to do. + */ } module_exit(rvt_cleanup); +/** + * rvt_alloc_device - allocate rdi + * @size: how big of a structure to allocate + * @nports: number of ports to allocate array slots for + * + * Use IB core device alloc to allocate space for the rdi which is assumed to be + * inside of the ib_device. Any extra space that drivers require should be + * included in size. + * + * We also allocate a port array based on the number of ports. + * + * Return: pointer to allocated rdi + */ struct rvt_dev_info *rvt_alloc_device(size_t size, int nports) { struct rvt_dev_info *rdi = ERR_PTR(-ENOMEM); @@ -105,15 +126,10 @@ static int rvt_modify_device(struct ib_device *device, struct ib_device_modify *device_modify) { /* - * Change dev props. Planned support is for node desc change and sys - * guid change only. This matches hfi1 and qib behavior. Other drivers - * that support existing modifications will need to add their support. + * There is currently no need to supply this based on qib and hfi1. + * Future drivers may need to implement this though. */ - /* - * VT-DRIVER-API: node_desc_change() - * VT-DRIVER-API: sys_guid_change() - */ return -EOPNOTSUPP; } @@ -123,7 +139,7 @@ static int rvt_modify_device(struct ib_device *device, * @port_num: port number, 1 based from ib core * @props: structure to hold returned properties * - * Returns 0 on success + * Return: 0 on success */ static int rvt_query_port(struct ib_device *ibdev, u8 port_num, struct ib_port_attr *props) @@ -158,7 +174,7 @@ static int rvt_query_port(struct ib_device *ibdev, u8 port_num, * @port_modify_mask: How to change the port * @props: Structure to fill in * - * Returns 0 on success + * Return: 0 on success */ static int rvt_modify_port(struct ib_device *ibdev, u8 port_num, int port_modify_mask, struct ib_port_modify *props) @@ -191,7 +207,7 @@ static int rvt_modify_port(struct ib_device *ibdev, u8 port_num, * @port_num: Port number, 1 based from ib core * @intex: Index into pkey table * - * Returns 0 on failure pkey otherwise + * Return: 0 on failure pkey otherwise */ static int rvt_query_pkey(struct ib_device *ibdev, u8 port_num, u16 index, u16 *pkey) @@ -223,7 +239,7 @@ static int rvt_query_pkey(struct ib_device *ibdev, u8 port_num, u16 index, * @index: = Index in table * @gid: Gid to return * - * Returns 0 on success + * Return: 0 on success */ static int rvt_query_gid(struct ib_device *ibdev, u8 port_num, int guid_index, union ib_gid *gid) @@ -316,6 +332,15 @@ static int rvt_get_port_immutable(struct ib_device *ibdev, u8 port_num, #define CHECK_DRIVER_OVERRIDE(rdi, x) \ rdi->ibdev.x = rdi->ibdev.x ? : rvt_ ##x +/** + * rvt_register_device - register a driver + * @rdi: main dev structure for all of rdmavt operations + * + * It is up to drivers to allocate the rdi and fill in the appropriate + * information. + * + * Return: 0 on success otherwise an errno. + */ int rvt_register_device(struct rvt_dev_info *rdi) { /* Validate that drivers have provided the right information */ @@ -487,6 +512,10 @@ bail_no_mr: } EXPORT_SYMBOL(rvt_register_device); +/** + * rvt_unregister_device - remove a driver + * @rdi: rvt dev struct + */ void rvt_unregister_device(struct rvt_dev_info *rdi) { trace_rvt_dbg(rdi, "Driver is unregistering."); @@ -502,9 +531,16 @@ void rvt_unregister_device(struct rvt_dev_info *rdi) } EXPORT_SYMBOL(rvt_unregister_device); -/* +/** + * rvt_init_port - init internal data for driver port + * @rdi: rvt dev strut + * @port: rvt port + * @port_index: 0 based index of ports, different from IB core port num + * * Keep track of a list of ports. No need to have a detach port. * They persist until the driver goes away. + * + * Return: always 0 */ int rvt_init_port(struct rvt_dev_info *rdi, struct rvt_ibport *port, int port_index, u16 *pkey_table) diff --git a/include/rdma/rdma_vt.h b/include/rdma/rdma_vt.h index aabd2e5..57c708d 100644 --- a/include/rdma/rdma_vt.h +++ b/include/rdma/rdma_vt.h @@ -146,21 +146,6 @@ struct rvt_ibport { * Things that are driver specific, module parameters in hfi1 and qib */ struct rvt_driver_params { - /* - * driver required fields: - * node_guid - * phys_port_cnt - * dma_device - * owner - * driver optional fields (rvt will provide generic value if blank): - * name - * node_desc - * rvt fields, driver value ignored: - * uverbs_abi_ver - * node_type - * num_comp_vectors - * uverbs_cmd_mask - */ struct ib_device_attr props; /* -- cgit v0.10.2 From 1348d706f254fe7030221251a5e1685ff3d9f86a Mon Sep 17 00:00:00 2001 From: Dennis Dalessandro Date: Sun, 14 Feb 2016 12:10:37 -0800 Subject: IB/rdmavt: Add per verb driver callback checking For each verb validate that all requirements for driver callbacks are met. If a function is called without checking for a valid pointer, it is a required function. Also document what each callback function does. Reviewed-by: Mike Marciniszyn Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford diff --git a/drivers/infiniband/sw/rdmavt/vt.c b/drivers/infiniband/sw/rdmavt/vt.c index f5cb09b..9566a92 100644 --- a/drivers/infiniband/sw/rdmavt/vt.c +++ b/drivers/infiniband/sw/rdmavt/vt.c @@ -325,12 +325,375 @@ static int rvt_get_port_immutable(struct ib_device *ibdev, u8 port_num, return 0; } -/* - * Check driver override. If driver passes a value use it, otherwise we use our - * own value. - */ -#define CHECK_DRIVER_OVERRIDE(rdi, x) \ - rdi->ibdev.x = rdi->ibdev.x ? : rvt_ ##x +enum { + MISC, + QUERY_DEVICE, + MODIFY_DEVICE, + QUERY_PORT, + MODIFY_PORT, + QUERY_PKEY, + QUERY_GID, + ALLOC_UCONTEXT, + DEALLOC_UCONTEXT, + GET_PORT_IMMUTABLE, + CREATE_QP, + MODIFY_QP, + DESTROY_QP, + QUERY_QP, + POST_SEND, + POST_RECV, + POST_SRQ_RECV, + CREATE_AH, + DESTROY_AH, + MODIFY_AH, + QUERY_AH, + CREATE_SRQ, + MODIFY_SRQ, + DESTROY_SRQ, + QUERY_SRQ, + ATTACH_MCAST, + DETACH_MCAST, + GET_DMA_MR, + REG_USER_MR, + DEREG_MR, + ALLOC_MR, + ALLOC_FMR, + MAP_PHYS_FMR, + UNMAP_FMR, + DEALLOC_FMR, + MMAP, + CREATE_CQ, + DESTROY_CQ, + POLL_CQ, + REQ_NOTFIY_CQ, + RESIZE_CQ, + ALLOC_PD, + DEALLOC_PD, + _VERB_IDX_MAX /* Must always be last! */ +}; + +static inline int check_driver_override(struct rvt_dev_info *rdi, + size_t offset, void *func) +{ + if (!*(void **)((void *)&rdi->ibdev + offset)) { + *(void **)((void *)&rdi->ibdev + offset) = func; + return 0; + } + + return 1; +} + +static int check_support(struct rvt_dev_info *rdi, int verb) +{ + switch (verb) { + case MISC: + /* + * These functions are not part of verbs specifically but are + * required for rdmavt to function. + */ + if ((!rdi->driver_f.port_callback) || + (!rdi->driver_f.get_card_name) || + (!rdi->driver_f.get_pci_dev)) + return -EINVAL; + break; + + case QUERY_DEVICE: + check_driver_override(rdi, offsetof(struct ib_device, + query_device), + rvt_query_device); + break; + + case MODIFY_DEVICE: + /* + * rdmavt does not support modify device currently drivers must + * provide. + */ + if (!check_driver_override(rdi, offsetof(struct ib_device, + modify_device), + rvt_modify_device)) + return -EOPNOTSUPP; + break; + + case QUERY_PORT: + if (!check_driver_override(rdi, offsetof(struct ib_device, + query_port), + rvt_query_port)) + if (!rdi->driver_f.query_port_state) + return -EINVAL; + break; + + case MODIFY_PORT: + if (!check_driver_override(rdi, offsetof(struct ib_device, + modify_port), + rvt_modify_port)) + if (!rdi->driver_f.cap_mask_chg || + !rdi->driver_f.shut_down_port) + return -EINVAL; + break; + + case QUERY_PKEY: + check_driver_override(rdi, offsetof(struct ib_device, + query_pkey), + rvt_query_pkey); + break; + + case QUERY_GID: + if (!check_driver_override(rdi, offsetof(struct ib_device, + query_gid), + rvt_query_gid)) + if (!rdi->driver_f.get_guid_be) + return -EINVAL; + break; + + case ALLOC_UCONTEXT: + check_driver_override(rdi, offsetof(struct ib_device, + alloc_ucontext), + rvt_alloc_ucontext); + break; + + case DEALLOC_UCONTEXT: + check_driver_override(rdi, offsetof(struct ib_device, + dealloc_ucontext), + rvt_dealloc_ucontext); + break; + + case GET_PORT_IMMUTABLE: + check_driver_override(rdi, offsetof(struct ib_device, + get_port_immutable), + rvt_get_port_immutable); + break; + + case CREATE_QP: + if (!check_driver_override(rdi, offsetof(struct ib_device, + create_qp), + rvt_create_qp)) + if (!rdi->driver_f.qp_priv_alloc || + !rdi->driver_f.qp_priv_free || + !rdi->driver_f.notify_qp_reset || + !rdi->driver_f.flush_qp_waiters || + !rdi->driver_f.stop_send_queue || + !rdi->driver_f.quiesce_qp) + return -EINVAL; + break; + + case MODIFY_QP: + if (!check_driver_override(rdi, offsetof(struct ib_device, + modify_qp), + rvt_modify_qp)) + if (!rdi->driver_f.notify_qp_reset || + !rdi->driver_f.schedule_send || + !rdi->driver_f.get_pmtu_from_attr || + !rdi->driver_f.flush_qp_waiters || + !rdi->driver_f.stop_send_queue || + !rdi->driver_f.quiesce_qp || + !rdi->driver_f.notify_error_qp || + !rdi->driver_f.mtu_from_qp || + !rdi->driver_f.mtu_to_path_mtu || + !rdi->driver_f.shut_down_port || + !rdi->driver_f.cap_mask_chg) + return -EINVAL; + break; + + case DESTROY_QP: + if (!check_driver_override(rdi, offsetof(struct ib_device, + destroy_qp), + rvt_destroy_qp)) + if (!rdi->driver_f.qp_priv_free || + !rdi->driver_f.notify_qp_reset || + !rdi->driver_f.flush_qp_waiters || + !rdi->driver_f.stop_send_queue || + !rdi->driver_f.quiesce_qp) + return -EINVAL; + break; + + case QUERY_QP: + check_driver_override(rdi, offsetof(struct ib_device, + query_qp), + rvt_query_qp); + break; + + case POST_SEND: + if (!check_driver_override(rdi, offsetof(struct ib_device, + post_send), + rvt_post_send)) + if (!rdi->driver_f.schedule_send || + !rdi->driver_f.do_send) + return -EINVAL; + break; + + case POST_RECV: + check_driver_override(rdi, offsetof(struct ib_device, + post_recv), + rvt_post_recv); + break; + case POST_SRQ_RECV: + check_driver_override(rdi, offsetof(struct ib_device, + post_srq_recv), + rvt_post_srq_recv); + break; + + case CREATE_AH: + check_driver_override(rdi, offsetof(struct ib_device, + create_ah), + rvt_create_ah); + break; + + case DESTROY_AH: + check_driver_override(rdi, offsetof(struct ib_device, + destroy_ah), + rvt_destroy_ah); + break; + + case MODIFY_AH: + check_driver_override(rdi, offsetof(struct ib_device, + modify_ah), + rvt_modify_ah); + break; + + case QUERY_AH: + check_driver_override(rdi, offsetof(struct ib_device, + query_ah), + rvt_query_ah); + break; + + case CREATE_SRQ: + check_driver_override(rdi, offsetof(struct ib_device, + create_srq), + rvt_create_srq); + break; + + case MODIFY_SRQ: + check_driver_override(rdi, offsetof(struct ib_device, + modify_srq), + rvt_modify_srq); + break; + + case DESTROY_SRQ: + check_driver_override(rdi, offsetof(struct ib_device, + destroy_srq), + rvt_destroy_srq); + break; + + case QUERY_SRQ: + check_driver_override(rdi, offsetof(struct ib_device, + query_srq), + rvt_query_srq); + break; + + case ATTACH_MCAST: + check_driver_override(rdi, offsetof(struct ib_device, + attach_mcast), + rvt_attach_mcast); + break; + + case DETACH_MCAST: + check_driver_override(rdi, offsetof(struct ib_device, + detach_mcast), + rvt_detach_mcast); + break; + + case GET_DMA_MR: + check_driver_override(rdi, offsetof(struct ib_device, + get_dma_mr), + rvt_get_dma_mr); + break; + + case REG_USER_MR: + check_driver_override(rdi, offsetof(struct ib_device, + reg_user_mr), + rvt_reg_user_mr); + break; + + case DEREG_MR: + check_driver_override(rdi, offsetof(struct ib_device, + dereg_mr), + rvt_dereg_mr); + break; + + case ALLOC_FMR: + check_driver_override(rdi, offsetof(struct ib_device, + alloc_fmr), + rvt_alloc_fmr); + break; + + case ALLOC_MR: + check_driver_override(rdi, offsetof(struct ib_device, + alloc_mr), + rvt_alloc_mr); + break; + + case MAP_PHYS_FMR: + check_driver_override(rdi, offsetof(struct ib_device, + map_phys_fmr), + rvt_map_phys_fmr); + break; + + case UNMAP_FMR: + check_driver_override(rdi, offsetof(struct ib_device, + unmap_fmr), + rvt_unmap_fmr); + break; + + case DEALLOC_FMR: + check_driver_override(rdi, offsetof(struct ib_device, + dealloc_fmr), + rvt_dealloc_fmr); + break; + + case MMAP: + check_driver_override(rdi, offsetof(struct ib_device, + mmap), + rvt_mmap); + break; + + case CREATE_CQ: + check_driver_override(rdi, offsetof(struct ib_device, + create_cq), + rvt_create_cq); + break; + + case DESTROY_CQ: + check_driver_override(rdi, offsetof(struct ib_device, + destroy_cq), + rvt_destroy_cq); + break; + + case POLL_CQ: + check_driver_override(rdi, offsetof(struct ib_device, + poll_cq), + rvt_poll_cq); + break; + + case REQ_NOTFIY_CQ: + check_driver_override(rdi, offsetof(struct ib_device, + req_notify_cq), + rvt_req_notify_cq); + break; + + case RESIZE_CQ: + check_driver_override(rdi, offsetof(struct ib_device, + resize_cq), + rvt_resize_cq); + break; + + case ALLOC_PD: + check_driver_override(rdi, offsetof(struct ib_device, + alloc_pd), + rvt_alloc_pd); + break; + + case DEALLOC_PD: + check_driver_override(rdi, offsetof(struct ib_device, + dealloc_pd), + rvt_dealloc_pd); + break; + + default: + return -EINVAL; + } + + return 0; +} /** * rvt_register_device - register a driver @@ -343,35 +706,26 @@ static int rvt_get_port_immutable(struct ib_device *ibdev, u8 port_num, */ int rvt_register_device(struct rvt_dev_info *rdi) { - /* Validate that drivers have provided the right information */ - int ret = 0; + int ret = 0, i; if (!rdi) return -EINVAL; - if ((!rdi->driver_f.port_callback) || - (!rdi->driver_f.get_card_name) || - (!rdi->driver_f.get_pci_dev) || - (!rdi->driver_f.check_ah)) { - pr_err("Driver not supporting req func\n"); - return -EINVAL; - } + /* + * Check to ensure drivers have setup the required helpers for the verbs + * they want rdmavt to handle + */ + for (i = 0; i < _VERB_IDX_MAX; i++) + if (check_support(rdi, i)) { + pr_err("Driver support req not met at %d\n", i); + return -EINVAL; + } + /* Once we get past here we can use rvt_pr macros and tracepoints */ trace_rvt_dbg(rdi, "Driver attempting registration"); rvt_mmap_init(rdi); - /* Dev Ops */ - CHECK_DRIVER_OVERRIDE(rdi, query_device); - CHECK_DRIVER_OVERRIDE(rdi, modify_device); - CHECK_DRIVER_OVERRIDE(rdi, query_port); - CHECK_DRIVER_OVERRIDE(rdi, modify_port); - CHECK_DRIVER_OVERRIDE(rdi, query_pkey); - CHECK_DRIVER_OVERRIDE(rdi, query_gid); - CHECK_DRIVER_OVERRIDE(rdi, alloc_ucontext); - CHECK_DRIVER_OVERRIDE(rdi, dealloc_ucontext); - CHECK_DRIVER_OVERRIDE(rdi, get_port_immutable); - /* Queue Pairs */ ret = rvt_driver_qp_init(rdi); if (ret) { @@ -379,33 +733,15 @@ int rvt_register_device(struct rvt_dev_info *rdi) return -EINVAL; } - CHECK_DRIVER_OVERRIDE(rdi, create_qp); - CHECK_DRIVER_OVERRIDE(rdi, modify_qp); - CHECK_DRIVER_OVERRIDE(rdi, destroy_qp); - CHECK_DRIVER_OVERRIDE(rdi, query_qp); - CHECK_DRIVER_OVERRIDE(rdi, post_send); - CHECK_DRIVER_OVERRIDE(rdi, post_recv); - CHECK_DRIVER_OVERRIDE(rdi, post_srq_recv); - /* Address Handle */ - CHECK_DRIVER_OVERRIDE(rdi, create_ah); - CHECK_DRIVER_OVERRIDE(rdi, destroy_ah); - CHECK_DRIVER_OVERRIDE(rdi, modify_ah); - CHECK_DRIVER_OVERRIDE(rdi, query_ah); spin_lock_init(&rdi->n_ahs_lock); rdi->n_ahs_allocated = 0; /* Shared Receive Queue */ - CHECK_DRIVER_OVERRIDE(rdi, create_srq); - CHECK_DRIVER_OVERRIDE(rdi, modify_srq); - CHECK_DRIVER_OVERRIDE(rdi, destroy_srq); - CHECK_DRIVER_OVERRIDE(rdi, query_srq); rvt_driver_srq_init(rdi); /* Multicast */ rvt_driver_mcast_init(rdi); - CHECK_DRIVER_OVERRIDE(rdi, attach_mcast); - CHECK_DRIVER_OVERRIDE(rdi, detach_mcast); /* Mem Region */ ret = rvt_driver_mr_init(rdi); @@ -414,35 +750,18 @@ int rvt_register_device(struct rvt_dev_info *rdi) goto bail_no_mr; } - CHECK_DRIVER_OVERRIDE(rdi, get_dma_mr); - CHECK_DRIVER_OVERRIDE(rdi, reg_user_mr); - CHECK_DRIVER_OVERRIDE(rdi, dereg_mr); - CHECK_DRIVER_OVERRIDE(rdi, alloc_mr); - CHECK_DRIVER_OVERRIDE(rdi, alloc_fmr); - CHECK_DRIVER_OVERRIDE(rdi, map_phys_fmr); - CHECK_DRIVER_OVERRIDE(rdi, unmap_fmr); - CHECK_DRIVER_OVERRIDE(rdi, dealloc_fmr); - CHECK_DRIVER_OVERRIDE(rdi, mmap); - /* Completion queues */ ret = rvt_driver_cq_init(rdi); if (ret) { pr_err("Error in driver CQ init.\n"); goto bail_mr; } - CHECK_DRIVER_OVERRIDE(rdi, create_cq); - CHECK_DRIVER_OVERRIDE(rdi, destroy_cq); - CHECK_DRIVER_OVERRIDE(rdi, poll_cq); - CHECK_DRIVER_OVERRIDE(rdi, req_notify_cq); - CHECK_DRIVER_OVERRIDE(rdi, resize_cq); /* DMA Operations */ rdi->ibdev.dma_ops = rdi->ibdev.dma_ops ? : &rvt_default_dma_mapping_ops; /* Protection Domain */ - CHECK_DRIVER_OVERRIDE(rdi, alloc_pd); - CHECK_DRIVER_OVERRIDE(rdi, dealloc_pd); spin_lock_init(&rdi->n_pds_lock); rdi->n_pds_allocated = 0; diff --git a/include/rdma/rdma_vt.h b/include/rdma/rdma_vt.h index 57c708d..ec658d8 100644 --- a/include/rdma/rdma_vt.h +++ b/include/rdma/rdma_vt.h @@ -208,62 +208,157 @@ struct rvt_dev_info; struct rvt_swqe; struct rvt_driver_provided { /* - * The work to create port files in /sys/class Infiniband is different - * depending on the driver. This should not be extracted away and - * instead drivers are responsible for setting the correct callback for - * this. + * Which functions are required depends on which verbs rdmavt is + * providing and which verbs the driver is overriding. See + * check_support() for details. */ - /* -------------------*/ - /* Required functions */ - /* -------------------*/ + /* Passed to ib core registration. Callback to create syfs files */ int (*port_callback)(struct ib_device *, u8, struct kobject *); + + /* + * Returns a string to represent the device for which is being + * registered. This is primarily used for error and debug messages on + * the console. + */ const char * (*get_card_name)(struct rvt_dev_info *rdi); + + /* + * Returns a pointer to the undelying hardware's PCI device. This is + * used to display information as to what hardware is being referenced + * in an output message + */ struct pci_dev * (*get_pci_dev)(struct rvt_dev_info *rdi); - unsigned (*free_all_qps)(struct rvt_dev_info *rdi); + + /* + * Allocate a private queue pair data structure for driver specific + * information which is opaque to rdmavt. + */ void * (*qp_priv_alloc)(struct rvt_dev_info *rdi, struct rvt_qp *qp, gfp_t gfp); + + /* + * Free the driver's private qp structure. + */ void (*qp_priv_free)(struct rvt_dev_info *rdi, struct rvt_qp *qp); + + /* + * Inform the driver the particular qp in quesiton has been reset so + * that it can clean up anything it needs to. + */ void (*notify_qp_reset)(struct rvt_qp *qp); + + /* + * Give the driver a notice that there is send work to do. It is up to + * the driver to generally push the packets out, this just queues the + * work with the driver. There are two variants here. The no_lock + * version requires the s_lock not to be held. The other assumes the + * s_lock is held. + */ void (*schedule_send)(struct rvt_qp *qp); void (*schedule_send_no_lock)(struct rvt_qp *qp); + + /* + * Sometimes rdmavt needs to kick the driver's send progress. That is + * done by this call back. + */ void (*do_send)(struct rvt_qp *qp); + + /* + * Get a path mtu from the driver based on qp attributes. + */ int (*get_pmtu_from_attr)(struct rvt_dev_info *rdi, struct rvt_qp *qp, struct ib_qp_attr *attr); + + /* + * Notify driver that it needs to flush any outstanding IO requests that + * are waiting on a qp. + */ void (*flush_qp_waiters)(struct rvt_qp *qp); + + /* + * Notify driver to stop its queue of sending packets. Nothing else + * should be posted to the queue pair after this has been called. + */ void (*stop_send_queue)(struct rvt_qp *qp); + + /* + * Have the drivr drain any in progress operations + */ void (*quiesce_qp)(struct rvt_qp *qp); + + /* + * Inform the driver a qp has went to error state. + */ void (*notify_error_qp)(struct rvt_qp *qp); + + /* + * Get an MTU for a qp. + */ u32 (*mtu_from_qp)(struct rvt_dev_info *rdi, struct rvt_qp *qp, u32 pmtu); + /* + * Convert an mtu to a path mtu + */ int (*mtu_to_path_mtu)(u32 mtu); + + /* + * Get the guid of a port in big endian byte order + */ int (*get_guid_be)(struct rvt_dev_info *rdi, struct rvt_ibport *rvp, int guid_index, __be64 *guid); + + /* + * Query driver for the state of the port. + */ int (*query_port_state)(struct rvt_dev_info *rdi, u8 port_num, struct ib_port_attr *props); + + /* + * Tell driver to shutdown a port + */ int (*shut_down_port)(struct rvt_dev_info *rdi, u8 port_num); + + /* Tell driver to send a trap for changed port capabilities */ void (*cap_mask_chg)(struct rvt_dev_info *rdi, u8 port_num); - /*--------------------*/ - /* Optional functions */ - /*--------------------*/ + /* + * The following functions can be safely ignored completely. Any use of + * these is checked for NULL before blindly calling. Rdmavt should also + * be functional if drivers omit these. + */ + + /* Called to inform the driver that all qps should now be freed. */ + unsigned (*free_all_qps)(struct rvt_dev_info *rdi); + + /* Driver specific AH validation */ int (*check_ah)(struct ib_device *, struct ib_ah_attr *); + + /* Inform the driver a new AH has been created */ void (*notify_new_ah)(struct ib_device *, struct ib_ah_attr *, struct rvt_ah *); + + /* Let the driver pick the next queue pair number*/ int (*alloc_qpn)(struct rvt_dev_info *rdi, struct rvt_qpn_table *qpt, enum ib_qp_type type, u8 port_num, gfp_t gfp); - /** - * Return 0 if modification is valid, -errno otherwise - */ + + /* Determine if its safe or allowed to modify the qp */ int (*check_modify_qp)(struct rvt_qp *qp, struct ib_qp_attr *attr, int attr_mask, struct ib_udata *udata); + + /* Driver specific QP modification/notification-of */ void (*modify_qp)(struct rvt_qp *qp, struct ib_qp_attr *attr, int attr_mask, struct ib_udata *udata); + /* Driver specific work request checking */ int (*check_send_wqe)(struct rvt_qp *qp, struct rvt_swqe *wqe); + /* Notify driver a mad agent has been created */ void (*notify_create_mad_agent)(struct rvt_dev_info *rdi, int port_idx); + + /* Notify driver a mad agent has been removed */ void (*notify_free_mad_agent)(struct rvt_dev_info *rdi, int port_idx); + }; struct rvt_dev_info { -- cgit v0.10.2 From 611ac099289736176e16aedd75f980f21b00d974 Mon Sep 17 00:00:00 2001 From: Dennis Dalessandro Date: Sun, 14 Feb 2016 12:10:45 -0800 Subject: IB/qib: Setup notify free/create mad agent callbacks for rdmavt Qib needs to be notified when mad agents are created and freed, there is some counter maintenance that needs to be performed. Add those callbacks at registration time with rdmavt. Reviewed-by: Ira Weiny Reviewed-by: Jubin John Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford diff --git a/drivers/infiniband/hw/qib/qib_verbs.c b/drivers/infiniband/hw/qib/qib_verbs.c index 5cf019f..cbf6200 100644 --- a/drivers/infiniband/hw/qib/qib_verbs.c +++ b/drivers/infiniband/hw/qib/qib_verbs.c @@ -1682,6 +1682,10 @@ int qib_register_ib_device(struct qib_devdata *dd) dd->verbs_dev.rdi.driver_f.query_port_state = qib_query_port; dd->verbs_dev.rdi.driver_f.shut_down_port = qib_shut_down_port; dd->verbs_dev.rdi.driver_f.cap_mask_chg = qib_cap_mask_chg; + dd->verbs_dev.rdi.driver_f.notify_create_mad_agent = + qib_notify_create_mad_agent; + dd->verbs_dev.rdi.driver_f.notify_free_mad_agent = + qib_notify_free_mad_agent; dd->verbs_dev.rdi.dparms.max_rdma_atomic = QIB_MAX_RDMA_ATOMIC; dd->verbs_dev.rdi.driver_f.get_guid_be = qib_get_guid_be; -- cgit v0.10.2 From 4eadd8ff21bffcaf3338267dcca571accc612c8e Mon Sep 17 00:00:00 2001 From: Dennis Dalessandro Date: Sun, 14 Feb 2016 12:10:55 -0800 Subject: IB/qib,rdmavt: Move smi_ah to qib Rdmavt adopted an smi_ah from qib which is not needed by hfi1. Move this back to qib and get it out of the common library. Reviewed-by: Ira Weiny Reviewed-by: Jubin John Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford diff --git a/drivers/infiniband/hw/qib/qib_iba7322.c b/drivers/infiniband/hw/qib/qib_iba7322.c index a159922..ca28c19 100644 --- a/drivers/infiniband/hw/qib/qib_iba7322.c +++ b/drivers/infiniband/hw/qib/qib_iba7322.c @@ -2910,8 +2910,8 @@ static void qib_setup_7322_cleanup(struct qib_devdata *dd) spin_unlock_irqrestore(&dd->cspec->gpio_lock, flags); qib_qsfp_deinit(&dd->pport[i].cpspec->qsfp_data); } - if (dd->pport[i].ibport_data.rvp.smi_ah) - ib_destroy_ah(&dd->pport[i].ibport_data.rvp.smi_ah->ibah); + if (dd->pport[i].ibport_data.smi_ah) + ib_destroy_ah(&dd->pport[i].ibport_data.smi_ah->ibah); } } @@ -5507,7 +5507,7 @@ static void try_7322_ipg(struct qib_pportdata *ppd) if (IS_ERR(send_buf)) goto retry; - if (!ibp->rvp.smi_ah) { + if (!ibp->smi_ah) { struct ib_ah *ah; ah = qib_create_qp0_ah(ibp, be16_to_cpu(IB_LID_PERMISSIVE)); @@ -5515,11 +5515,11 @@ static void try_7322_ipg(struct qib_pportdata *ppd) ret = PTR_ERR(ah); else { send_buf->ah = ah; - ibp->rvp.smi_ah = ibah_to_rvtah(ah); + ibp->smi_ah = ibah_to_rvtah(ah); ret = 0; } } else { - send_buf->ah = &ibp->rvp.smi_ah->ibah; + send_buf->ah = &ibp->smi_ah->ibah; ret = 0; } diff --git a/drivers/infiniband/hw/qib/qib_verbs.h b/drivers/infiniband/hw/qib/qib_verbs.h index d137d71..4b76a8d 100644 --- a/drivers/infiniband/hw/qib/qib_verbs.h +++ b/drivers/infiniband/hw/qib/qib_verbs.h @@ -219,6 +219,7 @@ struct qib_pma_counters { struct qib_ibport { struct rvt_ibport rvp; + struct rvt_ah *smi_ah; __be64 guids[QIB_GUIDS_PER_PORT - 1]; /* writable GUIDs */ struct qib_pma_counters __percpu *pmastats; u64 z_unicast_xmit; /* starting count for PMA */ diff --git a/include/rdma/rdma_vt.h b/include/rdma/rdma_vt.h index ec658d8..2c3a04c 100644 --- a/include/rdma/rdma_vt.h +++ b/include/rdma/rdma_vt.h @@ -137,7 +137,6 @@ struct rvt_ibport { u16 *pkey_table; struct rvt_ah *sm_ah; - struct rvt_ah *smi_ah; }; #define RVT_CQN_MAX 16 /* maximum length of cq name */ -- cgit v0.10.2 From ce73fe25a6bd4a4deabed57e2553100e10fb8362 Mon Sep 17 00:00:00 2001 From: Dennis Dalessandro Date: Sun, 14 Feb 2016 12:11:03 -0800 Subject: IB/rdmavt: Remove RVT_FLAGs While hfi1 and qib were still supporting bits and pieces of core verbs components there needed to be a way to convey if rdmavt should handle allocation and initialize of resources like the queue pair table. Now that all of this is moved into rdmavt there is no need for these flags. They are no longer used in the drivers. Reviewed-by: Ira Weiny Reviewed-by: Jubin John Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford diff --git a/drivers/infiniband/sw/rdmavt/cq.c b/drivers/infiniband/sw/rdmavt/cq.c index c69c070..b1ffc8b 100644 --- a/drivers/infiniband/sw/rdmavt/cq.c +++ b/drivers/infiniband/sw/rdmavt/cq.c @@ -508,11 +508,6 @@ int rvt_driver_cq_init(struct rvt_dev_info *rdi) int cpu; struct task_struct *task; - if (rdi->flags & RVT_FLAG_CQ_INIT_DRIVER) { - rvt_pr_info(rdi, "Driver is doing CQ init.\n"); - return 0; - } - if (rdi->worker) return 0; rdi->worker = kzalloc(sizeof(*rdi->worker), GFP_KERNEL); diff --git a/drivers/infiniband/sw/rdmavt/mr.c b/drivers/infiniband/sw/rdmavt/mr.c index 8549652..4d5ef73 100644 --- a/drivers/infiniband/sw/rdmavt/mr.c +++ b/drivers/infiniband/sw/rdmavt/mr.c @@ -66,11 +66,6 @@ int rvt_driver_mr_init(struct rvt_dev_info *rdi) unsigned lk_tab_size; int i; - if (rdi->flags & RVT_FLAG_MR_INIT_DRIVER) { - rvt_pr_info(rdi, "Driver is doing MR init.\n"); - return 0; - } - /* * The top hfi1_lkey_table_size bits are used to index the * table. The lower 8 bits can be owned by the user (copied from diff --git a/drivers/infiniband/sw/rdmavt/qp.c b/drivers/infiniband/sw/rdmavt/qp.c index 5809562..441e17a 100644 --- a/drivers/infiniband/sw/rdmavt/qp.c +++ b/drivers/infiniband/sw/rdmavt/qp.c @@ -173,11 +173,6 @@ int rvt_driver_qp_init(struct rvt_dev_info *rdi) int i; int ret = -ENOMEM; - if (rdi->flags & RVT_FLAG_QP_INIT_DRIVER) { - rvt_pr_info(rdi, "Driver is doing QP init.\n"); - return 0; - } - if (!rdi->dparms.qp_table_size) return -EINVAL; @@ -284,9 +279,6 @@ void rvt_qp_exit(struct rvt_dev_info *rdi) if (!rdi->qp_dev) return; - if (rdi->flags & RVT_FLAG_QP_INIT_DRIVER) - return; /* driver did the qp init so nothing else to do */ - kfree(rdi->qp_dev->qp_table); free_qpn_table(&rdi->qp_dev->qpn_table); kfree(rdi->qp_dev); diff --git a/include/rdma/rdma_vt.h b/include/rdma/rdma_vt.h index 2c3a04c..264c514 100644 --- a/include/rdma/rdma_vt.h +++ b/include/rdma/rdma_vt.h @@ -60,16 +60,6 @@ #include #include -/* - * For some of the IBTA objects there will likely be some - * initializations required. We need flags to determine whether it is OK - * for rdmavt to do this or not. This does not imply any functions of a - * partiuclar IBTA object are overridden. - */ -#define RVT_FLAG_MR_INIT_DRIVER BIT(1) -#define RVT_FLAG_QP_INIT_DRIVER BIT(2) -#define RVT_FLAG_CQ_INIT_DRIVER BIT(3) - #define RVT_MAX_PKEY_VALUES 16 struct rvt_ibport { -- cgit v0.10.2 From 0765b01b8e2da50ad56f6544f935f5eaef1389f2 Mon Sep 17 00:00:00 2001 From: Dennis Dalessandro Date: Sun, 14 Feb 2016 12:11:12 -0800 Subject: IB/rdmavt: Remove signal_supported and comments Initially it was intended that rdmavt would support some signaling between the underlying driver and itself. However this turned out to be unnecessary for qib and hfi1. If we need to add something like this in later to support another driver we should do it then. As of now this essentially dead code so remove it. Reviewed-by: Ira Weiny Reviewed-by: Jubin John Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford diff --git a/include/rdma/rdma_vt.h b/include/rdma/rdma_vt.h index 264c514..4c50bbb 100644 --- a/include/rdma/rdma_vt.h +++ b/include/rdma/rdma_vt.h @@ -138,24 +138,6 @@ struct rvt_driver_params { struct ib_device_attr props; /* - * Drivers will need to support a number of notifications to rvt in - * accordance with certain events. This structure should contain a mask - * of the supported events. Such events that the rvt may need to know - * about include: - * port errors - * port active - * lid change - * sm change - * client reregister - * pkey change - * - * There may also be other events that the rvt layers needs to know - * about this is not an exhaustive list. Some events though rvt does not - * need to rely on the driver for such as completion queue error. - */ - int rvt_signal_supported; - - /* * Anything driver specific that is not covered by props * For instance special module parameters. Goes here. */ -- cgit v0.10.2 From 79a225be38932b17707009767e85d6edf450e7cc Mon Sep 17 00:00:00 2001 From: Dennis Dalessandro Date: Sun, 14 Feb 2016 12:11:20 -0800 Subject: IB/rdmavt: Remove unnecessary exported functions Remove exported functions which are no longer required as the functionality has moved into rdmavt. This also requires re-ordering some of the functions since their prototype no longer appears in a header file. Rather than add forward declarations it is just cleaner to re-order some of the functions. Reviewed-by: Jubin John Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford diff --git a/drivers/infiniband/sw/rdmavt/mmap.c b/drivers/infiniband/sw/rdmavt/mmap.c index 273974f..e202b81 100644 --- a/drivers/infiniband/sw/rdmavt/mmap.c +++ b/drivers/infiniband/sw/rdmavt/mmap.c @@ -80,7 +80,6 @@ void rvt_release_mmap_info(struct kref *ref) vfree(ip->obj); kfree(ip); } -EXPORT_SYMBOL(rvt_release_mmap_info); static void rvt_vma_open(struct vm_area_struct *vma) { @@ -146,7 +145,6 @@ int rvt_mmap(struct ib_ucontext *context, struct vm_area_struct *vma) done: return ret; } -EXPORT_SYMBOL(rvt_mmap); /** * rvt_create_mmap_info - allocate information for hfi1_mmap @@ -185,7 +183,6 @@ struct rvt_mmap_info *rvt_create_mmap_info(struct rvt_dev_info *rdi, return ip; } -EXPORT_SYMBOL(rvt_create_mmap_info); /** * rvt_update_mmap_info - update a mem map @@ -209,4 +206,3 @@ void rvt_update_mmap_info(struct rvt_dev_info *rdi, struct rvt_mmap_info *ip, ip->size = size; ip->obj = obj; } -EXPORT_SYMBOL(rvt_update_mmap_info); diff --git a/drivers/infiniband/sw/rdmavt/mmap.h b/drivers/infiniband/sw/rdmavt/mmap.h index e806747..fab0e7b 100644 --- a/drivers/infiniband/sw/rdmavt/mmap.h +++ b/drivers/infiniband/sw/rdmavt/mmap.h @@ -51,5 +51,13 @@ #include void rvt_mmap_init(struct rvt_dev_info *rdi); +void rvt_release_mmap_info(struct kref *ref); +int rvt_mmap(struct ib_ucontext *context, struct vm_area_struct *vma); +struct rvt_mmap_info *rvt_create_mmap_info(struct rvt_dev_info *rdi, + u32 size, + struct ib_ucontext *context, + void *obj); +void rvt_update_mmap_info(struct rvt_dev_info *rdi, struct rvt_mmap_info *ip, + u32 size, void *obj); #endif /* DEF_RDMAVTMMAP_H */ diff --git a/drivers/infiniband/sw/rdmavt/qp.c b/drivers/infiniband/sw/rdmavt/qp.c index 441e17a..dbf124d 100644 --- a/drivers/infiniband/sw/rdmavt/qp.c +++ b/drivers/infiniband/sw/rdmavt/qp.c @@ -390,12 +390,116 @@ static void free_qpn(struct rvt_qpn_table *qpt, u32 qpn) } /** + * rvt_clear_mr_refs - Drop help mr refs + * @qp: rvt qp data structure + * @clr_sends: If shoudl clear send side or not + */ +static void rvt_clear_mr_refs(struct rvt_qp *qp, int clr_sends) +{ + unsigned n; + + if (test_and_clear_bit(RVT_R_REWIND_SGE, &qp->r_aflags)) + rvt_put_ss(&qp->s_rdma_read_sge); + + rvt_put_ss(&qp->r_sge); + + if (clr_sends) { + while (qp->s_last != qp->s_head) { + struct rvt_swqe *wqe = rvt_get_swqe_ptr(qp, qp->s_last); + unsigned i; + + for (i = 0; i < wqe->wr.num_sge; i++) { + struct rvt_sge *sge = &wqe->sg_list[i]; + + rvt_put_mr(sge->mr); + } + if (qp->ibqp.qp_type == IB_QPT_UD || + qp->ibqp.qp_type == IB_QPT_SMI || + qp->ibqp.qp_type == IB_QPT_GSI) + atomic_dec(&ibah_to_rvtah( + wqe->ud_wr.ah)->refcount); + if (++qp->s_last >= qp->s_size) + qp->s_last = 0; + smp_wmb(); /* see qp_set_savail */ + } + if (qp->s_rdma_mr) { + rvt_put_mr(qp->s_rdma_mr); + qp->s_rdma_mr = NULL; + } + } + + if (qp->ibqp.qp_type != IB_QPT_RC) + return; + + for (n = 0; n < ARRAY_SIZE(qp->s_ack_queue); n++) { + struct rvt_ack_entry *e = &qp->s_ack_queue[n]; + + if (e->opcode == IB_OPCODE_RC_RDMA_READ_REQUEST && + e->rdma_sge.mr) { + rvt_put_mr(e->rdma_sge.mr); + e->rdma_sge.mr = NULL; + } + } +} + +/** + * rvt_remove_qp - remove qp form table + * @rdi: rvt dev struct + * @qp: qp to remove + * + * Remove the QP from the table so it can't be found asynchronously by + * the receive routine. + */ +static void rvt_remove_qp(struct rvt_dev_info *rdi, struct rvt_qp *qp) +{ + struct rvt_ibport *rvp = rdi->ports[qp->port_num - 1]; + u32 n = hash_32(qp->ibqp.qp_num, rdi->qp_dev->qp_table_bits); + unsigned long flags; + int removed = 1; + + spin_lock_irqsave(&rdi->qp_dev->qpt_lock, flags); + + if (rcu_dereference_protected(rvp->qp[0], + lockdep_is_held(&rdi->qp_dev->qpt_lock)) == qp) { + RCU_INIT_POINTER(rvp->qp[0], NULL); + } else if (rcu_dereference_protected(rvp->qp[1], + lockdep_is_held(&rdi->qp_dev->qpt_lock)) == qp) { + RCU_INIT_POINTER(rvp->qp[1], NULL); + } else { + struct rvt_qp *q; + struct rvt_qp __rcu **qpp; + + removed = 0; + qpp = &rdi->qp_dev->qp_table[n]; + for (; (q = rcu_dereference_protected(*qpp, + lockdep_is_held(&rdi->qp_dev->qpt_lock))) != NULL; + qpp = &q->next) { + if (q == qp) { + RCU_INIT_POINTER(*qpp, + rcu_dereference_protected(qp->next, + lockdep_is_held(&rdi->qp_dev->qpt_lock))); + removed = 1; + trace_rvt_qpremove(qp, n); + break; + } + } + } + + spin_unlock_irqrestore(&rdi->qp_dev->qpt_lock, flags); + if (removed) { + synchronize_rcu(); + if (atomic_dec_and_test(&qp->refcount)) + wake_up(&qp->wait); + } +} + +/** * reset_qp - initialize the QP state to the reset state * @qp: the QP to reset * @type: the QP type * r and s lock are required to be held by the caller */ -void rvt_reset_qp(struct rvt_dev_info *rdi, struct rvt_qp *qp, +static void rvt_reset_qp(struct rvt_dev_info *rdi, struct rvt_qp *qp, enum ib_qp_type type) { if (qp->state != IB_QPS_RESET) { @@ -475,7 +579,6 @@ void rvt_reset_qp(struct rvt_dev_info *rdi, struct rvt_qp *qp, } qp->r_sge.num_sge = 0; } -EXPORT_SYMBOL(rvt_reset_qp); /** * rvt_create_qp - create a queue pair for a device @@ -762,60 +865,6 @@ bail_swq: } /** - * rvt_clear_mr_refs - Drop help mr refs - * @qp: rvt qp data structure - * @clr_sends: If shoudl clear send side or not - */ -void rvt_clear_mr_refs(struct rvt_qp *qp, int clr_sends) -{ - unsigned n; - - if (test_and_clear_bit(RVT_R_REWIND_SGE, &qp->r_aflags)) - rvt_put_ss(&qp->s_rdma_read_sge); - - rvt_put_ss(&qp->r_sge); - - if (clr_sends) { - while (qp->s_last != qp->s_head) { - struct rvt_swqe *wqe = rvt_get_swqe_ptr(qp, qp->s_last); - unsigned i; - - for (i = 0; i < wqe->wr.num_sge; i++) { - struct rvt_sge *sge = &wqe->sg_list[i]; - - rvt_put_mr(sge->mr); - } - if (qp->ibqp.qp_type == IB_QPT_UD || - qp->ibqp.qp_type == IB_QPT_SMI || - qp->ibqp.qp_type == IB_QPT_GSI) - atomic_dec(&ibah_to_rvtah( - wqe->ud_wr.ah)->refcount); - if (++qp->s_last >= qp->s_size) - qp->s_last = 0; - smp_wmb(); /* see qp_set_savail */ - } - if (qp->s_rdma_mr) { - rvt_put_mr(qp->s_rdma_mr); - qp->s_rdma_mr = NULL; - } - } - - if (qp->ibqp.qp_type != IB_QPT_RC) - return; - - for (n = 0; n < ARRAY_SIZE(qp->s_ack_queue); n++) { - struct rvt_ack_entry *e = &qp->s_ack_queue[n]; - - if (e->opcode == IB_OPCODE_RC_RDMA_READ_REQUEST && - e->rdma_sge.mr) { - rvt_put_mr(e->rdma_sge.mr); - e->rdma_sge.mr = NULL; - } - } -} -EXPORT_SYMBOL(rvt_clear_mr_refs); - -/** * rvt_error_qp - put a QP into the error state * @qp: the QP to put into the error state * @err: the receive completion error to signal if a RWQE is active @@ -923,58 +972,6 @@ static void rvt_insert_qp(struct rvt_dev_info *rdi, struct rvt_qp *qp) } /** - * rvt_remove_qp - remove qp form table - * @rdi: rvt dev struct - * @qp: qp to remove - * - * Remove the QP from the table so it can't be found asynchronously by - * the receive routine. - */ -void rvt_remove_qp(struct rvt_dev_info *rdi, struct rvt_qp *qp) -{ - struct rvt_ibport *rvp = rdi->ports[qp->port_num - 1]; - u32 n = hash_32(qp->ibqp.qp_num, rdi->qp_dev->qp_table_bits); - unsigned long flags; - int removed = 1; - - spin_lock_irqsave(&rdi->qp_dev->qpt_lock, flags); - - if (rcu_dereference_protected(rvp->qp[0], - lockdep_is_held(&rdi->qp_dev->qpt_lock)) == qp) { - RCU_INIT_POINTER(rvp->qp[0], NULL); - } else if (rcu_dereference_protected(rvp->qp[1], - lockdep_is_held(&rdi->qp_dev->qpt_lock)) == qp) { - RCU_INIT_POINTER(rvp->qp[1], NULL); - } else { - struct rvt_qp *q; - struct rvt_qp __rcu **qpp; - - removed = 0; - qpp = &rdi->qp_dev->qp_table[n]; - for (; (q = rcu_dereference_protected(*qpp, - lockdep_is_held(&rdi->qp_dev->qpt_lock))) != NULL; - qpp = &q->next) { - if (q == qp) { - RCU_INIT_POINTER(*qpp, - rcu_dereference_protected(qp->next, - lockdep_is_held(&rdi->qp_dev->qpt_lock))); - removed = 1; - trace_rvt_qpremove(qp, n); - break; - } - } - } - - spin_unlock_irqrestore(&rdi->qp_dev->qpt_lock, flags); - if (removed) { - synchronize_rcu(); - if (atomic_dec_and_test(&qp->refcount)) - wake_up(&qp->wait); - } -} -EXPORT_SYMBOL(rvt_remove_qp); - -/** * qib_modify_qp - modify the attributes of a queue pair * @ibqp: the queue pair who's attributes we're modifying * @attr: the new attributes @@ -1234,6 +1231,19 @@ inval: return -EINVAL; } +/** rvt_free_qpn - Free a qpn from the bit map + * @qpt: QP table + * @qpn: queue pair number to free + */ +static void rvt_free_qpn(struct rvt_qpn_table *qpt, u32 qpn) +{ + struct rvt_qpn_map *map; + + map = qpt->map + qpn / RVT_BITS_PER_PAGE; + if (map->page) + clear_bit(qpn & RVT_BITS_PER_PAGE_MASK, map->page); +} + /** * rvt_destroy_qp - destroy a queue pair * @ibqp: the queue pair to destroy @@ -1664,29 +1674,3 @@ int rvt_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr, } return 0; } - -/** rvt_free_qpn - Free a qpn from the bit map - * @qpt: QP table - * @qpn: queue pair number to free - */ -void rvt_free_qpn(struct rvt_qpn_table *qpt, u32 qpn) -{ - struct rvt_qpn_map *map; - - map = qpt->map + qpn / RVT_BITS_PER_PAGE; - if (map->page) - clear_bit(qpn & RVT_BITS_PER_PAGE_MASK, map->page); -} -EXPORT_SYMBOL(rvt_free_qpn); - -/** - * rvt_dec_qp_cnt - decrement qp count - * rdi: rvt dev struct - */ -void rvt_dec_qp_cnt(struct rvt_dev_info *rdi) -{ - spin_lock(&rdi->n_qps_lock); - rdi->n_qps_allocated--; - spin_unlock(&rdi->n_qps_lock); -} -EXPORT_SYMBOL(rvt_dec_qp_cnt); diff --git a/drivers/infiniband/sw/rdmavt/srq.c b/drivers/infiniband/sw/rdmavt/srq.c index 98c4927..f7c48e90 100644 --- a/drivers/infiniband/sw/rdmavt/srq.c +++ b/drivers/infiniband/sw/rdmavt/srq.c @@ -50,6 +50,7 @@ #include #include "srq.h" +#include "vt.h" /** * rvt_driver_srq_init - init srq resources on a per driver basis diff --git a/drivers/infiniband/sw/rdmavt/vt.h b/drivers/infiniband/sw/rdmavt/vt.h index e26f9e9..6b01eaa 100644 --- a/drivers/infiniband/sw/rdmavt/vt.h +++ b/drivers/infiniband/sw/rdmavt/vt.h @@ -60,6 +60,7 @@ #include "mmap.h" #include "cq.h" #include "mad.h" +#include "mmap.h" #define rvt_pr_info(rdi, fmt, ...) \ __rvt_pr_info(rdi->driver_f.get_pci_dev(rdi), \ diff --git a/include/rdma/rdma_vt.h b/include/rdma/rdma_vt.h index 4c50bbb..a869655 100644 --- a/include/rdma/rdma_vt.h +++ b/include/rdma/rdma_vt.h @@ -476,19 +476,6 @@ int rvt_rkey_ok(struct rvt_qp *qp, struct rvt_sge *sge, u32 len, u64 vaddr, u32 rkey, int acc); int rvt_lkey_ok(struct rvt_lkey_table *rkt, struct rvt_pd *pd, struct rvt_sge *isge, struct ib_sge *sge, int acc); -int rvt_mmap(struct ib_ucontext *context, struct vm_area_struct *vma); -void rvt_release_mmap_info(struct kref *ref); -struct rvt_mmap_info *rvt_create_mmap_info(struct rvt_dev_info *rdi, - u32 size, - struct ib_ucontext *context, - void *obj); -void rvt_update_mmap_info(struct rvt_dev_info *rdi, struct rvt_mmap_info *ip, - u32 size, void *obj); -int rvt_reg_mr(struct rvt_qp *qp, struct ib_reg_wr *wr); struct rvt_mcast *rvt_mcast_find(struct rvt_ibport *ibp, union ib_gid *mgid); -/* Temporary export */ -void rvt_reset_qp(struct rvt_dev_info *rdi, struct rvt_qp *qp, - enum ib_qp_type type); - #endif /* DEF_RDMA_VT_H */ diff --git a/include/rdma/rdmavt_qp.h b/include/rdma/rdmavt_qp.h index 1066b5d..933f14f 100644 --- a/include/rdma/rdmavt_qp.h +++ b/include/rdma/rdmavt_qp.h @@ -438,10 +438,6 @@ static inline struct rvt_rwqe *rvt_get_rwqe_ptr(struct rvt_rq *rq, unsigned n) extern const int ib_rvt_state_ops[]; struct rvt_dev_info; -void rvt_remove_qp(struct rvt_dev_info *rdi, struct rvt_qp *qp); -void rvt_clear_mr_refs(struct rvt_qp *qp, int clr_sends); int rvt_error_qp(struct rvt_qp *qp, enum ib_wc_status err); -void rvt_free_qpn(struct rvt_qpn_table *qpt, u32 qpn); -void rvt_dec_qp_cnt(struct rvt_dev_info *rdi); #endif /* DEF_RDMAVT_INCQP_H */ -- cgit v0.10.2 From 21cfca33587d45ccdc5aaaedf97a909ccc4a0a27 Mon Sep 17 00:00:00 2001 From: Harish Chegondi Date: Sun, 14 Feb 2016 12:11:28 -0800 Subject: IB/qib: Destroy SMI AH before de-allocating the protection domain If SMI AH is not destroyed before de-allocating the PD, it would result in non-zero PD use count when de-allocating the PD, triggering a WARN_ON() at drivers/infiniband/core/verbs.c:284 ib_dealloc_pd+0x69/0xb0 [ib_core]() when unloading the qib driver on systems with dual-port card. This problem has always been there in qib and was detected only after the commit 7dd78647a2c2 ("IB/core: Make ib_dealloc_pd return void") introduced a WARN_ON in ib_dealloc_pd() that triggers if a PD's use count is non-zero before de-allocating the PD. Below is the call trace from the dmesg log. [ 7264.966129] Call Trace: [ 7264.969652] [] dump_stack+0x44/0x64 [ 7264.976181] [] warn_slowpath_common+0x86/0xc0 [ 7264.983656] [] warn_slowpath_null+0x1a/0x20 [ 7264.990961] [] ib_dealloc_pd+0x69/0xb0 [ib_core] [ 7264.998717] [] ib_mad_port_close+0xb8/0x120 [ib_mad] [ 7265.006866] [] ib_mad_remove_device+0x6f/0xc0 [ib_mad] [ 7265.015224] [] ib_unregister_device+0xa7/0x140 [ib_core] [ 7265.023738] [] rvt_unregister_device+0x29/0x80 [rdmavt] [ 7265.032181] [] qib_unregister_ib_device+0x22/0x210 [ib_qib] [ 7265.040993] [] qib_remove_one+0x1f/0x250 [ib_qib] [ 7265.048823] [] pci_device_remove+0x39/0xc0 [ 7265.055984] [] __device_release_driver+0x9a/0x140 [ 7265.063821] [] driver_detach+0xb8/0xc0 [ 7265.070579] [] bus_remove_driver+0x55/0xd0 [ 7265.077717] [] driver_unregister+0x2c/0x50 [ 7265.084849] [] pci_unregister_driver+0x2a/0x80 [ 7265.092366] [] qib_ib_cleanup+0x37/0x65 [ib_qib] [ 7265.100068] [] SyS_delete_module+0x190/0x220 [ 7265.107379] [] entry_SYSCALL_64_fastpath+0x12/0x71 Reviewed-by: Mike Marciniszyn Reviewed-by: Dennis Dalessandro Signed-off-by: Harish Chegondi Signed-off-by: Doug Ledford diff --git a/drivers/infiniband/hw/qib/qib_iba7322.c b/drivers/infiniband/hw/qib/qib_iba7322.c index ca28c19..82d7c4b 100644 --- a/drivers/infiniband/hw/qib/qib_iba7322.c +++ b/drivers/infiniband/hw/qib/qib_iba7322.c @@ -2910,8 +2910,6 @@ static void qib_setup_7322_cleanup(struct qib_devdata *dd) spin_unlock_irqrestore(&dd->cspec->gpio_lock, flags); qib_qsfp_deinit(&dd->pport[i].cpspec->qsfp_data); } - if (dd->pport[i].ibport_data.smi_ah) - ib_destroy_ah(&dd->pport[i].ibport_data.smi_ah->ibah); } } diff --git a/drivers/infiniband/hw/qib/qib_mad.c b/drivers/infiniband/hw/qib/qib_mad.c index 73ca2c2..0bd1837 100644 --- a/drivers/infiniband/hw/qib/qib_mad.c +++ b/drivers/infiniband/hw/qib/qib_mad.c @@ -2496,4 +2496,7 @@ void qib_notify_free_mad_agent(struct rvt_dev_info *rdi, int port_idx) if (dd->pport[port_idx].cong_stats.timer.data) del_timer_sync(&dd->pport[port_idx].cong_stats.timer); + + if (dd->pport[port_idx].ibport_data.smi_ah) + ib_destroy_ah(&dd->pport[port_idx].ibport_data.smi_ah->ibah); } -- cgit v0.10.2 From 3585254d56b0c474a50f911295710e786b33d9ca Mon Sep 17 00:00:00 2001 From: Mike Marciniszyn Date: Sun, 14 Feb 2016 12:44:17 -0800 Subject: staging/rdma/hfi1: add s_avail to qp_stats This diagnostic capability was missed in the dual lock series. Signed-off-by: Vennila Megavannan Reviewed-by: Dennis Dalessandro Signed-off-by: Mike Marciniszyn Signed-off-by: Doug Ledford diff --git a/drivers/staging/rdma/hfi1/qp.c b/drivers/staging/rdma/hfi1/qp.c index 9e05314..9846cd6 100644 --- a/drivers/staging/rdma/hfi1/qp.c +++ b/drivers/staging/rdma/hfi1/qp.c @@ -612,7 +612,7 @@ void qp_iter_print(struct seq_file *s, struct qp_iter *iter) sde = qp_to_sdma_engine(qp, priv->s_sc); wqe = rvt_get_swqe_ptr(qp, qp->s_last); seq_printf(s, - "N %d %s QP%u R %u %s %u %u %u f=%x %u %u %u %u %u PSN %x %x %x %x %x (%u %u %u %u %u %u) QP%u LID %x SL %u MTU %u %u %u %u SDE %p,%u\n", + "N %d %s QP%u R %u %s %u %u %u f=%x %u %u %u %u %u PSN %x %x %x %x %x (%u %u %u %u %u %u %u) QP%u LID %x SL %u MTU %u %u %u %u SDE %p,%u\n", iter->n, qp_idle(qp) ? "I" : "B", qp->ibqp.qp_num, @@ -632,6 +632,7 @@ void qp_iter_print(struct seq_file *s, struct qp_iter *iter) qp->s_sending_psn, qp->s_sending_hpsn, qp->s_last, qp->s_acked, qp->s_cur, qp->s_tail, qp->s_head, qp->s_size, + qp->s_avail, qp->remote_qpn, qp->remote_ah_attr.dlid, qp->remote_ah_attr.sl, -- cgit v0.10.2 From d2421a82f6d8ad407d3f4acdbacedfb06d9f47f5 Mon Sep 17 00:00:00 2001 From: Mike Marciniszyn Date: Sun, 14 Feb 2016 12:44:26 -0800 Subject: IB/rdmvt: close send engine struct holes pahole noted the wasted 4 bytes after s_lock and r_lock. Move s_flags and r_psn to fill the holes. Reviewed-by: Dennis Dalessandro Signed-off-by: Mike Marciniszyn Signed-off-by: Doug Ledford diff --git a/include/rdma/rdmavt_qp.h b/include/rdma/rdmavt_qp.h index 933f14f..5c307ed 100644 --- a/include/rdma/rdmavt_qp.h +++ b/include/rdma/rdmavt_qp.h @@ -283,12 +283,12 @@ struct rvt_qp { struct rvt_sge_state s_rdma_read_sge; spinlock_t r_lock ____cacheline_aligned_in_smp; /* used for APM */ + u32 r_psn; /* expected rcv packet sequence number */ unsigned long r_aflags; u64 r_wr_id; /* ID for current receive WQE */ u32 r_ack_psn; /* PSN for next ACK or atomic ACK */ u32 r_len; /* total length of r_sge */ u32 r_rcv_len; /* receive data len processed */ - u32 r_psn; /* expected rcv packet sequence number */ u32 r_msn; /* message sequence number */ u8 r_state; /* opcode of last packet received */ @@ -308,8 +308,8 @@ struct rvt_qp { u32 s_ssn; /* SSN of tail entry */ spinlock_t s_lock ____cacheline_aligned_in_smp; - struct rvt_sge_state *s_cur_sge; u32 s_flags; + struct rvt_sge_state *s_cur_sge; struct rvt_swqe *s_wqe; struct rvt_sge_state s_sge; /* current send request data */ struct rvt_mregion *s_rdma_mr; -- cgit v0.10.2 From 45842abbb292338d7d328c40bae411218242d2cd Mon Sep 17 00:00:00 2001 From: Mike Marciniszyn Date: Sun, 14 Feb 2016 12:44:34 -0800 Subject: staging/rdma/hfi1: move txreq header code The patch separates the txreq defines into new files, one for verbs and one for sdma. The verbs_txreq implementation handles the setup and teardown of the txreq cache, so the register routine is changed to call the new init/exit routines. This patch allows for followup patches enhance the send engine. Reviewed-by: Dennis Dalessandro Signed-off-by: Mike Marciniszyn Signed-off-by: Doug Ledford diff --git a/drivers/staging/rdma/hfi1/Makefile b/drivers/staging/rdma/hfi1/Makefile index 6681b74..9b11706 100644 --- a/drivers/staging/rdma/hfi1/Makefile +++ b/drivers/staging/rdma/hfi1/Makefile @@ -11,7 +11,8 @@ hfi1-y := affinity.o chip.o device.o diag.o driver.o efivar.o \ eprom.o file_ops.o firmware.o \ init.o intr.o mad.o pcie.o pio.o pio_copy.o platform.o \ qp.o qsfp.o rc.o ruc.o sdma.o sysfs.o trace.o twsi.o \ - uc.o ud.o user_exp_rcv.o user_pages.o user_sdma.o verbs.o + uc.o ud.o user_exp_rcv.o user_pages.o user_sdma.o verbs.o \ + verbs_txreq.o hfi1-$(CONFIG_DEBUG_FS) += debugfs.o CFLAGS_trace.o = -I$(src) diff --git a/drivers/staging/rdma/hfi1/qp.c b/drivers/staging/rdma/hfi1/qp.c index 9846cd6..7387ef5 100644 --- a/drivers/staging/rdma/hfi1/qp.c +++ b/drivers/staging/rdma/hfi1/qp.c @@ -60,7 +60,7 @@ #include "hfi.h" #include "qp.h" #include "trace.h" -#include "sdma.h" +#include "verbs_txreq.h" unsigned int hfi1_qp_table_size = 256; module_param_named(qp_table_size, hfi1_qp_table_size, uint, S_IRUGO); diff --git a/drivers/staging/rdma/hfi1/ruc.c b/drivers/staging/rdma/hfi1/ruc.c index 6114550..7c6feff 100644 --- a/drivers/staging/rdma/hfi1/ruc.c +++ b/drivers/staging/rdma/hfi1/ruc.c @@ -53,7 +53,7 @@ #include "hfi.h" #include "mad.h" #include "qp.h" -#include "sdma.h" +#include "verbs_txreq.h" /* * Convert the AETH RNR timeout code into the number of microseconds. diff --git a/drivers/staging/rdma/hfi1/sdma.h b/drivers/staging/rdma/hfi1/sdma.h index 1d52d6e..76ed215 100644 --- a/drivers/staging/rdma/hfi1/sdma.h +++ b/drivers/staging/rdma/hfi1/sdma.h @@ -58,9 +58,8 @@ #include "hfi.h" #include "verbs.h" +#include "sdma_txreq.h" -/* increased for AHG */ -#define NUM_DESC 6 /* Hardware limit */ #define MAX_DESC 64 /* Hardware limit for SDMA packet size */ @@ -311,83 +310,6 @@ struct hw_sdma_desc { __le64 qw[2]; }; -/* - * struct sdma_desc - canonical fragment descriptor - * - * This is the descriptor carried in the tx request - * corresponding to each fragment. - * - */ -struct sdma_desc { - /* private: don't use directly */ - u64 qw[2]; -}; - -struct sdma_txreq; -typedef void (*callback_t)(struct sdma_txreq *, int, int); - -/** - * struct sdma_txreq - the sdma_txreq structure (one per packet) - * @list: for use by user and by queuing for wait - * - * This is the representation of a packet which consists of some - * number of fragments. Storage is provided to within the structure. - * for all fragments. - * - * The storage for the descriptors are automatically extended as needed - * when the currently allocation is exceeded. - * - * The user (Verbs or PSM) may overload this structure with fields - * specific to their use by putting this struct first in their struct. - * The method of allocation of the overloaded structure is user dependent - * - * The list is the only public field in the structure. - * - */ - -struct sdma_txreq { - struct list_head list; - /* private: */ - struct sdma_desc *descp; - /* private: */ - void *coalesce_buf; - /* private: */ - u16 coalesce_idx; - /* private: */ - struct iowait *wait; - /* private: */ - callback_t complete; -#ifdef CONFIG_HFI1_DEBUG_SDMA_ORDER - u64 sn; -#endif - /* private: - used in coalesce/pad processing */ - u16 packet_len; - /* private: - down-counted to trigger last */ - u16 tlen; - /* private: flags */ - u16 flags; - /* private: */ - u16 num_desc; - /* private: */ - u16 desc_limit; - /* private: */ - u16 next_descq_idx; - /* private: */ - struct sdma_desc descs[NUM_DESC]; -}; - -struct verbs_txreq { - struct hfi1_pio_header phdr; - struct sdma_txreq txreq; - struct rvt_qp *qp; - struct rvt_swqe *wqe; - struct rvt_mregion *mr; - struct rvt_sge_state *ss; - struct sdma_engine *sde; - u16 hdr_dwords; - u16 hdr_inx; -}; - /** * struct sdma_engine - Data pertaining to each SDMA engine. * @dd: a back-pointer to the device data diff --git a/drivers/staging/rdma/hfi1/sdma_txreq.h b/drivers/staging/rdma/hfi1/sdma_txreq.h new file mode 100644 index 0000000..d0f77a8 --- /dev/null +++ b/drivers/staging/rdma/hfi1/sdma_txreq.h @@ -0,0 +1,130 @@ +/* + * Copyright(c) 2016 Intel Corporation. + * + * This file is provided under a dual BSD/GPLv2 license. When using or + * redistributing this file, you may do so under either license. + * + * GPL LICENSE SUMMARY + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * BSD LICENSE + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * - Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + */ + +#ifndef HFI1_SDMA_TXREQ_H +#define HFI1_SDMA_TXREQ_H + +/* increased for AHG */ +#define NUM_DESC 6 + +/* + * struct sdma_desc - canonical fragment descriptor + * + * This is the descriptor carried in the tx request + * corresponding to each fragment. + * + */ +struct sdma_desc { + /* private: don't use directly */ + u64 qw[2]; +}; + +/** + * struct sdma_txreq - the sdma_txreq structure (one per packet) + * @list: for use by user and by queuing for wait + * + * This is the representation of a packet which consists of some + * number of fragments. Storage is provided to within the structure. + * for all fragments. + * + * The storage for the descriptors are automatically extended as needed + * when the currently allocation is exceeded. + * + * The user (Verbs or PSM) may overload this structure with fields + * specific to their use by putting this struct first in their struct. + * The method of allocation of the overloaded structure is user dependent + * + * The list is the only public field in the structure. + * + */ + +#define SDMA_TXREQ_S_OK 0 +#define SDMA_TXREQ_S_SENDERROR 1 +#define SDMA_TXREQ_S_ABORTED 2 +#define SDMA_TXREQ_S_SHUTDOWN 3 + +/* flags bits */ +#define SDMA_TXREQ_F_URGENT 0x0001 +#define SDMA_TXREQ_F_AHG_COPY 0x0002 +#define SDMA_TXREQ_F_USE_AHG 0x0004 + +struct sdma_txreq; +typedef void (*callback_t)(struct sdma_txreq *, int, int); + +struct iowait; +struct sdma_txreq { + struct list_head list; + /* private: */ + struct sdma_desc *descp; + /* private: */ + void *coalesce_buf; + /* private: */ + struct iowait *wait; + /* private: */ + callback_t complete; +#ifdef CONFIG_HFI1_DEBUG_SDMA_ORDER + u64 sn; +#endif + /* private: - used in coalesce/pad processing */ + u16 packet_len; + /* private: - down-counted to trigger last */ + u16 tlen; + /* private: */ + u16 num_desc; + /* private: */ + u16 desc_limit; + /* private: */ + u16 next_descq_idx; + /* private: */ + u16 coalesce_idx; + /* private: flags */ + u16 flags; + /* private: */ + struct sdma_desc descs[NUM_DESC]; +}; + +#endif /* HFI1_SDMA_TXREQ_H */ diff --git a/drivers/staging/rdma/hfi1/verbs.c b/drivers/staging/rdma/hfi1/verbs.c index 1df4648..7838b21 100644 --- a/drivers/staging/rdma/hfi1/verbs.c +++ b/drivers/staging/rdma/hfi1/verbs.c @@ -63,7 +63,7 @@ #include "device.h" #include "trace.h" #include "qp.h" -#include "sdma.h" +#include "verbs_txreq.h" static unsigned int hfi1_lkey_table_size = 16; module_param_named(lkey_table_size, hfi1_lkey_table_size, uint, @@ -508,89 +508,6 @@ void update_sge(struct rvt_sge_state *ss, u32 length) } } -static noinline struct verbs_txreq *__get_txreq(struct hfi1_ibdev *dev, - struct rvt_qp *qp) -{ - struct hfi1_qp_priv *priv = qp->priv; - struct verbs_txreq *tx; - unsigned long flags; - - tx = kmem_cache_alloc(dev->verbs_txreq_cache, GFP_ATOMIC); - if (!tx) { - spin_lock_irqsave(&qp->s_lock, flags); - write_seqlock(&dev->iowait_lock); - if (ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK && - list_empty(&priv->s_iowait.list)) { - dev->n_txwait++; - qp->s_flags |= RVT_S_WAIT_TX; - list_add_tail(&priv->s_iowait.list, &dev->txwait); - trace_hfi1_qpsleep(qp, RVT_S_WAIT_TX); - atomic_inc(&qp->refcount); - } - qp->s_flags &= ~RVT_S_BUSY; - write_sequnlock(&dev->iowait_lock); - spin_unlock_irqrestore(&qp->s_lock, flags); - tx = ERR_PTR(-EBUSY); - } - return tx; -} - -static inline struct verbs_txreq *get_txreq(struct hfi1_ibdev *dev, - struct rvt_qp *qp) -{ - struct verbs_txreq *tx; - - tx = kmem_cache_alloc(dev->verbs_txreq_cache, GFP_ATOMIC); - if (!tx) { - /* call slow path to get the lock */ - tx = __get_txreq(dev, qp); - if (IS_ERR(tx)) - return tx; - } - tx->qp = qp; - return tx; -} - -void hfi1_put_txreq(struct verbs_txreq *tx) -{ - struct hfi1_ibdev *dev; - struct rvt_qp *qp; - unsigned long flags; - unsigned int seq; - struct hfi1_qp_priv *priv; - - qp = tx->qp; - dev = to_idev(qp->ibqp.device); - - if (tx->mr) { - rvt_put_mr(tx->mr); - tx->mr = NULL; - } - sdma_txclean(dd_from_dev(dev), &tx->txreq); - - /* Free verbs_txreq and return to slab cache */ - kmem_cache_free(dev->verbs_txreq_cache, tx); - - do { - seq = read_seqbegin(&dev->iowait_lock); - if (!list_empty(&dev->txwait)) { - struct iowait *wait; - - write_seqlock_irqsave(&dev->iowait_lock, flags); - /* Wake up first QP wanting a free struct */ - wait = list_first_entry(&dev->txwait, struct iowait, - list); - qp = iowait_to_qp(wait); - priv = qp->priv; - list_del_init(&priv->s_iowait.list); - /* refcount held until actual wake up */ - write_sequnlock_irqrestore(&dev->iowait_lock, flags); - hfi1_qp_wakeup(qp, RVT_S_WAIT_TX); - break; - } - } while (read_seqretry(&dev->iowait_lock, seq)); -} - /* * This is called with progress side lock held. */ @@ -1427,13 +1344,6 @@ static void init_ibport(struct hfi1_pportdata *ppd) RCU_INIT_POINTER(ibp->rvp.qp[1], NULL); } -static void verbs_txreq_kmem_cache_ctor(void *obj) -{ - struct verbs_txreq *tx = obj; - - memset(tx, 0, sizeof(*tx)); -} - /** * hfi1_register_ib_device - register our device with the infiniband core * @dd: the device data structure @@ -1447,8 +1357,6 @@ int hfi1_register_ib_device(struct hfi1_devdata *dd) unsigned i; int ret; size_t lcpysz = IB_DEVICE_NAME_MAX; - u16 descq_cnt; - char buf[TXREQ_NAME_LEN]; for (i = 0; i < dd->num_pports; i++) init_ibport(ppd + i); @@ -1461,18 +1369,9 @@ int hfi1_register_ib_device(struct hfi1_devdata *dd) INIT_LIST_HEAD(&dev->txwait); INIT_LIST_HEAD(&dev->memwait); - descq_cnt = sdma_get_descq_cnt(); - - snprintf(buf, sizeof(buf), "hfi1_%u_vtxreq_cache", dd->unit); - /* SLAB_HWCACHE_ALIGN for AHG */ - dev->verbs_txreq_cache = kmem_cache_create(buf, - sizeof(struct verbs_txreq), - 0, SLAB_HWCACHE_ALIGN, - verbs_txreq_kmem_cache_ctor); - if (!dev->verbs_txreq_cache) { - ret = -ENOMEM; + ret = verbs_txreq_init(dev); + if (ret) goto err_verbs_txreq; - } /* * The system image GUID is supposed to be the same for all @@ -1578,7 +1477,7 @@ int hfi1_register_ib_device(struct hfi1_devdata *dd) err_class: rvt_unregister_device(&dd->verbs_dev.rdi); err_verbs_txreq: - kmem_cache_destroy(dev->verbs_txreq_cache); + verbs_txreq_exit(dev); dd_dev_err(dd, "cannot register verbs: %d!\n", -ret); return ret; } @@ -1597,7 +1496,7 @@ void hfi1_unregister_ib_device(struct hfi1_devdata *dd) dd_dev_err(dd, "memwait list not empty!\n"); del_timer_sync(&dev->mem_timer); - kmem_cache_destroy(dev->verbs_txreq_cache); + verbs_txreq_exit(dev); } void hfi1_cnp_rcv(struct hfi1_packet *packet) diff --git a/drivers/staging/rdma/hfi1/verbs_txreq.c b/drivers/staging/rdma/hfi1/verbs_txreq.c new file mode 100644 index 0000000..bc95c41 --- /dev/null +++ b/drivers/staging/rdma/hfi1/verbs_txreq.c @@ -0,0 +1,149 @@ +/* + * Copyright(c) 2016 Intel Corporation. + * + * This file is provided under a dual BSD/GPLv2 license. When using or + * redistributing this file, you may do so under either license. + * + * GPL LICENSE SUMMARY + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * BSD LICENSE + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * - Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + */ + +#include "hfi.h" +#include "verbs_txreq.h" +#include "qp.h" +#include "trace.h" + +#define TXREQ_LEN 24 + +void hfi1_put_txreq(struct verbs_txreq *tx) +{ + struct hfi1_ibdev *dev; + struct rvt_qp *qp; + unsigned long flags; + unsigned int seq; + struct hfi1_qp_priv *priv; + + qp = tx->qp; + dev = to_idev(qp->ibqp.device); + + if (tx->mr) + rvt_put_mr(tx->mr); + + sdma_txclean(dd_from_dev(dev), &tx->txreq); + + /* Free verbs_txreq and return to slab cache */ + kmem_cache_free(dev->verbs_txreq_cache, tx); + + do { + seq = read_seqbegin(&dev->iowait_lock); + if (!list_empty(&dev->txwait)) { + struct iowait *wait; + + write_seqlock_irqsave(&dev->iowait_lock, flags); + wait = list_first_entry(&dev->txwait, struct iowait, + list); + qp = iowait_to_qp(wait); + priv = qp->priv; + list_del_init(&priv->s_iowait.list); + /* refcount held until actual wake up */ + write_sequnlock_irqrestore(&dev->iowait_lock, flags); + hfi1_qp_wakeup(qp, RVT_S_WAIT_TX); + break; + } + } while (read_seqretry(&dev->iowait_lock, seq)); +} + +struct verbs_txreq *__get_txreq(struct hfi1_ibdev *dev, + struct rvt_qp *qp) +{ + struct verbs_txreq *tx = ERR_PTR(-EBUSY); + unsigned long flags; + + spin_lock_irqsave(&qp->s_lock, flags); + write_seqlock(&dev->iowait_lock); + if (ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK) { + struct hfi1_qp_priv *priv; + + tx = kmem_cache_alloc(dev->verbs_txreq_cache, GFP_ATOMIC); + if (tx) + goto out; + priv = qp->priv; + if (list_empty(&priv->s_iowait.list)) { + dev->n_txwait++; + qp->s_flags |= RVT_S_WAIT_TX; + list_add_tail(&priv->s_iowait.list, &dev->txwait); + trace_hfi1_qpsleep(qp, RVT_S_WAIT_TX); + atomic_inc(&qp->refcount); + } + qp->s_flags &= ~RVT_S_BUSY; + } +out: + write_sequnlock(&dev->iowait_lock); + spin_unlock_irqrestore(&qp->s_lock, flags); + return tx; +} + +static void verbs_txreq_kmem_cache_ctor(void *obj) +{ + struct verbs_txreq *tx = (struct verbs_txreq *)obj; + + memset(tx, 0, sizeof(*tx)); +} + +int verbs_txreq_init(struct hfi1_ibdev *dev) +{ + char buf[TXREQ_LEN]; + struct hfi1_devdata *dd = dd_from_dev(dev); + + snprintf(buf, sizeof(buf), "hfi1_%u_vtxreq_cache", dd->unit); + dev->verbs_txreq_cache = kmem_cache_create(buf, + sizeof(struct verbs_txreq), + 0, SLAB_HWCACHE_ALIGN, + verbs_txreq_kmem_cache_ctor); + if (!dev->verbs_txreq_cache) + return -ENOMEM; + return 0; +} + +void verbs_txreq_exit(struct hfi1_ibdev *dev) +{ + kmem_cache_destroy(dev->verbs_txreq_cache); + dev->verbs_txreq_cache = NULL; +} diff --git a/drivers/staging/rdma/hfi1/verbs_txreq.h b/drivers/staging/rdma/hfi1/verbs_txreq.h new file mode 100644 index 0000000..387882a --- /dev/null +++ b/drivers/staging/rdma/hfi1/verbs_txreq.h @@ -0,0 +1,95 @@ +/* + * Copyright(c) 2016 Intel Corporation. + * + * This file is provided under a dual BSD/GPLv2 license. When using or + * redistributing this file, you may do so under either license. + * + * GPL LICENSE SUMMARY + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * BSD LICENSE + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * - Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + */ + +#ifndef HFI1_VERBS_TXREQ_H +#define HFI1_VERBS_TXREQ_H + +#include +#include + +#include "verbs.h" +#include "sdma_txreq.h" +#include "iowait.h" + +struct verbs_txreq { + struct hfi1_pio_header phdr; + struct sdma_txreq txreq; + struct rvt_qp *qp; + struct rvt_swqe *wqe; + struct rvt_mregion *mr; + struct rvt_sge_state *ss; + struct sdma_engine *sde; + struct send_context *psc; + u16 hdr_dwords; +}; + +struct hfi1_ibdev; +struct verbs_txreq *__get_txreq(struct hfi1_ibdev *dev, + struct rvt_qp *qp); + +static inline struct verbs_txreq *get_txreq(struct hfi1_ibdev *dev, + struct rvt_qp *qp) +{ + struct verbs_txreq *tx; + + tx = kmem_cache_alloc(dev->verbs_txreq_cache, GFP_ATOMIC); + if (unlikely(!tx)) { + /* call slow path to get the lock */ + tx = __get_txreq(dev, qp); + if (IS_ERR(tx)) + return tx; + } + tx->qp = qp; + tx->mr = NULL; + return tx; +} + +void hfi1_put_txreq(struct verbs_txreq *tx); +int verbs_txreq_init(struct hfi1_ibdev *dev); +void verbs_txreq_exit(struct hfi1_ibdev *dev); + +#endif /* HFI1_VERBS_TXREQ_H */ -- cgit v0.10.2 From bb5df5f9eea6b9efb5911a5fef63b4614af01c89 Mon Sep 17 00:00:00 2001 From: Dennis Dalessandro Date: Sun, 14 Feb 2016 12:44:43 -0800 Subject: staging/rdma/hfi1: Remove header memcpy from sdma send path. Instead of writing the header into a buffer then copying it into another buffer to be sent, remove that memcpy and instead build the header directly into the tx request that will be sent. Reviewed-by: Mike Marciniszyn Signed-off-by: Vennila Megavannan Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford diff --git a/drivers/staging/rdma/hfi1/diag.c b/drivers/staging/rdma/hfi1/diag.c index bfce812..9523dc1 100644 --- a/drivers/staging/rdma/hfi1/diag.c +++ b/drivers/staging/rdma/hfi1/diag.c @@ -70,6 +70,7 @@ #include "hfi.h" #include "device.h" #include "common.h" +#include "verbs_txreq.h" #include "trace.h" #undef pr_fmt @@ -1682,8 +1683,6 @@ int snoop_send_dma_handler(struct rvt_qp *qp, struct hfi1_pkt_state *ps, int snoop_send_pio_handler(struct rvt_qp *qp, struct hfi1_pkt_state *ps, u64 pbc) { - struct hfi1_qp_priv *priv = qp->priv; - struct ahg_ib_header *ahdr = priv->s_hdr; u32 hdrwords = qp->s_hdrwords; struct rvt_sge_state *ss = qp->s_cur_sge; u32 len = qp->s_cur_size; @@ -1691,7 +1690,7 @@ int snoop_send_pio_handler(struct rvt_qp *qp, struct hfi1_pkt_state *ps, u32 plen = hdrwords + dwords + 2; /* includes pbc */ struct hfi1_pportdata *ppd = ps->ppd; struct snoop_packet *s_packet = NULL; - u32 *hdr = (u32 *)&ahdr->ibh; + u32 *hdr = (u32 *)&ps->s_txreq->phdr.hdr; u32 length = 0; struct rvt_sge_state temp_ss; void *data = NULL; @@ -1702,7 +1701,7 @@ int snoop_send_pio_handler(struct rvt_qp *qp, struct hfi1_pkt_state *ps, struct capture_md md; u32 vl; u32 hdr_len = hdrwords << 2; - u32 tlen = HFI1_GET_PKT_LEN(&ahdr->ibh); + u32 tlen = HFI1_GET_PKT_LEN(&ps->s_txreq->phdr.hdr); md.u.pbc = 0; @@ -1729,7 +1728,7 @@ int snoop_send_pio_handler(struct rvt_qp *qp, struct hfi1_pkt_state *ps, md.port = 1; md.dir = PKT_DIR_EGRESS; if (likely(pbc == 0)) { - vl = be16_to_cpu(ahdr->ibh.lrh[0]) >> 12; + vl = be16_to_cpu(ps->s_txreq->phdr.hdr.lrh[0]) >> 12; md.u.pbc = create_pbc(ppd, 0, qp->s_srate, vl, plen); } else { md.u.pbc = 0; @@ -1791,7 +1790,7 @@ int snoop_send_pio_handler(struct rvt_qp *qp, struct hfi1_pkt_state *ps, ret = HFI1_FILTER_HIT; } else { ret = ppd->dd->hfi1_snoop.filter_callback( - &ahdr->ibh, + &ps->s_txreq->phdr.hdr, NULL, ppd->dd->hfi1_snoop.filter_value); } @@ -1823,9 +1822,16 @@ int snoop_send_pio_handler(struct rvt_qp *qp, struct hfi1_pkt_state *ps, spin_unlock_irqrestore(&qp->s_lock, flags); } else if (qp->ibqp.qp_type == IB_QPT_RC) { spin_lock_irqsave(&qp->s_lock, flags); - hfi1_rc_send_complete(qp, &ahdr->ibh); + hfi1_rc_send_complete(qp, + &ps->s_txreq->phdr.hdr); spin_unlock_irqrestore(&qp->s_lock, flags); } + + /* + * If snoop is dropping the packet we need to put the + * txreq back because no one else will. + */ + hfi1_put_txreq(ps->s_txreq); return 0; } break; diff --git a/drivers/staging/rdma/hfi1/rc.c b/drivers/staging/rdma/hfi1/rc.c index a62c9424..75d70d5 100644 --- a/drivers/staging/rdma/hfi1/rc.c +++ b/drivers/staging/rdma/hfi1/rc.c @@ -54,7 +54,7 @@ #include "hfi.h" #include "qp.h" -#include "sdma.h" +#include "verbs_txreq.h" #include "trace.h" /* cut down ridiculously long IB macro names */ @@ -201,13 +201,15 @@ static u32 restart_sge(struct rvt_sge_state *ss, struct rvt_swqe *wqe, * @qp: a pointer to the QP * @ohdr: a pointer to the IB header being constructed * @pmtu: the path MTU + * @ps: the xmit packet state * * Return 1 if constructed; otherwise, return 0. * Note that we are in the responder's side of the QP context. * Note the QP s_lock must be held. */ static int make_rc_ack(struct hfi1_ibdev *dev, struct rvt_qp *qp, - struct hfi1_other_headers *ohdr, u32 pmtu) + struct hfi1_other_headers *ohdr, u32 pmtu, + struct hfi1_pkt_state *ps) { struct rvt_ack_entry *e; u32 hwords; @@ -347,7 +349,7 @@ normal: qp->s_rdma_ack_cnt++; qp->s_hdrwords = hwords; qp->s_cur_size = len; - hfi1_make_ruc_header(qp, ohdr, bth0, bth2, middle); + hfi1_make_ruc_header(qp, ohdr, bth0, bth2, middle, ps); return 1; bail: @@ -371,7 +373,7 @@ bail: * * Return 1 if constructed; otherwise, return 0. */ -int hfi1_make_rc_req(struct rvt_qp *qp) +int hfi1_make_rc_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps) { struct hfi1_qp_priv *priv = qp->priv; struct hfi1_ibdev *dev = to_idev(qp->ibqp.device); @@ -385,18 +387,21 @@ int hfi1_make_rc_req(struct rvt_qp *qp) u32 bth2; u32 pmtu = qp->pmtu; char newreq; - int ret = 0; int middle = 0; int delta; - ohdr = &priv->s_hdr->ibh.u.oth; + ps->s_txreq = get_txreq(ps->dev, qp); + if (IS_ERR(ps->s_txreq)) + goto bail_no_tx; + + ohdr = &ps->s_txreq->phdr.hdr.u.oth; if (qp->remote_ah_attr.ah_flags & IB_AH_GRH) - ohdr = &priv->s_hdr->ibh.u.l.oth; + ohdr = &ps->s_txreq->phdr.hdr.u.l.oth; /* Sending responses has higher priority over sending requests. */ if ((qp->s_flags & RVT_S_RESP_PENDING) && - make_rc_ack(dev, qp, ohdr, pmtu)) - goto done; + make_rc_ack(dev, qp, ohdr, pmtu, ps)) + return 1; if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_SEND_OK)) { if (!(ib_rvt_state_ops[qp->state] & RVT_FLUSH_SEND)) @@ -415,7 +420,7 @@ int hfi1_make_rc_req(struct rvt_qp *qp) hfi1_send_complete(qp, wqe, qp->s_last != qp->s_acked ? IB_WC_SUCCESS : IB_WC_WR_FLUSH_ERR); /* will get called again */ - goto done; + goto done_free_tx; } if (qp->s_flags & (RVT_S_WAIT_RNR | RVT_S_WAIT_ACK)) @@ -752,12 +757,23 @@ int hfi1_make_rc_req(struct rvt_qp *qp) ohdr, bth0 | (qp->s_state << 24), bth2, - middle); -done: + middle, + ps); return 1; + +done_free_tx: + hfi1_put_txreq(ps->s_txreq); + ps->s_txreq = NULL; + return 1; + bail: + hfi1_put_txreq(ps->s_txreq); + +bail_no_tx: + ps->s_txreq = NULL; qp->s_flags &= ~RVT_S_BUSY; - return ret; + qp->s_hdrwords = 0; + return 0; } /** diff --git a/drivers/staging/rdma/hfi1/ruc.c b/drivers/staging/rdma/hfi1/ruc.c index 7c6feff..70d1d34 100644 --- a/drivers/staging/rdma/hfi1/ruc.c +++ b/drivers/staging/rdma/hfi1/ruc.c @@ -54,6 +54,7 @@ #include "mad.h" #include "qp.h" #include "verbs_txreq.h" +#include "trace.h" /* * Convert the AETH RNR timeout code into the number of microseconds. @@ -698,6 +699,7 @@ u32 hfi1_make_grh(struct hfi1_ibport *ibp, struct ib_grh *hdr, static inline void build_ahg(struct rvt_qp *qp, u32 npsn) { struct hfi1_qp_priv *priv = qp->priv; + if (unlikely(qp->s_flags & RVT_S_AHG_CLEAR)) clear_ahg(qp); if (!(qp->s_flags & RVT_S_AHG_VALID)) { @@ -740,10 +742,11 @@ static inline void build_ahg(struct rvt_qp *qp, u32 npsn) } void hfi1_make_ruc_header(struct rvt_qp *qp, struct hfi1_other_headers *ohdr, - u32 bth0, u32 bth2, int middle) + u32 bth0, u32 bth2, int middle, + struct hfi1_pkt_state *ps) { - struct hfi1_ibport *ibp = to_iport(qp->ibqp.device, qp->port_num); struct hfi1_qp_priv *priv = qp->priv; + struct hfi1_ibport *ibp = ps->ibp; u16 lrh0; u32 nwords; u32 extra_bytes; @@ -754,7 +757,8 @@ void hfi1_make_ruc_header(struct rvt_qp *qp, struct hfi1_other_headers *ohdr, nwords = (qp->s_cur_size + extra_bytes) >> 2; lrh0 = HFI1_LRH_BTH; if (unlikely(qp->remote_ah_attr.ah_flags & IB_AH_GRH)) { - qp->s_hdrwords += hfi1_make_grh(ibp, &priv->s_hdr->ibh.u.l.grh, + qp->s_hdrwords += hfi1_make_grh(ibp, + &ps->s_txreq->phdr.hdr.u.l.grh, &qp->remote_ah_attr.grh, qp->s_hdrwords, nwords); lrh0 = HFI1_LRH_GRH; @@ -784,11 +788,11 @@ void hfi1_make_ruc_header(struct rvt_qp *qp, struct hfi1_other_headers *ohdr, build_ahg(qp, bth2); else qp->s_flags &= ~RVT_S_AHG_VALID; - priv->s_hdr->ibh.lrh[0] = cpu_to_be16(lrh0); - priv->s_hdr->ibh.lrh[1] = cpu_to_be16(qp->remote_ah_attr.dlid); - priv->s_hdr->ibh.lrh[2] = + ps->s_txreq->phdr.hdr.lrh[0] = cpu_to_be16(lrh0); + ps->s_txreq->phdr.hdr.lrh[1] = cpu_to_be16(qp->remote_ah_attr.dlid); + ps->s_txreq->phdr.hdr.lrh[2] = cpu_to_be16(qp->s_hdrwords + nwords + SIZE_OF_CRC); - priv->s_hdr->ibh.lrh[3] = cpu_to_be16(ppd_from_ibp(ibp)->lid | + ps->s_txreq->phdr.hdr.lrh[3] = cpu_to_be16(ppd_from_ibp(ibp)->lid | qp->remote_ah_attr.src_path_bits); bth0 |= hfi1_get_pkey(ibp, qp->s_pkey_index); bth0 |= extra_bytes << 20; @@ -826,7 +830,7 @@ void hfi1_do_send(struct rvt_qp *qp) { struct hfi1_pkt_state ps; struct hfi1_qp_priv *priv = qp->priv; - int (*make_req)(struct rvt_qp *qp); + int (*make_req)(struct rvt_qp *qp, struct hfi1_pkt_state *ps); unsigned long flags; unsigned long timeout; unsigned long timeout_int; @@ -906,7 +910,7 @@ void hfi1_do_send(struct rvt_qp *qp) } spin_lock_irqsave(&qp->s_lock, flags); } - } while (make_req(qp)); + } while (make_req(qp, &ps)); spin_unlock_irqrestore(&qp->s_lock, flags); } diff --git a/drivers/staging/rdma/hfi1/uc.c b/drivers/staging/rdma/hfi1/uc.c index f884b5c..77431b1 100644 --- a/drivers/staging/rdma/hfi1/uc.c +++ b/drivers/staging/rdma/hfi1/uc.c @@ -49,7 +49,7 @@ */ #include "hfi.h" -#include "sdma.h" +#include "verbs_txreq.h" #include "qp.h" /* cut down ridiculously long IB macro names */ @@ -63,7 +63,7 @@ * * Return 1 if constructed; otherwise, return 0. */ -int hfi1_make_uc_req(struct rvt_qp *qp) +int hfi1_make_uc_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps) { struct hfi1_qp_priv *priv = qp->priv; struct hfi1_other_headers *ohdr; @@ -72,9 +72,12 @@ int hfi1_make_uc_req(struct rvt_qp *qp) u32 bth0 = 0; u32 len; u32 pmtu = qp->pmtu; - int ret = 0; int middle = 0; + ps->s_txreq = get_txreq(ps->dev, qp); + if (IS_ERR(ps->s_txreq)) + goto bail_no_tx; + if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_SEND_OK)) { if (!(ib_rvt_state_ops[qp->state] & RVT_FLUSH_SEND)) goto bail; @@ -90,12 +93,12 @@ int hfi1_make_uc_req(struct rvt_qp *qp) clear_ahg(qp); wqe = rvt_get_swqe_ptr(qp, qp->s_last); hfi1_send_complete(qp, wqe, IB_WC_WR_FLUSH_ERR); - goto done; + goto done_free_tx; } - ohdr = &priv->s_hdr->ibh.u.oth; + ohdr = &ps->s_txreq->phdr.hdr.u.oth; if (qp->remote_ah_attr.ah_flags & IB_AH_GRH) - ohdr = &priv->s_hdr->ibh.u.l.oth; + ohdr = &ps->s_txreq->phdr.hdr.u.l.oth; /* Get the next send request. */ wqe = rvt_get_swqe_ptr(qp, qp->s_cur); @@ -235,13 +238,22 @@ int hfi1_make_uc_req(struct rvt_qp *qp) qp->s_cur_sge = &qp->s_sge; qp->s_cur_size = len; hfi1_make_ruc_header(qp, ohdr, bth0 | (qp->s_state << 24), - mask_psn(qp->s_psn++), middle); -done: + mask_psn(qp->s_psn++), middle, ps); + return 1; + +done_free_tx: + hfi1_put_txreq(ps->s_txreq); + ps->s_txreq = NULL; return 1; bail: + hfi1_put_txreq(ps->s_txreq); + +bail_no_tx: + ps->s_txreq = NULL; qp->s_flags &= ~RVT_S_BUSY; - return ret; + qp->s_hdrwords = 0; + return 0; } /** diff --git a/drivers/staging/rdma/hfi1/ud.c b/drivers/staging/rdma/hfi1/ud.c index ba78e2e..a7118bc 100644 --- a/drivers/staging/rdma/hfi1/ud.c +++ b/drivers/staging/rdma/hfi1/ud.c @@ -54,6 +54,7 @@ #include "hfi.h" #include "mad.h" #include "qp.h" +#include "verbs_txreq.h" /** * ud_loopback - handle send on loopback QPs @@ -265,7 +266,7 @@ drop: * * Return 1 if constructed; otherwise, return 0. */ -int hfi1_make_ud_req(struct rvt_qp *qp) +int hfi1_make_ud_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps) { struct hfi1_qp_priv *priv = qp->priv; struct hfi1_other_headers *ohdr; @@ -278,10 +279,13 @@ int hfi1_make_ud_req(struct rvt_qp *qp) u32 bth0; u16 lrh0; u16 lid; - int ret = 0; int next_cur; u8 sc5; + ps->s_txreq = get_txreq(ps->dev, qp); + if (IS_ERR(ps->s_txreq)) + goto bail_no_tx; + if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_NEXT_SEND_OK)) { if (!(ib_rvt_state_ops[qp->state] & RVT_FLUSH_SEND)) goto bail; @@ -296,7 +300,7 @@ int hfi1_make_ud_req(struct rvt_qp *qp) } wqe = rvt_get_swqe_ptr(qp, qp->s_last); hfi1_send_complete(qp, wqe, IB_WC_WR_FLUSH_ERR); - goto done; + goto done_free_tx; } /* see post_one_send() */ @@ -337,7 +341,7 @@ int hfi1_make_ud_req(struct rvt_qp *qp) ud_loopback(qp, wqe); spin_lock_irqsave(&qp->s_lock, flags); hfi1_send_complete(qp, wqe, IB_WC_SUCCESS); - goto done; + goto done_free_tx; } } @@ -359,11 +363,12 @@ int hfi1_make_ud_req(struct rvt_qp *qp) if (ah_attr->ah_flags & IB_AH_GRH) { /* Header size in 32-bit words. */ - qp->s_hdrwords += hfi1_make_grh(ibp, &priv->s_hdr->ibh.u.l.grh, - &ah_attr->grh, - qp->s_hdrwords, nwords); + qp->s_hdrwords += hfi1_make_grh(ibp, + &ps->s_txreq->phdr.hdr.u.l.grh, + &ah_attr->grh, + qp->s_hdrwords, nwords); lrh0 = HFI1_LRH_GRH; - ohdr = &priv->s_hdr->ibh.u.l.oth; + ohdr = &ps->s_txreq->phdr.hdr.u.l.oth; /* * Don't worry about sending to locally attached multicast * QPs. It is unspecified by the spec. what happens. @@ -371,7 +376,7 @@ int hfi1_make_ud_req(struct rvt_qp *qp) } else { /* Header size in 32-bit words. */ lrh0 = HFI1_LRH_BTH; - ohdr = &priv->s_hdr->ibh.u.oth; + ohdr = &ps->s_txreq->phdr.hdr.u.oth; } if (wqe->wr.opcode == IB_WR_SEND_WITH_IMM) { qp->s_hdrwords++; @@ -389,19 +394,20 @@ int hfi1_make_ud_req(struct rvt_qp *qp) priv->s_sc = sc5; } priv->s_sde = qp_to_sdma_engine(qp, priv->s_sc); - priv->s_hdr->ibh.lrh[0] = cpu_to_be16(lrh0); - priv->s_hdr->ibh.lrh[1] = cpu_to_be16(ah_attr->dlid); /* DEST LID */ - priv->s_hdr->ibh.lrh[2] = + ps->s_txreq->phdr.hdr.lrh[0] = cpu_to_be16(lrh0); + ps->s_txreq->phdr.hdr.lrh[1] = cpu_to_be16(ah_attr->dlid); + ps->s_txreq->phdr.hdr.lrh[2] = cpu_to_be16(qp->s_hdrwords + nwords + SIZE_OF_CRC); - if (ah_attr->dlid == be16_to_cpu(IB_LID_PERMISSIVE)) - priv->s_hdr->ibh.lrh[3] = IB_LID_PERMISSIVE; - else { + if (ah_attr->dlid == be16_to_cpu(IB_LID_PERMISSIVE)) { + ps->s_txreq->phdr.hdr.lrh[3] = IB_LID_PERMISSIVE; + } else { lid = ppd->lid; if (lid) { lid |= ah_attr->src_path_bits & ((1 << ppd->lmc) - 1); - priv->s_hdr->ibh.lrh[3] = cpu_to_be16(lid); - } else - priv->s_hdr->ibh.lrh[3] = IB_LID_PERMISSIVE; + ps->s_txreq->phdr.hdr.lrh[3] = cpu_to_be16(lid); + } else { + ps->s_txreq->phdr.hdr.lrh[3] = IB_LID_PERMISSIVE; + } } if (wqe->wr.send_flags & IB_SEND_SOLICITED) bth0 |= IB_BTH_SOLICITED; @@ -426,11 +432,21 @@ int hfi1_make_ud_req(struct rvt_qp *qp) priv->s_hdr->tx_flags = 0; priv->s_hdr->sde = NULL; -done: return 1; + +done_free_tx: + hfi1_put_txreq(ps->s_txreq); + ps->s_txreq = NULL; + return 1; + bail: + hfi1_put_txreq(ps->s_txreq); + +bail_no_tx: + ps->s_txreq = NULL; qp->s_flags &= ~RVT_S_BUSY; - return ret; + qp->s_hdrwords = 0; + return 0; } /* diff --git a/drivers/staging/rdma/hfi1/verbs.c b/drivers/staging/rdma/hfi1/verbs.c index 7838b21..8cf1d6b 100644 --- a/drivers/staging/rdma/hfi1/verbs.c +++ b/drivers/staging/rdma/hfi1/verbs.c @@ -622,8 +622,7 @@ bail_txadd: * NOTE: DMA mapping is held in the tx until completed in the ring or * the tx desc is freed without having been submitted to the ring * - * This routine insures the following all the helper routine - * calls succeed. + * This routine ensures all the helper routine calls succeed. */ /* New API */ static int build_verbs_tx_desc( @@ -635,10 +634,9 @@ static int build_verbs_tx_desc( u64 pbc) { int ret = 0; - struct hfi1_pio_header *phdr; + struct hfi1_pio_header *phdr = &tx->phdr; u16 hdrbytes = tx->hdr_dwords << 2; - phdr = &tx->phdr; if (!ahdr->ahgcount) { ret = sdma_txinit_ahg( &tx->txreq, @@ -652,29 +650,14 @@ static int build_verbs_tx_desc( if (ret) goto bail_txadd; phdr->pbc = cpu_to_le64(pbc); - memcpy(&phdr->hdr, &ahdr->ibh, hdrbytes - sizeof(phdr->pbc)); - /* add the header */ ret = sdma_txadd_kvaddr( sde->dd, &tx->txreq, - &tx->phdr, - tx->hdr_dwords << 2); + phdr, + hdrbytes); if (ret) goto bail_txadd; } else { - struct hfi1_other_headers *sohdr = &ahdr->ibh.u.oth; - struct hfi1_other_headers *dohdr = &phdr->hdr.u.oth; - - /* needed in rc_send_complete() */ - phdr->hdr.lrh[0] = ahdr->ibh.lrh[0]; - if ((be16_to_cpu(phdr->hdr.lrh[0]) & 3) == HFI1_LRH_GRH) { - sohdr = &ahdr->ibh.u.l.oth; - dohdr = &phdr->hdr.u.l.oth; - } - /* opcode */ - dohdr->bth[0] = sohdr->bth[0]; - /* PSN/ACK */ - dohdr->bth[2] = sohdr->bth[2]; ret = sdma_txinit_ahg( &tx->txreq, ahdr->tx_flags, @@ -712,6 +695,7 @@ int hfi1_verbs_send_dma(struct rvt_qp *qp, struct hfi1_pkt_state *ps, u8 sc5 = priv->s_sc; int ret; + struct hfi1_ibdev *tdev; if (!list_empty(&priv->s_iowait.tx_head)) { stx = list_first_entry( @@ -726,7 +710,10 @@ int hfi1_verbs_send_dma(struct rvt_qp *qp, struct hfi1_pkt_state *ps, return ret; } - tx = get_txreq(dev, qp); + tx = ps->s_txreq; + + tdev = to_idev(qp->ibqp.device); + if (IS_ERR(tx)) goto bail_tx; @@ -748,7 +735,8 @@ int hfi1_verbs_send_dma(struct rvt_qp *qp, struct hfi1_pkt_state *ps, ret = build_verbs_tx_desc(tx->sde, ss, len, tx, ahdr, pbc); if (unlikely(ret)) goto bail_build; - trace_output_ibhdr(dd_from_ibdev(qp->ibqp.device), &ahdr->ibh); + trace_output_ibhdr(dd_from_ibdev(qp->ibqp.device), + &ps->s_txreq->phdr.hdr); ret = sdma_send_txreq(tx->sde, &priv->s_iowait, &tx->txreq); if (unlikely(ret == -ECOMM)) goto bail_ecomm; @@ -824,27 +812,29 @@ int hfi1_verbs_send_pio(struct rvt_qp *qp, struct hfi1_pkt_state *ps, u64 pbc) { struct hfi1_qp_priv *priv = qp->priv; - struct ahg_ib_header *ahdr = priv->s_hdr; u32 hdrwords = qp->s_hdrwords; struct rvt_sge_state *ss = qp->s_cur_sge; u32 len = qp->s_cur_size; u32 dwords = (len + 3) >> 2; u32 plen = hdrwords + dwords + 2; /* includes pbc */ struct hfi1_pportdata *ppd = ps->ppd; - u32 *hdr = (u32 *)&ahdr->ibh; + u32 *hdr = (u32 *)&ps->s_txreq->phdr.hdr; u64 pbc_flags = 0; u32 sc5; unsigned long flags = 0; struct send_context *sc; struct pio_buf *pbuf; int wc_status = IB_WC_SUCCESS; + int ret = 0; /* vl15 special case taken care of in ud.c */ sc5 = priv->s_sc; sc = qp_to_send_context(qp, sc5); - if (!sc) - return -EINVAL; + if (!sc) { + ret = -EINVAL; + goto bail; + } if (likely(pbc == 0)) { u32 vl = sc_to_vlt(dd_from_ibdev(qp->ibqp.device), sc5); /* set PBC_DC_INFO bit (aka SC[4]) in pbc_flags */ @@ -872,7 +862,8 @@ int hfi1_verbs_send_pio(struct rvt_qp *qp, struct hfi1_pkt_state *ps, * so lets continue to queue the request. */ hfi1_cdbg(PIO, "alloc failed. state active, queuing"); - return no_bufs_available(qp, sc); + ret = no_bufs_available(qp, sc); + goto bail; } } @@ -895,7 +886,8 @@ int hfi1_verbs_send_pio(struct rvt_qp *qp, struct hfi1_pkt_state *ps, } } - trace_output_ibhdr(dd_from_ibdev(qp->ibqp.device), &ahdr->ibh); + trace_output_ibhdr(dd_from_ibdev(qp->ibqp.device), + &ps->s_txreq->phdr.hdr); if (qp->s_rdma_mr) { rvt_put_mr(qp->s_rdma_mr); @@ -909,10 +901,15 @@ pio_bail: spin_unlock_irqrestore(&qp->s_lock, flags); } else if (qp->ibqp.qp_type == IB_QPT_RC) { spin_lock_irqsave(&qp->s_lock, flags); - hfi1_rc_send_complete(qp, &ahdr->ibh); + hfi1_rc_send_complete(qp, &ps->s_txreq->phdr.hdr); spin_unlock_irqrestore(&qp->s_lock, flags); } - return 0; + + ret = 0; + +bail: + hfi1_put_txreq(ps->s_txreq); + return ret; } /* @@ -1011,8 +1008,6 @@ bad: int hfi1_verbs_send(struct rvt_qp *qp, struct hfi1_pkt_state *ps) { struct hfi1_devdata *dd = dd_from_ibdev(qp->ibqp.device); - struct hfi1_qp_priv *priv = qp->priv; - struct ahg_ib_header *ahdr = priv->s_hdr; int ret; int pio = 0; unsigned long flags = 0; @@ -1026,7 +1021,7 @@ int hfi1_verbs_send(struct rvt_qp *qp, struct hfi1_pkt_state *ps) !(dd->flags & HFI1_HAS_SEND_DMA)) pio = 1; - ret = egress_pkey_check(dd->pport, &ahdr->ibh, qp); + ret = egress_pkey_check(dd->pport, &ps->s_txreq->phdr.hdr, qp); if (unlikely(ret)) { /* * The value we are returning here does not get propagated to diff --git a/drivers/staging/rdma/hfi1/verbs.h b/drivers/staging/rdma/hfi1/verbs.h index d00c55d..73f471a 100644 --- a/drivers/staging/rdma/hfi1/verbs.h +++ b/drivers/staging/rdma/hfi1/verbs.h @@ -59,6 +59,7 @@ #include #include #include +#include #include #include #include @@ -194,13 +195,6 @@ struct hfi1_pio_header { } __packed; /* - * used for force cacheline alignment for AHG - */ -struct tx_pio_header { - struct hfi1_pio_header phdr; -} ____cacheline_aligned; - -/* * hfi1 specific data structures that will be hidden from rvt after the queue * pair is made common */ @@ -222,6 +216,7 @@ struct hfi1_pkt_state { struct hfi1_ibdev *dev; struct hfi1_ibport *ibp; struct hfi1_pportdata *ppd; + struct verbs_txreq *s_txreq; }; #define HFI1_PSN_CREDIT 16 @@ -436,7 +431,8 @@ u32 hfi1_make_grh(struct hfi1_ibport *ibp, struct ib_grh *hdr, struct ib_global_route *grh, u32 hwords, u32 nwords); void hfi1_make_ruc_header(struct rvt_qp *qp, struct hfi1_other_headers *ohdr, - u32 bth0, u32 bth2, int middle); + u32 bth0, u32 bth2, int middle, + struct hfi1_pkt_state *ps); void _hfi1_do_send(struct work_struct *work); @@ -447,11 +443,11 @@ void hfi1_send_complete(struct rvt_qp *qp, struct rvt_swqe *wqe, void hfi1_send_rc_ack(struct hfi1_ctxtdata *, struct rvt_qp *qp, int is_fecn); -int hfi1_make_rc_req(struct rvt_qp *qp); +int hfi1_make_rc_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps); -int hfi1_make_uc_req(struct rvt_qp *qp); +int hfi1_make_uc_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps); -int hfi1_make_ud_req(struct rvt_qp *qp); +int hfi1_make_ud_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps); int hfi1_register_ib_device(struct hfi1_devdata *); diff --git a/drivers/staging/rdma/hfi1/verbs_txreq.h b/drivers/staging/rdma/hfi1/verbs_txreq.h index 387882a..d89d29b 100644 --- a/drivers/staging/rdma/hfi1/verbs_txreq.h +++ b/drivers/staging/rdma/hfi1/verbs_txreq.h @@ -63,7 +63,6 @@ struct verbs_txreq { struct rvt_mregion *mr; struct rvt_sge_state *ss; struct sdma_engine *sde; - struct send_context *psc; u16 hdr_dwords; }; -- cgit v0.10.2 From c239a5b5d6617b8bdae401f86529cab76313f3e7 Mon Sep 17 00:00:00 2001 From: Mike Marciniszyn Date: Sun, 14 Feb 2016 12:44:52 -0800 Subject: staging/rdma/hfi1: remove s_rdma_mr It can be conveyed in the verbs_txreq. Reviewed-by: Dennis Dalessandro Signed-off-by: Mike Marciniszyn Signed-off-by: Doug Ledford diff --git a/drivers/staging/rdma/hfi1/rc.c b/drivers/staging/rdma/hfi1/rc.c index 75d70d5..c075e85 100644 --- a/drivers/staging/rdma/hfi1/rc.c +++ b/drivers/staging/rdma/hfi1/rc.c @@ -266,9 +266,9 @@ static int make_rc_ack(struct hfi1_ibdev *dev, struct rvt_qp *qp, goto bail; } /* Copy SGE state in case we need to resend */ - qp->s_rdma_mr = e->rdma_sge.mr; - if (qp->s_rdma_mr) - rvt_get_mr(qp->s_rdma_mr); + ps->s_txreq->mr = e->rdma_sge.mr; + if (ps->s_txreq->mr) + rvt_get_mr(ps->s_txreq->mr); qp->s_ack_rdma_sge.sge = e->rdma_sge; qp->s_ack_rdma_sge.num_sge = 1; qp->s_cur_sge = &qp->s_ack_rdma_sge; @@ -305,9 +305,9 @@ static int make_rc_ack(struct hfi1_ibdev *dev, struct rvt_qp *qp, /* FALLTHROUGH */ case OP(RDMA_READ_RESPONSE_MIDDLE): qp->s_cur_sge = &qp->s_ack_rdma_sge; - qp->s_rdma_mr = qp->s_ack_rdma_sge.sge.mr; - if (qp->s_rdma_mr) - rvt_get_mr(qp->s_rdma_mr); + ps->s_txreq->mr = qp->s_ack_rdma_sge.sge.mr; + if (ps->s_txreq->mr) + rvt_get_mr(ps->s_txreq->mr); len = qp->s_ack_rdma_sge.sge.sge_length; if (len > pmtu) { len = pmtu; diff --git a/drivers/staging/rdma/hfi1/verbs.c b/drivers/staging/rdma/hfi1/verbs.c index 8cf1d6b..dc8eb6b 100644 --- a/drivers/staging/rdma/hfi1/verbs.c +++ b/drivers/staging/rdma/hfi1/verbs.c @@ -728,9 +728,6 @@ int hfi1_verbs_send_dma(struct rvt_qp *qp, struct hfi1_pkt_state *ps, pbc = create_pbc(ppd, pbc_flags, qp->srate_mbps, vl, plen); } tx->wqe = qp->s_wqe; - tx->mr = qp->s_rdma_mr; - if (qp->s_rdma_mr) - qp->s_rdma_mr = NULL; tx->hdr_dwords = hdrwords + 2; ret = build_verbs_tx_desc(tx->sde, ss, len, tx, ahdr, pbc); if (unlikely(ret)) @@ -889,11 +886,6 @@ int hfi1_verbs_send_pio(struct rvt_qp *qp, struct hfi1_pkt_state *ps, trace_output_ibhdr(dd_from_ibdev(qp->ibqp.device), &ps->s_txreq->phdr.hdr); - if (qp->s_rdma_mr) { - rvt_put_mr(qp->s_rdma_mr); - qp->s_rdma_mr = NULL; - } - pio_bail: if (qp->s_wqe) { spin_lock_irqsave(&qp->s_lock, flags); -- cgit v0.10.2 From 721d04273a8265847612a420174bb6e9a13d8d4f Mon Sep 17 00:00:00 2001 From: Jubin John Date: Sun, 14 Feb 2016 12:45:00 -0800 Subject: staging/rdma/hfi1: Add s_sendcontext priv field s_sendcontext will be used to map the QPs to the send contexts for PIO. Reviewed-by: Mike Marciniszyn Reviewed-by: Dennis Dalessandro Signed-off-by: Jubin John Signed-off-by: Doug Ledford diff --git a/drivers/staging/rdma/hfi1/qp.c b/drivers/staging/rdma/hfi1/qp.c index 7387ef5..571e78f 100644 --- a/drivers/staging/rdma/hfi1/qp.c +++ b/drivers/staging/rdma/hfi1/qp.c @@ -191,6 +191,9 @@ int hfi1_check_modify_qp(struct rvt_qp *qp, struct ib_qp_attr *attr, if (!qp_to_sdma_engine(qp, sc) && dd->flags & HFI1_HAS_SEND_DMA) return -EINVAL; + + if (!qp_to_send_context(qp, sc)) + return -EINVAL; } if (attr_mask & IB_QP_ALT_PATH) { @@ -201,6 +204,9 @@ int hfi1_check_modify_qp(struct rvt_qp *qp, struct ib_qp_attr *attr, if (!qp_to_sdma_engine(qp, sc) && dd->flags & HFI1_HAS_SEND_DMA) return -EINVAL; + + if (!qp_to_send_context(qp, sc)) + return -EINVAL; } return 0; @@ -608,11 +614,13 @@ void qp_iter_print(struct seq_file *s, struct qp_iter *iter) struct rvt_qp *qp = iter->qp; struct hfi1_qp_priv *priv = qp->priv; struct sdma_engine *sde; + struct send_context *send_context; sde = qp_to_sdma_engine(qp, priv->s_sc); wqe = rvt_get_swqe_ptr(qp, qp->s_last); + send_context = qp_to_send_context(qp, priv->s_sc); seq_printf(s, - "N %d %s QP%u R %u %s %u %u %u f=%x %u %u %u %u %u PSN %x %x %x %x %x (%u %u %u %u %u %u %u) QP%u LID %x SL %u MTU %u %u %u %u SDE %p,%u\n", + "N %d %s QP%u R %u %s %u %u %u f=%x %u %u %u %u %u PSN %x %x %x %x %x (%u %u %u %u %u %u %u) QP%u LID %x SL %u MTU %u %u %u %u SDE %p,%u SC %p\n", iter->n, qp_idle(qp) ? "I" : "B", qp->ibqp.qp_num, @@ -641,7 +649,8 @@ void qp_iter_print(struct seq_file *s, struct qp_iter *iter) qp->s_retry_cnt, qp->s_rnr_retry_cnt, sde, - sde ? sde->this_idx : 0); + sde ? sde->this_idx : 0, + send_context); } void qp_comm_est(struct rvt_qp *qp) diff --git a/drivers/staging/rdma/hfi1/ud.c b/drivers/staging/rdma/hfi1/ud.c index a7118bc..1b4b191 100644 --- a/drivers/staging/rdma/hfi1/ud.c +++ b/drivers/staging/rdma/hfi1/ud.c @@ -394,6 +394,7 @@ int hfi1_make_ud_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps) priv->s_sc = sc5; } priv->s_sde = qp_to_sdma_engine(qp, priv->s_sc); + priv->s_sendcontext = qp_to_send_context(qp, priv->s_sc); ps->s_txreq->phdr.hdr.lrh[0] = cpu_to_be16(lrh0); ps->s_txreq->phdr.hdr.lrh[1] = cpu_to_be16(ah_attr->dlid); ps->s_txreq->phdr.hdr.lrh[2] = diff --git a/drivers/staging/rdma/hfi1/verbs.h b/drivers/staging/rdma/hfi1/verbs.h index 73f471a..3d25ad4 100644 --- a/drivers/staging/rdma/hfi1/verbs.h +++ b/drivers/staging/rdma/hfi1/verbs.h @@ -199,10 +199,11 @@ struct hfi1_pio_header { * pair is made common */ struct hfi1_qp_priv { - struct ahg_ib_header *s_hdr; /* next packet header to send */ - struct sdma_engine *s_sde; /* current sde */ - u8 s_sc; /* SC[0..4] for next packet */ - u8 r_adefered; /* number of acks defered */ + struct ahg_ib_header *s_hdr; /* next header to send */ + struct sdma_engine *s_sde; /* current sde */ + struct send_context *s_sendcontext; /* current sendcontext */ + u8 s_sc; /* SC[0..4] for next packet */ + u8 r_adefered; /* number of acks defered */ struct iowait s_iowait; struct timer_list s_rnr_timer; struct rvt_qp *owner; -- cgit v0.10.2 From 1235bef8f04bf020b03f32e083e34bc91fc51343 Mon Sep 17 00:00:00 2001 From: Mike Marciniszyn Date: Sun, 14 Feb 2016 12:45:09 -0800 Subject: staging/rdma/hfi1: avoid passing pmtu It is in the qp. Reviewed-by: Dennis Dalessandro Signed-off-by: Mike Marciniszyn Signed-off-by: Doug Ledford diff --git a/drivers/staging/rdma/hfi1/rc.c b/drivers/staging/rdma/hfi1/rc.c index c075e85..d54d3ad 100644 --- a/drivers/staging/rdma/hfi1/rc.c +++ b/drivers/staging/rdma/hfi1/rc.c @@ -200,7 +200,6 @@ static u32 restart_sge(struct rvt_sge_state *ss, struct rvt_swqe *wqe, * @dev: the device for this QP * @qp: a pointer to the QP * @ohdr: a pointer to the IB header being constructed - * @pmtu: the path MTU * @ps: the xmit packet state * * Return 1 if constructed; otherwise, return 0. @@ -208,7 +207,7 @@ static u32 restart_sge(struct rvt_sge_state *ss, struct rvt_swqe *wqe, * Note the QP s_lock must be held. */ static int make_rc_ack(struct hfi1_ibdev *dev, struct rvt_qp *qp, - struct hfi1_other_headers *ohdr, u32 pmtu, + struct hfi1_other_headers *ohdr, struct hfi1_pkt_state *ps) { struct rvt_ack_entry *e; @@ -217,6 +216,7 @@ static int make_rc_ack(struct hfi1_ibdev *dev, struct rvt_qp *qp, u32 bth0; u32 bth2; int middle = 0; + u32 pmtu = qp->pmtu; /* Don't send an ACK if we aren't supposed to. */ if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK)) @@ -400,7 +400,7 @@ int hfi1_make_rc_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps) /* Sending responses has higher priority over sending requests. */ if ((qp->s_flags & RVT_S_RESP_PENDING) && - make_rc_ack(dev, qp, ohdr, pmtu, ps)) + make_rc_ack(dev, qp, ohdr, ps)) return 1; if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_SEND_OK)) { -- cgit v0.10.2 From 711e104ddca7b609889e1edf0a8482673ea4a7cc Mon Sep 17 00:00:00 2001 From: Mike Marciniszyn Date: Sun, 14 Feb 2016 12:45:18 -0800 Subject: staging/rdma/hfi1: fix panic in send engine The send engine wasn't correctly handling pre-built packets, and worse, the pointer to a packet state's txreq wasn't initialized correctly. To fix: - all waiters need to save any prebuilt packets (smda waits already did) - the progress routine needs to handle a QPs prebuilt packet and initialize the txreq pointer properly To keep SDMA working, the dma send code needs to see if a packet has been built already. If not the code will build it. Reviewed-by: Dennis Dalessandro Signed-off-by: Mike Marciniszyn Signed-off-by: Doug Ledford diff --git a/drivers/staging/rdma/hfi1/iowait.h b/drivers/staging/rdma/hfi1/iowait.h index e8ba560..e007eb8 100644 --- a/drivers/staging/rdma/hfi1/iowait.h +++ b/drivers/staging/rdma/hfi1/iowait.h @@ -54,6 +54,7 @@ #include #include +#include "sdma_txreq.h" /* * typedef (*restart_t)() - restart callback * @work: pointer to work structure @@ -185,4 +186,23 @@ static inline void iowait_drain_wakeup(struct iowait *wait) wake_up(&wait->wait_dma); } +/** + * iowait_get_txhead() - get packet off of iowait list + * + * @wait wait struture + */ +static inline struct sdma_txreq *iowait_get_txhead(struct iowait *wait) +{ + struct sdma_txreq *tx = NULL; + + if (!list_empty(&wait->tx_head)) { + tx = list_first_entry( + &wait->tx_head, + struct sdma_txreq, + list); + list_del_init(&tx->list); + } + return tx; +} + #endif diff --git a/drivers/staging/rdma/hfi1/rc.c b/drivers/staging/rdma/hfi1/rc.c index d54d3ad..2704287 100644 --- a/drivers/staging/rdma/hfi1/rc.c +++ b/drivers/staging/rdma/hfi1/rc.c @@ -348,6 +348,8 @@ normal: } qp->s_rdma_ack_cnt++; qp->s_hdrwords = hwords; + /* pbc */ + ps->s_txreq->hdr_dwords = hwords + 2; qp->s_cur_size = len; hfi1_make_ruc_header(qp, ohdr, bth0, bth2, middle, ps); return 1; @@ -750,6 +752,8 @@ int hfi1_make_rc_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps) } qp->s_len -= len; qp->s_hdrwords = hwords; + /* pbc */ + ps->s_txreq->hdr_dwords = hwords + 2; qp->s_cur_sge = ss; qp->s_cur_size = len; hfi1_make_ruc_header( diff --git a/drivers/staging/rdma/hfi1/ruc.c b/drivers/staging/rdma/hfi1/ruc.c index 70d1d34..70f42c9 100644 --- a/drivers/staging/rdma/hfi1/ruc.c +++ b/drivers/staging/rdma/hfi1/ruc.c @@ -879,6 +879,8 @@ void hfi1_do_send(struct rvt_qp *qp) timeout = jiffies + (timeout_int) / 8; cpu = priv->s_sde ? priv->s_sde->cpu : cpumask_first(cpumask_of_node(ps.ppd->dd->node)); + /* insure a pre-built packet is handled */ + ps.s_txreq = get_waiting_verbs_txreq(qp); do { /* Check for a constructed packet to be sent. */ if (qp->s_hdrwords != 0) { diff --git a/drivers/staging/rdma/hfi1/sdma_txreq.h b/drivers/staging/rdma/hfi1/sdma_txreq.h index d0f77a8..2effb35 100644 --- a/drivers/staging/rdma/hfi1/sdma_txreq.h +++ b/drivers/staging/rdma/hfi1/sdma_txreq.h @@ -127,4 +127,9 @@ struct sdma_txreq { struct sdma_desc descs[NUM_DESC]; }; +static inline int sdma_txreq_built(struct sdma_txreq *tx) +{ + return tx->num_desc; +} + #endif /* HFI1_SDMA_TXREQ_H */ diff --git a/drivers/staging/rdma/hfi1/uc.c b/drivers/staging/rdma/hfi1/uc.c index 77431b1..3270561 100644 --- a/drivers/staging/rdma/hfi1/uc.c +++ b/drivers/staging/rdma/hfi1/uc.c @@ -235,6 +235,8 @@ int hfi1_make_uc_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps) } qp->s_len -= len; qp->s_hdrwords = hwords; + /* pbc */ + ps->s_txreq->hdr_dwords = qp->s_hdrwords + 2; qp->s_cur_sge = &qp->s_sge; qp->s_cur_size = len; hfi1_make_ruc_header(qp, ohdr, bth0 | (qp->s_state << 24), diff --git a/drivers/staging/rdma/hfi1/ud.c b/drivers/staging/rdma/hfi1/ud.c index 1b4b191..bae5ccd 100644 --- a/drivers/staging/rdma/hfi1/ud.c +++ b/drivers/staging/rdma/hfi1/ud.c @@ -53,8 +53,8 @@ #include "hfi.h" #include "mad.h" -#include "qp.h" #include "verbs_txreq.h" +#include "qp.h" /** * ud_loopback - handle send on loopback QPs @@ -394,7 +394,9 @@ int hfi1_make_ud_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps) priv->s_sc = sc5; } priv->s_sde = qp_to_sdma_engine(qp, priv->s_sc); + ps->s_txreq->sde = priv->s_sde; priv->s_sendcontext = qp_to_send_context(qp, priv->s_sc); + ps->s_txreq->psc = priv->s_sendcontext; ps->s_txreq->phdr.hdr.lrh[0] = cpu_to_be16(lrh0); ps->s_txreq->phdr.hdr.lrh[1] = cpu_to_be16(ah_attr->dlid); ps->s_txreq->phdr.hdr.lrh[2] = @@ -432,6 +434,8 @@ int hfi1_make_ud_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps) priv->s_hdr->ahgidx = 0; priv->s_hdr->tx_flags = 0; priv->s_hdr->sde = NULL; + /* pbc */ + ps->s_txreq->hdr_dwords = qp->s_hdrwords + 2; return 1; diff --git a/drivers/staging/rdma/hfi1/verbs.c b/drivers/staging/rdma/hfi1/verbs.c index dc8eb6b..229dde5 100644 --- a/drivers/staging/rdma/hfi1/verbs.c +++ b/drivers/staging/rdma/hfi1/verbs.c @@ -547,7 +547,9 @@ static void verbs_sdma_complete( hfi1_put_txreq(tx); } -static int wait_kmem(struct hfi1_ibdev *dev, struct rvt_qp *qp) +static int wait_kmem(struct hfi1_ibdev *dev, + struct rvt_qp *qp, + struct hfi1_pkt_state *ps) { struct hfi1_qp_priv *priv = qp->priv; unsigned long flags; @@ -556,6 +558,8 @@ static int wait_kmem(struct hfi1_ibdev *dev, struct rvt_qp *qp) spin_lock_irqsave(&qp->s_lock, flags); if (ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK) { write_seqlock(&dev->iowait_lock); + list_add_tail(&ps->s_txreq->txreq.list, + &priv->s_iowait.tx_head); if (list_empty(&priv->s_iowait.list)) { if (list_empty(&dev->memwait)) mod_timer(&dev->mem_timer, jiffies + 1); @@ -578,7 +582,7 @@ static int wait_kmem(struct hfi1_ibdev *dev, struct rvt_qp *qp) * * Add failures will revert the sge cursor */ -static int build_verbs_ulp_payload( +static noinline int build_verbs_ulp_payload( struct sdma_engine *sde, struct rvt_sge_state *ss, u32 length, @@ -690,48 +694,30 @@ int hfi1_verbs_send_dma(struct rvt_qp *qp, struct hfi1_pkt_state *ps, struct hfi1_ibdev *dev = ps->dev; struct hfi1_pportdata *ppd = ps->ppd; struct verbs_txreq *tx; - struct sdma_txreq *stx; u64 pbc_flags = 0; u8 sc5 = priv->s_sc; int ret; - struct hfi1_ibdev *tdev; - - if (!list_empty(&priv->s_iowait.tx_head)) { - stx = list_first_entry( - &priv->s_iowait.tx_head, - struct sdma_txreq, - list); - list_del_init(&stx->list); - tx = container_of(stx, struct verbs_txreq, txreq); - ret = sdma_send_txreq(tx->sde, &priv->s_iowait, stx); - if (unlikely(ret == -ECOMM)) - goto bail_ecomm; - return ret; - } tx = ps->s_txreq; - - tdev = to_idev(qp->ibqp.device); - - if (IS_ERR(tx)) - goto bail_tx; - - tx->sde = priv->s_sde; - - if (likely(pbc == 0)) { - u32 vl = sc_to_vlt(dd_from_ibdev(qp->ibqp.device), sc5); - /* No vl15 here */ - /* set PBC_DC_INFO bit (aka SC[4]) in pbc_flags */ - pbc_flags |= (!!(sc5 & 0x10)) << PBC_DC_INFO_SHIFT; - - pbc = create_pbc(ppd, pbc_flags, qp->srate_mbps, vl, plen); + if (!sdma_txreq_built(&tx->txreq)) { + if (likely(pbc == 0)) { + u32 vl = sc_to_vlt(dd_from_ibdev(qp->ibqp.device), sc5); + /* No vl15 here */ + /* set PBC_DC_INFO bit (aka SC[4]) in pbc_flags */ + pbc_flags |= (!!(sc5 & 0x10)) << PBC_DC_INFO_SHIFT; + + pbc = create_pbc(ppd, + pbc_flags, + qp->srate_mbps, + vl, + plen); + } + tx->wqe = qp->s_wqe; + ret = build_verbs_tx_desc(tx->sde, ss, len, tx, ahdr, pbc); + if (unlikely(ret)) + goto bail_build; } - tx->wqe = qp->s_wqe; - tx->hdr_dwords = hdrwords + 2; - ret = build_verbs_tx_desc(tx->sde, ss, len, tx, ahdr, pbc); - if (unlikely(ret)) - goto bail_build; trace_output_ibhdr(dd_from_ibdev(qp->ibqp.device), &ps->s_txreq->phdr.hdr); ret = sdma_send_txreq(tx->sde, &priv->s_iowait, &tx->txreq); @@ -743,18 +729,22 @@ bail_ecomm: /* The current one got "sent" */ return 0; bail_build: - /* kmalloc or mapping fail */ - hfi1_put_txreq(tx); - return wait_kmem(dev, qp); -bail_tx: - return PTR_ERR(tx); + ret = wait_kmem(dev, qp, ps); + if (!ret) { + /* free txreq - bad state */ + hfi1_put_txreq(ps->s_txreq); + ps->s_txreq = NULL; + } + return ret; } /* * If we are now in the error state, return zero to flush the * send work request. */ -static int no_bufs_available(struct rvt_qp *qp, struct send_context *sc) +static int no_bufs_available(struct rvt_qp *qp, + struct send_context *sc, + struct hfi1_pkt_state *ps) { struct hfi1_qp_priv *priv = qp->priv; struct hfi1_devdata *dd = sc->dd; @@ -771,6 +761,8 @@ static int no_bufs_available(struct rvt_qp *qp, struct send_context *sc) spin_lock_irqsave(&qp->s_lock, flags); if (ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK) { write_seqlock(&dev->iowait_lock); + list_add_tail(&ps->s_txreq->txreq.list, + &priv->s_iowait.tx_head); if (list_empty(&priv->s_iowait.list)) { struct hfi1_ibdev *dev = &dd->verbs_dev; int was_empty; @@ -859,8 +851,11 @@ int hfi1_verbs_send_pio(struct rvt_qp *qp, struct hfi1_pkt_state *ps, * so lets continue to queue the request. */ hfi1_cdbg(PIO, "alloc failed. state active, queuing"); - ret = no_bufs_available(qp, sc); - goto bail; + ret = no_bufs_available(qp, sc, ps); + if (!ret) + goto bail; + /* tx consumed in wait */ + return ret; } } diff --git a/drivers/staging/rdma/hfi1/verbs_txreq.h b/drivers/staging/rdma/hfi1/verbs_txreq.h index d89d29b..f56149e 100644 --- a/drivers/staging/rdma/hfi1/verbs_txreq.h +++ b/drivers/staging/rdma/hfi1/verbs_txreq.h @@ -63,6 +63,7 @@ struct verbs_txreq { struct rvt_mregion *mr; struct rvt_sge_state *ss; struct sdma_engine *sde; + struct send_context *psc; u16 hdr_dwords; }; @@ -74,6 +75,7 @@ static inline struct verbs_txreq *get_txreq(struct hfi1_ibdev *dev, struct rvt_qp *qp) { struct verbs_txreq *tx; + struct hfi1_qp_priv *priv = qp->priv; tx = kmem_cache_alloc(dev->verbs_txreq_cache, GFP_ATOMIC); if (unlikely(!tx)) { @@ -84,9 +86,24 @@ static inline struct verbs_txreq *get_txreq(struct hfi1_ibdev *dev, } tx->qp = qp; tx->mr = NULL; + tx->sde = priv->s_sde; + tx->psc = priv->s_sendcontext; + /* so that we can test if the sdma decriptors are there */ + tx->txreq.num_desc = 0; return tx; } +static inline struct verbs_txreq *get_waiting_verbs_txreq(struct rvt_qp *qp) +{ + struct sdma_txreq *stx; + struct hfi1_qp_priv *priv = qp->priv; + + stx = iowait_get_txhead(&priv->s_iowait); + if (stx) + return container_of(stx, struct verbs_txreq, txreq); + return NULL; +} + void hfi1_put_txreq(struct verbs_txreq *tx); int verbs_txreq_init(struct hfi1_ibdev *dev); void verbs_txreq_exit(struct hfi1_ibdev *dev); -- cgit v0.10.2 From 4f8cc5c04f9445c4b1ef82769b5c0a0f1f8713c9 Mon Sep 17 00:00:00 2001 From: Mike Marciniszyn Date: Sun, 14 Feb 2016 12:45:27 -0800 Subject: staging/rdma/hfi1: use u8 for vl/sl The use should match the universal container size. Reviewed-by: Ira Weiny Signed-off-by: Mike Marciniszyn Signed-off-by: Doug Ledford diff --git a/drivers/staging/rdma/hfi1/verbs.c b/drivers/staging/rdma/hfi1/verbs.c index 229dde5..a4f8b26 100644 --- a/drivers/staging/rdma/hfi1/verbs.c +++ b/drivers/staging/rdma/hfi1/verbs.c @@ -809,7 +809,7 @@ int hfi1_verbs_send_pio(struct rvt_qp *qp, struct hfi1_pkt_state *ps, struct hfi1_pportdata *ppd = ps->ppd; u32 *hdr = (u32 *)&ps->s_txreq->phdr.hdr; u64 pbc_flags = 0; - u32 sc5; + u8 sc5; unsigned long flags = 0; struct send_context *sc; struct pio_buf *pbuf; @@ -825,7 +825,7 @@ int hfi1_verbs_send_pio(struct rvt_qp *qp, struct hfi1_pkt_state *ps, goto bail; } if (likely(pbc == 0)) { - u32 vl = sc_to_vlt(dd_from_ibdev(qp->ibqp.device), sc5); + u8 vl = sc_to_vlt(dd_from_ibdev(qp->ibqp.device), sc5); /* set PBC_DC_INFO bit (aka SC[4]) in pbc_flags */ pbc_flags |= (!!(sc5 & 0x10)) << PBC_DC_INFO_SHIFT; pbc = create_pbc(ppd, pbc_flags, qp->srate_mbps, vl, plen); -- cgit v0.10.2 From 14553ca11039732bcba3c160a26d702dbe71dd49 Mon Sep 17 00:00:00 2001 From: Mike Marciniszyn Date: Sun, 14 Feb 2016 12:45:36 -0800 Subject: staging/rdma/hfi1: Adaptive PIO for short messages The change requires a new pio_busy field in the iowait structure to track the number of outstanding pios. The new counter together with the sdma counter serve as the basis for a packet by packet decision as to which egress mechanism to use. Since packets given to different egress mechanisms are not ordered, this scheme will preserve the order. The iowait drain/wait mechanisms are extended for a pio case. An additional qp wait flag is added for the PIO drain wait case. Currently the only pio wait is for buffers, so the no_bufs_available() routine name is changed to pio_wait() and a third argument is passed with one of the two pio wait flags to generalize the routine. A module parameter is added to hold a configurable threshold. For now, the module parameter is zero. A heuristic routine is added to return the func pointer of the proper egress routine to use. The heuristic is as follows: - SMI always uses pio - GSI,UD qps <= threshold use pio - UD qps > threadhold use sdma o No coordination with sdma is required because order is not required and this qp pio count is not maintained for UD - RC/UC ONLY packets <= threshold chose as follows: o If sdmas pending, use SDMA o Otherwise use pio and enable the pio tracking count at the time the pio buffer is allocated - RC/UC ONLY packets > threshold use SDMA o If pio's are pending the pio_wait with the new wait flag is called to delay for pios to drain The threshold is potentially reduced by the QP's mtu. The sc_buffer_alloc() has two additional args (a callback, a void *) which are exploited by the RC/UC cases to pass a new complete routine and a qp *. When the shadow ring completes the credit associated with a packet, the new complete routine is called. The verbs_pio_complete() will then decrement the busy count and trigger any drain waiters in qp destroy or reset. Reviewed-by: Jubin John Reviewed-by: Dennis Dalessandro Signed-off-by: Mike Marciniszyn Signed-off-by: Doug Ledford diff --git a/drivers/staging/rdma/hfi1/chip.c b/drivers/staging/rdma/hfi1/chip.c index 1294617..36e8e3e 100644 --- a/drivers/staging/rdma/hfi1/chip.c +++ b/drivers/staging/rdma/hfi1/chip.c @@ -1588,6 +1588,14 @@ static u64 access_sw_pio_wait(const struct cntr_entry *entry, return dd->verbs_dev.n_piowait; } +static u64 access_sw_pio_drain(const struct cntr_entry *entry, + void *context, int vl, int mode, u64 data) +{ + struct hfi1_devdata *dd = (struct hfi1_devdata *)context; + + return dd->verbs_dev.n_piodrain; +} + static u64 access_sw_vtx_wait(const struct cntr_entry *entry, void *context, int vl, int mode, u64 data) { @@ -4129,6 +4137,8 @@ static struct cntr_entry dev_cntrs[DEV_CNTR_LAST] = { access_sw_vtx_wait), [C_SW_PIO_WAIT] = CNTR_ELEM("PioWait", 0, 0, CNTR_NORMAL, access_sw_pio_wait), +[C_SW_PIO_DRAIN] = CNTR_ELEM("PioDrain", 0, 0, CNTR_NORMAL, + access_sw_pio_drain), [C_SW_KMEM_WAIT] = CNTR_ELEM("KmemWait", 0, 0, CNTR_NORMAL, access_sw_kmem_wait), [C_SW_SEND_SCHED] = CNTR_ELEM("SendSched", 0, 0, CNTR_NORMAL, diff --git a/drivers/staging/rdma/hfi1/chip.h b/drivers/staging/rdma/hfi1/chip.h index b86c220..6c581e0 100644 --- a/drivers/staging/rdma/hfi1/chip.h +++ b/drivers/staging/rdma/hfi1/chip.h @@ -800,6 +800,7 @@ enum { C_SW_CPU_RCV_LIM, C_SW_VTX_WAIT, C_SW_PIO_WAIT, + C_SW_PIO_DRAIN, C_SW_KMEM_WAIT, C_SW_SEND_SCHED, C_SDMA_DESC_FETCHED_CNT, diff --git a/drivers/staging/rdma/hfi1/hfi.h b/drivers/staging/rdma/hfi1/hfi.h index 702723b..43d4861 100644 --- a/drivers/staging/rdma/hfi1/hfi.h +++ b/drivers/staging/rdma/hfi1/hfi.h @@ -811,6 +811,7 @@ struct sdma_vl_map; #define BOARD_VERS_MAX 96 /* how long the version string can be */ #define SERIAL_MAX 16 /* length of the serial number */ +typedef int (*send_routine)(struct rvt_qp *, struct hfi1_pkt_state *, u64); struct hfi1_devdata { struct hfi1_ibdev verbs_dev; /* must be first */ struct list_head list; @@ -1121,10 +1122,8 @@ struct hfi1_devdata { * Handlers for outgoing data so that snoop/capture does not * have to have its hooks in the send path */ - int (*process_pio_send)(struct rvt_qp *qp, struct hfi1_pkt_state *ps, - u64 pbc); - int (*process_dma_send)(struct rvt_qp *qp, struct hfi1_pkt_state *ps, - u64 pbc); + send_routine process_pio_send; + send_routine process_dma_send; void (*pio_inline_send)(struct hfi1_devdata *dd, struct pio_buf *pbuf, u64 pbc, const void *from, size_t count); diff --git a/drivers/staging/rdma/hfi1/iowait.h b/drivers/staging/rdma/hfi1/iowait.h index e007eb8..b5eb1e0 100644 --- a/drivers/staging/rdma/hfi1/iowait.h +++ b/drivers/staging/rdma/hfi1/iowait.h @@ -55,6 +55,7 @@ #include #include "sdma_txreq.h" + /* * typedef (*restart_t)() - restart callback * @work: pointer to work structure @@ -71,6 +72,7 @@ struct sdma_engine; * @wakeup: space callback * @iowork: workqueue overhead * @wait_dma: wait for sdma_busy == 0 + * @wait_pio: wait for pio_busy == 0 * @sdma_busy: # of packets in flight * @count: total number of descriptors in tx_head'ed list * @tx_limit: limit for overflow queuing @@ -104,7 +106,9 @@ struct iowait { void (*wakeup)(struct iowait *wait, int reason); struct work_struct iowork; wait_queue_head_t wait_dma; + wait_queue_head_t wait_pio; atomic_t sdma_busy; + atomic_t pio_busy; u32 count; u32 tx_limit; u32 tx_count; @@ -141,7 +145,9 @@ static inline void iowait_init( INIT_LIST_HEAD(&wait->tx_head); INIT_WORK(&wait->iowork, func); init_waitqueue_head(&wait->wait_dma); + init_waitqueue_head(&wait->wait_pio); atomic_set(&wait->sdma_busy, 0); + atomic_set(&wait->pio_busy, 0); wait->tx_limit = tx_limit; wait->sleep = sleep; wait->wakeup = wakeup; @@ -175,6 +181,88 @@ static inline void iowait_sdma_drain(struct iowait *wait) } /** + * iowait_sdma_pending() - return sdma pending count + * + * @wait: iowait structure + * + */ +static inline int iowait_sdma_pending(struct iowait *wait) +{ + return atomic_read(&wait->sdma_busy); +} + +/** + * iowait_sdma_inc - note sdma io pending + * @wait: iowait structure + */ +static inline void iowait_sdma_inc(struct iowait *wait) +{ + atomic_inc(&wait->sdma_busy); +} + +/** + * iowait_sdma_add - add count to pending + * @wait: iowait structure + */ +static inline void iowait_sdma_add(struct iowait *wait, int count) +{ + atomic_add(count, &wait->sdma_busy); +} + +/** + * iowait_sdma_dec - note sdma complete + * @wait: iowait structure + */ +static inline int iowait_sdma_dec(struct iowait *wait) +{ + return atomic_dec_and_test(&wait->sdma_busy); +} + +/** + * iowait_pio_drain() - wait for pios to drain + * + * @wait: iowait structure + * + * This will delay until the iowait pios have + * completed. + */ +static inline void iowait_pio_drain(struct iowait *wait) +{ + wait_event_timeout(wait->wait_pio, + !atomic_read(&wait->pio_busy), + HZ); +} + +/** + * iowait_pio_pending() - return pio pending count + * + * @wait: iowait structure + * + */ +static inline int iowait_pio_pending(struct iowait *wait) +{ + return atomic_read(&wait->pio_busy); +} + +/** + * iowait_pio_inc - note pio pending + * @wait: iowait structure + */ +static inline void iowait_pio_inc(struct iowait *wait) +{ + atomic_inc(&wait->pio_busy); +} + +/** + * iowait_sdma_dec - note pio complete + * @wait: iowait structure + */ +static inline int iowait_pio_dec(struct iowait *wait) +{ + return atomic_dec_and_test(&wait->pio_busy); +} + +/** * iowait_drain_wakeup() - trigger iowait_drain() waiter * * @wait: iowait structure @@ -184,6 +272,7 @@ static inline void iowait_sdma_drain(struct iowait *wait) static inline void iowait_drain_wakeup(struct iowait *wait) { wake_up(&wait->wait_dma); + wake_up(&wait->wait_pio); } /** diff --git a/drivers/staging/rdma/hfi1/pio.c b/drivers/staging/rdma/hfi1/pio.c index be0dcc3..f5aab0e 100644 --- a/drivers/staging/rdma/hfi1/pio.c +++ b/drivers/staging/rdma/hfi1/pio.c @@ -1564,7 +1564,8 @@ full: write_sequnlock_irqrestore(&dev->iowait_lock, flags); for (i = 0; i < n; i++) - hfi1_qp_wakeup(qps[i], RVT_S_WAIT_PIO); + hfi1_qp_wakeup(qps[i], + RVT_S_WAIT_PIO | RVT_S_WAIT_PIO_DRAIN); } /* translate a send credit update to a bit code of reasons */ diff --git a/drivers/staging/rdma/hfi1/qp.c b/drivers/staging/rdma/hfi1/qp.c index 571e78f..c7b83d6 100644 --- a/drivers/staging/rdma/hfi1/qp.c +++ b/drivers/staging/rdma/hfi1/qp.c @@ -359,6 +359,25 @@ void _hfi1_schedule_send(struct rvt_qp *qp) cpumask_first(cpumask_of_node(dd->node))); } +static void qp_pio_drain(struct rvt_qp *qp) +{ + struct hfi1_ibdev *dev; + struct hfi1_qp_priv *priv = qp->priv; + + if (!priv->s_sendcontext) + return; + dev = to_idev(qp->ibqp.device); + while (iowait_pio_pending(&priv->s_iowait)) { + write_seqlock_irq(&dev->iowait_lock); + hfi1_sc_wantpiobuf_intr(priv->s_sendcontext, 1); + write_sequnlock_irq(&dev->iowait_lock); + iowait_pio_drain(&priv->s_iowait); + write_seqlock_irq(&dev->iowait_lock); + hfi1_sc_wantpiobuf_intr(priv->s_sendcontext, 0); + write_sequnlock_irq(&dev->iowait_lock); + } +} + /** * hfi1_schedule_send - schedule progress * @qp: the QP @@ -620,7 +639,7 @@ void qp_iter_print(struct seq_file *s, struct qp_iter *iter) wqe = rvt_get_swqe_ptr(qp, qp->s_last); send_context = qp_to_send_context(qp, priv->s_sc); seq_printf(s, - "N %d %s QP%u R %u %s %u %u %u f=%x %u %u %u %u %u PSN %x %x %x %x %x (%u %u %u %u %u %u %u) QP%u LID %x SL %u MTU %u %u %u %u SDE %p,%u SC %p\n", + "N %d %s QP%x R %u %s %u %u %u f=%x %u %u %u %u %u %u PSN %x %x %x %x %x (%u %u %u %u %u %u %u) QP%x LID %x SL %u MTU %u %u %u %u SDE %p,%u SC %p\n", iter->n, qp_idle(qp) ? "I" : "B", qp->ibqp.qp_num, @@ -630,7 +649,8 @@ void qp_iter_print(struct seq_file *s, struct qp_iter *iter) wqe ? wqe->wr.opcode : 0, qp->s_hdrwords, qp->s_flags, - atomic_read(&priv->s_iowait.sdma_busy), + iowait_sdma_pending(&priv->s_iowait), + iowait_pio_pending(&priv->s_iowait), !list_empty(&priv->s_iowait.list), qp->timeout, wqe ? wqe->ssn : 0, @@ -739,6 +759,7 @@ void quiesce_qp(struct rvt_qp *qp) struct hfi1_qp_priv *priv = qp->priv; iowait_sdma_drain(&priv->s_iowait); + qp_pio_drain(qp); flush_tx_list(qp); } diff --git a/drivers/staging/rdma/hfi1/rc.c b/drivers/staging/rdma/hfi1/rc.c index 2704287..443fda8 100644 --- a/drivers/staging/rdma/hfi1/rc.c +++ b/drivers/staging/rdma/hfi1/rc.c @@ -181,6 +181,18 @@ void hfi1_del_timers_sync(struct rvt_qp *qp) del_timer_sync(&priv->s_rnr_timer); } +/* only opcode mask for adaptive pio */ +const u32 rc_only_opcode = + BIT(OP(SEND_ONLY) & 0x1f) | + BIT(OP(SEND_ONLY_WITH_IMMEDIATE & 0x1f)) | + BIT(OP(RDMA_WRITE_ONLY & 0x1f)) | + BIT(OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE & 0x1f)) | + BIT(OP(RDMA_READ_REQUEST & 0x1f)) | + BIT(OP(ACKNOWLEDGE & 0x1f)) | + BIT(OP(ATOMIC_ACKNOWLEDGE & 0x1f)) | + BIT(OP(COMPARE_SWAP & 0x1f)) | + BIT(OP(FETCH_ADD & 0x1f)); + static u32 restart_sge(struct rvt_sge_state *ss, struct rvt_swqe *wqe, u32 psn, u32 pmtu) { @@ -217,6 +229,7 @@ static int make_rc_ack(struct hfi1_ibdev *dev, struct rvt_qp *qp, u32 bth2; int middle = 0; u32 pmtu = qp->pmtu; + struct hfi1_qp_priv *priv = qp->priv; /* Don't send an ACK if we aren't supposed to. */ if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK)) @@ -350,6 +363,7 @@ normal: qp->s_hdrwords = hwords; /* pbc */ ps->s_txreq->hdr_dwords = hwords + 2; + ps->s_txreq->sde = priv->s_sde; qp->s_cur_size = len; hfi1_make_ruc_header(qp, ohdr, bth0, bth2, middle, ps); return 1; @@ -413,7 +427,7 @@ int hfi1_make_rc_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps) if (qp->s_last == ACCESS_ONCE(qp->s_head)) goto bail; /* If DMAs are in progress, we can't flush immediately. */ - if (atomic_read(&priv->s_iowait.sdma_busy)) { + if (iowait_sdma_pending(&priv->s_iowait)) { qp->s_flags |= RVT_S_WAIT_DMA; goto bail; } @@ -754,6 +768,7 @@ int hfi1_make_rc_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps) qp->s_hdrwords = hwords; /* pbc */ ps->s_txreq->hdr_dwords = hwords + 2; + ps->s_txreq->sde = priv->s_sde; qp->s_cur_sge = ss; qp->s_cur_size = len; hfi1_make_ruc_header( diff --git a/drivers/staging/rdma/hfi1/sdma.c b/drivers/staging/rdma/hfi1/sdma.c index 579d821..ff38fa3 100644 --- a/drivers/staging/rdma/hfi1/sdma.c +++ b/drivers/staging/rdma/hfi1/sdma.c @@ -410,7 +410,7 @@ static void sdma_flush(struct sdma_engine *sde) #endif sdma_txclean(sde->dd, txp); if (wait) - drained = atomic_dec_and_test(&wait->sdma_busy); + drained = iowait_sdma_dec(wait); if (txp->complete) (*txp->complete)(txp, SDMA_TXREQ_S_ABORTED, drained); if (wait && drained) @@ -584,7 +584,7 @@ static void sdma_flush_descq(struct sdma_engine *sde) /* remove from list */ sde->tx_ring[sde->tx_head++ & sde->sdma_mask] = NULL; if (wait) - drained = atomic_dec_and_test(&wait->sdma_busy); + drained = iowait_sdma_dec(wait); #ifdef CONFIG_HFI1_DEBUG_SDMA_ORDER trace_hfi1_sdma_out_sn(sde, txp->sn); if (WARN_ON_ONCE(sde->head_sn != txp->sn)) @@ -1498,7 +1498,7 @@ retry: /* remove from list */ sde->tx_ring[sde->tx_head++ & sde->sdma_mask] = NULL; if (wait) - drained = atomic_dec_and_test(&wait->sdma_busy); + drained = iowait_sdma_dec(wait); #ifdef CONFIG_HFI1_DEBUG_SDMA_ORDER trace_hfi1_sdma_out_sn(sde, txp->sn); if (WARN_ON_ONCE(sde->head_sn != txp->sn)) @@ -2092,14 +2092,14 @@ retry: goto nodesc; tail = submit_tx(sde, tx); if (wait) - atomic_inc(&wait->sdma_busy); + iowait_sdma_inc(wait); sdma_update_tail(sde, tail); unlock: spin_unlock_irqrestore(&sde->tail_lock, flags); return ret; unlock_noconn: if (wait) - atomic_inc(&wait->sdma_busy); + iowait_sdma_inc(wait); tx->next_descq_idx = 0; #ifdef CONFIG_HFI1_DEBUG_SDMA_ORDER tx->sn = sde->tail_sn++; @@ -2181,7 +2181,7 @@ retry: } update_tail: if (wait) - atomic_add(count, &wait->sdma_busy); + iowait_sdma_add(wait, count); if (tail != INVALID_TAIL) sdma_update_tail(sde, tail); spin_unlock_irqrestore(&sde->tail_lock, flags); @@ -2192,7 +2192,7 @@ unlock_noconn: tx->wait = wait; list_del_init(&tx->list); if (wait) - atomic_inc(&wait->sdma_busy); + iowait_sdma_inc(wait); tx->next_descq_idx = 0; #ifdef CONFIG_HFI1_DEBUG_SDMA_ORDER tx->sn = sde->tail_sn++; diff --git a/drivers/staging/rdma/hfi1/uc.c b/drivers/staging/rdma/hfi1/uc.c index 3270561..e58ec15 100644 --- a/drivers/staging/rdma/hfi1/uc.c +++ b/drivers/staging/rdma/hfi1/uc.c @@ -55,6 +55,13 @@ /* cut down ridiculously long IB macro names */ #define OP(x) IB_OPCODE_UC_##x +/* only opcode mask for adaptive pio */ +const u32 uc_only_opcode = + BIT(OP(SEND_ONLY) & 0x1f) | + BIT(OP(SEND_ONLY_WITH_IMMEDIATE & 0x1f)) | + BIT(OP(RDMA_WRITE_ONLY & 0x1f)) | + BIT(OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE & 0x1f)); + /** * hfi1_make_uc_req - construct a request packet (SEND, RDMA write) * @qp: a pointer to the QP @@ -86,7 +93,7 @@ int hfi1_make_uc_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps) if (qp->s_last == ACCESS_ONCE(qp->s_head)) goto bail; /* If DMAs are in progress, we can't flush immediately. */ - if (atomic_read(&priv->s_iowait.sdma_busy)) { + if (iowait_sdma_pending(&priv->s_iowait)) { qp->s_flags |= RVT_S_WAIT_DMA; goto bail; } @@ -237,6 +244,7 @@ int hfi1_make_uc_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps) qp->s_hdrwords = hwords; /* pbc */ ps->s_txreq->hdr_dwords = qp->s_hdrwords + 2; + ps->s_txreq->sde = priv->s_sde; qp->s_cur_sge = &qp->s_sge; qp->s_cur_size = len; hfi1_make_ruc_header(qp, ohdr, bth0 | (qp->s_state << 24), diff --git a/drivers/staging/rdma/hfi1/ud.c b/drivers/staging/rdma/hfi1/ud.c index bae5ccd..da4e465 100644 --- a/drivers/staging/rdma/hfi1/ud.c +++ b/drivers/staging/rdma/hfi1/ud.c @@ -294,7 +294,7 @@ int hfi1_make_ud_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps) if (qp->s_last == ACCESS_ONCE(qp->s_head)) goto bail; /* If DMAs are in progress, we can't flush immediately. */ - if (atomic_read(&priv->s_iowait.sdma_busy)) { + if (iowait_sdma_pending(&priv->s_iowait)) { qp->s_flags |= RVT_S_WAIT_DMA; goto bail; } @@ -331,7 +331,7 @@ int hfi1_make_ud_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps) * Instead of waiting, we could queue a * zero length descriptor so we get a callback. */ - if (atomic_read(&priv->s_iowait.sdma_busy)) { + if (iowait_sdma_pending(&priv->s_iowait)) { qp->s_flags |= RVT_S_WAIT_DMA; goto bail; } diff --git a/drivers/staging/rdma/hfi1/verbs.c b/drivers/staging/rdma/hfi1/verbs.c index a4f8b26..d900374 100644 --- a/drivers/staging/rdma/hfi1/verbs.c +++ b/drivers/staging/rdma/hfi1/verbs.c @@ -124,11 +124,20 @@ unsigned int hfi1_max_srq_wrs = 0x1FFFF; module_param_named(max_srq_wrs, hfi1_max_srq_wrs, uint, S_IRUGO); MODULE_PARM_DESC(max_srq_wrs, "Maximum number of SRQ WRs support"); +unsigned short piothreshold; +module_param(piothreshold, ushort, S_IRUGO); +MODULE_PARM_DESC(piothreshold, "size used to determine sdma vs. pio"); + static void verbs_sdma_complete( struct sdma_txreq *cookie, int status, int drained); +static int pio_wait(struct rvt_qp *qp, + struct send_context *sc, + struct hfi1_pkt_state *ps, + u32 flag); + /* Length of buffer to create verbs txreq cache name */ #define TXREQ_NAME_LEN 24 @@ -742,9 +751,10 @@ bail_build: * If we are now in the error state, return zero to flush the * send work request. */ -static int no_bufs_available(struct rvt_qp *qp, - struct send_context *sc, - struct hfi1_pkt_state *ps) +static int pio_wait(struct rvt_qp *qp, + struct send_context *sc, + struct hfi1_pkt_state *ps, + u32 flag) { struct hfi1_qp_priv *priv = qp->priv; struct hfi1_devdata *dd = sc->dd; @@ -767,8 +777,10 @@ static int no_bufs_available(struct rvt_qp *qp, struct hfi1_ibdev *dev = &dd->verbs_dev; int was_empty; + dev->n_piowait += !!(flag & RVT_S_WAIT_PIO); + dev->n_piodrain += !!(flag & RVT_S_WAIT_PIO_DRAIN); dev->n_piowait++; - qp->s_flags |= RVT_S_WAIT_PIO; + qp->s_flags |= flag; was_empty = list_empty(&sc->piowait); list_add_tail(&priv->s_iowait.list, &sc->piowait); trace_hfi1_qpsleep(qp, RVT_S_WAIT_PIO); @@ -797,6 +809,15 @@ struct send_context *qp_to_send_context(struct rvt_qp *qp, u8 sc5) return dd->vld[vl].sc; } +static void verbs_pio_complete(void *arg, int code) +{ + struct rvt_qp *qp = (struct rvt_qp *)arg; + struct hfi1_qp_priv *priv = qp->priv; + + if (iowait_pio_dec(&priv->s_iowait)) + iowait_drain_wakeup(&priv->s_iowait); +} + int hfi1_verbs_send_pio(struct rvt_qp *qp, struct hfi1_pkt_state *ps, u64 pbc) { @@ -815,6 +836,17 @@ int hfi1_verbs_send_pio(struct rvt_qp *qp, struct hfi1_pkt_state *ps, struct pio_buf *pbuf; int wc_status = IB_WC_SUCCESS; int ret = 0; + pio_release_cb cb = NULL; + + /* only RC/UC use complete */ + switch (qp->ibqp.qp_type) { + case IB_QPT_RC: + case IB_QPT_UC: + cb = verbs_pio_complete; + break; + default: + break; + } /* vl15 special case taken care of in ud.c */ sc5 = priv->s_sc; @@ -830,8 +862,12 @@ int hfi1_verbs_send_pio(struct rvt_qp *qp, struct hfi1_pkt_state *ps, pbc_flags |= (!!(sc5 & 0x10)) << PBC_DC_INFO_SHIFT; pbc = create_pbc(ppd, pbc_flags, qp->srate_mbps, vl, plen); } - pbuf = sc_buffer_alloc(sc, plen, NULL, NULL); + if (cb) + iowait_pio_inc(&priv->s_iowait); + pbuf = sc_buffer_alloc(sc, plen, cb, qp); if (unlikely(pbuf == NULL)) { + if (cb) + verbs_pio_complete(qp, 0); if (ppd->host_link_state != HLS_UP_ACTIVE) { /* * If we have filled the PIO buffers to capacity and are @@ -851,8 +887,9 @@ int hfi1_verbs_send_pio(struct rvt_qp *qp, struct hfi1_pkt_state *ps, * so lets continue to queue the request. */ hfi1_cdbg(PIO, "alloc failed. state active, queuing"); - ret = no_bufs_available(qp, sc, ps); + ret = pio_wait(qp, sc, ps, RVT_S_WAIT_PIO); if (!ret) + /* txreq not queued - free */ goto bail; /* tx consumed in wait */ return ret; @@ -985,6 +1022,48 @@ bad: } /** + * get_send_routine - choose an egress routine + * + * Choose an egress routine based on QP type + * and size + */ +static inline send_routine get_send_routine(struct rvt_qp *qp, + struct hfi1_ib_header *h) +{ + struct hfi1_devdata *dd = dd_from_ibdev(qp->ibqp.device); + struct hfi1_qp_priv *priv = qp->priv; + + if (unlikely(!(dd->flags & HFI1_HAS_SEND_DMA))) + return dd->process_pio_send; + switch (qp->ibqp.qp_type) { + case IB_QPT_SMI: + return dd->process_pio_send; + case IB_QPT_GSI: + case IB_QPT_UD: + if (piothreshold && qp->s_cur_size <= piothreshold) + return dd->process_pio_send; + break; + case IB_QPT_RC: + if (piothreshold && + qp->s_cur_size <= min(piothreshold, qp->pmtu) && + (BIT(get_opcode(h) & 0x1f) & rc_only_opcode) && + iowait_sdma_pending(&priv->s_iowait) == 0) + return dd->process_pio_send; + break; + case IB_QPT_UC: + if (piothreshold && + qp->s_cur_size <= min(piothreshold, qp->pmtu) && + (BIT(get_opcode(h) & 0x1f) & uc_only_opcode) && + iowait_sdma_pending(&priv->s_iowait) == 0) + return dd->process_pio_send; + break; + default: + break; + } + return dd->process_dma_send; +} + +/** * hfi1_verbs_send - send a packet * @qp: the QP to send on * @ps: the state of the packet to send @@ -995,19 +1074,10 @@ bad: int hfi1_verbs_send(struct rvt_qp *qp, struct hfi1_pkt_state *ps) { struct hfi1_devdata *dd = dd_from_ibdev(qp->ibqp.device); + send_routine sr; int ret; - int pio = 0; - unsigned long flags = 0; - - /* - * VL15 packets (IB_QPT_SMI) will always use PIO, so we - * can defer SDMA restart until link goes ACTIVE without - * worrying about just how we got there. - */ - if ((qp->ibqp.qp_type == IB_QPT_SMI) || - !(dd->flags & HFI1_HAS_SEND_DMA)) - pio = 1; + sr = get_send_routine(qp, &ps->s_txreq->phdr.hdr); ret = egress_pkey_check(dd->pport, &ps->s_txreq->phdr.hdr, qp); if (unlikely(ret)) { /* @@ -1018,7 +1088,9 @@ int hfi1_verbs_send(struct rvt_qp *qp, struct hfi1_pkt_state *ps) * mechanism for handling the errors. So for SDMA we can just * return. */ - if (pio) { + if (sr == dd->process_pio_send) { + unsigned long flags; + hfi1_cdbg(PIO, "%s() Failed. Completing with err", __func__); spin_lock_irqsave(&qp->s_lock, flags); @@ -1027,20 +1099,7 @@ int hfi1_verbs_send(struct rvt_qp *qp, struct hfi1_pkt_state *ps) } return -EINVAL; } - - if (pio) { - ret = dd->process_pio_send(qp, ps, 0); - } else { -#ifdef CONFIG_SDMA_VERBOSITY - dd_dev_err(dd, "CONFIG SDMA %s:%d %s()\n", - slashstrip(__FILE__), __LINE__, __func__); - dd_dev_err(dd, "SDMA hdrwords = %u, len = %u\n", qp->s_hdrwords, - qp->s_cur_size); -#endif - ret = dd->process_dma_send(qp, ps, 0); - } - - return ret; + return sr(qp, ps, 0); } /** diff --git a/drivers/staging/rdma/hfi1/verbs.h b/drivers/staging/rdma/hfi1/verbs.h index 3d25ad4..8f1fde8 100644 --- a/drivers/staging/rdma/hfi1/verbs.h +++ b/drivers/staging/rdma/hfi1/verbs.h @@ -265,6 +265,7 @@ struct hfi1_ibdev { struct timer_list mem_timer; u64 n_piowait; + u64 n_piodrain; u64 n_txwait; u64 n_kmem_wait; @@ -425,6 +426,19 @@ void hfi1_modify_qp(struct rvt_qp *qp, struct ib_qp_attr *attr, int hfi1_check_send_wqe(struct rvt_qp *qp, struct rvt_swqe *wqe); +extern const u32 rc_only_opcode; +extern const u32 uc_only_opcode; + +static inline u8 get_opcode(struct hfi1_ib_header *h) +{ + u16 lnh = be16_to_cpu(h->lrh[0]) & 3; + + if (lnh == IB_LNH_IBA_LOCAL) + return be32_to_cpu(h->u.oth.bth[0]) >> 24; + else + return be32_to_cpu(h->u.l.oth.bth[0]) >> 24; +} + int hfi1_ruc_check_hdr(struct hfi1_ibport *ibp, struct hfi1_ib_header *hdr, int has_grh, struct rvt_qp *qp, u32 bth0); @@ -494,6 +508,8 @@ extern unsigned int hfi1_max_srq_sges; extern unsigned int hfi1_max_srq_wrs; +extern unsigned short piothreshold; + extern const u32 ib_hfi1_rnr_table[]; #endif /* HFI1_VERBS_H */ diff --git a/drivers/staging/rdma/hfi1/verbs_txreq.h b/drivers/staging/rdma/hfi1/verbs_txreq.h index f56149e..1cf69b2 100644 --- a/drivers/staging/rdma/hfi1/verbs_txreq.h +++ b/drivers/staging/rdma/hfi1/verbs_txreq.h @@ -93,6 +93,11 @@ static inline struct verbs_txreq *get_txreq(struct hfi1_ibdev *dev, return tx; } +static inline struct sdma_txreq *get_sdma_txreq(struct verbs_txreq *tx) +{ + return &tx->txreq; +} + static inline struct verbs_txreq *get_waiting_verbs_txreq(struct rvt_qp *qp) { struct sdma_txreq *stx; diff --git a/include/rdma/rdmavt_qp.h b/include/rdma/rdmavt_qp.h index 5c307ed..f2f4df0 100644 --- a/include/rdma/rdmavt_qp.h +++ b/include/rdma/rdmavt_qp.h @@ -82,6 +82,7 @@ * RVT_S_WAIT_DMA - waiting for send DMA queue to drain before generating * next send completion entry not via send DMA * RVT_S_WAIT_PIO - waiting for a send buffer to be available + * RVT_S_WAIT_PIO_DRAIN - waiting for a qp to drain pio packets * RVT_S_WAIT_TX - waiting for a struct verbs_txreq to be available * RVT_S_WAIT_DMA_DESC - waiting for DMA descriptors to be available * RVT_S_WAIT_KMEM - waiting for kernel memory to be available @@ -101,16 +102,17 @@ #define RVT_S_WAIT_SSN_CREDIT 0x0100 #define RVT_S_WAIT_DMA 0x0200 #define RVT_S_WAIT_PIO 0x0400 -#define RVT_S_WAIT_TX 0x0800 -#define RVT_S_WAIT_DMA_DESC 0x1000 -#define RVT_S_WAIT_KMEM 0x2000 -#define RVT_S_WAIT_PSN 0x4000 -#define RVT_S_WAIT_ACK 0x8000 -#define RVT_S_SEND_ONE 0x10000 -#define RVT_S_UNLIMITED_CREDIT 0x20000 -#define RVT_S_AHG_VALID 0x40000 -#define RVT_S_AHG_CLEAR 0x80000 -#define RVT_S_ECN 0x100000 +#define RVT_S_WAIT_PIO_DRAIN 0x0800 +#define RVT_S_WAIT_TX 0x1000 +#define RVT_S_WAIT_DMA_DESC 0x2000 +#define RVT_S_WAIT_KMEM 0x4000 +#define RVT_S_WAIT_PSN 0x8000 +#define RVT_S_WAIT_ACK 0x10000 +#define RVT_S_SEND_ONE 0x20000 +#define RVT_S_UNLIMITED_CREDIT 0x40000 +#define RVT_S_AHG_VALID 0x80000 +#define RVT_S_AHG_CLEAR 0x100000 +#define RVT_S_ECN 0x200000 /* * Wait flags that would prevent any packet type from being sent. -- cgit v0.10.2 From 91702b4a39fb566b78f2ef1cea8bf6ed3fe9f4a6 Mon Sep 17 00:00:00 2001 From: Mike Marciniszyn Date: Sun, 14 Feb 2016 12:45:44 -0800 Subject: IB/qib, staging/rdma/hfi1, IB/rdmavt: progress selection changes The non-rdamvt versions of qib and hfi1 allow for a differing heuristic to override a schedule progress in favor of a direct call the the progress routine. This patch adds that for both drivers and rdmavt. Reviewed-by: Dennis Dalessandro Signed-off-by: Mike Marciniszyn Signed-off-by: Doug Ledford diff --git a/drivers/infiniband/hw/qib/qib_qp.c b/drivers/infiniband/hw/qib/qib_qp.c index 6ffa022..575b737 100644 --- a/drivers/infiniband/hw/qib/qib_qp.c +++ b/drivers/infiniband/hw/qib/qib_qp.c @@ -484,12 +484,13 @@ void qib_get_credit(struct rvt_qp *qp, u32 aeth) * the ring but after the wqe has been * setup. * - * Returns 0 on success, -EINVAL on failure + * Returns 1 to force direct progress, 0 otherwise, -EINVAL on failure */ int qib_check_send_wqe(struct rvt_qp *qp, struct rvt_swqe *wqe) { struct rvt_ah *ah; + int ret = 0; switch (qp->ibqp.qp_type) { case IB_QPT_RC: @@ -503,11 +504,13 @@ int qib_check_send_wqe(struct rvt_qp *qp, ah = ibah_to_rvtah(wqe->ud_wr.ah); if (wqe->length > (1 << ah->log_pmtu)) return -EINVAL; + /* progress hint */ + ret = 1; break; default: break; } - return 0; + return ret; } #ifdef CONFIG_DEBUG_FS diff --git a/drivers/infiniband/sw/rdmavt/qp.c b/drivers/infiniband/sw/rdmavt/qp.c index dbf124d..ef82abf 100644 --- a/drivers/infiniband/sw/rdmavt/qp.c +++ b/drivers/infiniband/sw/rdmavt/qp.c @@ -1430,7 +1430,9 @@ static inline u32 qp_get_savail(struct rvt_qp *qp) * @qp: the QP to post on * @wr: the work request to send */ -static int rvt_post_one_wr(struct rvt_qp *qp, struct ib_send_wr *wr) +static int rvt_post_one_wr(struct rvt_qp *qp, + struct ib_send_wr *wr, + int *call_send) { struct rvt_swqe *wqe; u32 next; @@ -1532,8 +1534,10 @@ static int rvt_post_one_wr(struct rvt_qp *qp, struct ib_send_wr *wr) /* general part of wqe valid - allow for driver checks */ if (rdi->driver_f.check_send_wqe) { ret = rdi->driver_f.check_send_wqe(qp, wqe); - if (ret) + if (ret < 0) goto bail_inval_free; + if (ret) + *call_send = ret; } log_pmtu = qp->log_pmtu; @@ -1606,7 +1610,7 @@ int rvt_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, call_send = qp->s_head == ACCESS_ONCE(qp->s_last) && !wr->next; for (; wr; wr = wr->next) { - err = rvt_post_one_wr(qp, wr); + err = rvt_post_one_wr(qp, wr, &call_send); if (unlikely(err)) { *bad_wr = wr; goto bail; diff --git a/drivers/staging/rdma/hfi1/qp.c b/drivers/staging/rdma/hfi1/qp.c index c7b83d6..2d15705 100644 --- a/drivers/staging/rdma/hfi1/qp.c +++ b/drivers/staging/rdma/hfi1/qp.c @@ -73,6 +73,7 @@ static int iowait_sleep( struct sdma_txreq *stx, unsigned seq); static void iowait_wakeup(struct iowait *wait, int reason); +static void qp_pio_drain(struct rvt_qp *qp); static inline unsigned mk_qpn(struct rvt_qpn_table *qpt, struct rvt_qpn_map *map, unsigned off) @@ -272,7 +273,7 @@ int hfi1_check_send_wqe(struct rvt_qp *qp, default: break; } - return 0; + return wqe->length <= piothreshold; } /** -- cgit v0.10.2 From a545f5308b6cf476def8a9326f7e82f89623bb03 Mon Sep 17 00:00:00 2001 From: Mike Marciniszyn Date: Sun, 14 Feb 2016 12:45:53 -0800 Subject: staging/rdma/hfi: fix CQ completion order issue The current implementation of the sdma_wait variable has a timing hole that can cause a completion Q entry to be returned from a pio send prior to an older sdma packets completion queue entry. The sdma_wait variable used to be decremented prior to calling the packet complete routine. The hole is between decrement and the verbs completion where send engine using pio could return a out of order completion in that window. This patch closes the hole by allowing an API option to specify an sdma_drained callback. The atomic dec is positioned after the complete callback to avoid the window as long as the pio path doesn't execute when there is a non-zero sdma count. Reviewed-by: Jubin John Signed-off-by: Dean Luick Signed-off-by: Mike Marciniszyn Signed-off-by: Doug Ledford diff --git a/drivers/staging/rdma/hfi1/iowait.h b/drivers/staging/rdma/hfi1/iowait.h index b5eb1e0..2cb3f04 100644 --- a/drivers/staging/rdma/hfi1/iowait.h +++ b/drivers/staging/rdma/hfi1/iowait.h @@ -69,7 +69,8 @@ struct sdma_engine; * @list: used to add/insert into QP/PQ wait lists * @tx_head: overflow list of sdma_txreq's * @sleep: no space callback - * @wakeup: space callback + * @wakeup: space callback wakeup + * @sdma_drained: sdma count drained * @iowork: workqueue overhead * @wait_dma: wait for sdma_busy == 0 * @wait_pio: wait for pio_busy == 0 @@ -104,6 +105,7 @@ struct iowait { struct sdma_txreq *tx, unsigned seq); void (*wakeup)(struct iowait *wait, int reason); + void (*sdma_drained)(struct iowait *wait); struct work_struct iowork; wait_queue_head_t wait_dma; wait_queue_head_t wait_pio; @@ -122,7 +124,7 @@ struct iowait { * @tx_limit: limit for overflow queuing * @func: restart function for workqueue * @sleep: sleep function for no space - * @wakeup: wakeup function for no space + * @resume: wakeup function for no space * * This function initializes the iowait * structure embedded in the QP or PQ. @@ -138,7 +140,8 @@ static inline void iowait_init( struct iowait *wait, struct sdma_txreq *tx, unsigned seq), - void (*wakeup)(struct iowait *wait, int reason)) + void (*wakeup)(struct iowait *wait, int reason), + void (*sdma_drained)(struct iowait *wait)) { wait->count = 0; INIT_LIST_HEAD(&wait->list); @@ -151,6 +154,7 @@ static inline void iowait_init( wait->tx_limit = tx_limit; wait->sleep = sleep; wait->wakeup = wakeup; + wait->sdma_drained = sdma_drained; } /** @@ -273,6 +277,8 @@ static inline void iowait_drain_wakeup(struct iowait *wait) { wake_up(&wait->wait_dma); wake_up(&wait->wait_pio); + if (wait->sdma_drained) + wait->sdma_drained(wait); } /** diff --git a/drivers/staging/rdma/hfi1/qp.c b/drivers/staging/rdma/hfi1/qp.c index 2d15705..77e91f2 100644 --- a/drivers/staging/rdma/hfi1/qp.c +++ b/drivers/staging/rdma/hfi1/qp.c @@ -73,6 +73,7 @@ static int iowait_sleep( struct sdma_txreq *stx, unsigned seq); static void iowait_wakeup(struct iowait *wait, int reason); +static void iowait_sdma_drained(struct iowait *wait); static void qp_pio_drain(struct rvt_qp *qp); static inline unsigned mk_qpn(struct rvt_qpn_table *qpt, @@ -509,6 +510,22 @@ static void iowait_wakeup(struct iowait *wait, int reason) hfi1_qp_wakeup(qp, RVT_S_WAIT_DMA_DESC); } +static void iowait_sdma_drained(struct iowait *wait) +{ + struct rvt_qp *qp = iowait_to_qp(wait); + + /* + * This happens when the send engine notes + * a QP in the error state and cannot + * do the flush work until that QP's + * sdma work has finished. + */ + if (qp->s_flags & RVT_S_WAIT_DMA) { + qp->s_flags &= ~RVT_S_WAIT_DMA; + hfi1_schedule_send(qp); + } +} + /** * * qp_to_sdma_engine - map a qp to a send engine @@ -773,7 +790,8 @@ void notify_qp_reset(struct rvt_qp *qp) 1, _hfi1_do_send, iowait_sleep, - iowait_wakeup); + iowait_wakeup, + iowait_sdma_drained); priv->r_adefered = 0; clear_ahg(qp); } diff --git a/drivers/staging/rdma/hfi1/sdma.c b/drivers/staging/rdma/hfi1/sdma.c index ff38fa3..e79f931 100644 --- a/drivers/staging/rdma/hfi1/sdma.c +++ b/drivers/staging/rdma/hfi1/sdma.c @@ -361,6 +361,28 @@ static inline void sdma_set_desc_cnt(struct sdma_engine *sde, unsigned cnt) write_sde_csr(sde, SD(DESC_CNT), reg); } +static inline void complete_tx(struct sdma_engine *sde, + struct sdma_txreq *tx, + int res) +{ + /* protect against complete modifying */ + struct iowait *wait = tx->wait; + callback_t complete = tx->complete; + +#ifdef CONFIG_HFI1_DEBUG_SDMA_ORDER + trace_hfi1_sdma_out_sn(sde, txp->sn); + if (WARN_ON_ONCE(sde->head_sn != txp->sn)) + dd_dev_err(sde->dd, "expected %llu got %llu\n", + sde->head_sn, txp->sn); + sde->head_sn++; +#endif + sdma_txclean(sde->dd, tx); + if (complete) + (*complete)(tx, res); + if (iowait_sdma_dec(wait) && wait) + iowait_drain_wakeup(wait); +} + /* * Complete all the sdma requests with a SDMA_TXREQ_S_ABORTED status * @@ -395,27 +417,8 @@ static void sdma_flush(struct sdma_engine *sde) } spin_unlock_irqrestore(&sde->flushlist_lock, flags); /* flush from flush list */ - list_for_each_entry_safe(txp, txp_next, &flushlist, list) { - int drained = 0; - /* protect against complete modifying */ - struct iowait *wait = txp->wait; - - list_del_init(&txp->list); -#ifdef CONFIG_HFI1_DEBUG_SDMA_ORDER - trace_hfi1_sdma_out_sn(sde, txp->sn); - if (WARN_ON_ONCE(sde->head_sn != txp->sn)) - dd_dev_err(sde->dd, "expected %llu got %llu\n", - sde->head_sn, txp->sn); - sde->head_sn++; -#endif - sdma_txclean(sde->dd, txp); - if (wait) - drained = iowait_sdma_dec(wait); - if (txp->complete) - (*txp->complete)(txp, SDMA_TXREQ_S_ABORTED, drained); - if (wait && drained) - iowait_drain_wakeup(wait); - } + list_for_each_entry_safe(txp, txp_next, &flushlist, list) + complete_tx(sde, txp, SDMA_TXREQ_S_ABORTED); } /* @@ -577,31 +580,10 @@ static void sdma_flush_descq(struct sdma_engine *sde) head = ++sde->descq_head & sde->sdma_mask; /* if now past this txp's descs, do the callback */ if (txp && txp->next_descq_idx == head) { - int drained = 0; - /* protect against complete modifying */ - struct iowait *wait = txp->wait; - /* remove from list */ sde->tx_ring[sde->tx_head++ & sde->sdma_mask] = NULL; - if (wait) - drained = iowait_sdma_dec(wait); -#ifdef CONFIG_HFI1_DEBUG_SDMA_ORDER - trace_hfi1_sdma_out_sn(sde, txp->sn); - if (WARN_ON_ONCE(sde->head_sn != txp->sn)) - dd_dev_err(sde->dd, "expected %llu got %llu\n", - sde->head_sn, txp->sn); - sde->head_sn++; -#endif - sdma_txclean(sde->dd, txp); + complete_tx(sde, txp, SDMA_TXREQ_S_ABORTED); trace_hfi1_sdma_progress(sde, head, tail, txp); - if (txp->complete) - (*txp->complete)( - txp, - SDMA_TXREQ_S_ABORTED, - drained); - if (wait && drained) - iowait_drain_wakeup(wait); - /* see if there is another txp */ txp = get_txhead(sde); } progress++; @@ -1470,7 +1452,7 @@ static void sdma_make_progress(struct sdma_engine *sde, u64 status) { struct sdma_txreq *txp = NULL; int progress = 0; - u16 hwhead, swhead, swtail; + u16 hwhead, swhead; int idle_check_done = 0; hwhead = sdma_gethead(sde); @@ -1491,29 +1473,9 @@ retry: /* if now past this txp's descs, do the callback */ if (txp && txp->next_descq_idx == swhead) { - int drained = 0; - /* protect against complete modifying */ - struct iowait *wait = txp->wait; - /* remove from list */ sde->tx_ring[sde->tx_head++ & sde->sdma_mask] = NULL; - if (wait) - drained = iowait_sdma_dec(wait); -#ifdef CONFIG_HFI1_DEBUG_SDMA_ORDER - trace_hfi1_sdma_out_sn(sde, txp->sn); - if (WARN_ON_ONCE(sde->head_sn != txp->sn)) - dd_dev_err(sde->dd, "expected %llu got %llu\n", - sde->head_sn, txp->sn); - sde->head_sn++; -#endif - sdma_txclean(sde->dd, txp); - if (txp->complete) - (*txp->complete)( - txp, - SDMA_TXREQ_S_OK, - drained); - if (wait && drained) - iowait_drain_wakeup(wait); + complete_tx(sde, txp, SDMA_TXREQ_S_OK); /* see if there is another txp */ txp = get_txhead(sde); } @@ -1531,6 +1493,8 @@ retry: * of sdma_make_progress(..) which is ensured by idle_check_done flag */ if ((status & sde->idle_mask) && !idle_check_done) { + u16 swtail; + swtail = ACCESS_ONCE(sde->descq_tail) & sde->sdma_mask; if (swtail != hwhead) { hwhead = (u16)read_sde_csr(sde, SD(HEAD)); diff --git a/drivers/staging/rdma/hfi1/sdma.h b/drivers/staging/rdma/hfi1/sdma.h index 76ed215..f24b5a1 100644 --- a/drivers/staging/rdma/hfi1/sdma.h +++ b/drivers/staging/rdma/hfi1/sdma.h @@ -555,7 +555,7 @@ static inline int sdma_txinit_ahg( u8 num_ahg, u32 *ahg, u8 ahg_hlen, - void (*cb)(struct sdma_txreq *, int, int)) + void (*cb)(struct sdma_txreq *, int)) { if (tlen == 0) return -ENODATA; @@ -618,7 +618,7 @@ static inline int sdma_txinit( struct sdma_txreq *tx, u16 flags, u16 tlen, - void (*cb)(struct sdma_txreq *, int, int)) + void (*cb)(struct sdma_txreq *, int)) { return sdma_txinit_ahg(tx, flags, tlen, 0, 0, NULL, 0, cb); } diff --git a/drivers/staging/rdma/hfi1/sdma_txreq.h b/drivers/staging/rdma/hfi1/sdma_txreq.h index 2effb35..bf7d777 100644 --- a/drivers/staging/rdma/hfi1/sdma_txreq.h +++ b/drivers/staging/rdma/hfi1/sdma_txreq.h @@ -93,7 +93,7 @@ struct sdma_desc { #define SDMA_TXREQ_F_USE_AHG 0x0004 struct sdma_txreq; -typedef void (*callback_t)(struct sdma_txreq *, int, int); +typedef void (*callback_t)(struct sdma_txreq *, int); struct iowait; struct sdma_txreq { diff --git a/drivers/staging/rdma/hfi1/user_sdma.c b/drivers/staging/rdma/hfi1/user_sdma.c index ac90309..dfa9ef2 100644 --- a/drivers/staging/rdma/hfi1/user_sdma.c +++ b/drivers/staging/rdma/hfi1/user_sdma.c @@ -273,7 +273,7 @@ struct user_sdma_txreq { static int user_sdma_send_pkts(struct user_sdma_request *, unsigned); static int num_user_pages(const struct iovec *); -static void user_sdma_txreq_cb(struct sdma_txreq *, int, int); +static void user_sdma_txreq_cb(struct sdma_txreq *, int); static inline void pq_update(struct hfi1_user_sdma_pkt_q *); static void user_sdma_free_request(struct user_sdma_request *, bool); static int pin_vector_pages(struct user_sdma_request *, @@ -388,7 +388,7 @@ int hfi1_user_sdma_alloc_queues(struct hfi1_ctxtdata *uctxt, struct file *fp) init_waitqueue_head(&pq->wait); iowait_init(&pq->busy, 0, NULL, defer_packet_queue, - activate_packet_queue); + activate_packet_queue, NULL); pq->reqidx = 0; snprintf(buf, 64, "txreq-kmem-cache-%u-%u-%u", dd->unit, uctxt->ctxt, fd->subctxt); @@ -1341,8 +1341,7 @@ static int set_txreq_header_ahg(struct user_sdma_request *req, * tx request have been processed by the DMA engine. Called in * interrupt context. */ -static void user_sdma_txreq_cb(struct sdma_txreq *txreq, int status, - int drain) +static void user_sdma_txreq_cb(struct sdma_txreq *txreq, int status) { struct user_sdma_txreq *tx = container_of(txreq, struct user_sdma_txreq, txreq); diff --git a/drivers/staging/rdma/hfi1/verbs.c b/drivers/staging/rdma/hfi1/verbs.c index d900374..3141966 100644 --- a/drivers/staging/rdma/hfi1/verbs.c +++ b/drivers/staging/rdma/hfi1/verbs.c @@ -130,8 +130,7 @@ MODULE_PARM_DESC(piothreshold, "size used to determine sdma vs. pio"); static void verbs_sdma_complete( struct sdma_txreq *cookie, - int status, - int drained); + int status); static int pio_wait(struct rvt_qp *qp, struct send_context *sc, @@ -523,8 +522,7 @@ void update_sge(struct rvt_sge_state *ss, u32 length) /* New API */ static void verbs_sdma_complete( struct sdma_txreq *cookie, - int status, - int drained) + int status) { struct verbs_txreq *tx = container_of(cookie, struct verbs_txreq, txreq); @@ -539,18 +537,6 @@ static void verbs_sdma_complete( hdr = &tx->phdr.hdr; hfi1_rc_send_complete(qp, hdr); } - if (drained) { - /* - * This happens when the send engine notes - * a QP in the error state and cannot - * do the flush work until that QP's - * sdma work has finished. - */ - if (qp->s_flags & RVT_S_WAIT_DMA) { - qp->s_flags &= ~RVT_S_WAIT_DMA; - hfi1_schedule_send(qp); - } - } spin_unlock(&qp->s_lock); hfi1_put_txreq(tx); -- cgit v0.10.2 From 35f6befc8441d20724a41bafc810b7c8f5a92986 Mon Sep 17 00:00:00 2001 From: Jubin John Date: Sun, 14 Feb 2016 12:46:10 -0800 Subject: staging/rdma/hfi1: Add qp to send context mapping for PIO PIO send context mapping is changed from per-VL to QPN based. qp to send context mapping is done using a mapping infrastructure similar to the current vl to sdma engine mapping. Reviewed-by: Mike Marciniszyn Signed-off-by: Jubin John Signed-off-by: Doug Ledford diff --git a/drivers/staging/rdma/hfi1/hfi.h b/drivers/staging/rdma/hfi1/hfi.h index 43d4861..4d5a18e 100644 --- a/drivers/staging/rdma/hfi1/hfi.h +++ b/drivers/staging/rdma/hfi1/hfi.h @@ -841,6 +841,12 @@ struct hfi1_devdata { spinlock_t sc_lock; /* Per VL data. Enough for all VLs but not all elements are set/used. */ struct per_vl_data vld[PER_VL_SEND_CONTEXTS]; + /* lock for pio_map */ + spinlock_t pio_map_lock; + /* array of kernel send contexts */ + struct send_context **kernel_send_context; + /* array of vl maps */ + struct pio_vl_map __rcu *pio_map; /* seqlock for sc2vl */ seqlock_t sc2vl_lock; u64 sc2vl[4]; diff --git a/drivers/staging/rdma/hfi1/init.c b/drivers/staging/rdma/hfi1/init.c index 112cb6c..423c699 100644 --- a/drivers/staging/rdma/hfi1/init.c +++ b/drivers/staging/rdma/hfi1/init.c @@ -1050,6 +1050,7 @@ struct hfi1_devdata *hfi1_alloc_devdata(struct pci_dev *pdev, size_t extra) mutex_init(&dd->qsfp_i2c_mutex); seqlock_init(&dd->sc2vl_lock); spin_lock_init(&dd->sde_map_lock); + spin_lock_init(&dd->pio_map_lock); init_waitqueue_head(&dd->event_queue); dd->int_counter = alloc_percpu(u64); @@ -1317,6 +1318,7 @@ static void cleanup_device_data(struct hfi1_devdata *dd) } } kfree(tmp); + free_pio_map(dd); /* must follow rcv context free - need to remove rcv's hooks */ for (ctxt = 0; ctxt < dd->num_send_contexts; ctxt++) sc_free(dd->send_contexts[ctxt].sc); diff --git a/drivers/staging/rdma/hfi1/pio.c b/drivers/staging/rdma/hfi1/pio.c index f5aab0e..69bbe22 100644 --- a/drivers/staging/rdma/hfi1/pio.c +++ b/drivers/staging/rdma/hfi1/pio.c @@ -312,7 +312,7 @@ int init_sc_pools_and_sizes(struct hfi1_devdata *dd) if (i == SC_ACK) { count = dd->n_krcv_queues; } else if (i == SC_KERNEL) { - count = num_vls + 1 /* VL15 */; + count = (INIT_SC_PER_VL * num_vls) + 1 /* VL15 */; } else if (count == SCC_PER_CPU) { count = dd->num_rcv_contexts - dd->n_krcv_queues; } else if (count < 0) { @@ -1687,11 +1687,217 @@ done: spin_unlock(&dd->sc_lock); } +/* + * pio_select_send_context_vl() - select send context + * @dd: devdata + * @selector: a spreading factor + * @vl: this vl + * + * This function returns a send context based on the selector and a vl. + * The mapping fields are protected by RCU + */ +struct send_context *pio_select_send_context_vl(struct hfi1_devdata *dd, + u32 selector, u8 vl) +{ + struct pio_vl_map *m; + struct pio_map_elem *e; + struct send_context *rval; + + /* + * NOTE This should only happen if SC->VL changed after the initial + * checks on the QP/AH + * Default will return VL0's send context below + */ + if (unlikely(vl >= num_vls)) { + rval = NULL; + goto done; + } + + rcu_read_lock(); + m = rcu_dereference(dd->pio_map); + if (unlikely(!m)) { + rcu_read_unlock(); + return dd->vld[0].sc; + } + e = m->map[vl & m->mask]; + rval = e->ksc[selector & e->mask]; + rcu_read_unlock(); + +done: + rval = !rval ? dd->vld[0].sc : rval; + return rval; +} + +/* + * pio_select_send_context_sc() - select send context + * @dd: devdata + * @selector: a spreading factor + * @sc5: the 5 bit sc + * + * This function returns an send context based on the selector and an sc + */ +struct send_context *pio_select_send_context_sc(struct hfi1_devdata *dd, + u32 selector, u8 sc5) +{ + u8 vl = sc_to_vlt(dd, sc5); + + return pio_select_send_context_vl(dd, selector, vl); +} + +/* + * Free the indicated map struct + */ +static void pio_map_free(struct pio_vl_map *m) +{ + int i; + + for (i = 0; m && i < m->actual_vls; i++) + kfree(m->map[i]); + kfree(m); +} + +/* + * Handle RCU callback + */ +static void pio_map_rcu_callback(struct rcu_head *list) +{ + struct pio_vl_map *m = container_of(list, struct pio_vl_map, list); + + pio_map_free(m); +} + +/* + * pio_map_init - called when #vls change + * @dd: hfi1_devdata + * @port: port number + * @num_vls: number of vls + * @vl_scontexts: per vl send context mapping (optional) + * + * This routine changes the mapping based on the number of vls. + * + * vl_scontexts is used to specify a non-uniform vl/send context + * loading. NULL implies auto computing the loading and giving each + * VL an uniform distribution of send contexts per VL. + * + * The auto algorithm computers the sc_per_vl and the number of extra + * send contexts. Any extra send contexts are added from the last VL + * on down + * + * rcu locking is used here to control access to the mapping fields. + * + * If either the num_vls or num_send_contexts are non-power of 2, the + * array sizes in the struct pio_vl_map and the struct pio_map_elem are + * rounded up to the next highest power of 2 and the first entry is + * reused in a round robin fashion. + * + * If an error occurs the map change is not done and the mapping is not + * chaged. + * + */ +int pio_map_init(struct hfi1_devdata *dd, u8 port, u8 num_vls, u8 *vl_scontexts) +{ + int i, j; + int extra, sc_per_vl; + int scontext = 1; + int num_kernel_send_contexts = 0; + u8 lvl_scontexts[OPA_MAX_VLS]; + struct pio_vl_map *oldmap, *newmap; + + if (!vl_scontexts) { + /* send context 0 reserved for VL15 */ + for (i = 1; i < dd->num_send_contexts; i++) + if (dd->send_contexts[i].type == SC_KERNEL) + num_kernel_send_contexts++; + /* truncate divide */ + sc_per_vl = num_kernel_send_contexts / num_vls; + /* extras */ + extra = num_kernel_send_contexts % num_vls; + vl_scontexts = lvl_scontexts; + /* add extras from last vl down */ + for (i = num_vls - 1; i >= 0; i--, extra--) + vl_scontexts[i] = sc_per_vl + (extra > 0 ? 1 : 0); + } + /* build new map */ + newmap = kzalloc(sizeof(*newmap) + + roundup_pow_of_two(num_vls) * + sizeof(struct pio_map_elem *), + GFP_KERNEL); + if (!newmap) + goto bail; + newmap->actual_vls = num_vls; + newmap->vls = roundup_pow_of_two(num_vls); + newmap->mask = (1 << ilog2(newmap->vls)) - 1; + for (i = 0; i < newmap->vls; i++) { + /* save for wrap around */ + int first_scontext = scontext; + + if (i < newmap->actual_vls) { + int sz = roundup_pow_of_two(vl_scontexts[i]); + + /* only allocate once */ + newmap->map[i] = kzalloc(sizeof(*newmap->map[i]) + + sz * sizeof(struct + send_context *), + GFP_KERNEL); + if (!newmap->map[i]) + goto bail; + newmap->map[i]->mask = (1 << ilog2(sz)) - 1; + /* assign send contexts */ + for (j = 0; j < sz; j++) { + if (dd->kernel_send_context[scontext]) + newmap->map[i]->ksc[j] = + dd->kernel_send_context[scontext]; + if (++scontext >= first_scontext + + vl_scontexts[i]) + /* wrap back to first send context */ + scontext = first_scontext; + } + } else { + /* just re-use entry without allocating */ + newmap->map[i] = newmap->map[i % num_vls]; + } + scontext = first_scontext + vl_scontexts[i]; + } + /* newmap in hand, save old map */ + spin_lock_irq(&dd->pio_map_lock); + oldmap = rcu_dereference_protected(dd->pio_map, + lockdep_is_held(&dd->pio_map_lock)); + + /* publish newmap */ + rcu_assign_pointer(dd->pio_map, newmap); + + spin_unlock_irq(&dd->pio_map_lock); + /* success, free any old map after grace period */ + if (oldmap) + call_rcu(&oldmap->list, pio_map_rcu_callback); + return 0; +bail: + /* free any partial allocation */ + pio_map_free(newmap); + return -ENOMEM; +} + +void free_pio_map(struct hfi1_devdata *dd) +{ + /* Free PIO map if allocated */ + if (rcu_access_pointer(dd->pio_map)) { + spin_lock_irq(&dd->pio_map_lock); + kfree(rcu_access_pointer(dd->pio_map)); + RCU_INIT_POINTER(dd->pio_map, NULL); + spin_unlock_irq(&dd->pio_map_lock); + synchronize_rcu(); + } + kfree(dd->kernel_send_context); + dd->kernel_send_context = NULL; +} + int init_pervl_scs(struct hfi1_devdata *dd) { int i; - u64 mask, all_vl_mask = (u64) 0x80ff; /* VLs 0-7, 15 */ + u64 mask, all_vl_mask = (u64)0x80ff; /* VLs 0-7, 15 */ + u64 data_vls_mask = (u64)0x00ff; /* VLs 0-7 */ u32 ctxt; + struct hfi1_pportdata *ppd = dd->pport; dd->vld[15].sc = sc_alloc(dd, SC_KERNEL, dd->rcd[0]->rcvhdrqentsize, dd->node); @@ -1699,6 +1905,12 @@ int init_pervl_scs(struct hfi1_devdata *dd) goto nomem; hfi1_init_ctxt(dd->vld[15].sc); dd->vld[15].mtu = enum_to_mtu(OPA_MTU_2048); + + dd->kernel_send_context = kmalloc_node(dd->num_send_contexts * + sizeof(struct send_context *), + GFP_KERNEL, dd->node); + dd->kernel_send_context[0] = dd->vld[15].sc; + for (i = 0; i < num_vls; i++) { /* * Since this function does not deal with a specific @@ -1711,12 +1923,19 @@ int init_pervl_scs(struct hfi1_devdata *dd) dd->rcd[0]->rcvhdrqentsize, dd->node); if (!dd->vld[i].sc) goto nomem; - + dd->kernel_send_context[i + 1] = dd->vld[i].sc; hfi1_init_ctxt(dd->vld[i].sc); - /* non VL15 start with the max MTU */ dd->vld[i].mtu = hfi1_max_mtu; } + for (i = num_vls; i < INIT_SC_PER_VL * num_vls; i++) { + dd->kernel_send_context[i + 1] = + sc_alloc(dd, SC_KERNEL, dd->rcd[0]->rcvhdrqentsize, dd->node); + if (!dd->kernel_send_context[i + 1]) + goto nomem; + hfi1_init_ctxt(dd->kernel_send_context[i + 1]); + } + sc_enable(dd->vld[15].sc); ctxt = dd->vld[15].sc->hw_context; mask = all_vl_mask & ~(1LL << 15); @@ -1724,17 +1943,29 @@ int init_pervl_scs(struct hfi1_devdata *dd) dd_dev_info(dd, "Using send context %u(%u) for VL15\n", dd->vld[15].sc->sw_index, ctxt); + for (i = 0; i < num_vls; i++) { sc_enable(dd->vld[i].sc); ctxt = dd->vld[i].sc->hw_context; - mask = all_vl_mask & ~(1LL << i); + mask = all_vl_mask & ~(data_vls_mask); write_kctxt_csr(dd, ctxt, SC(CHECK_VL), mask); } + for (i = num_vls; i < INIT_SC_PER_VL * num_vls; i++) { + sc_enable(dd->kernel_send_context[i + 1]); + ctxt = dd->kernel_send_context[i + 1]->hw_context; + mask = all_vl_mask & ~(data_vls_mask); + write_kctxt_csr(dd, ctxt, SC(CHECK_VL), mask); + } + + if (pio_map_init(dd, ppd->port - 1, num_vls, NULL)) + goto nomem; return 0; nomem: sc_free(dd->vld[15].sc); for (i = 0; i < num_vls; i++) sc_free(dd->vld[i].sc); + for (i = num_vls; i < INIT_SC_PER_VL * num_vls; i++) + sc_free(dd->kernel_send_context[i + 1]); return -ENOMEM; } diff --git a/drivers/staging/rdma/hfi1/pio.h b/drivers/staging/rdma/hfi1/pio.h index 53d3e0a..1dedeb2 100644 --- a/drivers/staging/rdma/hfi1/pio.h +++ b/drivers/staging/rdma/hfi1/pio.h @@ -165,6 +165,112 @@ struct sc_config_sizes { short int count; }; +/* + * The diagram below details the relationship of the mapping structures + * + * Since the mapping now allows for non-uniform send contexts per vl, the + * number of send contexts for a vl is either the vl_scontexts[vl] or + * a computation based on num_kernel_send_contexts/num_vls: + * + * For example: + * nactual = vl_scontexts ? vl_scontexts[vl] : num_kernel_send_contexts/num_vls + * + * n = roundup to next highest power of 2 using nactual + * + * In the case where there are num_kernel_send_contexts/num_vls doesn't divide + * evenly, the extras are added from the last vl downward. + * + * For the case where n > nactual, the send contexts are assigned + * in a round robin fashion wrapping back to the first send context + * for a particular vl. + * + * dd->pio_map + * | pio_map_elem[0] + * | +--------------------+ + * v | mask | + * pio_vl_map |--------------------| + * +--------------------------+ | ksc[0] -> sc 1 | + * | list (RCU) | |--------------------| + * |--------------------------| ->| ksc[1] -> sc 2 | + * | mask | --/ |--------------------| + * |--------------------------| -/ | * | + * | actual_vls (max 8) | -/ |--------------------| + * |--------------------------| --/ | ksc[n] -> sc n | + * | vls (max 8) | -/ +--------------------+ + * |--------------------------| --/ + * | map[0] |-/ + * |--------------------------| +--------------------+ + * | map[1] |--- | mask | + * |--------------------------| \---- |--------------------| + * | * | \-- | ksc[0] -> sc 1+n | + * | * | \---- |--------------------| + * | * | \->| ksc[1] -> sc 2+n | + * |--------------------------| |--------------------| + * | map[vls - 1] |- | * | + * +--------------------------+ \- |--------------------| + * \- | ksc[m] -> sc m+n | + * \ +--------------------+ + * \- + * \ + * \- +--------------------+ + * \- | mask | + * \ |--------------------| + * \- | ksc[0] -> sc 1+m+n | + * \- |--------------------| + * >| ksc[1] -> sc 2+m+n | + * |--------------------| + * | * | + * |--------------------| + * | ksc[o] -> sc o+m+n | + * +--------------------+ + * + */ + +/* Initial number of send contexts per VL */ +#define INIT_SC_PER_VL 2 + +/* + * struct pio_map_elem - mapping for a vl + * @mask - selector mask + * @ksc - array of kernel send contexts for this vl + * + * The mask is used to "mod" the selector to + * produce index into the trailing array of + * kscs + */ +struct pio_map_elem { + u32 mask; + struct send_context *ksc[0]; +}; + +/* + * struct pio_vl_map - mapping for a vl + * @list - rcu head for free callback + * @mask - vl mask to "mod" the vl to produce an index to map array + * @actual_vls - number of vls + * @vls - numbers of vls rounded to next power of 2 + * @map - array of pio_map_elem entries + * + * This is the parent mapping structure. The trailing members of the + * struct point to pio_map_elem entries, which in turn point to an + * array of kscs for that vl. + */ +struct pio_vl_map { + struct rcu_head list; + u32 mask; + u8 actual_vls; + u8 vls; + struct pio_map_elem *map[0]; +}; + +int pio_map_init(struct hfi1_devdata *dd, u8 port, u8 num_vls, + u8 *vl_scontexts); +void free_pio_map(struct hfi1_devdata *dd); +struct send_context *pio_select_send_context_vl(struct hfi1_devdata *dd, + u32 selector, u8 vl); +struct send_context *pio_select_send_context_sc(struct hfi1_devdata *dd, + u32 selector, u8 sc5); + /* send context functions */ int init_credit_return(struct hfi1_devdata *dd); void free_credit_return(struct hfi1_devdata *dd); diff --git a/drivers/staging/rdma/hfi1/qp.c b/drivers/staging/rdma/hfi1/qp.c index 77e91f2..76d6a36 100644 --- a/drivers/staging/rdma/hfi1/qp.c +++ b/drivers/staging/rdma/hfi1/qp.c @@ -552,6 +552,30 @@ struct sdma_engine *qp_to_sdma_engine(struct rvt_qp *qp, u8 sc5) return sde; } +/* + * qp_to_send_context - map a qp to a send context + * @qp: the QP + * @sc5: the 5 bit sc + * + * Return: + * A send context for the qp + */ +struct send_context *qp_to_send_context(struct rvt_qp *qp, u8 sc5) +{ + struct hfi1_devdata *dd = dd_from_ibdev(qp->ibqp.device); + + switch (qp->ibqp.qp_type) { + case IB_QPT_SMI: + /* SMA packets to VL15 */ + return dd->vld[15].sc; + default: + break; + } + + return pio_select_send_context_sc(dd, qp->ibqp.qp_num >> dd->qos_shift, + sc5); +} + struct qp_iter { struct hfi1_ibdev *dev; struct rvt_qp *qp; diff --git a/drivers/staging/rdma/hfi1/qp.h b/drivers/staging/rdma/hfi1/qp.h index afc2b4d..7b1c57e 100644 --- a/drivers/staging/rdma/hfi1/qp.h +++ b/drivers/staging/rdma/hfi1/qp.h @@ -109,6 +109,7 @@ void hfi1_get_credit(struct rvt_qp *qp, u32 aeth); void hfi1_qp_wakeup(struct rvt_qp *qp, u32 flag); struct sdma_engine *qp_to_sdma_engine(struct rvt_qp *qp, u8 sc5); +struct send_context *qp_to_send_context(struct rvt_qp *qp, u8 sc5); struct qp_iter; diff --git a/drivers/staging/rdma/hfi1/verbs.c b/drivers/staging/rdma/hfi1/verbs.c index 3141966..10b14da 100644 --- a/drivers/staging/rdma/hfi1/verbs.c +++ b/drivers/staging/rdma/hfi1/verbs.c @@ -783,18 +783,6 @@ static int pio_wait(struct rvt_qp *qp, return ret; } -struct send_context *qp_to_send_context(struct rvt_qp *qp, u8 sc5) -{ - struct hfi1_devdata *dd = dd_from_ibdev(qp->ibqp.device); - struct hfi1_pportdata *ppd = dd->pport + (qp->port_num - 1); - u8 vl; - - vl = sc_to_vlt(dd, sc5); - if (vl >= ppd->vls_supported && vl != 15) - return NULL; - return dd->vld[vl].sc; -} - static void verbs_pio_complete(void *arg, int code) { struct rvt_qp *qp = (struct rvt_qp *)arg; diff --git a/drivers/staging/rdma/hfi1/verbs.h b/drivers/staging/rdma/hfi1/verbs.h index 8f1fde8..c736015b 100644 --- a/drivers/staging/rdma/hfi1/verbs.h +++ b/drivers/staging/rdma/hfi1/verbs.h @@ -478,8 +478,6 @@ int hfi1_verbs_send_dma(struct rvt_qp *qp, struct hfi1_pkt_state *ps, int hfi1_verbs_send_pio(struct rvt_qp *qp, struct hfi1_pkt_state *ps, u64 pbc); -struct send_context *qp_to_send_context(struct rvt_qp *qp, u8 sc5); - extern const enum ib_wc_opcode ib_hfi1_wc_opcode[]; extern const u8 hdr_len_by_opcode[]; -- cgit v0.10.2 From 8a4d3444ebfacceb3e1bf4e449a42f9b6345596c Mon Sep 17 00:00:00 2001 From: Mike Marciniszyn Date: Sun, 14 Feb 2016 12:46:01 -0800 Subject: staging/rdma/hfi1: Determine actual operational VLs Use shared credits and dedicated credits for each VL to determine the actual number of operational VLs. Reviewed-by: Mike Marciniszyn Reviewed-by: Dennis Dalessandro Signed-off-by: Jubin John Signed-off-by: Doug Ledford diff --git a/drivers/staging/rdma/hfi1/chip.c b/drivers/staging/rdma/hfi1/chip.c index 36e8e3e..b169e89 100644 --- a/drivers/staging/rdma/hfi1/chip.c +++ b/drivers/staging/rdma/hfi1/chip.c @@ -10305,12 +10305,6 @@ int hfi1_set_ib_cfg(struct hfi1_pportdata *ppd, int which, u32 val) ppd->vls_operational = val; if (!ppd->port) ret = -EINVAL; - else - ret = sdma_map_init( - ppd->dd, - ppd->port - 1, - val, - NULL); } break; /* @@ -10721,12 +10715,15 @@ static void wait_for_vl_status_clear(struct hfi1_devdata *dd, u64 mask, * raise = if the new limit is higher than the current value (may be changed * earlier in the algorithm), set the new limit to the new value */ -int set_buffer_control(struct hfi1_devdata *dd, struct buffer_control *new_bc) +int set_buffer_control(struct hfi1_pportdata *ppd, + struct buffer_control *new_bc) { + struct hfi1_devdata *dd = ppd->dd; u64 changing_mask, ld_mask, stat_mask; int change_count; int i, use_all_mask; int this_shared_changing; + int vl_count = 0, ret; /* * A0: add the variable any_shared_limit_changing below and in the * algorithm above. If removing A0 support, it can be removed. @@ -10878,6 +10875,28 @@ int set_buffer_control(struct hfi1_devdata *dd, struct buffer_control *new_bc) /* bracket the credit change with a total adjustment */ if (new_total < cur_total) set_global_limit(dd, new_total); + + /* + * Determine the actual number of operational VLS using the number of + * dedicated and shared credits for each VL. + */ + if (change_count > 0) { + for (i = 0; i < TXE_NUM_DATA_VL; i++) + if (be16_to_cpu(new_bc->vl[i].dedicated) > 0 || + be16_to_cpu(new_bc->vl[i].shared) > 0) + vl_count++; + ppd->actual_vls_operational = vl_count; + ret = sdma_map_init(dd, ppd->port - 1, vl_count ? + ppd->actual_vls_operational : + ppd->vls_operational, + NULL); + if (ret == 0) + ret = pio_map_init(dd, ppd->port - 1, vl_count ? + ppd->actual_vls_operational : + ppd->vls_operational, NULL); + if (ret) + return ret; + } return 0; } @@ -10969,7 +10988,7 @@ int fm_set_table(struct hfi1_pportdata *ppd, int which, void *t) VL_ARB_LOW_PRIO_TABLE_SIZE, t); break; case FM_TBL_BUFFER_CONTROL: - ret = set_buffer_control(ppd->dd, t); + ret = set_buffer_control(ppd, t); break; case FM_TBL_SC2VLNT: set_sc2vlnt(ppd->dd, t); @@ -13990,6 +14009,7 @@ struct hfi1_devdata *hfi1_init_dd(struct pci_dev *pdev, } ppd->vls_supported = num_vls; ppd->vls_operational = ppd->vls_supported; + ppd->actual_vls_operational = ppd->vls_supported; /* Set the default MTU. */ for (vl = 0; vl < num_vls; vl++) dd->vld[vl].mtu = hfi1_max_mtu; @@ -14074,6 +14094,7 @@ struct hfi1_devdata *hfi1_init_dd(struct pci_dev *pdev, num_vls, dd->chip_sdma_engines); num_vls = dd->chip_sdma_engines; ppd->vls_supported = dd->chip_sdma_engines; + ppd->vls_operational = ppd->vls_supported; } /* diff --git a/drivers/staging/rdma/hfi1/diag.c b/drivers/staging/rdma/hfi1/diag.c index 9523dc1..b8faee0 100644 --- a/drivers/staging/rdma/hfi1/diag.c +++ b/drivers/staging/rdma/hfi1/diag.c @@ -1017,7 +1017,7 @@ static long hfi1_assign_snoop_link_credits(struct hfi1_pportdata *ppd, t.vl[i].dedicated = be_per_vl_credits; t.vl[15].dedicated = cpu_to_be16(vl15_credits); - return set_buffer_control(ppd->dd, &t); + return set_buffer_control(ppd, &t); err_exit: snoop_dbg("port_state = 0x%x, total_credits = %d, vl15_credits = %d", diff --git a/drivers/staging/rdma/hfi1/hfi.h b/drivers/staging/rdma/hfi1/hfi.h index 4d5a18e..e8c4e56 100644 --- a/drivers/staging/rdma/hfi1/hfi.h +++ b/drivers/staging/rdma/hfi1/hfi.h @@ -644,6 +644,7 @@ struct hfi1_pportdata { u16 link_speed_active; u8 vls_supported; u8 vls_operational; + u8 actual_vls_operational; /* LID mask control */ u8 lmc; /* Rx Polarity inversion (compensate for ~tx on partner) */ @@ -1522,7 +1523,7 @@ int snoop_send_pio_handler(struct rvt_qp *qp, struct hfi1_pkt_state *ps, u64 pbc); void snoop_inline_pio_send(struct hfi1_devdata *dd, struct pio_buf *pbuf, u64 pbc, const void *from, size_t count); -int set_buffer_control(struct hfi1_devdata *dd, struct buffer_control *bc); +int set_buffer_control(struct hfi1_pportdata *ppd, struct buffer_control *bc); static inline struct hfi1_devdata *dd_from_ppd(struct hfi1_pportdata *ppd) { -- cgit v0.10.2 From 77e7639fd782f5432c87ed7143b3e50be76c8500 Mon Sep 17 00:00:00 2001 From: Jubin John Date: Sun, 14 Feb 2016 12:46:19 -0800 Subject: staging/rdma/hfi1: Add send context sw index Print the qp's send context sw index in the qpstats Reviewed-by: Mike Marciniszyn Signed-off-by: Jubin John Signed-off-by: Doug Ledford diff --git a/drivers/staging/rdma/hfi1/qp.c b/drivers/staging/rdma/hfi1/qp.c index 76d6a36..cc00eca 100644 --- a/drivers/staging/rdma/hfi1/qp.c +++ b/drivers/staging/rdma/hfi1/qp.c @@ -681,7 +681,7 @@ void qp_iter_print(struct seq_file *s, struct qp_iter *iter) wqe = rvt_get_swqe_ptr(qp, qp->s_last); send_context = qp_to_send_context(qp, priv->s_sc); seq_printf(s, - "N %d %s QP%x R %u %s %u %u %u f=%x %u %u %u %u %u %u PSN %x %x %x %x %x (%u %u %u %u %u %u %u) QP%x LID %x SL %u MTU %u %u %u %u SDE %p,%u SC %p\n", + "N %d %s QP%x R %u %s %u %u %u f=%x %u %u %u %u %u %u PSN %x %x %x %x %x (%u %u %u %u %u %u %u) QP%x LID %x SL %u MTU %u %u %u %u SDE %p,%u SC %p,%u\n", iter->n, qp_idle(qp) ? "I" : "B", qp->ibqp.qp_num, @@ -712,7 +712,8 @@ void qp_iter_print(struct seq_file *s, struct qp_iter *iter) qp->s_rnr_retry_cnt, sde, sde ? sde->this_idx : 0, - send_context); + send_context, + send_context ? send_context->sw_index : 0); } void qp_comm_est(struct rvt_qp *qp) -- cgit v0.10.2 From 0358a440c2e7401238372316565b654fd95e5142 Mon Sep 17 00:00:00 2001 From: Vennila Megavannan Date: Sun, 14 Feb 2016 12:46:28 -0800 Subject: staging/rdma/hfi1: add cq head and tail information to qpstats This enables debugging issues related to cq event signalling mechanism Reviewed-by: Mike Marciniszyn Reviewed-by: Dennis Dalessandro Signed-off-by: Vennila Megavannan Signed-off-by: Doug Ledford diff --git a/drivers/staging/rdma/hfi1/qp.c b/drivers/staging/rdma/hfi1/qp.c index cc00eca..df90579 100644 --- a/drivers/staging/rdma/hfi1/qp.c +++ b/drivers/staging/rdma/hfi1/qp.c @@ -681,7 +681,7 @@ void qp_iter_print(struct seq_file *s, struct qp_iter *iter) wqe = rvt_get_swqe_ptr(qp, qp->s_last); send_context = qp_to_send_context(qp, priv->s_sc); seq_printf(s, - "N %d %s QP%x R %u %s %u %u %u f=%x %u %u %u %u %u %u PSN %x %x %x %x %x (%u %u %u %u %u %u %u) QP%x LID %x SL %u MTU %u %u %u %u SDE %p,%u SC %p,%u\n", + "N %d %s QP%x R %u %s %u %u %u f=%x %u %u %u %u %u %u PSN %x %x %x %x %x (%u %u %u %u %u %u %u) QP%x LID %x SL %u MTU %u %u %u %u SDE %p,%u SC %p,%u CQ %u %u\n", iter->n, qp_idle(qp) ? "I" : "B", qp->ibqp.qp_num, @@ -713,7 +713,9 @@ void qp_iter_print(struct seq_file *s, struct qp_iter *iter) sde, sde ? sde->this_idx : 0, send_context, - send_context ? send_context->sw_index : 0); + send_context ? send_context->sw_index : 0, + ibcq_to_rvtcq(qp->ibqp.send_cq)->queue->head, + ibcq_to_rvtcq(qp->ibqp.send_cq)->queue->tail); } void qp_comm_est(struct rvt_qp *qp) -- cgit v0.10.2 From 8638b77f13d2b11a4e356916526d6303e1002fe9 Mon Sep 17 00:00:00 2001 From: Jubin John Date: Sun, 14 Feb 2016 20:19:24 -0800 Subject: staging/rdma/hfi1: Add spaces around binary operators Add spaces around binary operators. Fixes checkpatch check: CHECK: spaces preferred around that 'x' where x is a binary operator Reviewed-by: Dennis Dalessandro Reviewed-by: Ira Weiny Reviewed-by: Mike Marciniszyn Signed-off-by: Jubin John Signed-off-by: Doug Ledford diff --git a/drivers/staging/rdma/hfi1/chip.c b/drivers/staging/rdma/hfi1/chip.c index b169e89..79c215e 100644 --- a/drivers/staging/rdma/hfi1/chip.c +++ b/drivers/staging/rdma/hfi1/chip.c @@ -422,10 +422,10 @@ static struct flag_table pio_err_status_flags[] = { SEC_SPC_FREEZE, SEND_PIO_ERR_STATUS_PIO_STATE_MACHINE_ERR_SMASK), /*23*/ FLAG_ENTRY("PioWriteQwValidParity", - SEC_WRITE_DROPPED|SEC_SPC_FREEZE, + SEC_WRITE_DROPPED | SEC_SPC_FREEZE, SEND_PIO_ERR_STATUS_PIO_WRITE_QW_VALID_PARITY_ERR_SMASK), /*24*/ FLAG_ENTRY("PioBlockQwCountParity", - SEC_WRITE_DROPPED|SEC_SPC_FREEZE, + SEC_WRITE_DROPPED | SEC_SPC_FREEZE, SEND_PIO_ERR_STATUS_PIO_BLOCK_QW_COUNT_PARITY_ERR_SMASK), /*25*/ FLAG_ENTRY("PioVlfVlLenParity", SEC_SPC_FREEZE, @@ -1196,7 +1196,7 @@ CNTR_ELEM(#name, \ #define OVR_LBL(ctx) C_RCV_HDR_OVF_ ## ctx #define OVR_ELM(ctx) \ CNTR_ELEM("RcvHdrOvr" #ctx, \ - (RCV_HDR_OVFL_CNT + ctx*0x100), \ + (RCV_HDR_OVFL_CNT + ctx * 0x100), \ 0, CNTR_NORMAL, port_access_u64_csr) /* 32bit TXE */ @@ -5259,7 +5259,7 @@ static char *is_various_name(char *buf, size_t bsize, unsigned int source) if (source < ARRAY_SIZE(various_names)) strncpy(buf, various_names[source], bsize); else - snprintf(buf, bsize, "Reserved%u", source+IS_VARIOUS_START); + snprintf(buf, bsize, "Reserved%u", source + IS_VARIOUS_START); return buf; } @@ -6318,7 +6318,7 @@ void reset_link_credits(struct hfi1_devdata *dd) /* remove all previous VL credit limits */ for (i = 0; i < TXE_NUM_DATA_VL; i++) - write_csr(dd, SEND_CM_CREDIT_VL + (8*i), 0); + write_csr(dd, SEND_CM_CREDIT_VL + (8 * i), 0); write_csr(dd, SEND_CM_CREDIT_VL15, 0); write_global_credit(dd, 0, 0, 0); /* reset the CM block */ @@ -7573,7 +7573,7 @@ static void handle_8051_interrupt(struct hfi1_devdata *dd, u32 unused, u64 reg) /* if the link is already going down or disabled, do not * queue another */ if ((ppd->host_link_state - & (HLS_GOING_OFFLINE|HLS_LINK_COOLDOWN)) + & (HLS_GOING_OFFLINE | HLS_LINK_COOLDOWN)) || ppd->link_enabled == 0) { dd_dev_info(dd, "%s: not queuing link down\n", __func__); @@ -7991,7 +7991,7 @@ static irqreturn_t general_interrupt(int irq, void *data) /* phase 2: call the appropriate handler */ for_each_set_bit(bit, (unsigned long *)®s[0], - CCE_NUM_INT_CSRS*64) { + CCE_NUM_INT_CSRS * 64) { is_interrupt(dd, bit); } @@ -8014,12 +8014,12 @@ static irqreturn_t sdma_interrupt(int irq, void *data) /* This read_csr is really bad in the hot path */ status = read_csr(dd, - CCE_INT_STATUS + (8*(IS_SDMA_START/64))) + CCE_INT_STATUS + (8 * (IS_SDMA_START / 64))) & sde->imask; if (likely(status)) { /* clear the interrupt(s) */ write_csr(dd, - CCE_INT_CLEAR + (8*(IS_SDMA_START/64)), + CCE_INT_CLEAR + (8 * (IS_SDMA_START / 64)), status); /* handle the interrupt(s) */ @@ -8944,10 +8944,10 @@ static u16 opa_to_vc_link_widths(u16 opa_widths) u16 from; u16 to; } opa_link_xlate[] = { - { OPA_LINK_WIDTH_1X, 1 << (1-1) }, - { OPA_LINK_WIDTH_2X, 1 << (2-1) }, - { OPA_LINK_WIDTH_3X, 1 << (3-1) }, - { OPA_LINK_WIDTH_4X, 1 << (4-1) }, + { OPA_LINK_WIDTH_1X, 1 << (1 - 1) }, + { OPA_LINK_WIDTH_2X, 1 << (2 - 1) }, + { OPA_LINK_WIDTH_3X, 1 << (3 - 1) }, + { OPA_LINK_WIDTH_4X, 1 << (4 - 1) }, }; for (i = 0; i < ARRAY_SIZE(opa_link_xlate); i++) { @@ -9725,7 +9725,7 @@ static void set_lidlmc(struct hfi1_pportdata *ppd) c1 &= ~(DCC_CFG_PORT_CONFIG1_TARGET_DLID_SMASK | DCC_CFG_PORT_CONFIG1_DLID_MASK_SMASK); c1 |= ((ppd->lid & DCC_CFG_PORT_CONFIG1_TARGET_DLID_MASK) - << DCC_CFG_PORT_CONFIG1_TARGET_DLID_SHIFT)| + << DCC_CFG_PORT_CONFIG1_TARGET_DLID_SHIFT) | ((mask & DCC_CFG_PORT_CONFIG1_DLID_MASK_MASK) << DCC_CFG_PORT_CONFIG1_DLID_MASK_SHIFT); write_csr(ppd->dd, DCC_CFG_PORT_CONFIG1, c1); @@ -10290,7 +10290,7 @@ int hfi1_set_ib_cfg(struct hfi1_pportdata *ppd, int which, u32 val) * The VL Arbitrator high limit is sent in units of 4k * bytes, while HFI stores it in units of 64 bytes. */ - val *= 4096/64; + val *= 4096 / 64; reg = ((u64)val & SEND_HIGH_PRIORITY_LIMIT_LIMIT_MASK) << SEND_HIGH_PRIORITY_LIMIT_LIMIT_SHIFT; write_csr(ppd->dd, SEND_HIGH_PRIORITY_LIMIT, reg); @@ -10507,7 +10507,7 @@ static int get_buffer_control(struct hfi1_devdata *dd, /* OPA and HFI have a 1-1 mapping */ for (i = 0; i < TXE_NUM_DATA_VL; i++) - read_one_cm_vl(dd, SEND_CM_CREDIT_VL + (8*i), &bc->vl[i]); + read_one_cm_vl(dd, SEND_CM_CREDIT_VL + (8 * i), &bc->vl[i]); /* NOTE: assumes that VL* and VL15 CSRs are bit-wise identical */ read_one_cm_vl(dd, SEND_CM_CREDIT_VL15, &bc->vl[15]); @@ -11207,16 +11207,16 @@ u32 hdrqempty(struct hfi1_ctxtdata *rcd) static u32 encoded_size(u32 size) { switch (size) { - case 4*1024: return 0x1; - case 8*1024: return 0x2; - case 16*1024: return 0x3; - case 32*1024: return 0x4; - case 64*1024: return 0x5; - case 128*1024: return 0x6; - case 256*1024: return 0x7; - case 512*1024: return 0x8; - case 1*1024*1024: return 0x9; - case 2*1024*1024: return 0xa; + case 4 * 1024: return 0x1; + case 8 * 1024: return 0x2; + case 16 * 1024: return 0x3; + case 32 * 1024: return 0x4; + case 64 * 1024: return 0x5; + case 128 * 1024: return 0x6; + case 256 * 1024: return 0x7; + case 512 * 1024: return 0x8; + case 1 * 1024 * 1024: return 0x9; + case 2 * 1024 * 1024: return 0xa; } return 0x1; /* if invalid, go with the minimum size */ } @@ -12324,12 +12324,12 @@ void set_intr_state(struct hfi1_devdata *dd, u32 enable) if (enable) { /* enable all interrupts */ for (i = 0; i < CCE_NUM_INT_CSRS; i++) - write_csr(dd, CCE_INT_MASK + (8*i), ~(u64)0); + write_csr(dd, CCE_INT_MASK + (8 * i), ~(u64)0); init_qsfp_int(dd); } else { for (i = 0; i < CCE_NUM_INT_CSRS; i++) - write_csr(dd, CCE_INT_MASK + (8*i), 0ull); + write_csr(dd, CCE_INT_MASK + (8 * i), 0ull); } } @@ -12341,7 +12341,7 @@ static void clear_all_interrupts(struct hfi1_devdata *dd) int i; for (i = 0; i < CCE_NUM_INT_CSRS; i++) - write_csr(dd, CCE_INT_CLEAR + (8*i), ~(u64)0); + write_csr(dd, CCE_INT_CLEAR + (8 * i), ~(u64)0); write_csr(dd, CCE_ERR_CLEAR, ~(u64)0); write_csr(dd, MISC_ERR_CLEAR, ~(u64)0); @@ -12421,10 +12421,10 @@ static void remap_intr(struct hfi1_devdata *dd, int isrc, int msix_intr) /* direct the chip source to the given MSI-X interrupt */ m = isrc / 8; n = isrc % 8; - reg = read_csr(dd, CCE_INT_MAP + (8*m)); - reg &= ~((u64)0xff << (8*n)); - reg |= ((u64)msix_intr & 0xff) << (8*n); - write_csr(dd, CCE_INT_MAP + (8*m), reg); + reg = read_csr(dd, CCE_INT_MAP + (8 * m)); + reg &= ~((u64)0xff << (8 * n)); + reg |= ((u64)msix_intr & 0xff) << (8 * n); + write_csr(dd, CCE_INT_MAP + (8 * m), reg); } static void remap_sdma_interrupts(struct hfi1_devdata *dd, @@ -12437,11 +12437,11 @@ static void remap_sdma_interrupts(struct hfi1_devdata *dd, * SDMAProgress * SDMAIdle */ - remap_intr(dd, IS_SDMA_START + 0*TXE_NUM_SDMA_ENGINES + engine, + remap_intr(dd, IS_SDMA_START + 0 * TXE_NUM_SDMA_ENGINES + engine, msix_intr); - remap_intr(dd, IS_SDMA_START + 1*TXE_NUM_SDMA_ENGINES + engine, + remap_intr(dd, IS_SDMA_START + 1 * TXE_NUM_SDMA_ENGINES + engine, msix_intr); - remap_intr(dd, IS_SDMA_START + 2*TXE_NUM_SDMA_ENGINES + engine, + remap_intr(dd, IS_SDMA_START + 2 * TXE_NUM_SDMA_ENGINES + engine, msix_intr); } @@ -12520,9 +12520,9 @@ static int request_msix_irqs(struct hfi1_devdata *dd) * Set the interrupt register and mask for this * context's interrupt. */ - rcd->ireg = (IS_RCVAVAIL_START+idx) / 64; + rcd->ireg = (IS_RCVAVAIL_START + idx) / 64; rcd->imask = ((u64)1) << - ((IS_RCVAVAIL_START+idx) % 64); + ((IS_RCVAVAIL_START + idx) % 64); handler = receive_context_interrupt; thread = receive_context_thread; arg = rcd; @@ -12542,7 +12542,7 @@ static int request_msix_irqs(struct hfi1_devdata *dd) if (arg == NULL) continue; /* make sure the name is terminated */ - me->name[sizeof(me->name)-1] = 0; + me->name[sizeof(me->name) - 1] = 0; ret = request_threaded_irq(me->msix.vector, handler, thread, 0, me->name, arg); @@ -12581,7 +12581,7 @@ static void reset_interrupts(struct hfi1_devdata *dd) /* all chip interrupts map to MSI-X 0 */ for (i = 0; i < CCE_NUM_INT_MAP_CSRS; i++) - write_csr(dd, CCE_INT_MAP + (8*i), 0); + write_csr(dd, CCE_INT_MAP + (8 * i), 0); } static int set_up_interrupts(struct hfi1_devdata *dd) @@ -12831,7 +12831,7 @@ static void write_uninitialized_csrs_and_memories(struct hfi1_devdata *dd) /* CceIntMap */ for (i = 0; i < CCE_NUM_INT_MAP_CSRS; i++) - write_csr(dd, CCE_INT_MAP+(8*i), 0); + write_csr(dd, CCE_INT_MAP + (8 * i), 0); /* SendCtxtCreditReturnAddr */ for (i = 0; i < dd->chip_send_contexts; i++) @@ -12849,12 +12849,12 @@ static void write_uninitialized_csrs_and_memories(struct hfi1_devdata *dd) write_kctxt_csr(dd, i, RCV_HDR_ADDR, 0); write_kctxt_csr(dd, i, RCV_HDR_TAIL_ADDR, 0); for (j = 0; j < RXE_NUM_TID_FLOWS; j++) - write_uctxt_csr(dd, i, RCV_TID_FLOW_TABLE+(8*j), 0); + write_uctxt_csr(dd, i, RCV_TID_FLOW_TABLE + (8 * j), 0); } /* RcvArray */ for (i = 0; i < dd->chip_rcv_array_count; i++) - write_csr(dd, RCV_ARRAY + (8*i), + write_csr(dd, RCV_ARRAY + (8 * i), RCV_ARRAY_RT_WRITE_ENABLE_SMASK); /* RcvQPMapTable */ @@ -13092,15 +13092,15 @@ static void reset_txe_csrs(struct hfi1_devdata *dd) write_csr(dd, SEND_ERR_CLEAR, ~0ull); /* SEND_ERR_FORCE read-only */ for (i = 0; i < VL_ARB_LOW_PRIO_TABLE_SIZE; i++) - write_csr(dd, SEND_LOW_PRIORITY_LIST + (8*i), 0); + write_csr(dd, SEND_LOW_PRIORITY_LIST + (8 * i), 0); for (i = 0; i < VL_ARB_HIGH_PRIO_TABLE_SIZE; i++) - write_csr(dd, SEND_HIGH_PRIORITY_LIST + (8*i), 0); - for (i = 0; i < dd->chip_send_contexts/NUM_CONTEXTS_PER_SET; i++) - write_csr(dd, SEND_CONTEXT_SET_CTRL + (8*i), 0); + write_csr(dd, SEND_HIGH_PRIORITY_LIST + (8 * i), 0); + for (i = 0; i < dd->chip_send_contexts / NUM_CONTEXTS_PER_SET; i++) + write_csr(dd, SEND_CONTEXT_SET_CTRL + (8 * i), 0); for (i = 0; i < TXE_NUM_32_BIT_COUNTER; i++) - write_csr(dd, SEND_COUNTER_ARRAY32 + (8*i), 0); + write_csr(dd, SEND_COUNTER_ARRAY32 + (8 * i), 0); for (i = 0; i < TXE_NUM_64_BIT_COUNTER; i++) - write_csr(dd, SEND_COUNTER_ARRAY64 + (8*i), 0); + write_csr(dd, SEND_COUNTER_ARRAY64 + (8 * i), 0); write_csr(dd, SEND_CM_CTRL, SEND_CM_CTRL_RESETCSR); write_csr(dd, SEND_CM_GLOBAL_CREDIT, SEND_CM_GLOBAL_CREDIT_RESETCSR); @@ -13111,7 +13111,7 @@ static void reset_txe_csrs(struct hfi1_devdata *dd) write_csr(dd, SEND_CM_REMOTE_AU_TABLE0_TO3, 0); write_csr(dd, SEND_CM_REMOTE_AU_TABLE4_TO7, 0); for (i = 0; i < TXE_NUM_DATA_VL; i++) - write_csr(dd, SEND_CM_CREDIT_VL + (8*i), 0); + write_csr(dd, SEND_CM_CREDIT_VL + (8 * i), 0); write_csr(dd, SEND_CM_CREDIT_VL15, 0); /* SEND_CM_CREDIT_USED_VL read-only */ /* SEND_CM_CREDIT_USED_VL15 read-only */ @@ -13403,7 +13403,7 @@ static void init_chip(struct hfi1_devdata *dd) write_csr(dd, RCV_CTXT_CTRL, 0); /* mask all interrupt sources */ for (i = 0; i < CCE_NUM_INT_CSRS; i++) - write_csr(dd, CCE_INT_MASK + (8*i), 0ull); + write_csr(dd, CCE_INT_MASK + (8 * i), 0ull); /* * DC Reset: do a full DC reset before the register clear. @@ -14404,7 +14404,7 @@ static void handle_temp_err(struct hfi1_devdata *dd) dd_dev_emerg(dd, "Critical temperature reached! Forcing device into freeze mode!\n"); dd->flags |= HFI1_FORCED_FREEZE; - start_freeze_handling(ppd, FREEZE_SELF|FREEZE_ABORT); + start_freeze_handling(ppd, FREEZE_SELF | FREEZE_ABORT); /* * Shut DC down as much and as quickly as possible. * diff --git a/drivers/staging/rdma/hfi1/debugfs.c b/drivers/staging/rdma/hfi1/debugfs.c index d6dc339..0ee7217 100644 --- a/drivers/staging/rdma/hfi1/debugfs.c +++ b/drivers/staging/rdma/hfi1/debugfs.c @@ -750,7 +750,7 @@ void hfi1_dbg_ibdev_init(struct hfi1_ibdev *ibd) ppd, &port_cntr_ops[i].ops, port_cntr_ops[i].ops.write == NULL ? - S_IRUGO : S_IRUGO|S_IWUSR); + S_IRUGO : S_IRUGO | S_IWUSR); } } diff --git a/drivers/staging/rdma/hfi1/driver.c b/drivers/staging/rdma/hfi1/driver.c index 5d012fe..dd5187f 100644 --- a/drivers/staging/rdma/hfi1/driver.c +++ b/drivers/staging/rdma/hfi1/driver.c @@ -702,7 +702,7 @@ static inline int process_rcv_packet(struct hfi1_packet *packet, int thread) */ prefetch_range(packet->ebuf, packet->tlen - ((packet->rcd->rcvhdrqentsize - - (rhf_hdrq_offset(packet->rhf)+2)) * 4)); + (rhf_hdrq_offset(packet->rhf) + 2)) * 4)); } /* diff --git a/drivers/staging/rdma/hfi1/eprom.c b/drivers/staging/rdma/hfi1/eprom.c index 29958aa..9a0ddd7 100644 --- a/drivers/staging/rdma/hfi1/eprom.c +++ b/drivers/staging/rdma/hfi1/eprom.c @@ -99,7 +99,7 @@ /* sleep length while waiting for controller */ #define WAIT_SLEEP_US 100 /* must be larger than 5 (see usage) */ -#define COUNT_DELAY_SEC(n) ((n) * (1000000/WAIT_SLEEP_US)) +#define COUNT_DELAY_SEC(n) ((n) * (1000000 / WAIT_SLEEP_US)) /* GPIO pins */ #define EPROM_WP_N (1ull << 14) /* EPROM write line */ @@ -254,7 +254,7 @@ static void read_page(struct hfi1_devdata *dd, u32 offset, u32 *result) int i; write_csr(dd, ASIC_EEP_ADDR_CMD, CMD_READ_DATA(offset)); - for (i = 0; i < EP_PAGE_SIZE/sizeof(u32); i++) + for (i = 0; i < EP_PAGE_SIZE / sizeof(u32); i++) result[i] = (u32)read_csr(dd, ASIC_EEP_DATA); write_csr(dd, ASIC_EEP_ADDR_CMD, CMD_NOP); /* close open page */ } @@ -265,7 +265,7 @@ static void read_page(struct hfi1_devdata *dd, u32 offset, u32 *result) static int read_length(struct hfi1_devdata *dd, u32 start, u32 len, u64 addr) { u32 offset; - u32 buffer[EP_PAGE_SIZE/sizeof(u32)]; + u32 buffer[EP_PAGE_SIZE / sizeof(u32)]; int ret = 0; /* reject anything not on an EPROM page boundary */ @@ -296,7 +296,7 @@ static int write_page(struct hfi1_devdata *dd, u32 offset, u32 *data) write_csr(dd, ASIC_EEP_ADDR_CMD, CMD_WRITE_ENABLE); write_csr(dd, ASIC_EEP_DATA, data[0]); write_csr(dd, ASIC_EEP_ADDR_CMD, CMD_PAGE_PROGRAM(offset)); - for (i = 1; i < EP_PAGE_SIZE/sizeof(u32); i++) + for (i = 1; i < EP_PAGE_SIZE / sizeof(u32); i++) write_csr(dd, ASIC_EEP_DATA, data[i]); /* will close the open page */ return wait_for_not_busy(dd); @@ -308,7 +308,7 @@ static int write_page(struct hfi1_devdata *dd, u32 offset, u32 *data) static int write_length(struct hfi1_devdata *dd, u32 start, u32 len, u64 addr) { u32 offset; - u32 buffer[EP_PAGE_SIZE/sizeof(u32)]; + u32 buffer[EP_PAGE_SIZE / sizeof(u32)]; int ret = 0; /* reject anything not on an EPROM page boundary */ diff --git a/drivers/staging/rdma/hfi1/firmware.c b/drivers/staging/rdma/hfi1/firmware.c index 35084b7..f87460d 100644 --- a/drivers/staging/rdma/hfi1/firmware.c +++ b/drivers/staging/rdma/hfi1/firmware.c @@ -393,17 +393,17 @@ static int verify_css_header(struct hfi1_devdata *dd, struct css_header *css) /* verify CSS header fields (most sizes are in DW, so add /4) */ if (invalid_header(dd, "module_type", css->module_type, CSS_MODULE_TYPE) || invalid_header(dd, "header_len", css->header_len, - (sizeof(struct firmware_file)/4)) + (sizeof(struct firmware_file) / 4)) || invalid_header(dd, "header_version", css->header_version, CSS_HEADER_VERSION) || invalid_header(dd, "module_vendor", css->module_vendor, CSS_MODULE_VENDOR) || invalid_header(dd, "key_size", - css->key_size, KEY_SIZE/4) + css->key_size, KEY_SIZE / 4) || invalid_header(dd, "modulus_size", - css->modulus_size, KEY_SIZE/4) + css->modulus_size, KEY_SIZE / 4) || invalid_header(dd, "exponent_size", - css->exponent_size, EXPONENT_SIZE/4)) { + css->exponent_size, EXPONENT_SIZE / 4)) { return -EINVAL; } return 0; @@ -488,7 +488,7 @@ static int obtain_one_firmware(struct hfi1_devdata *dd, const char *name, ret = verify_css_header(dd, css); if (ret) { dd_dev_info(dd, "Invalid CSS header for \"%s\"\n", name); - } else if ((css->size*4) == fdet->fw->size) { + } else if ((css->size * 4) == fdet->fw->size) { /* non-augmented firmware file */ struct firmware_file *ff = (struct firmware_file *) fdet->fw->data; @@ -513,7 +513,7 @@ static int obtain_one_firmware(struct hfi1_devdata *dd, const char *name, dd_dev_err(dd, "driver is unable to validate firmware without r2 and mu (not in firmware file)\n"); ret = -EINVAL; } - } else if ((css->size*4) + AUGMENT_SIZE == fdet->fw->size) { + } else if ((css->size * 4) + AUGMENT_SIZE == fdet->fw->size) { /* augmented firmware file */ struct augmented_firmware_file *aff = (struct augmented_firmware_file *)fdet->fw->data; @@ -536,7 +536,7 @@ static int obtain_one_firmware(struct hfi1_devdata *dd, const char *name, /* css->size check failed */ dd_dev_err(dd, "invalid firmware header field size: expected 0x%lx or 0x%lx, actual 0x%x\n", - fdet->fw->size/4, (fdet->fw->size - AUGMENT_SIZE)/4, + fdet->fw->size / 4, (fdet->fw->size - AUGMENT_SIZE) / 4, css->size); ret = -EINVAL; @@ -780,7 +780,7 @@ static int retry_firmware(struct hfi1_devdata *dd, int load_result) static void write_rsa_data(struct hfi1_devdata *dd, int what, const u8 *data, int nbytes) { - int qw_size = nbytes/8; + int qw_size = nbytes / 8; int i; if (((unsigned long)data & 0x7) == 0) { @@ -788,14 +788,14 @@ static void write_rsa_data(struct hfi1_devdata *dd, int what, u64 *ptr = (u64 *)data; for (i = 0; i < qw_size; i++, ptr++) - write_csr(dd, what + (8*i), *ptr); + write_csr(dd, what + (8 * i), *ptr); } else { /* not aligned */ for (i = 0; i < qw_size; i++, data += 8) { u64 value; memcpy(&value, data, 8); - write_csr(dd, what + (8*i), value); + write_csr(dd, what + (8 * i), value); } } } @@ -808,7 +808,7 @@ static void write_streamed_rsa_data(struct hfi1_devdata *dd, int what, const u8 *data, int nbytes) { u64 *ptr = (u64 *)data; - int qw_size = nbytes/8; + int qw_size = nbytes / 8; for (; qw_size > 0; qw_size--, ptr++) write_csr(dd, what, *ptr); @@ -1743,8 +1743,8 @@ int get_platform_config_field(struct hfi1_devdata *dd, if (len < field_len_bits) return -EINVAL; - seek = field_start_bits/8; - wlen = field_len_bits/8; + seek = field_start_bits / 8; + wlen = field_len_bits / 8; src_ptr = (u32 *)((u8 *)src_ptr + seek); @@ -1783,7 +1783,7 @@ int get_platform_config_field(struct hfi1_devdata *dd, if (!src_ptr || len < field_len_bits) return -EINVAL; - src_ptr += (field_start_bits/32); + src_ptr += (field_start_bits / 32); *data = (*src_ptr >> (field_start_bits % 32)) & ((1 << field_len_bits) - 1); diff --git a/drivers/staging/rdma/hfi1/hfi.h b/drivers/staging/rdma/hfi1/hfi.h index e8c4e56..70decdf4 100644 --- a/drivers/staging/rdma/hfi1/hfi.h +++ b/drivers/staging/rdma/hfi1/hfi.h @@ -718,7 +718,7 @@ struct hfi1_pportdata { /* begin congestion log related entries * cc_log_lock protects all congestion log related data */ spinlock_t cc_log_lock ____cacheline_aligned_in_smp; - u8 threshold_cong_event_map[OPA_MAX_SLS/8]; + u8 threshold_cong_event_map[OPA_MAX_SLS / 8]; u16 threshold_event_counter; struct opa_hfi1_cong_log_event_internal cc_events[OPA_CONG_LOG_ELEMS]; int cc_log_idx; /* index for logging events */ diff --git a/drivers/staging/rdma/hfi1/intr.c b/drivers/staging/rdma/hfi1/intr.c index 9adab86..5e6d77d 100644 --- a/drivers/staging/rdma/hfi1/intr.c +++ b/drivers/staging/rdma/hfi1/intr.c @@ -166,7 +166,7 @@ void handle_linkup_change(struct hfi1_devdata *dd, u32 linkup) reset_link_credits(dd); /* freeze after a link down to guarantee a clean egress */ - start_freeze_handling(ppd, FREEZE_SELF|FREEZE_LINK_DOWN); + start_freeze_handling(ppd, FREEZE_SELF | FREEZE_LINK_DOWN); ev = IB_EVENT_PORT_ERR; diff --git a/drivers/staging/rdma/hfi1/mad.c b/drivers/staging/rdma/hfi1/mad.c index d9efe22..1a9eb50 100644 --- a/drivers/staging/rdma/hfi1/mad.c +++ b/drivers/staging/rdma/hfi1/mad.c @@ -534,7 +534,7 @@ static int __subn_get_opa_portinfo(struct opa_smp *smp, u32 am, u8 *data, ppd = dd->pport + (port - 1); ibp = &ppd->ibport_data; - if (ppd->vls_supported/2 > ARRAY_SIZE(pi->neigh_mtu.pvlx_to_mtu) || + if (ppd->vls_supported / 2 > ARRAY_SIZE(pi->neigh_mtu.pvlx_to_mtu) || ppd->vls_supported > ARRAY_SIZE(dd->vld)) { smp->status |= IB_SMP_INVALID_FIELD; return reply((struct ib_mad_hdr *)smp); @@ -600,13 +600,13 @@ static int __subn_get_opa_portinfo(struct opa_smp *smp, u32 am, u8 *data, for (i = 0; i < ppd->vls_supported; i++) { mtu = mtu_to_enum(dd->vld[i].mtu, HFI1_DEFAULT_ACTIVE_MTU); if ((i % 2) == 0) - pi->neigh_mtu.pvlx_to_mtu[i/2] |= (mtu << 4); + pi->neigh_mtu.pvlx_to_mtu[i / 2] |= (mtu << 4); else - pi->neigh_mtu.pvlx_to_mtu[i/2] |= mtu; + pi->neigh_mtu.pvlx_to_mtu[i / 2] |= mtu; } /* don't forget VL 15 */ mtu = mtu_to_enum(dd->vld[15].mtu, 2048); - pi->neigh_mtu.pvlx_to_mtu[15/2] |= mtu; + pi->neigh_mtu.pvlx_to_mtu[15 / 2] |= mtu; pi->smsl = ibp->rvp.sm_sl & OPA_PI_MASK_SMSL; pi->operational_vls = hfi1_get_ib_cfg(ppd, HFI1_IB_CFG_OP_VLS); pi->partenforce_filterraw |= @@ -744,7 +744,7 @@ static int __subn_get_opa_pkeytable(struct opa_smp *smp, u32 am, u8 *data, return reply((struct ib_mad_hdr *)smp); } - n_blocks_avail = (u16) (npkeys/OPA_PARTITION_TABLE_BLK_SIZE) + 1; + n_blocks_avail = (u16) (npkeys / OPA_PARTITION_TABLE_BLK_SIZE) + 1; size = (n_blocks_req * OPA_PARTITION_TABLE_BLK_SIZE) * sizeof(u16); @@ -1207,17 +1207,17 @@ static int __subn_set_opa_portinfo(struct opa_smp *smp, u32 am, u8 *data, (void)hfi1_set_ib_cfg(ppd, HFI1_IB_CFG_VL_HIGH_LIMIT, ibp->rvp.vl_high_limit); - if (ppd->vls_supported/2 > ARRAY_SIZE(pi->neigh_mtu.pvlx_to_mtu) || + if (ppd->vls_supported / 2 > ARRAY_SIZE(pi->neigh_mtu.pvlx_to_mtu) || ppd->vls_supported > ARRAY_SIZE(dd->vld)) { smp->status |= IB_SMP_INVALID_FIELD; return reply((struct ib_mad_hdr *)smp); } for (i = 0; i < ppd->vls_supported; i++) { if ((i % 2) == 0) - mtu = enum_to_mtu((pi->neigh_mtu.pvlx_to_mtu[i/2] >> 4) + mtu = enum_to_mtu((pi->neigh_mtu.pvlx_to_mtu[i / 2] >> 4) & 0xF); else - mtu = enum_to_mtu(pi->neigh_mtu.pvlx_to_mtu[i/2] & 0xF); + mtu = enum_to_mtu(pi->neigh_mtu.pvlx_to_mtu[i / 2] & 0xF); if (mtu == 0xffff) { pr_warn("SubnSet(OPA_PortInfo) mtu invalid %d (0x%x)\n", mtu, @@ -1236,7 +1236,7 @@ static int __subn_set_opa_portinfo(struct opa_smp *smp, u32 am, u8 *data, /* As per OPAV1 spec: VL15 must support and be configured * for operation with a 2048 or larger MTU. */ - mtu = enum_to_mtu(pi->neigh_mtu.pvlx_to_mtu[15/2] & 0xF); + mtu = enum_to_mtu(pi->neigh_mtu.pvlx_to_mtu[15 / 2] & 0xF); if (mtu < 2048 || mtu == 0xffff) mtu = 2048; if (dd->vld[15].mtu != mtu) { @@ -1419,7 +1419,7 @@ static int __subn_set_opa_pkeytable(struct opa_smp *smp, u32 am, u8 *data, return reply((struct ib_mad_hdr *)smp); } - n_blocks_avail = (u16)(npkeys/OPA_PARTITION_TABLE_BLK_SIZE) + 1; + n_blocks_avail = (u16)(npkeys / OPA_PARTITION_TABLE_BLK_SIZE) + 1; if (start_block + n_blocks_sent > n_blocks_avail || n_blocks_sent > OPA_NUM_PKEY_BLOCKS_PER_SMP) { @@ -3460,7 +3460,7 @@ static int __subn_get_opa_cc_table(struct opa_smp *smp, u32 am, u8 *data, rcu_read_unlock(); if (resp_len) - *resp_len += sizeof(u16)*(IB_CCT_ENTRIES * n_blocks + 1); + *resp_len += sizeof(u16) * (IB_CCT_ENTRIES * n_blocks + 1); return reply((struct ib_mad_hdr *)smp); } diff --git a/drivers/staging/rdma/hfi1/mad.h b/drivers/staging/rdma/hfi1/mad.h index f031775..b6c88be 100644 --- a/drivers/staging/rdma/hfi1/mad.h +++ b/drivers/staging/rdma/hfi1/mad.h @@ -267,7 +267,7 @@ struct opa_hfi1_cong_log { u8 congestion_flags; __be16 threshold_event_counter; __be32 current_time_stamp; - u8 threshold_cong_event_map[OPA_MAX_SLS/8]; + u8 threshold_cong_event_map[OPA_MAX_SLS / 8]; struct opa_hfi1_cong_log_event events[OPA_CONG_LOG_ELEMS]; } __packed; diff --git a/drivers/staging/rdma/hfi1/pcie.c b/drivers/staging/rdma/hfi1/pcie.c index 04f2d8a3..019b4f8 100644 --- a/drivers/staging/rdma/hfi1/pcie.c +++ b/drivers/staging/rdma/hfi1/pcie.c @@ -840,7 +840,7 @@ static void write_gasket_interrupt(struct hfi1_devdata *dd, int index, { write_csr(dd, ASIC_PCIE_SD_INTRPT_LIST + (index * 8), (((u64)code << ASIC_PCIE_SD_INTRPT_LIST_INTRPT_CODE_SHIFT) - |((u64)data << ASIC_PCIE_SD_INTRPT_LIST_INTRPT_DATA_SHIFT))); + | ((u64)data << ASIC_PCIE_SD_INTRPT_LIST_INTRPT_DATA_SHIFT))); } /* diff --git a/drivers/staging/rdma/hfi1/pio.c b/drivers/staging/rdma/hfi1/pio.c index 69bbe22..9bafedf 100644 --- a/drivers/staging/rdma/hfi1/pio.c +++ b/drivers/staging/rdma/hfi1/pio.c @@ -101,7 +101,7 @@ void pio_send_control(struct hfi1_devdata *dd, int op) /* Fall through */ case PSC_DATA_VL_ENABLE: /* Disallow sending on VLs not enabled */ - mask = (((~0ull)<halt_wait); } -#define BLOCK_DWORDS (PIO_BLOCK_SIZE/sizeof(u32)) +#define BLOCK_DWORDS (PIO_BLOCK_SIZE / sizeof(u32)) #define dwords_to_blocks(x) DIV_ROUND_UP(x, BLOCK_DWORDS) /* diff --git a/drivers/staging/rdma/hfi1/pio_copy.c b/drivers/staging/rdma/hfi1/pio_copy.c index ebb0baf..dc0c178 100644 --- a/drivers/staging/rdma/hfi1/pio_copy.c +++ b/drivers/staging/rdma/hfi1/pio_copy.c @@ -52,9 +52,9 @@ /* additive distance between non-SOP and SOP space */ #define SOP_DISTANCE (TXE_PIO_SIZE / 2) -#define PIO_BLOCK_MASK (PIO_BLOCK_SIZE-1) +#define PIO_BLOCK_MASK (PIO_BLOCK_SIZE - 1) /* number of QUADWORDs in a block */ -#define PIO_BLOCK_QWS (PIO_BLOCK_SIZE/sizeof(u64)) +#define PIO_BLOCK_QWS (PIO_BLOCK_SIZE / sizeof(u64)) /** * pio_copy - copy data block to MMIO space @@ -83,7 +83,7 @@ void pio_copy(struct hfi1_devdata *dd, struct pio_buf *pbuf, u64 pbc, dest += sizeof(u64); /* calculate where the QWORD data ends - in SOP=1 space */ - dend = dest + ((count>>1) * sizeof(u64)); + dend = dest + ((count >> 1) * sizeof(u64)); if (dend < send) { /* all QWORD data is within the SOP block, does *not* @@ -177,7 +177,7 @@ void pio_copy(struct hfi1_devdata *dd, struct pio_buf *pbuf, u64 pbc, * "zero" shift - bit shift used to zero out upper bytes. Input is * the count of LSB bytes to preserve. */ -#define zshift(x) (8 * (8-(x))) +#define zshift(x) (8 * (8 - (x))) /* * "merge" shift - bit shift used to merge with carry bytes. Input is @@ -244,7 +244,7 @@ static inline void read_extra_bytes(struct pio_buf *pbuf, pbuf->carry.val64 |= (((*(u64 *)from) >> mshift(off)) << zshift(xbytes)) - >> zshift(xbytes+pbuf->carry_bytes); + >> zshift(xbytes + pbuf->carry_bytes); off = 0; pbuf->carry_bytes += xbytes; nbytes -= xbytes; @@ -411,7 +411,7 @@ static inline void merge_write8( jcopy(&pbuf->carry.val8[pbuf->carry_bytes], src, remainder); writeq(pbuf->carry.val64, dest); - jcopy(&pbuf->carry.val8[0], src+remainder, pbuf->carry_bytes); + jcopy(&pbuf->carry.val8[0], src + remainder, pbuf->carry_bytes); } /* @@ -463,7 +463,7 @@ void seg_pio_copy_start(struct pio_buf *pbuf, u64 pbc, dest += sizeof(u64); /* calculate where the QWORD data ends - in SOP=1 space */ - dend = dest + ((nbytes>>3) * sizeof(u64)); + dend = dest + ((nbytes >> 3) * sizeof(u64)); if (dend < send) { /* all QWORD data is within the SOP block, does *not* @@ -645,7 +645,7 @@ static void mid_copy_straight(struct pio_buf *pbuf, void __iomem *dend; /* 8-byte data end */ /* calculate 8-byte data end */ - dend = dest + ((nbytes>>3) * sizeof(u64)); + dend = dest + ((nbytes >> 3) * sizeof(u64)); if (pbuf->qw_written < PIO_BLOCK_QWS) { /* @@ -713,7 +713,7 @@ static void mid_copy_straight(struct pio_buf *pbuf, /* we know carry_bytes was zero on entry to this routine */ read_low_bytes(pbuf, from, nbytes & 0x7); - pbuf->qw_written += nbytes>>3; + pbuf->qw_written += nbytes >> 3; } /* diff --git a/drivers/staging/rdma/hfi1/qsfp.c b/drivers/staging/rdma/hfi1/qsfp.c index 42e5be4..a6d55a6 100644 --- a/drivers/staging/rdma/hfi1/qsfp.c +++ b/drivers/staging/rdma/hfi1/qsfp.c @@ -339,7 +339,7 @@ int refresh_qsfp_cache(struct hfi1_pportdata *ppd, struct qsfp_data *cp) u8 *cache = &cp->cache[0]; /* ensure sane contents on invalid reads, for cable swaps */ - memset(cache, 0, (QSFP_MAX_NUM_PAGES*128)); + memset(cache, 0, (QSFP_MAX_NUM_PAGES * 128)); spin_lock_irqsave(&ppd->qsfp_info.qsfp_lock, flags); ppd->qsfp_info.cache_valid = 0; spin_unlock_irqrestore(&ppd->qsfp_info.qsfp_lock, flags); @@ -420,7 +420,7 @@ int refresh_qsfp_cache(struct hfi1_pportdata *ppd, struct qsfp_data *cp) return 0; bail: - memset(cache, 0, (QSFP_MAX_NUM_PAGES*128)); + memset(cache, 0, (QSFP_MAX_NUM_PAGES * 128)); return ret; } @@ -564,7 +564,7 @@ int qsfp_dump(struct hfi1_pportdata *ppd, char *buf, int len) memcpy(bin_buff, &cache[bidx], QSFP_DUMP_CHUNK); for (iidx = 0; iidx < QSFP_DUMP_CHUNK; ++iidx) { - sofar += scnprintf(buf + sofar, len-sofar, + sofar += scnprintf(buf + sofar, len - sofar, " %02X", bin_buff[iidx]); } sofar += scnprintf(buf + sofar, len - sofar, "\n"); diff --git a/drivers/staging/rdma/hfi1/qsfp.h b/drivers/staging/rdma/hfi1/qsfp.h index af59a43..9f6e2f3 100644 --- a/drivers/staging/rdma/hfi1/qsfp.h +++ b/drivers/staging/rdma/hfi1/qsfp.h @@ -214,7 +214,7 @@ struct qsfp_data { /* Helps to find our way */ struct hfi1_pportdata *ppd; struct work_struct qsfp_work; - u8 cache[QSFP_MAX_NUM_PAGES*128]; + u8 cache[QSFP_MAX_NUM_PAGES * 128]; spinlock_t qsfp_lock; u8 check_interrupt_flags; u8 reset_needed; diff --git a/drivers/staging/rdma/hfi1/rc.c b/drivers/staging/rdma/hfi1/rc.c index 443fda8..5cdf1d2 100644 --- a/drivers/staging/rdma/hfi1/rc.c +++ b/drivers/staging/rdma/hfi1/rc.c @@ -1992,7 +1992,7 @@ static void log_cca_event(struct hfi1_pportdata *ppd, u8 sl, u32 rlid, spin_lock_irqsave(&ppd->cc_log_lock, flags); - ppd->threshold_cong_event_map[sl/8] |= 1 << (sl % 8); + ppd->threshold_cong_event_map[sl / 8] |= 1 << (sl % 8); ppd->threshold_event_counter++; cc_event = &ppd->cc_events[ppd->cc_log_idx++]; diff --git a/drivers/staging/rdma/hfi1/sdma.c b/drivers/staging/rdma/hfi1/sdma.c index e79f931..3953935 100644 --- a/drivers/staging/rdma/hfi1/sdma.c +++ b/drivers/staging/rdma/hfi1/sdma.c @@ -1020,7 +1020,7 @@ int sdma_init(struct hfi1_devdata *dd, u8 port) dd->chip_sdma_mem_size); per_sdma_credits = - dd->chip_sdma_mem_size/(num_engines * SDMA_BLOCK_SIZE); + dd->chip_sdma_mem_size / (num_engines * SDMA_BLOCK_SIZE); /* set up freeze waitqueue */ init_waitqueue_head(&dd->sdma_unfreeze_wq); @@ -1625,10 +1625,10 @@ static void sdma_setlengen(struct sdma_engine *sde) * generation counter. */ write_sde_csr(sde, SD(LEN_GEN), - (sde->descq_cnt/64) << SD(LEN_GEN_LENGTH_SHIFT) + (sde->descq_cnt / 64) << SD(LEN_GEN_LENGTH_SHIFT) ); write_sde_csr(sde, SD(LEN_GEN), - ((sde->descq_cnt/64) << SD(LEN_GEN_LENGTH_SHIFT)) + ((sde->descq_cnt / 64) << SD(LEN_GEN_LENGTH_SHIFT)) | (4ULL << SD(LEN_GEN_GENERATION_SHIFT)) ); } @@ -3057,5 +3057,5 @@ void _sdma_engine_progress_schedule( trace_hfi1_sdma_engine_progress(sde, sde->progress_mask); /* assume we have selected a good cpu */ write_csr(sde->dd, - CCE_INT_FORCE + (8*(IS_SDMA_START/64)), sde->progress_mask); + CCE_INT_FORCE + (8 * (IS_SDMA_START / 64)), sde->progress_mask); } diff --git a/drivers/staging/rdma/hfi1/sdma.h b/drivers/staging/rdma/hfi1/sdma.h index f24b5a1..c106d3c 100644 --- a/drivers/staging/rdma/hfi1/sdma.h +++ b/drivers/staging/rdma/hfi1/sdma.h @@ -682,7 +682,7 @@ static inline void _sdma_close_tx(struct hfi1_devdata *dd, dd->default_desc1; if (tx->flags & SDMA_TXREQ_F_URGENT) tx->descp[tx->num_desc].qw[1] |= - (SDMA_DESC1_HEAD_TO_HOST_FLAG| + (SDMA_DESC1_HEAD_TO_HOST_FLAG | SDMA_DESC1_INT_REQ_FLAG); } diff --git a/drivers/staging/rdma/hfi1/user_sdma.c b/drivers/staging/rdma/hfi1/user_sdma.c index dfa9ef2..9fe18b0 100644 --- a/drivers/staging/rdma/hfi1/user_sdma.c +++ b/drivers/staging/rdma/hfi1/user_sdma.c @@ -1204,7 +1204,7 @@ static int set_txreq_header(struct user_sdma_request *req, /* Set ACK request on last packet */ if (unlikely(tx->flags & TXREQ_FLAGS_REQ_LAST_PKT)) - hdr->bth[2] |= cpu_to_be32(1UL<<31); + hdr->bth[2] |= cpu_to_be32(1UL << 31); /* Set the new offset */ hdr->kdeth.swdata[6] = cpu_to_le32(req->koffset); diff --git a/drivers/staging/rdma/hfi1/verbs.c b/drivers/staging/rdma/hfi1/verbs.c index 10b14da..acf1132 100644 --- a/drivers/staging/rdma/hfi1/verbs.c +++ b/drivers/staging/rdma/hfi1/verbs.c @@ -874,7 +874,7 @@ int hfi1_verbs_send_pio(struct rvt_qp *qp, struct hfi1_pkt_state *ps, pio_copy(ppd->dd, pbuf, pbc, hdr, hdrwords); } else { if (ss) { - seg_pio_copy_start(pbuf, pbc, hdr, hdrwords*4); + seg_pio_copy_start(pbuf, pbc, hdr, hdrwords * 4); while (len) { void *addr = ss->sge.vaddr; u32 slen = ss->sge.length; -- cgit v0.10.2 From 74182acd7f6b5782d72bf608db233348d2120af0 Mon Sep 17 00:00:00 2001 From: Jubin John Date: Sun, 14 Feb 2016 20:19:32 -0800 Subject: staging/rdma/hfi1: Remove multiple blank lines Remove multiple blank lines to fix checkpatch check: CHECK: Please don't use multiple blank lines Reviewed-by: Dennis Dalessandro Reviewed-by: Ira Weiny Reviewed-by: Mike Marciniszyn Signed-off-by: Jubin John Signed-off-by: Doug Ledford diff --git a/drivers/staging/rdma/hfi1/chip.c b/drivers/staging/rdma/hfi1/chip.c index 79c215e..0a593bd 100644 --- a/drivers/staging/rdma/hfi1/chip.c +++ b/drivers/staging/rdma/hfi1/chip.c @@ -986,7 +986,6 @@ static struct flag_table dc8051_info_host_msg_flags[] = { FLAG_ENTRY0("Link going down", 0x0100), }; - static u32 encoded_size(u32 size); static u32 chip_to_opa_lstate(struct hfi1_devdata *dd, u32 chip_lstate); static int set_physical_link_state(struct hfi1_devdata *dd, u64 state); @@ -1285,7 +1284,6 @@ static inline u64 read_write_csr(const struct hfi1_devdata *dd, u32 csr, { u64 ret; - if (mode == CNTR_MODE_R) { ret = read_csr(dd, csr); } else if (mode == CNTR_MODE_W) { @@ -10748,7 +10746,6 @@ int set_buffer_control(struct hfi1_pportdata *ppd, #define valid_vl(idx) ((idx) < TXE_NUM_DATA_VL || (idx) == 15) #define NUM_USABLE_VLS 16 /* look at VL15 and less */ - /* find the new total credits, do sanity check on unused VLs */ for (i = 0; i < OPA_MAX_VLS; i++) { if (valid_vl(i)) { @@ -11891,7 +11888,6 @@ static int init_cntrs(struct hfi1_devdata *dd) if (!dd->scntrs) goto bail; - /* allocate space for the counter names */ dd->cntrnameslen = sz; dd->cntrnames = kmalloc(sz, GFP_KERNEL); @@ -12060,7 +12056,6 @@ bail: return -ENOMEM; } - static u32 chip_to_opa_lstate(struct hfi1_devdata *dd, u32 chip_lstate) { switch (chip_lstate) { @@ -14282,7 +14277,6 @@ static u16 delay_cycles(struct hfi1_pportdata *ppd, u32 desired_egress_rate, return (u16)delta_cycles; } - /** * create_pbc - build a pbc for transmission * @flags: special case flags or-ed in built pbc diff --git a/drivers/staging/rdma/hfi1/debugfs.c b/drivers/staging/rdma/hfi1/debugfs.c index 0ee7217..7cb4348 100644 --- a/drivers/staging/rdma/hfi1/debugfs.c +++ b/drivers/staging/rdma/hfi1/debugfs.c @@ -102,7 +102,6 @@ do { \ pr_warn("create of %s failed\n", name); \ } while (0) - #define DEBUGFS_SEQ_FILE_CREATE(name, parent, data) \ DEBUGFS_FILE_CREATE(#name, parent, data, &_##name##_file_ops, S_IRUGO) @@ -127,7 +126,6 @@ static void *_opcode_stats_seq_next(struct seq_file *s, void *v, loff_t *pos) return pos; } - static void _opcode_stats_seq_stop(struct seq_file *s, void *v) __releases(RCU) { @@ -308,7 +306,6 @@ static void *_sdes_seq_next(struct seq_file *s, void *v, loff_t *pos) return pos; } - static void _sdes_seq_stop(struct seq_file *s, void *v) __releases(RCU) { diff --git a/drivers/staging/rdma/hfi1/file_ops.c b/drivers/staging/rdma/hfi1/file_ops.c index cc681f7..b52cb78 100644 --- a/drivers/staging/rdma/hfi1/file_ops.c +++ b/drivers/staging/rdma/hfi1/file_ops.c @@ -162,7 +162,6 @@ enum mmap_types { #define dbg(fmt, ...) \ pr_info(fmt, ##__VA_ARGS__) - static inline int is_valid_mmap(u64 token) { return (HFI1_MMAP_TOKEN_GET(MAGIC, token) == HFI1_MMAP_MAGIC); @@ -1589,7 +1588,6 @@ static loff_t ui_lseek(struct file *filp, loff_t offset, int whence) return filp->f_pos; } - /* NOTE: assumes unsigned long is 8 bytes */ static ssize_t ui_read(struct file *filp, char __user *buf, size_t count, loff_t *f_pos) diff --git a/drivers/staging/rdma/hfi1/hfi.h b/drivers/staging/rdma/hfi1/hfi.h index 70decdf4..347ceca 100644 --- a/drivers/staging/rdma/hfi1/hfi.h +++ b/drivers/staging/rdma/hfi1/hfi.h @@ -878,7 +878,6 @@ struct hfi1_devdata { wait_queue_head_t sdma_unfreeze_wq; atomic_t sdma_unfreeze_count; - /* hfi1_pportdata, points to array of (physical) port-specific * data structs, indexed by pidx (0..n-1) */ @@ -1598,7 +1597,6 @@ static inline struct cc_state *get_cc_state(struct hfi1_pportdata *ppd) /* IB dword length mask in PBC (lower 11 bits); same for all chips */ #define HFI1_PBC_LENGTH_MASK ((1 << 11) - 1) - /* ctxt_flag bit offsets */ /* context has been setup */ #define HFI1_CTXT_SETUP_DONE 1 diff --git a/drivers/staging/rdma/hfi1/init.c b/drivers/staging/rdma/hfi1/init.c index 423c699..3071fbc 100644 --- a/drivers/staging/rdma/hfi1/init.c +++ b/drivers/staging/rdma/hfi1/init.c @@ -1723,7 +1723,6 @@ int hfi1_setup_eagerbufs(struct hfi1_ctxtdata *rcd) rcd->ctxt, rcd->egrbufs.alloced, rcd->egrbufs.rcvtid_size, rcd->egrbufs.size); - /* * Set the contexts rcv array head update threshold to the closest * power of 2 (so we can use a mask instead of modulo) below half diff --git a/drivers/staging/rdma/hfi1/intr.c b/drivers/staging/rdma/hfi1/intr.c index 5e6d77d..9a9b331 100644 --- a/drivers/staging/rdma/hfi1/intr.c +++ b/drivers/staging/rdma/hfi1/intr.c @@ -179,7 +179,6 @@ void handle_linkup_change(struct hfi1_devdata *dd, u32 linkup) signal_ib_event(ppd, ev); } - } /* diff --git a/drivers/staging/rdma/hfi1/mad.c b/drivers/staging/rdma/hfi1/mad.c index 1a9eb50..b687e3f 100644 --- a/drivers/staging/rdma/hfi1/mad.c +++ b/drivers/staging/rdma/hfi1/mad.c @@ -2310,7 +2310,6 @@ static void a0_portstatus(struct hfi1_pportdata *ppd, } } - static int pma_get_opa_portstatus(struct opa_pma_mad *pmp, struct ib_device *ibdev, u8 port, u32 *resp_len) { diff --git a/drivers/staging/rdma/hfi1/mad.h b/drivers/staging/rdma/hfi1/mad.h index b6c88be..f9e93c0 100644 --- a/drivers/staging/rdma/hfi1/mad.h +++ b/drivers/staging/rdma/hfi1/mad.h @@ -235,7 +235,6 @@ struct ib_pma_portcounters_cong { #define IB_CC_SVCTYPE_RD 0x2 #define IB_CC_SVCTYPE_UD 0x3 - /* * There should be an equivalent IB #define for the following, but * I cannot find it. diff --git a/drivers/staging/rdma/hfi1/pcie.c b/drivers/staging/rdma/hfi1/pcie.c index 019b4f8..26eb610 100644 --- a/drivers/staging/rdma/hfi1/pcie.c +++ b/drivers/staging/rdma/hfi1/pcie.c @@ -447,7 +447,6 @@ void restore_pci_variables(struct hfi1_devdata *dd) pci_write_config_dword(dd->pcidev, PCIE_CFG_TPH2, dd->pci_tph2); } - /* * BIOS may not set PCIe bus-utilization parameters for best performance. * Check and optionally adjust them to maximize our throughput. diff --git a/drivers/staging/rdma/hfi1/pio.c b/drivers/staging/rdma/hfi1/pio.c index 9bafedf..f1f30b3 100644 --- a/drivers/staging/rdma/hfi1/pio.c +++ b/drivers/staging/rdma/hfi1/pio.c @@ -852,7 +852,6 @@ struct send_context *sc_alloc(struct hfi1_devdata *dd, int type, sc->credit_ctrl, thresh); - return sc; } diff --git a/drivers/staging/rdma/hfi1/pio.h b/drivers/staging/rdma/hfi1/pio.h index 1dedeb2..8d0cf1b 100644 --- a/drivers/staging/rdma/hfi1/pio.h +++ b/drivers/staging/rdma/hfi1/pio.h @@ -50,7 +50,6 @@ * */ - /* send context types */ #define SC_KERNEL 0 #define SC_ACK 1 @@ -318,7 +317,6 @@ void pio_kernel_unfreeze(struct hfi1_devdata *dd); void __cm_reset(struct hfi1_devdata *dd, u64 sendctrl); void pio_send_control(struct hfi1_devdata *dd, int op); - /* PIO copy routines */ void pio_copy(struct hfi1_devdata *dd, struct pio_buf *pbuf, u64 pbc, const void *from, size_t count); diff --git a/drivers/staging/rdma/hfi1/sdma.c b/drivers/staging/rdma/hfi1/sdma.c index 3953935..cc21272 100644 --- a/drivers/staging/rdma/hfi1/sdma.c +++ b/drivers/staging/rdma/hfi1/sdma.c @@ -609,7 +609,6 @@ static void sdma_sw_clean_up_task(unsigned long opaque) * descq are ours to play with. */ - /* * In the error clean up sequence, software clean must be called * before the hardware clean so we can use the hardware head in @@ -1690,7 +1689,6 @@ static void set_sdma_integrity(struct sdma_engine *sde) write_sde_csr(sde, SD(CHECK_ENABLE), reg); } - static void init_sdma_regs( struct sdma_engine *sde, u32 credits, diff --git a/drivers/staging/rdma/hfi1/sdma.h b/drivers/staging/rdma/hfi1/sdma.h index c106d3c..cc01e81 100644 --- a/drivers/staging/rdma/hfi1/sdma.h +++ b/drivers/staging/rdma/hfi1/sdma.h @@ -65,7 +65,6 @@ /* Hardware limit for SDMA packet size */ #define MAX_SDMA_PKT_SIZE ((16 * 1024) - 1) - #define SDMA_TXREQ_S_OK 0 #define SDMA_TXREQ_S_SENDERROR 1 #define SDMA_TXREQ_S_ABORTED 2 @@ -418,7 +417,6 @@ struct sdma_engine { struct list_head flushlist; }; - int sdma_init(struct hfi1_devdata *dd, u8 port); void sdma_start(struct hfi1_devdata *dd); void sdma_exit(struct hfi1_devdata *dd); @@ -464,7 +462,6 @@ static inline int __sdma_running(struct sdma_engine *engine) return engine->state.current_state == sdma_state_s99_running; } - /** * sdma_running() - state suitability test * @engine: sdma engine @@ -494,7 +491,6 @@ void _sdma_txreq_ahgadd( u32 *ahg, u8 ahg_hlen); - /** * sdma_txinit_ahg() - initialize an sdma_txreq struct with AHG * @tx: tx request to initialize diff --git a/drivers/staging/rdma/hfi1/sysfs.c b/drivers/staging/rdma/hfi1/sysfs.c index f1d47e7..1f3a747 100644 --- a/drivers/staging/rdma/hfi1/sysfs.c +++ b/drivers/staging/rdma/hfi1/sysfs.c @@ -53,7 +53,6 @@ #include "mad.h" #include "trace.h" - /* * Start of per-port congestion control structures and support code */ @@ -254,7 +253,6 @@ HFI1_SC2VL_ATTR(29); HFI1_SC2VL_ATTR(30); HFI1_SC2VL_ATTR(31); - static struct attribute *sc2vl_default_attributes[] = { &hfi1_sc2vl_attr_0.attr, &hfi1_sc2vl_attr_1.attr, @@ -360,7 +358,6 @@ HFI1_SL2SC_ATTR(29); HFI1_SL2SC_ATTR(30); HFI1_SL2SC_ATTR(31); - static struct attribute *sl2sc_default_attributes[] = { &hfi1_sl2sc_attr_0.attr, &hfi1_sl2sc_attr_1.attr, @@ -493,7 +490,6 @@ static struct kobj_type hfi1_vl2mtu_ktype = { .default_attrs = vl2mtu_default_attributes }; - /* end of per-port file structures and support code */ /* @@ -535,7 +531,6 @@ static ssize_t show_boardversion(struct device *device, return scnprintf(buf, PAGE_SIZE, "%s", dd->boardversion); } - static ssize_t show_nctxts(struct device *device, struct device_attribute *attr, char *buf) { @@ -702,7 +697,6 @@ int hfi1_create_port_files(struct ib_device *ibdev, u8 port_num, } kobject_uevent(&ppd->vl2mtu_kobj, KOBJ_ADD); - ret = kobject_init_and_add(&ppd->pport_cc_kobj, &port_cc_ktype, kobj, "CCMgtA"); if (ret) { diff --git a/drivers/staging/rdma/hfi1/trace.h b/drivers/staging/rdma/hfi1/trace.h index fcae96e..a13215f 100644 --- a/drivers/staging/rdma/hfi1/trace.h +++ b/drivers/staging/rdma/hfi1/trace.h @@ -380,7 +380,6 @@ const char *parse_sdma_flags( #define __parse_sdma_flags(desc0, desc1) parse_sdma_flags(p, desc0, desc1) - #define lrh_name(lrh) { HFI1_##lrh, #lrh } #define show_lnh(lrh) \ __print_symbolic(lrh, \ @@ -427,7 +426,6 @@ __print_symbolic(opcode, \ ib_opcode_name(UD_SEND_ONLY_WITH_IMMEDIATE), \ ib_opcode_name(CNP)) - #define LRH_PRN "vl %d lver %d sl %d lnh %d,%s dlid %.4x len %d slid %.4x" #define BTH_PRN \ "op 0x%.2x,%s se %d m %d pad %d tver %d pkey 0x%.4x " \ @@ -563,7 +561,6 @@ DEFINE_EVENT(hfi1_ibhdr_template, output_ibhdr, #undef TRACE_SYSTEM #define TRACE_SYSTEM hfi1_snoop - TRACE_EVENT(snoop_capture, TP_PROTO(struct hfi1_devdata *dd, int hdr_len, @@ -760,7 +757,6 @@ DECLARE_EVENT_CLASS(hfi1_bct_template, ) ); - DEFINE_EVENT(hfi1_bct_template, bct_set, TP_PROTO(struct hfi1_devdata *dd, struct buffer_control *bc), TP_ARGS(dd, bc)); diff --git a/drivers/staging/rdma/hfi1/twsi.h b/drivers/staging/rdma/hfi1/twsi.h index 6cb30e5..0722ac8 100644 --- a/drivers/staging/rdma/hfi1/twsi.h +++ b/drivers/staging/rdma/hfi1/twsi.h @@ -65,5 +65,4 @@ int hfi1_twsi_blk_rd(struct hfi1_devdata *dd, u32 target, int dev, int addr, int hfi1_twsi_blk_wr(struct hfi1_devdata *dd, u32 target, int dev, int addr, const void *buffer, int len); - #endif /* _TWSI_H */ diff --git a/drivers/staging/rdma/hfi1/ud.c b/drivers/staging/rdma/hfi1/ud.c index da4e465..5779f3a 100644 --- a/drivers/staging/rdma/hfi1/ud.c +++ b/drivers/staging/rdma/hfi1/ud.c @@ -647,7 +647,6 @@ static int opa_smp_check(struct hfi1_ibport *ibp, u16 pkey, u8 sc5, return 0; } - /** * hfi1_ud_rcv - receive an incoming UD packet * @ibp: the port the packet came in on diff --git a/drivers/staging/rdma/hfi1/verbs.c b/drivers/staging/rdma/hfi1/verbs.c index acf1132..466055b 100644 --- a/drivers/staging/rdma/hfi1/verbs.c +++ b/drivers/staging/rdma/hfi1/verbs.c @@ -376,7 +376,6 @@ dropit: return 0; } - /** * hfi1_ib_rcv - process an incoming packet * @packet: data packet information @@ -965,7 +964,6 @@ static inline int egress_pkey_check(struct hfi1_pportdata *ppd, if ((sc5 == 0xf) && ((pkey & PKEY_LOW_15_MASK) != PKEY_LOW_15_MASK)) goto bad; - /* Is the pkey = 0x0, or 0x8000? */ if ((pkey & PKEY_LOW_15_MASK) == 0) goto bad; -- cgit v0.10.2 From 50e5dcbed6b36212c40e8fee18a7f5c7bb0aca13 Mon Sep 17 00:00:00 2001 From: Jubin John Date: Sun, 14 Feb 2016 20:19:41 -0800 Subject: staging/rdma/hfi1: Remove space after cast Remove the space after a cast to fix checkpatch check: CHECK: No space is necessary after a cast Reviewed-by: Dennis Dalessandro Reviewed-by: Ira Weiny Reviewed-by: Mike Marciniszyn Signed-off-by: Jubin John Signed-off-by: Doug Ledford diff --git a/drivers/staging/rdma/hfi1/chip.c b/drivers/staging/rdma/hfi1/chip.c index 0a593bd..cf57865 100644 --- a/drivers/staging/rdma/hfi1/chip.c +++ b/drivers/staging/rdma/hfi1/chip.c @@ -5436,7 +5436,7 @@ static void update_rcverr_timer(unsigned long opaque) OPA_LINKDOWN_REASON_EXCESSIVE_BUFFER_OVERRUN); queue_work(ppd->hfi1_wq, &ppd->link_bounce_work); } - dd->rcv_ovfl_cnt = (u32) cur_ovfl_cnt; + dd->rcv_ovfl_cnt = (u32)cur_ovfl_cnt; mod_timer(&dd->rcverr_timer, jiffies + HZ * RCVERR_CHECK_TIME); } @@ -6366,7 +6366,7 @@ static void lcb_shutdown(struct hfi1_devdata *dd, int abort) reg | (1ull << DCC_CFG_RESET_RESET_LCB_SHIFT) | (1ull << DCC_CFG_RESET_RESET_RX_FPE_SHIFT)); - (void) read_csr(dd, DCC_CFG_RESET); /* make sure the write completed */ + (void)read_csr(dd, DCC_CFG_RESET); /* make sure the write completed */ if (!abort) { udelay(1); /* must hold for the longer of 16cclks or 20ns */ write_csr(dd, DCC_CFG_RESET, reg); @@ -13407,7 +13407,7 @@ static void init_chip(struct hfi1_devdata *dd) * across the clear. */ write_csr(dd, CCE_DC_CTRL, CCE_DC_CTRL_DC_RESET_SMASK); - (void) read_csr(dd, CCE_DC_CTRL); + (void)read_csr(dd, CCE_DC_CTRL); if (use_flr) { /* diff --git a/drivers/staging/rdma/hfi1/debugfs.c b/drivers/staging/rdma/hfi1/debugfs.c index 7cb4348..f309c5f 100644 --- a/drivers/staging/rdma/hfi1/debugfs.c +++ b/drivers/staging/rdma/hfi1/debugfs.c @@ -149,8 +149,8 @@ static int _opcode_stats_seq_show(struct seq_file *s, void *v) if (!n_packets && !n_bytes) return SEQ_SKIP; seq_printf(s, "%02llx %llu/%llu\n", i, - (unsigned long long) n_packets, - (unsigned long long) n_bytes); + (unsigned long long)n_packets, + (unsigned long long)n_bytes); return 0; } diff --git a/drivers/staging/rdma/hfi1/dma.c b/drivers/staging/rdma/hfi1/dma.c index e03bd73..afe572d 100644 --- a/drivers/staging/rdma/hfi1/dma.c +++ b/drivers/staging/rdma/hfi1/dma.c @@ -52,7 +52,7 @@ #include "verbs.h" -#define BAD_DMA_ADDRESS ((u64) 0) +#define BAD_DMA_ADDRESS ((u64)0) /* * The following functions implement driver specific replacements @@ -74,7 +74,7 @@ static u64 hfi1_dma_map_single(struct ib_device *dev, void *cpu_addr, if (WARN_ON(!valid_dma_direction(direction))) return BAD_DMA_ADDRESS; - return (u64) cpu_addr; + return (u64)cpu_addr; } static void hfi1_dma_unmap_single(struct ib_device *dev, u64 addr, size_t size, @@ -95,7 +95,7 @@ static u64 hfi1_dma_map_page(struct ib_device *dev, struct page *page, if (offset + size > PAGE_SIZE) return BAD_DMA_ADDRESS; - addr = (u64) page_address(page); + addr = (u64)page_address(page); if (addr) addr += offset; @@ -120,7 +120,7 @@ static int hfi1_map_sg(struct ib_device *dev, struct scatterlist *sgl, return BAD_DMA_ADDRESS; for_each_sg(sgl, sg, nents, i) { - addr = (u64) page_address(sg_page(sg)); + addr = (u64)page_address(sg_page(sg)); if (!addr) { ret = 0; break; @@ -161,14 +161,14 @@ static void *hfi1_dma_alloc_coherent(struct ib_device *dev, size_t size, if (p) addr = page_address(p); if (dma_handle) - *dma_handle = (u64) addr; + *dma_handle = (u64)addr; return addr; } static void hfi1_dma_free_coherent(struct ib_device *dev, size_t size, void *cpu_addr, u64 dma_handle) { - free_pages((unsigned long) cpu_addr, get_order(size)); + free_pages((unsigned long)cpu_addr, get_order(size)); } struct ib_dma_mapping_ops hfi1_dma_mapping_ops = { diff --git a/drivers/staging/rdma/hfi1/driver.c b/drivers/staging/rdma/hfi1/driver.c index dd5187f..6082935 100644 --- a/drivers/staging/rdma/hfi1/driver.c +++ b/drivers/staging/rdma/hfi1/driver.c @@ -594,7 +594,7 @@ static void __prescan_rxq(struct hfi1_packet *packet) while (1) { struct hfi1_devdata *dd = rcd->dd; struct hfi1_ibport *ibp = &rcd->ppd->ibport_data; - __le32 *rhf_addr = (__le32 *) rcd->rcvhdrq + mdata.ps_head + + __le32 *rhf_addr = (__le32 *)rcd->rcvhdrq + mdata.ps_head + dd->rhf_offset; struct rvt_qp *qp; struct hfi1_ib_header *hdr; @@ -730,7 +730,7 @@ static inline int process_rcv_packet(struct hfi1_packet *packet, int thread) } } - packet->rhf_addr = (__le32 *) packet->rcd->rcvhdrq + packet->rhqoff + + packet->rhf_addr = (__le32 *)packet->rcd->rcvhdrq + packet->rhqoff + packet->rcd->dd->rhf_offset; packet->rhf = rhf_to_cpu(packet->rhf_addr); @@ -969,7 +969,7 @@ int handle_receive_interrupt(struct hfi1_ctxtdata *rcd, int thread) /* On to the next packet */ packet.rhqoff += packet.rsize; - packet.rhf_addr = (__le32 *) rcd->rcvhdrq + + packet.rhf_addr = (__le32 *)rcd->rcvhdrq + packet.rhqoff + dd->rhf_offset; packet.rhf = rhf_to_cpu(packet.rhf_addr); diff --git a/drivers/staging/rdma/hfi1/firmware.c b/drivers/staging/rdma/hfi1/firmware.c index f87460d..31550a3 100644 --- a/drivers/staging/rdma/hfi1/firmware.c +++ b/drivers/staging/rdma/hfi1/firmware.c @@ -1838,7 +1838,7 @@ void read_guid(struct hfi1_devdata *dd) { /* Take the DC out of reset to get a valid GUID value */ write_csr(dd, CCE_DC_CTRL, 0); - (void) read_csr(dd, CCE_DC_CTRL); + (void)read_csr(dd, CCE_DC_CTRL); dd->base_guid = read_csr(dd, DC_DC8051_CFG_LOCAL_GUID); dd_dev_info(dd, "GUID %llx", diff --git a/drivers/staging/rdma/hfi1/hfi.h b/drivers/staging/rdma/hfi1/hfi.h index 347ceca..de82f8e 100644 --- a/drivers/staging/rdma/hfi1/hfi.h +++ b/drivers/staging/rdma/hfi1/hfi.h @@ -1668,7 +1668,7 @@ void hfi1_release_user_pages(struct page **, size_t, bool); static inline void clear_rcvhdrtail(const struct hfi1_ctxtdata *rcd) { - *((u64 *) rcd->rcvhdrtail_kvaddr) = 0ULL; + *((u64 *)rcd->rcvhdrtail_kvaddr) = 0ULL; } static inline u32 get_rcvhdrtail(const struct hfi1_ctxtdata *rcd) @@ -1677,7 +1677,7 @@ static inline u32 get_rcvhdrtail(const struct hfi1_ctxtdata *rcd) * volatile because it's a DMA target from the chip, routine is * inlined, and don't want register caching or reordering. */ - return (u32) le64_to_cpu(*rcd->rcvhdrtail_kvaddr); + return (u32)le64_to_cpu(*rcd->rcvhdrtail_kvaddr); } /* diff --git a/drivers/staging/rdma/hfi1/mad.c b/drivers/staging/rdma/hfi1/mad.c index b687e3f..a56d7dc 100644 --- a/drivers/staging/rdma/hfi1/mad.c +++ b/drivers/staging/rdma/hfi1/mad.c @@ -744,7 +744,7 @@ static int __subn_get_opa_pkeytable(struct opa_smp *smp, u32 am, u8 *data, return reply((struct ib_mad_hdr *)smp); } - n_blocks_avail = (u16) (npkeys / OPA_PARTITION_TABLE_BLK_SIZE) + 1; + n_blocks_avail = (u16)(npkeys / OPA_PARTITION_TABLE_BLK_SIZE) + 1; size = (n_blocks_req * OPA_PARTITION_TABLE_BLK_SIZE) * sizeof(u16); @@ -758,7 +758,7 @@ static int __subn_get_opa_pkeytable(struct opa_smp *smp, u32 am, u8 *data, return reply((struct ib_mad_hdr *)smp); } - p = (__be16 *) data; + p = (__be16 *)data; q = (u16 *)data; /* get the real pkeys if we are requesting the first block */ if (start_block == 0) { @@ -1406,7 +1406,7 @@ static int __subn_set_opa_pkeytable(struct opa_smp *smp, u32 am, u8 *data, struct hfi1_devdata *dd = dd_from_ibdev(ibdev); u32 n_blocks_sent = OPA_AM_NBLK(am); u32 start_block = am & 0x7ff; - u16 *p = (u16 *) data; + u16 *p = (u16 *)data; __be16 *q = (__be16 *)data; int i; u16 n_blocks_avail; @@ -1586,7 +1586,7 @@ static int __subn_get_opa_sc_to_vlt(struct opa_smp *smp, u32 am, u8 *data, { u32 n_blocks = OPA_AM_NBLK(am); struct hfi1_devdata *dd = dd_from_ibdev(ibdev); - void *vp = (void *) data; + void *vp = (void *)data; size_t size = 4 * sizeof(u64); if (n_blocks != 1) { @@ -1609,7 +1609,7 @@ static int __subn_set_opa_sc_to_vlt(struct opa_smp *smp, u32 am, u8 *data, u32 n_blocks = OPA_AM_NBLK(am); int async_update = OPA_AM_ASYNC(am); struct hfi1_devdata *dd = dd_from_ibdev(ibdev); - void *vp = (void *) data; + void *vp = (void *)data; struct hfi1_pportdata *ppd; int lstate; @@ -1641,7 +1641,7 @@ static int __subn_get_opa_sc_to_vlnt(struct opa_smp *smp, u32 am, u8 *data, u32 n_blocks = OPA_AM_NPORT(am); struct hfi1_devdata *dd = dd_from_ibdev(ibdev); struct hfi1_pportdata *ppd; - void *vp = (void *) data; + void *vp = (void *)data; int size; if (n_blocks != 1) { @@ -1666,7 +1666,7 @@ static int __subn_set_opa_sc_to_vlnt(struct opa_smp *smp, u32 am, u8 *data, u32 n_blocks = OPA_AM_NPORT(am); struct hfi1_devdata *dd = dd_from_ibdev(ibdev); struct hfi1_pportdata *ppd; - void *vp = (void *) data; + void *vp = (void *)data; int lstate; if (n_blocks != 1) { @@ -1699,7 +1699,7 @@ static int __subn_get_opa_psi(struct opa_smp *smp, u32 am, u8 *data, u32 lstate; struct hfi1_ibport *ibp; struct hfi1_pportdata *ppd; - struct opa_port_state_info *psi = (struct opa_port_state_info *) data; + struct opa_port_state_info *psi = (struct opa_port_state_info *)data; if (nports != 1) { smp->status |= IB_SMP_INVALID_FIELD; @@ -1748,7 +1748,7 @@ static int __subn_set_opa_psi(struct opa_smp *smp, u32 am, u8 *data, u8 ls_new, ps_new; struct hfi1_ibport *ibp; struct hfi1_pportdata *ppd; - struct opa_port_state_info *psi = (struct opa_port_state_info *) data; + struct opa_port_state_info *psi = (struct opa_port_state_info *)data; int ret, invalid = 0; if (nports != 1) { @@ -1834,7 +1834,7 @@ static int __subn_get_opa_bct(struct opa_smp *smp, u32 am, u8 *data, u32 num_ports = OPA_AM_NPORT(am); struct hfi1_devdata *dd = dd_from_ibdev(ibdev); struct hfi1_pportdata *ppd; - struct buffer_control *p = (struct buffer_control *) data; + struct buffer_control *p = (struct buffer_control *)data; int size; if (num_ports != 1) { @@ -1857,7 +1857,7 @@ static int __subn_set_opa_bct(struct opa_smp *smp, u32 am, u8 *data, u32 num_ports = OPA_AM_NPORT(am); struct hfi1_devdata *dd = dd_from_ibdev(ibdev); struct hfi1_pportdata *ppd; - struct buffer_control *p = (struct buffer_control *) data; + struct buffer_control *p = (struct buffer_control *)data; if (num_ports != 1) { smp->status |= IB_SMP_INVALID_FIELD; @@ -1930,10 +1930,10 @@ static int __subn_set_opa_vl_arb(struct opa_smp *smp, u32 am, u8 *data, switch (section) { case OPA_VLARB_LOW_ELEMENTS: - (void) fm_set_table(ppd, FM_TBL_VL_LOW_ARB, p); + (void)fm_set_table(ppd, FM_TBL_VL_LOW_ARB, p); break; case OPA_VLARB_HIGH_ELEMENTS: - (void) fm_set_table(ppd, FM_TBL_VL_HIGH_ARB, p); + (void)fm_set_table(ppd, FM_TBL_VL_HIGH_ARB, p); break; /* neither OPA_VLARB_PREEMPT_ELEMENTS, or OPA_VLARB_PREEMPT_MATRIX * can be changed from the default values */ @@ -2522,7 +2522,7 @@ static void a0_datacounters(struct hfi1_pportdata *ppd, struct _port_dctrs *rsp, idx_from_vl(vl)); if (tmp < sum_vl_xmit_wait) { /* we wrapped */ - sum_vl_xmit_wait = (u64) ~0; + sum_vl_xmit_wait = (u64)~0; break; } sum_vl_xmit_wait = tmp; @@ -3287,7 +3287,7 @@ static int __subn_get_opa_cong_setting(struct opa_smp *smp, u32 am, { int i; struct opa_congestion_setting_attr *p = - (struct opa_congestion_setting_attr *) data; + (struct opa_congestion_setting_attr *)data; struct hfi1_ibport *ibp = to_iport(ibdev, port); struct hfi1_pportdata *ppd = ppd_from_ibp(ibp); struct opa_congestion_setting_entry_shadow *entries; @@ -3326,7 +3326,7 @@ static int __subn_set_opa_cong_setting(struct opa_smp *smp, u32 am, u8 *data, u32 *resp_len) { struct opa_congestion_setting_attr *p = - (struct opa_congestion_setting_attr *) data; + (struct opa_congestion_setting_attr *)data; struct hfi1_ibport *ibp = to_iport(ibdev, port); struct hfi1_pportdata *ppd = ppd_from_ibp(ibp); struct opa_congestion_setting_entry_shadow *entries; @@ -3418,7 +3418,7 @@ static int __subn_get_opa_cc_table(struct opa_smp *smp, u32 am, u8 *data, u32 *resp_len) { struct ib_cc_table_attr *cc_table_attr = - (struct ib_cc_table_attr *) data; + (struct ib_cc_table_attr *)data; struct hfi1_ibport *ibp = to_iport(ibdev, port); struct hfi1_pportdata *ppd = ppd_from_ibp(ibp); u32 start_block = OPA_AM_START_BLK(am); @@ -3475,7 +3475,7 @@ static int __subn_set_opa_cc_table(struct opa_smp *smp, u32 am, u8 *data, struct ib_device *ibdev, u8 port, u32 *resp_len) { - struct ib_cc_table_attr *p = (struct ib_cc_table_attr *) data; + struct ib_cc_table_attr *p = (struct ib_cc_table_attr *)data; struct hfi1_ibport *ibp = to_iport(ibdev, port); struct hfi1_pportdata *ppd = ppd_from_ibp(ibp); u32 start_block = OPA_AM_START_BLK(am); @@ -3559,7 +3559,7 @@ static int __subn_get_opa_led_info(struct opa_smp *smp, u32 am, u8 *data, u32 *resp_len) { struct hfi1_devdata *dd = dd_from_ibdev(ibdev); - struct opa_led_info *p = (struct opa_led_info *) data; + struct opa_led_info *p = (struct opa_led_info *)data; u32 nport = OPA_AM_NPORT(am); u64 reg; @@ -3584,7 +3584,7 @@ static int __subn_set_opa_led_info(struct opa_smp *smp, u32 am, u8 *data, u32 *resp_len) { struct hfi1_devdata *dd = dd_from_ibdev(ibdev); - struct opa_led_info *p = (struct opa_led_info *) data; + struct opa_led_info *p = (struct opa_led_info *)data; u32 nport = OPA_AM_NPORT(am); int on = !!(be32_to_cpu(p->rsvd_led_mask) & OPA_LED_MASK); @@ -3800,7 +3800,7 @@ static int subn_get_opa_aggregate(struct opa_smp *smp, /* zero the payload for this segment */ memset(next_smp + sizeof(*agg), 0, agg_data_len); - (void) subn_get_opa_sma(agg->attr_id, smp, am, agg->data, + (void)subn_get_opa_sma(agg->attr_id, smp, am, agg->data, ibdev, port, NULL); if (smp->status & ~IB_SMP_DIRECTION) { set_aggr_error(agg); @@ -3844,7 +3844,7 @@ static int subn_set_opa_aggregate(struct opa_smp *smp, return reply((struct ib_mad_hdr *)smp); } - (void) subn_set_opa_sma(agg->attr_id, smp, am, agg->data, + (void)subn_set_opa_sma(agg->attr_id, smp, am, agg->data, ibdev, port, NULL); if (smp->status & ~IB_SMP_DIRECTION) { set_aggr_error(agg); @@ -3989,7 +3989,7 @@ static int process_subn_opa(struct ib_device *ibdev, int mad_flags, smp->method == IB_MGMT_METHOD_SET) && port_num && port_num <= ibdev->phys_port_cnt && port != port_num) - (void) check_mkey(to_iport(ibdev, port_num), + (void)check_mkey(to_iport(ibdev, port_num), (struct ib_mad_hdr *)smp, 0, smp->mkey, smp->route.dr.dr_slid, smp->route.dr.return_path, @@ -4079,7 +4079,7 @@ static int process_subn(struct ib_device *ibdev, int mad_flags, smp->method == IB_MGMT_METHOD_SET) && port_num && port_num <= ibdev->phys_port_cnt && port != port_num) - (void) check_mkey(to_iport(ibdev, port_num), + (void)check_mkey(to_iport(ibdev, port_num), (struct ib_mad_hdr *)smp, 0, smp->mkey, (__force __be32)smp->dr_slid, diff --git a/drivers/staging/rdma/hfi1/pcie.c b/drivers/staging/rdma/hfi1/pcie.c index 26eb610..0368516 100644 --- a/drivers/staging/rdma/hfi1/pcie.c +++ b/drivers/staging/rdma/hfi1/pcie.c @@ -233,7 +233,7 @@ int hfi1_pcie_ddinit(struct hfi1_devdata *dd, struct pci_dev *pdev, */ void hfi1_pcie_ddcleanup(struct hfi1_devdata *dd) { - u64 __iomem *base = (void __iomem *) dd->kregbase; + u64 __iomem *base = (void __iomem *)dd->kregbase; dd->flags &= ~HFI1_PRESENT; dd->kregbase = NULL; @@ -1188,7 +1188,7 @@ retry: /* step 5h: arm gasket logic */ /* hold DC in reset across the SBR */ write_csr(dd, CCE_DC_CTRL, CCE_DC_CTRL_DC_RESET_SMASK); - (void) read_csr(dd, CCE_DC_CTRL); /* DC reset hold */ + (void)read_csr(dd, CCE_DC_CTRL); /* DC reset hold */ /* save firmware control across the SBR */ fw_ctrl = read_csr(dd, MISC_CFG_FW_CTRL); diff --git a/drivers/staging/rdma/hfi1/pio.c b/drivers/staging/rdma/hfi1/pio.c index f1f30b3..3817731 100644 --- a/drivers/staging/rdma/hfi1/pio.c +++ b/drivers/staging/rdma/hfi1/pio.c @@ -130,7 +130,7 @@ void pio_send_control(struct hfi1_devdata *dd, int op) if (write) { write_csr(dd, SEND_CTRL, reg); if (flush) - (void) read_csr(dd, SEND_CTRL); /* flush write */ + (void)read_csr(dd, SEND_CTRL); /* flush write */ } spin_unlock_irqrestore(&dd->sendctrl_lock, flags); diff --git a/drivers/staging/rdma/hfi1/rc.c b/drivers/staging/rdma/hfi1/rc.c index 5cdf1d2..24f2b65 100644 --- a/drivers/staging/rdma/hfi1/rc.c +++ b/drivers/staging/rdma/hfi1/rc.c @@ -1610,7 +1610,7 @@ static void rc_rcv_resp(struct hfi1_ibport *ibp, if (opcode == OP(ATOMIC_ACKNOWLEDGE)) { __be32 *p = ohdr->u.at.atomic_ack_eth; - val = ((u64) be32_to_cpu(p[0]) << 32) | + val = ((u64)be32_to_cpu(p[0]) << 32) | be32_to_cpu(p[1]); } else val = 0; @@ -1708,7 +1708,7 @@ read_last: aeth = be32_to_cpu(ohdr->u.aeth); hfi1_copy_sge(&qp->s_rdma_read_sge, data, tlen, 0, 0); WARN_ON(qp->s_rdma_read_sge.num_sge); - (void) do_rc_ack(qp, aeth, psn, + (void)do_rc_ack(qp, aeth, psn, OP(RDMA_READ_RESPONSE_LAST), 0, rcd); goto ack_done; } @@ -1906,7 +1906,7 @@ static noinline int rc_rcv_error(struct hfi1_other_headers *ohdr, void *data, * or the send tasklet is already backed up to send an * earlier entry, we can ignore this request. */ - if (!e || e->opcode != (u8) opcode || old_req) + if (!e || e->opcode != (u8)opcode || old_req) goto unlock_done; qp->s_tail_ack_queue = prev; break; @@ -2430,7 +2430,7 @@ send_last: e->rdma_sge.mr = NULL; } ateth = &ohdr->u.atomic_eth; - vaddr = ((u64) be32_to_cpu(ateth->vaddr[0]) << 32) | + vaddr = ((u64)be32_to_cpu(ateth->vaddr[0]) << 32) | be32_to_cpu(ateth->vaddr[1]); if (unlikely(vaddr & (sizeof(u64) - 1))) goto nack_inv_unlck; @@ -2441,11 +2441,11 @@ send_last: IB_ACCESS_REMOTE_ATOMIC))) goto nack_acc_unlck; /* Perform atomic OP and save result. */ - maddr = (atomic64_t *) qp->r_sge.sge.vaddr; + maddr = (atomic64_t *)qp->r_sge.sge.vaddr; sdata = be64_to_cpu(ateth->swap_data); e->atomic_data = (opcode == OP(FETCH_ADD)) ? - (u64) atomic64_add_return(sdata, maddr) - sdata : - (u64) cmpxchg((u64 *) qp->r_sge.sge.vaddr, + (u64)atomic64_add_return(sdata, maddr) - sdata : + (u64)cmpxchg((u64 *)qp->r_sge.sge.vaddr, be64_to_cpu(ateth->compare_data), sdata); rvt_put_mr(qp->r_sge.sge.mr); diff --git a/drivers/staging/rdma/hfi1/ruc.c b/drivers/staging/rdma/hfi1/ruc.c index 70f42c9..6f0005a 100644 --- a/drivers/staging/rdma/hfi1/ruc.c +++ b/drivers/staging/rdma/hfi1/ruc.c @@ -508,12 +508,12 @@ do_write: IB_ACCESS_REMOTE_ATOMIC))) goto acc_err; /* Perform atomic OP and save result. */ - maddr = (atomic64_t *) qp->r_sge.sge.vaddr; + maddr = (atomic64_t *)qp->r_sge.sge.vaddr; sdata = wqe->atomic_wr.compare_add; - *(u64 *) sqp->s_sge.sge.vaddr = + *(u64 *)sqp->s_sge.sge.vaddr = (wqe->wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD) ? - (u64) atomic64_add_return(sdata, maddr) - sdata : - (u64) cmpxchg((u64 *) qp->r_sge.sge.vaddr, + (u64)atomic64_add_return(sdata, maddr) - sdata : + (u64)cmpxchg((u64 *)qp->r_sge.sge.vaddr, sdata, wqe->atomic_wr.swap); rvt_put_mr(qp->r_sge.sge.mr); qp->r_sge.num_sge = 0; diff --git a/drivers/staging/rdma/hfi1/sdma.c b/drivers/staging/rdma/hfi1/sdma.c index cc21272..9379419 100644 --- a/drivers/staging/rdma/hfi1/sdma.c +++ b/drivers/staging/rdma/hfi1/sdma.c @@ -534,7 +534,7 @@ static void sdma_err_progress_check(unsigned long data) static void sdma_hw_clean_up_task(unsigned long opaque) { - struct sdma_engine *sde = (struct sdma_engine *) opaque; + struct sdma_engine *sde = (struct sdma_engine *)opaque; u64 statuscsr; while (1) { @@ -594,7 +594,7 @@ static void sdma_flush_descq(struct sdma_engine *sde) static void sdma_sw_clean_up_task(unsigned long opaque) { - struct sdma_engine *sde = (struct sdma_engine *) opaque; + struct sdma_engine *sde = (struct sdma_engine *)opaque; unsigned long flags; spin_lock_irqsave(&sde->tail_lock, flags); @@ -1345,8 +1345,8 @@ retry: use_dmahead = HFI1_CAP_IS_KSET(USE_SDMA_HEAD) && __sdma_running(sde) && (dd->flags & HFI1_HAS_SDMA_TIMEOUT); hwhead = use_dmahead ? - (u16) le64_to_cpu(*sde->head_dma) : - (u16) read_sde_csr(sde, SD(HEAD)); + (u16)le64_to_cpu(*sde->head_dma) : + (u16)read_sde_csr(sde, SD(HEAD)); if (unlikely(HFI1_CAP_IS_KSET(SDMA_HEAD_CHECK))) { u16 cnt; @@ -3021,7 +3021,7 @@ void sdma_freeze(struct hfi1_devdata *dd) * software clean will read engine CSRs, so must be completed before * the next step, which will clear the engine CSRs. */ - (void) wait_event_interruptible(dd->sdma_unfreeze_wq, + (void)wait_event_interruptible(dd->sdma_unfreeze_wq, atomic_read(&dd->sdma_unfreeze_count) <= 0); /* no need to check results - done no matter what */ } diff --git a/drivers/staging/rdma/hfi1/trace.c b/drivers/staging/rdma/hfi1/trace.c index 9eadec5..923ca55 100644 --- a/drivers/staging/rdma/hfi1/trace.c +++ b/drivers/staging/rdma/hfi1/trace.c @@ -158,7 +158,7 @@ const char *parse_everbs_hdrs( eh->atomic_eth.rkey, (unsigned long long)ib_u64_get( (__be32 *)&eh->atomic_eth.swap_data), - (unsigned long long) ib_u64_get( + (unsigned long long)ib_u64_get( (__be32 *)&eh->atomic_eth.compare_data)); break; /* deth */ diff --git a/drivers/staging/rdma/hfi1/user_sdma.c b/drivers/staging/rdma/hfi1/user_sdma.c index 9fe18b0..03a10c8 100644 --- a/drivers/staging/rdma/hfi1/user_sdma.c +++ b/drivers/staging/rdma/hfi1/user_sdma.c @@ -1030,7 +1030,7 @@ free_tx: */ static inline int num_user_pages(const struct iovec *iov) { - const unsigned long addr = (unsigned long) iov->iov_base; + const unsigned long addr = (unsigned long)iov->iov_base; const unsigned long len = iov->iov_len; const unsigned long spage = addr & PAGE_MASK; const unsigned long epage = (addr + len - 1) & PAGE_MASK; diff --git a/drivers/staging/rdma/hfi1/verbs.h b/drivers/staging/rdma/hfi1/verbs.h index c736015b..dc623c6 100644 --- a/drivers/staging/rdma/hfi1/verbs.h +++ b/drivers/staging/rdma/hfi1/verbs.h @@ -346,7 +346,7 @@ int hfi1_process_mad(struct ib_device *ibdev, int mad_flags, u8 port, */ static inline int cmp_msn(u32 a, u32 b) { - return (((int) a) - ((int) b)) << 8; + return (((int)a) - ((int)b)) << 8; } /* @@ -355,7 +355,7 @@ static inline int cmp_msn(u32 a, u32 b) */ static inline int cmp_psn(u32 a, u32 b) { - return (((int) a) - ((int) b)) << PSN_SHIFT; + return (((int)a) - ((int)b)) << PSN_SHIFT; } /* -- cgit v0.10.2 From d125a6c66b972e8c6768707c2814107df5963f5f Mon Sep 17 00:00:00 2001 From: Jubin John Date: Sun, 14 Feb 2016 20:19:49 -0800 Subject: staging/rdma/hfi1: Fix comparison to NULL Convert pointer comparisons to NULL to !pointer to fix checkpatch check: CHECK: Comparison to NULL could be written "!pointer" Reviewed-by: Dennis Dalessandro Reviewed-by: Ira Weiny Reviewed-by: Mike Marciniszyn Signed-off-by: Jubin John Signed-off-by: Doug Ledford diff --git a/drivers/staging/rdma/hfi1/chip.c b/drivers/staging/rdma/hfi1/chip.c index cf57865..ea0ffd4 100644 --- a/drivers/staging/rdma/hfi1/chip.c +++ b/drivers/staging/rdma/hfi1/chip.c @@ -12260,7 +12260,7 @@ u64 hfi1_gpio_mod(struct hfi1_devdata *dd, u32 target, u32 data, u32 dir, int hfi1_init_ctxt(struct send_context *sc) { - if (sc != NULL) { + if (sc) { struct hfi1_devdata *dd = sc->dd; u64 reg; u8 set = (sc->type == SC_USER ? @@ -12371,7 +12371,7 @@ static void clean_up_interrupts(struct hfi1_devdata *dd) struct hfi1_msix_entry *me = dd->msix_entries; for (i = 0; i < dd->num_msix_entries; i++, me++) { - if (me->arg == NULL) /* => no irq, no affinity */ + if (!me->arg) /* => no irq, no affinity */ continue; hfi1_put_irq_affinity(dd, &dd->msix_entries[i]); free_irq(me->msix.vector, me->arg); @@ -12534,7 +12534,7 @@ static int request_msix_irqs(struct hfi1_devdata *dd) continue; } /* no argument, no interrupt */ - if (arg == NULL) + if (!arg) continue; /* make sure the name is terminated */ me->name[sizeof(me->name) - 1] = 0; diff --git a/drivers/staging/rdma/hfi1/debugfs.c b/drivers/staging/rdma/hfi1/debugfs.c index f309c5f..fa3df1f 100644 --- a/drivers/staging/rdma/hfi1/debugfs.c +++ b/drivers/staging/rdma/hfi1/debugfs.c @@ -746,7 +746,7 @@ void hfi1_dbg_ibdev_init(struct hfi1_ibdev *ibd) ibd->hfi1_ibdev_dbg, ppd, &port_cntr_ops[i].ops, - port_cntr_ops[i].ops.write == NULL ? + !port_cntr_ops[i].ops.write ? S_IRUGO : S_IRUGO | S_IWUSR); } } diff --git a/drivers/staging/rdma/hfi1/driver.c b/drivers/staging/rdma/hfi1/driver.c index 6082935..0c8bd91 100644 --- a/drivers/staging/rdma/hfi1/driver.c +++ b/drivers/staging/rdma/hfi1/driver.c @@ -371,7 +371,7 @@ static void rcv_hdrerr(struct hfi1_ctxtdata *rcd, struct hfi1_pportdata *ppd, if (rhf_use_egr_bfr(packet->rhf)) ebuf = packet->ebuf; - if (ebuf == NULL) + if (!ebuf) goto drop; /* this should never happen */ if (lnh == HFI1_LRH_BTH) @@ -402,7 +402,7 @@ static void rcv_hdrerr(struct hfi1_ctxtdata *rcd, struct hfi1_pportdata *ppd, lqpn = be32_to_cpu(bth[1]) & RVT_QPN_MASK; rcu_read_lock(); qp = rvt_lookup_qpn(rdi, &ibp->rvp, lqpn); - if (qp == NULL) { + if (!qp) { rcu_read_unlock(); goto drop; } @@ -637,7 +637,7 @@ static void __prescan_rxq(struct hfi1_packet *packet) rcu_read_lock(); qp = rvt_lookup_qpn(rdi, &ibp->rvp, qpn); - if (qp == NULL) { + if (!qp) { rcu_read_unlock(); goto next; } diff --git a/drivers/staging/rdma/hfi1/init.c b/drivers/staging/rdma/hfi1/init.c index 3071fbc..aabdc3d 100644 --- a/drivers/staging/rdma/hfi1/init.c +++ b/drivers/staging/rdma/hfi1/init.c @@ -386,7 +386,7 @@ void set_link_ipg(struct hfi1_pportdata *ppd) cc_state = get_cc_state(ppd); - if (cc_state == NULL) + if (!cc_state) /* * This should _never_ happen - rcu_read_lock() is held, * and set_link_ipg() should not be called if cc_state @@ -438,7 +438,7 @@ static enum hrtimer_restart cca_timer_fn(struct hrtimer *t) cc_state = get_cc_state(ppd); - if (cc_state == NULL) { + if (!cc_state) { rcu_read_unlock(); return HRTIMER_NORESTART; } diff --git a/drivers/staging/rdma/hfi1/mad.c b/drivers/staging/rdma/hfi1/mad.c index a56d7dc..44e7fbd 100644 --- a/drivers/staging/rdma/hfi1/mad.c +++ b/drivers/staging/rdma/hfi1/mad.c @@ -3297,7 +3297,7 @@ static int __subn_get_opa_cong_setting(struct opa_smp *smp, u32 am, cc_state = get_cc_state(ppd); - if (cc_state == NULL) { + if (!cc_state) { rcu_read_unlock(); return reply((struct ib_mad_hdr *)smp); } @@ -3439,7 +3439,7 @@ static int __subn_get_opa_cc_table(struct opa_smp *smp, u32 am, u8 *data, cc_state = get_cc_state(ppd); - if (cc_state == NULL) { + if (!cc_state) { rcu_read_unlock(); return reply((struct ib_mad_hdr *)smp); } @@ -3505,14 +3505,14 @@ static int __subn_set_opa_cc_table(struct opa_smp *smp, u32 am, u8 *data, } new_cc_state = kzalloc(sizeof(*new_cc_state), GFP_KERNEL); - if (new_cc_state == NULL) + if (!new_cc_state) goto getit; spin_lock(&ppd->cc_state_lock); old_cc_state = get_cc_state(ppd); - if (old_cc_state == NULL) { + if (!old_cc_state) { spin_unlock(&ppd->cc_state_lock); kfree(new_cc_state); return reply((struct ib_mad_hdr *)smp); diff --git a/drivers/staging/rdma/hfi1/pio.c b/drivers/staging/rdma/hfi1/pio.c index 3817731..7907e4c 100644 --- a/drivers/staging/rdma/hfi1/pio.c +++ b/drivers/staging/rdma/hfi1/pio.c @@ -2002,7 +2002,7 @@ int init_credit_return(struct hfi1_devdata *dd) bytes, &dd->cr_base[i].pa, GFP_KERNEL); - if (dd->cr_base[i].va == NULL) { + if (!dd->cr_base[i].va) { set_dev_node(&dd->pcidev->dev, dd->node); dd_dev_err(dd, "Unable to allocate credit return DMA range for NUMA %d\n", diff --git a/drivers/staging/rdma/hfi1/rc.c b/drivers/staging/rdma/hfi1/rc.c index 24f2b65..99584f7 100644 --- a/drivers/staging/rdma/hfi1/rc.c +++ b/drivers/staging/rdma/hfi1/rc.c @@ -2025,7 +2025,7 @@ void process_becn(struct hfi1_pportdata *ppd, u8 sl, u16 rlid, u32 lqpn, cc_state = get_cc_state(ppd); - if (cc_state == NULL) + if (!cc_state) return; /* diff --git a/drivers/staging/rdma/hfi1/sysfs.c b/drivers/staging/rdma/hfi1/sysfs.c index 1f3a747..3c34f77 100644 --- a/drivers/staging/rdma/hfi1/sysfs.c +++ b/drivers/staging/rdma/hfi1/sysfs.c @@ -83,7 +83,7 @@ static ssize_t read_cc_table_bin(struct file *filp, struct kobject *kobj, rcu_read_lock(); cc_state = get_cc_state(ppd); - if (cc_state == NULL) { + if (!cc_state) { rcu_read_unlock(); return -EINVAL; } @@ -130,7 +130,7 @@ static ssize_t read_cc_setting_bin(struct file *filp, struct kobject *kobj, rcu_read_lock(); cc_state = get_cc_state(ppd); - if (cc_state == NULL) { + if (!cc_state) { rcu_read_unlock(); return -EINVAL; } diff --git a/drivers/staging/rdma/hfi1/verbs.c b/drivers/staging/rdma/hfi1/verbs.c index 466055b..c412f1c 100644 --- a/drivers/staging/rdma/hfi1/verbs.c +++ b/drivers/staging/rdma/hfi1/verbs.c @@ -431,7 +431,7 @@ void hfi1_ib_rcv(struct hfi1_packet *packet) if (lnh != HFI1_LRH_GRH) goto drop; mcast = rvt_mcast_find(&ibp->rvp, &hdr->u.l.grh.dgid); - if (mcast == NULL) + if (!mcast) goto drop; list_for_each_entry_rcu(p, &mcast->qp_list, list) { packet->qp = p->qp; @@ -838,7 +838,7 @@ int hfi1_verbs_send_pio(struct rvt_qp *qp, struct hfi1_pkt_state *ps, if (cb) iowait_pio_inc(&priv->s_iowait); pbuf = sc_buffer_alloc(sc, plen, cb, qp); - if (unlikely(pbuf == NULL)) { + if (unlikely(!pbuf)) { if (cb) verbs_pio_complete(qp, 0); if (ppd->host_link_state != HLS_UP_ACTIVE) { -- cgit v0.10.2 From 458e86ab471b44a28a736cb8b0d364f3ec0d3e3e Mon Sep 17 00:00:00 2001 From: Jubin John Date: Sun, 14 Feb 2016 20:19:58 -0800 Subject: staging/rdma/hfi1: Remove blank line after an open brace Remove blank line after an open brace to fix checkpatch check: CHECK: Blank lines aren't necessary after an open brace '{' Reviewed-by: Dennis Dalessandro Reviewed-by: Ira Weiny Reviewed-by: Mike Marciniszyn Signed-off-by: Jubin John Signed-off-by: Doug Ledford diff --git a/drivers/staging/rdma/hfi1/chip.c b/drivers/staging/rdma/hfi1/chip.c index ea0ffd4..05e4f07 100644 --- a/drivers/staging/rdma/hfi1/chip.c +++ b/drivers/staging/rdma/hfi1/chip.c @@ -1538,7 +1538,6 @@ static u64 read_write_cpu(struct hfi1_devdata *dd, u64 *z_val, u64 __percpu *cntr, int vl, int mode, u64 data) { - u64 ret = 0; if (vl != CNTR_INVALID_VL) @@ -5931,7 +5930,6 @@ static void handle_qsfp_int(struct hfi1_devdata *dd, u32 src_ctx, u64 reg) u64 qsfp_int_mgmt = (u64)(QSFP_HFI0_INT_N | QSFP_HFI0_MODPRST_N); if (reg & QSFP_HFI0_MODPRST_N) { - dd_dev_info(dd, "%s: ModPresent triggered QSFP interrupt\n", __func__); @@ -5995,7 +5993,6 @@ static void handle_qsfp_int(struct hfi1_devdata *dd, u32 src_ctx, u64 reg) } if (reg & QSFP_HFI0_INT_N) { - dd_dev_info(dd, "%s: IntN triggered QSFP interrupt\n", __func__); spin_lock_irqsave(&ppd->qsfp_info.qsfp_lock, flags); @@ -9266,7 +9263,6 @@ void qsfp_event(struct work_struct *work) dc_start(dd); if (qd->cache_refresh_required) { - set_qsfp_int_n(ppd, 0); wait_for_qsfp_init(ppd); @@ -10122,7 +10118,6 @@ int set_link_state(struct hfi1_pportdata *ppd, u32 state) "%s: logical state did not change to ACTIVE\n", __func__); } else { - /* tell all engines to go running */ sdma_all_running(dd); diff --git a/drivers/staging/rdma/hfi1/driver.c b/drivers/staging/rdma/hfi1/driver.c index 0c8bd91..b5dfdb6 100644 --- a/drivers/staging/rdma/hfi1/driver.c +++ b/drivers/staging/rdma/hfi1/driver.c @@ -440,7 +440,6 @@ drop: static inline void init_packet(struct hfi1_ctxtdata *rcd, struct hfi1_packet *packet) { - packet->rsize = rcd->rcvhdrqentsize; /* words */ packet->maxcnt = rcd->rcvhdrq_cnt * packet->rsize; /* words */ packet->rcd = rcd; @@ -755,7 +754,6 @@ static inline void process_rcv_update(int last, struct hfi1_packet *packet) static inline void finish_packet(struct hfi1_packet *packet) { - /* * Nothing we need to free for the packet. * @@ -769,7 +767,6 @@ static inline void finish_packet(struct hfi1_packet *packet) static inline void process_rcv_qp_work(struct hfi1_packet *packet) { - struct hfi1_ctxtdata *rcd; struct rvt_qp *qp, *nqp; diff --git a/drivers/staging/rdma/hfi1/mad.c b/drivers/staging/rdma/hfi1/mad.c index 44e7fbd..a7e5f92 100644 --- a/drivers/staging/rdma/hfi1/mad.c +++ b/drivers/staging/rdma/hfi1/mad.c @@ -2183,7 +2183,6 @@ struct opa_port_error_info_msg { __be32 error_info_select_mask; __be32 reserved1; struct _port_ei { - u8 port_number; u8 reserved2[7]; @@ -3140,7 +3139,6 @@ static int pma_set_opa_portstatus(struct opa_pma_mad *pmp, for_each_set_bit(vl, (unsigned long *)&(vl_select_mask), 8 * sizeof(vl_select_mask)) { - if (counter_select & CS_PORT_XMIT_DATA) write_port_cntr(ppd, C_TX_FLIT_VL, idx_from_vl(vl), 0); diff --git a/drivers/staging/rdma/hfi1/qp.c b/drivers/staging/rdma/hfi1/qp.c index df90579..9024673 100644 --- a/drivers/staging/rdma/hfi1/qp.c +++ b/drivers/staging/rdma/hfi1/qp.c @@ -464,7 +464,6 @@ static int iowait_sleep( spin_lock_irqsave(&qp->s_lock, flags); if (ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK) { - /* * If we couldn't queue the DMA request, save the info * and try again later rather than destroying the diff --git a/drivers/staging/rdma/hfi1/qsfp.c b/drivers/staging/rdma/hfi1/qsfp.c index a6d55a6..aa9c62b 100644 --- a/drivers/staging/rdma/hfi1/qsfp.c +++ b/drivers/staging/rdma/hfi1/qsfp.c @@ -359,7 +359,6 @@ int refresh_qsfp_cache(struct hfi1_pportdata *ppd, struct qsfp_data *cp) /* Is paging enabled? */ if (!(cache[2] & 4)) { - /* Paging enabled, page 03 required */ if ((cache[195] & 0xC0) == 0xC0) { /* all */ @@ -520,7 +519,6 @@ int qsfp_dump(struct hfi1_pportdata *ppd, char *buf, int len) lenstr[1] = '\0'; if (ppd->qsfp_info.cache_valid) { - if (QSFP_IS_CU(cache[QSFP_MOD_TECH_OFFS])) sprintf(lenstr, "%dM ", cache[QSFP_MOD_LEN_OFFS]); diff --git a/drivers/staging/rdma/hfi1/sdma.c b/drivers/staging/rdma/hfi1/sdma.c index 9379419..c0ff079 100644 --- a/drivers/staging/rdma/hfi1/sdma.c +++ b/drivers/staging/rdma/hfi1/sdma.c @@ -475,7 +475,6 @@ static void sdma_err_halt_wait(struct work_struct *work) static void sdma_err_progress_check_schedule(struct sdma_engine *sde) { if (!is_bx(sde->dd) && HFI1_CAP_IS_KSET(SDMA_AHG)) { - unsigned index; struct hfi1_devdata *dd = sde->dd; @@ -1238,7 +1237,6 @@ void sdma_exit(struct hfi1_devdata *dd) for (this_idx = 0; dd->per_sdma && this_idx < dd->num_sdma; ++this_idx) { - sde = &dd->per_sdma[this_idx]; if (!list_empty(&sde->dmawait)) dd_dev_err(dd, "sde %u: dmawait list not empty!\n", -- cgit v0.10.2 From 5161fc3ef60260343c2ffc1b42c9a92ba954d846 Mon Sep 17 00:00:00 2001 From: Jubin John Date: Sun, 14 Feb 2016 20:20:06 -0800 Subject: staging/rdma/hfi1: Remove blank line before close brace Remove extra blank line before close brace to fix checkpatch check: CHECK: Blank lines aren't necessary before a close brace '}' Reviewed-by: Dennis Dalessandro Reviewed-by: Ira Weiny Reviewed-by: Mike Marciniszyn Signed-off-by: Jubin John Signed-off-by: Doug Ledford diff --git a/drivers/staging/rdma/hfi1/chip.c b/drivers/staging/rdma/hfi1/chip.c index 05e4f07..51256ba 100644 --- a/drivers/staging/rdma/hfi1/chip.c +++ b/drivers/staging/rdma/hfi1/chip.c @@ -13710,7 +13710,6 @@ static void assign_cm_au_table(struct hfi1_devdata *dd, u32 cu, SEND_CM_LOCAL_AU_TABLE4_TO7_LOCAL_AU_TABLE6_SHIFT | 64ull * cu << SEND_CM_LOCAL_AU_TABLE4_TO7_LOCAL_AU_TABLE7_SHIFT); - } static void assign_local_cm_au_table(struct hfi1_devdata *dd, u8 vcu) diff --git a/drivers/staging/rdma/hfi1/driver.c b/drivers/staging/rdma/hfi1/driver.c index b5dfdb6..fee5e39 100644 --- a/drivers/staging/rdma/hfi1/driver.c +++ b/drivers/staging/rdma/hfi1/driver.c @@ -762,7 +762,6 @@ static inline void finish_packet(struct hfi1_packet *packet) */ update_usrhead(packet->rcd, packet->rcd->head, packet->updegr, packet->etail, rcv_intr_dynamic, packet->numpkt); - } static inline void process_rcv_qp_work(struct hfi1_packet *packet) diff --git a/drivers/staging/rdma/hfi1/intr.c b/drivers/staging/rdma/hfi1/intr.c index 9a9b331..685fb4d 100644 --- a/drivers/staging/rdma/hfi1/intr.c +++ b/drivers/staging/rdma/hfi1/intr.c @@ -178,7 +178,6 @@ void handle_linkup_change(struct hfi1_devdata *dd, u32 linkup) /* notify IB of the link change */ signal_ib_event(ppd, ev); } - } /* diff --git a/drivers/staging/rdma/hfi1/mad.c b/drivers/staging/rdma/hfi1/mad.c index a7e5f92..adfd0a9 100644 --- a/drivers/staging/rdma/hfi1/mad.c +++ b/drivers/staging/rdma/hfi1/mad.c @@ -3805,7 +3805,6 @@ static int subn_get_opa_aggregate(struct opa_smp *smp, return reply((struct ib_mad_hdr *)smp); } next_smp += agg_size; - } return reply((struct ib_mad_hdr *)smp); @@ -3849,7 +3848,6 @@ static int subn_set_opa_aggregate(struct opa_smp *smp, return reply((struct ib_mad_hdr *)smp); } next_smp += agg_size; - } return reply((struct ib_mad_hdr *)smp); diff --git a/drivers/staging/rdma/hfi1/pcie.c b/drivers/staging/rdma/hfi1/pcie.c index 0368516..725e282 100644 --- a/drivers/staging/rdma/hfi1/pcie.c +++ b/drivers/staging/rdma/hfi1/pcie.c @@ -316,7 +316,6 @@ do_intx: nvec, ret); *msixcnt = 0; hfi1_enable_intx(dd->pcidev); - } /* return the PCIe link speed from the given link status */ diff --git a/drivers/staging/rdma/hfi1/sysfs.c b/drivers/staging/rdma/hfi1/sysfs.c index 3c34f77..fe232c1 100644 --- a/drivers/staging/rdma/hfi1/sysfs.c +++ b/drivers/staging/rdma/hfi1/sysfs.c @@ -568,7 +568,6 @@ static ssize_t show_serial(struct device *device, struct hfi1_devdata *dd = dd_from_dev(dev); return scnprintf(buf, PAGE_SIZE, "%s", dd->serial); - } static ssize_t store_chip_reset(struct device *device, diff --git a/drivers/staging/rdma/hfi1/uc.c b/drivers/staging/rdma/hfi1/uc.c index e58ec15..afdf539 100644 --- a/drivers/staging/rdma/hfi1/uc.c +++ b/drivers/staging/rdma/hfi1/uc.c @@ -604,5 +604,4 @@ drop: op_err: hfi1_rc_error(qp, IB_WC_LOC_QP_OP_ERR); return; - } diff --git a/drivers/staging/rdma/hfi1/ud.c b/drivers/staging/rdma/hfi1/ud.c index 5779f3a..c3f0697 100644 --- a/drivers/staging/rdma/hfi1/ud.c +++ b/drivers/staging/rdma/hfi1/ud.c @@ -772,7 +772,6 @@ void hfi1_ud_rcv(struct hfi1_packet *packet) mgmt_pkey_idx = hfi1_lookup_pkey_idx(ibp, pkey); if (mgmt_pkey_idx < 0) goto drop; - } if (unlikely(qkey != qp->qkey)) { hfi1_bad_pqkey(ibp, OPA_TRAP_BAD_Q_KEY, qkey, @@ -810,7 +809,6 @@ void hfi1_ud_rcv(struct hfi1_packet *packet) mgmt_pkey_idx = hfi1_lookup_pkey_idx(ibp, pkey); if (mgmt_pkey_idx < 0) goto drop; - } if (qp->ibqp.qp_num > 1 && diff --git a/drivers/staging/rdma/hfi1/user_sdma.c b/drivers/staging/rdma/hfi1/user_sdma.c index 03a10c8..097d278 100644 --- a/drivers/staging/rdma/hfi1/user_sdma.c +++ b/drivers/staging/rdma/hfi1/user_sdma.c @@ -713,7 +713,6 @@ int hfi1_user_sdma_process_request(struct file *fp, struct iovec *iovec, msecs_to_jiffies( SDMA_IOWAIT_TIMEOUT)); } - } *count += idx; return 0; @@ -1194,7 +1193,6 @@ static int set_txreq_header(struct user_sdma_request *req, if (ret) return ret; goto done; - } hdr->bth[2] = cpu_to_be32( -- cgit v0.10.2 From d0d236ea34e6ce2d9106a8f61f92b6af3995d6ad Mon Sep 17 00:00:00 2001 From: Jubin John Date: Sun, 14 Feb 2016 20:20:15 -0800 Subject: staging/rdma/hfi1: Fix logical continuations Move logical continuations to previous line to fix checkpatch check: CHECK: Logical continuations should be on the previous line Reviewed-by: Dennis Dalessandro Reviewed-by: Ira Weiny Reviewed-by: Mike Marciniszyn Signed-off-by: Jubin John Signed-off-by: Doug Ledford diff --git a/drivers/staging/rdma/hfi1/chip.c b/drivers/staging/rdma/hfi1/chip.c index 51256ba..b4c017a 100644 --- a/drivers/staging/rdma/hfi1/chip.c +++ b/drivers/staging/rdma/hfi1/chip.c @@ -6552,8 +6552,8 @@ void handle_sma_message(struct work_struct *work) * * Can activate the node. Discard otherwise. */ - if (ppd->host_link_state == HLS_UP_ARMED - && ppd->is_active_optimize_enabled) { + if (ppd->host_link_state == HLS_UP_ARMED && + ppd->is_active_optimize_enabled) { ppd->neighbor_normal = 1; ret = set_link_state(ppd, HLS_UP_ACTIVE); if (ret) @@ -7032,8 +7032,8 @@ static void get_link_widths(struct hfi1_devdata *dd, u16 *tx_width, * handle_verify_cap(). The ASIC 8051 firmware does not correctly * set the max_rate field in handle_verify_cap until v0.19. */ - if ((dd->icode == ICODE_RTL_SILICON) - && (dd->dc8051_ver < dc8051_ver(0, 19))) { + if ((dd->icode == ICODE_RTL_SILICON) && + (dd->dc8051_ver < dc8051_ver(0, 19))) { /* max_rate: 0 = 12.5G, 1 = 25G */ switch (max_rate) { case 0: @@ -7358,10 +7358,8 @@ retry: /* downgrade is disabled */ /* bounce if not at starting active width */ - if ((ppd->link_width_active != - ppd->link_width_downgrade_tx_active) - || (ppd->link_width_active != - ppd->link_width_downgrade_rx_active)) { + if ((ppd->link_width_active != ppd->link_width_downgrade_tx_active) || + (ppd->link_width_active != ppd->link_width_downgrade_rx_active)) { dd_dev_err(ppd->dd, "Link downgrade is disabled and link has downgraded, downing link\n"); dd_dev_err(ppd->dd, @@ -7371,8 +7369,8 @@ retry: ppd->link_width_downgrade_rx_active); do_bounce = 1; } - } else if ((lwde & ppd->link_width_downgrade_tx_active) == 0 - || (lwde & ppd->link_width_downgrade_rx_active) == 0) { + } else if ((lwde & ppd->link_width_downgrade_tx_active) == 0 || + (lwde & ppd->link_width_downgrade_rx_active) == 0) { /* Tx or Rx is outside the enabled policy */ dd_dev_err(ppd->dd, "Link is outside of downgrade allowed, downing link\n"); @@ -7567,9 +7565,9 @@ static void handle_8051_interrupt(struct hfi1_devdata *dd, u32 unused, u64 reg) if (queue_link_down) { /* if the link is already going down or disabled, do not * queue another */ - if ((ppd->host_link_state - & (HLS_GOING_OFFLINE | HLS_LINK_COOLDOWN)) - || ppd->link_enabled == 0) { + if ((ppd->host_link_state & + (HLS_GOING_OFFLINE | HLS_LINK_COOLDOWN)) || + ppd->link_enabled == 0) { dd_dev_info(dd, "%s: not queuing link down\n", __func__); } else { @@ -8888,10 +8886,9 @@ static int init_loopback(struct hfi1_devdata *dd) * * Accept all valid loopback values. */ - if ((dd->icode == ICODE_FUNCTIONAL_SIMULATOR) - && (loopback == LOOPBACK_SERDES - || loopback == LOOPBACK_LCB - || loopback == LOOPBACK_CABLE)) { + if ((dd->icode == ICODE_FUNCTIONAL_SIMULATOR) && + (loopback == LOOPBACK_SERDES || loopback == LOOPBACK_LCB || + loopback == LOOPBACK_CABLE)) { loopback = LOOPBACK_LCB; quick_linkup = 1; return 0; @@ -10020,8 +10017,8 @@ int set_link_state(struct hfi1_pportdata *ppd, u32 state) state = dd->link_default; /* interpret poll -> poll as a link bounce */ - poll_bounce = ppd->host_link_state == HLS_DN_POLL - && state == HLS_DN_POLL; + poll_bounce = ppd->host_link_state == HLS_DN_POLL && + state == HLS_DN_POLL; dd_dev_info(dd, "%s: current %s, new %s %s%s\n", __func__, link_state_name(ppd->host_link_state), @@ -10048,8 +10045,8 @@ int set_link_state(struct hfi1_pportdata *ppd, u32 state) switch (state) { case HLS_UP_INIT: - if (ppd->host_link_state == HLS_DN_POLL && (quick_linkup - || dd->icode == ICODE_FUNCTIONAL_SIMULATOR)) { + if (ppd->host_link_state == HLS_DN_POLL && + (quick_linkup || dd->icode == ICODE_FUNCTIONAL_SIMULATOR)) { /* * Quick link up jumps from polling to here. * @@ -10779,8 +10776,8 @@ int set_buffer_control(struct hfi1_pportdata *ppd, != cur_bc.vl[i].shared; if (this_shared_changing) any_shared_limit_changing = 1; - if (new_bc->vl[i].dedicated != cur_bc.vl[i].dedicated - || this_shared_changing) { + if (new_bc->vl[i].dedicated != cur_bc.vl[i].dedicated || + this_shared_changing) { changing[i] = 1; changing_mask |= stat_mask; change_count++; @@ -11227,8 +11224,8 @@ void hfi1_rcvctrl(struct hfi1_devdata *dd, unsigned int op, int ctxt) rcvctrl = read_kctxt_csr(dd, ctxt, RCV_CTXT_CTRL); /* if the context already enabled, don't do the extra steps */ - if ((op & HFI1_RCVCTRL_CTXT_ENB) - && !(rcvctrl & RCV_CTXT_CTRL_ENABLE_SMASK)) { + if ((op & HFI1_RCVCTRL_CTXT_ENB) && + !(rcvctrl & RCV_CTXT_CTRL_ENABLE_SMASK)) { /* reset the tail and hdr addresses, and sequence count */ write_kctxt_csr(dd, ctxt, RCV_HDR_ADDR, rcd->rcvhdrq_phys); @@ -11344,8 +11341,8 @@ void hfi1_rcvctrl(struct hfi1_devdata *dd, unsigned int op, int ctxt) write_kctxt_csr(dd, ctxt, RCV_CTXT_CTRL, rcd->rcvctrl); /* work around sticky RcvCtxtStatus.BlockedRHQFull */ - if (did_enable - && (rcvctrl & RCV_CTXT_CTRL_DONT_DROP_RHQ_FULL_SMASK)) { + if (did_enable && + (rcvctrl & RCV_CTXT_CTRL_DONT_DROP_RHQ_FULL_SMASK)) { reg = read_kctxt_csr(dd, ctxt, RCV_CTXT_STATUS); if (reg != 0) { dd_dev_info(dd, "ctxt %d status %lld (blocked)\n", @@ -13989,8 +13986,8 @@ struct hfi1_devdata *hfi1_init_dd(struct pci_dev *pdev, /* link width active is 0 when link is down */ /* link width downgrade active is 0 when link is down */ - if (num_vls < HFI1_MIN_VLS_SUPPORTED - || num_vls > HFI1_MAX_VLS_SUPPORTED) { + if (num_vls < HFI1_MIN_VLS_SUPPORTED || + num_vls > HFI1_MAX_VLS_SUPPORTED) { hfi1_early_err(&pdev->dev, "Invalid num_vls %u, using %u VLs\n", num_vls, HFI1_MAX_VLS_SUPPORTED); diff --git a/drivers/staging/rdma/hfi1/driver.c b/drivers/staging/rdma/hfi1/driver.c index fee5e39..3ef297e 100644 --- a/drivers/staging/rdma/hfi1/driver.c +++ b/drivers/staging/rdma/hfi1/driver.c @@ -1123,9 +1123,9 @@ int set_mtu(struct hfi1_pportdata *ppd) ppd->ibmaxlen = ppd->ibmtu + lrh_max_header_bytes(ppd->dd); mutex_lock(&ppd->hls_lock); - if (ppd->host_link_state == HLS_UP_INIT - || ppd->host_link_state == HLS_UP_ARMED - || ppd->host_link_state == HLS_UP_ACTIVE) + if (ppd->host_link_state == HLS_UP_INIT || + ppd->host_link_state == HLS_UP_ARMED || + ppd->host_link_state == HLS_UP_ACTIVE) is_up = 1; drain = !is_ax(dd) && is_up; diff --git a/drivers/staging/rdma/hfi1/file_ops.c b/drivers/staging/rdma/hfi1/file_ops.c index b52cb78..5077ee0 100644 --- a/drivers/staging/rdma/hfi1/file_ops.c +++ b/drivers/staging/rdma/hfi1/file_ops.c @@ -1626,12 +1626,12 @@ static ssize_t ui_read(struct file *filp, char __user *buf, size_t count, * them. These registers are defined as having a read value * of 0. */ - else if (csr_off == ASIC_GPIO_CLEAR - || csr_off == ASIC_GPIO_FORCE - || csr_off == ASIC_QSFP1_CLEAR - || csr_off == ASIC_QSFP1_FORCE - || csr_off == ASIC_QSFP2_CLEAR - || csr_off == ASIC_QSFP2_FORCE) + else if (csr_off == ASIC_GPIO_CLEAR || + csr_off == ASIC_GPIO_FORCE || + csr_off == ASIC_QSFP1_CLEAR || + csr_off == ASIC_QSFP1_FORCE || + csr_off == ASIC_QSFP2_CLEAR || + csr_off == ASIC_QSFP2_FORCE) data = 0; else if (csr_off >= barlen) { /* diff --git a/drivers/staging/rdma/hfi1/firmware.c b/drivers/staging/rdma/hfi1/firmware.c index 31550a3..1af5e34 100644 --- a/drivers/staging/rdma/hfi1/firmware.c +++ b/drivers/staging/rdma/hfi1/firmware.c @@ -391,19 +391,13 @@ static int invalid_header(struct hfi1_devdata *dd, const char *what, static int verify_css_header(struct hfi1_devdata *dd, struct css_header *css) { /* verify CSS header fields (most sizes are in DW, so add /4) */ - if (invalid_header(dd, "module_type", css->module_type, CSS_MODULE_TYPE) - || invalid_header(dd, "header_len", css->header_len, - (sizeof(struct firmware_file) / 4)) - || invalid_header(dd, "header_version", - css->header_version, CSS_HEADER_VERSION) - || invalid_header(dd, "module_vendor", - css->module_vendor, CSS_MODULE_VENDOR) - || invalid_header(dd, "key_size", - css->key_size, KEY_SIZE / 4) - || invalid_header(dd, "modulus_size", - css->modulus_size, KEY_SIZE / 4) - || invalid_header(dd, "exponent_size", - css->exponent_size, EXPONENT_SIZE / 4)) { + if (invalid_header(dd, "module_type", css->module_type, CSS_MODULE_TYPE) || + invalid_header(dd, "header_len", css->header_len, (sizeof(struct firmware_file) / 4)) || + invalid_header(dd, "header_version", css->header_version, CSS_HEADER_VERSION) || + invalid_header(dd, "module_vendor", css->module_vendor, CSS_MODULE_VENDOR) || + invalid_header(dd, "key_size", css->key_size, KEY_SIZE / 4) || + invalid_header(dd, "modulus_size", css->modulus_size, KEY_SIZE / 4) || + invalid_header(dd, "exponent_size", css->exponent_size, EXPONENT_SIZE / 4)) { return -EINVAL; } return 0; diff --git a/drivers/staging/rdma/hfi1/intr.c b/drivers/staging/rdma/hfi1/intr.c index 685fb4d..03cebae 100644 --- a/drivers/staging/rdma/hfi1/intr.c +++ b/drivers/staging/rdma/hfi1/intr.c @@ -131,8 +131,7 @@ void handle_linkup_change(struct hfi1_devdata *dd, u32 linkup) * NOTE: This uses this device's vAU, vCU, and vl15_init for * the remote values. Both sides must be using the values. */ - if (quick_linkup - || dd->icode == ICODE_FUNCTIONAL_SIMULATOR) { + if (quick_linkup || dd->icode == ICODE_FUNCTIONAL_SIMULATOR) { set_up_vl15(dd, dd->vau, dd->vl15_init); assign_remote_cm_au_table(dd, dd->vcu); ppd->neighbor_guid = diff --git a/drivers/staging/rdma/hfi1/mad.c b/drivers/staging/rdma/hfi1/mad.c index adfd0a9..ae594f4 100644 --- a/drivers/staging/rdma/hfi1/mad.c +++ b/drivers/staging/rdma/hfi1/mad.c @@ -1170,8 +1170,8 @@ static int __subn_set_opa_portinfo(struct opa_smp *smp, u32 am, u8 *data, ppd->port_error_action = be32_to_cpu(pi->port_error_action); lwe = be16_to_cpu(pi->link_width.enabled); if (lwe) { - if (lwe == OPA_LINK_WIDTH_RESET - || lwe == OPA_LINK_WIDTH_RESET_OLD) + if (lwe == OPA_LINK_WIDTH_RESET || + lwe == OPA_LINK_WIDTH_RESET_OLD) set_link_width_enabled(ppd, ppd->link_width_supported); else if ((lwe & ~ppd->link_width_supported) == 0) set_link_width_enabled(ppd, lwe); @@ -1180,8 +1180,8 @@ static int __subn_set_opa_portinfo(struct opa_smp *smp, u32 am, u8 *data, } lwe = be16_to_cpu(pi->link_width_downgrade.enabled); /* LWD.E is always applied - 0 means "disabled" */ - if (lwe == OPA_LINK_WIDTH_RESET - || lwe == OPA_LINK_WIDTH_RESET_OLD) { + if (lwe == OPA_LINK_WIDTH_RESET || + lwe == OPA_LINK_WIDTH_RESET_OLD) { set_link_width_downgrade_enabled(ppd, ppd->link_width_downgrade_supported); } else if ((lwe & ~ppd->link_width_downgrade_supported) == 0) { @@ -2335,8 +2335,8 @@ static int pma_get_opa_portstatus(struct opa_pma_mad *pmp, return reply((struct ib_mad_hdr *)pmp); } - if (nports != 1 || (port_num && port_num != port) - || num_vls > OPA_MAX_VLS || (vl_select_mask & ~VL_MASK_ALL)) { + if (nports != 1 || (port_num && port_num != port) || + num_vls > OPA_MAX_VLS || (vl_select_mask & ~VL_MASK_ALL)) { pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD; return reply((struct ib_mad_hdr *)pmp); } diff --git a/drivers/staging/rdma/hfi1/sdma.c b/drivers/staging/rdma/hfi1/sdma.c index c0ff079..c8c0aace 100644 --- a/drivers/staging/rdma/hfi1/sdma.c +++ b/drivers/staging/rdma/hfi1/sdma.c @@ -672,8 +672,8 @@ static void sdma_set_state(struct sdma_engine *sde, ss->previous_op = ss->current_op; ss->current_state = next_state; - if (ss->previous_state != sdma_state_s99_running - && next_state == sdma_state_s99_running) + if (ss->previous_state != sdma_state_s99_running && + next_state == sdma_state_s99_running) sdma_flush(sde); if (action[next_state].op_enable) -- cgit v0.10.2 From f4d507cdccd708a873dc4d6268a09475779af82d Mon Sep 17 00:00:00 2001 From: Jubin John Date: Sun, 14 Feb 2016 20:20:25 -0800 Subject: staging/rdma/hfi1: Add blank link after declarations Add blank line after declarations to fix checkpatch check: CHECK: Please use a blank line after function/struct/union/enum declarations Reviewed-by: Dennis Dalessandro Reviewed-by: Ira Weiny Reviewed-by: Mike Marciniszyn Signed-off-by: Jubin John Signed-off-by: Doug Ledford diff --git a/drivers/staging/rdma/hfi1/chip.c b/drivers/staging/rdma/hfi1/chip.c index b4c017a..bdc5610 100644 --- a/drivers/staging/rdma/hfi1/chip.c +++ b/drivers/staging/rdma/hfi1/chip.c @@ -6995,6 +6995,7 @@ static u16 link_width_to_bits(struct hfi1_devdata *dd, u16 width) static const u8 bit_counts[16] = { 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4 }; + static inline u8 nibble_to_count(u8 nibble) { return bit_counts[nibble & 0xf]; @@ -10410,6 +10411,7 @@ static int vl_arb_match_cache(struct vl_arb_cache *cache, { return !memcmp(cache->table, vl, VL_ARB_TABLE_SIZE * sizeof(*vl)); } + /* end functions related to vl arbitration table caching */ static int set_vl_weights(struct hfi1_pportdata *ppd, u32 target, diff --git a/drivers/staging/rdma/hfi1/debugfs.c b/drivers/staging/rdma/hfi1/debugfs.c index fa3df1f..e02c527 100644 --- a/drivers/staging/rdma/hfi1/debugfs.c +++ b/drivers/staging/rdma/hfi1/debugfs.c @@ -71,6 +71,7 @@ static const struct seq_operations _##name##_seq_ops = { \ .stop = _##name##_seq_stop, \ .show = _##name##_seq_show \ } + #define DEBUGFS_SEQ_FILE_OPEN(name) \ static int _##name##_open(struct inode *inode, struct file *s) \ { \ diff --git a/drivers/staging/rdma/hfi1/firmware.c b/drivers/staging/rdma/hfi1/firmware.c index 1af5e34..16c9dc7 100644 --- a/drivers/staging/rdma/hfi1/firmware.c +++ b/drivers/staging/rdma/hfi1/firmware.c @@ -113,6 +113,7 @@ struct css_header { u32 exponent_size; /* in DWORDs */ u32 reserved[22]; }; + /* expected field values */ #define CSS_MODULE_TYPE 0x00000006 #define CSS_HEADER_LEN 0x000000a1 @@ -172,6 +173,7 @@ enum fw_state { FW_FINAL, FW_ERR }; + static enum fw_state fw_state = FW_EMPTY; static int fw_err; static struct firmware_details fw_8051; diff --git a/drivers/staging/rdma/hfi1/hfi.h b/drivers/staging/rdma/hfi1/hfi.h index de82f8e..805535e 100644 --- a/drivers/staging/rdma/hfi1/hfi.h +++ b/drivers/staging/rdma/hfi1/hfi.h @@ -1495,6 +1495,7 @@ static inline int valid_ib_mtu(unsigned int mtu) mtu == 1024 || mtu == 2048 || mtu == 4096; } + static inline int valid_opa_max_mtu(unsigned int mtu) { return mtu >= 2048 && diff --git a/drivers/staging/rdma/hfi1/pcie.c b/drivers/staging/rdma/hfi1/pcie.c index 725e282..b169166 100644 --- a/drivers/staging/rdma/hfi1/pcie.c +++ b/drivers/staging/rdma/hfi1/pcie.c @@ -529,6 +529,7 @@ static void tune_pcie_caps(struct hfi1_devdata *dd) pcie_set_readrq(dd->pcidev, ep_mrrs); } } + /* End of PCIe capability tuning */ /* diff --git a/drivers/staging/rdma/hfi1/pio.h b/drivers/staging/rdma/hfi1/pio.h index 8d0cf1b..09a5eeb 100644 --- a/drivers/staging/rdma/hfi1/pio.h +++ b/drivers/staging/rdma/hfi1/pio.h @@ -105,6 +105,7 @@ struct send_context { struct hfi1_devdata *dd; /* device */ void __iomem *base_addr; /* start of PIO memory */ union pio_shadow_ring *sr; /* shadow ring */ + volatile __le64 *hw_free; /* HW free counter */ struct work_struct halt_work; /* halted context work queue entry */ unsigned long flags; /* flags */ -- cgit v0.10.2 From 58721b8f8c71a643edf9d51be159c5db39d843c6 Mon Sep 17 00:00:00 2001 From: Jubin John Date: Sun, 14 Feb 2016 20:20:33 -0800 Subject: staging/rdma/hfi1: Remove unnecessary parentheses Remove unnecessary parentheses around addressof single $Lvals to fix checkpatch check: CHECK: Unnecessary parentheses around $var Reviewed-by: Dennis Dalessandro Reviewed-by: Ira Weiny Reviewed-by: Mike Marciniszyn Signed-off-by: Jubin John Signed-off-by: Doug Ledford diff --git a/drivers/staging/rdma/hfi1/mad.c b/drivers/staging/rdma/hfi1/mad.c index ae594f4..36bd6fae 100644 --- a/drivers/staging/rdma/hfi1/mad.c +++ b/drivers/staging/rdma/hfi1/mad.c @@ -2416,7 +2416,7 @@ static int pma_get_opa_portstatus(struct opa_pma_mad *pmp, tmp = read_dev_cntr(dd, C_DC_UNC_ERR, CNTR_INVALID_VL); rsp->uncorrectable_errors = tmp < 0x100 ? (tmp & 0xff) : 0xff; - vlinfo = &(rsp->vls[0]); + vlinfo = &rsp->vls[0]; vfi = 0; /* The vl_select_mask has been checked above, and we know * that it contains only entries which represent valid VLs. @@ -2609,7 +2609,7 @@ static int pma_get_opa_datacounters(struct opa_pma_mad *pmp, return reply((struct ib_mad_hdr *)pmp); } - rsp = (struct _port_dctrs *)&(req->port[0]); + rsp = (struct _port_dctrs *)&req->port[0]; memset(rsp, 0, sizeof(*rsp)); rsp->port_number = port; @@ -2632,7 +2632,7 @@ static int pma_get_opa_datacounters(struct opa_pma_mad *pmp, cpu_to_be64(get_error_counter_summary(ibdev, port, res_lli, res_ler)); - vlinfo = &(rsp->vls[0]); + vlinfo = &rsp->vls[0]; vfi = 0; /* The vl_select_mask has been checked above, and we know * that it contains only entries which represent valid VLs. @@ -2816,7 +2816,7 @@ static int pma_get_opa_porterrors(struct opa_pma_mad *pmp, return reply((struct ib_mad_hdr *)pmp); } - rsp = (struct _port_ectrs *)&(req->port[0]); + rsp = (struct _port_ectrs *)&req->port[0]; ibp = to_iport(ibdev, port_num); ppd = ppd_from_ibp(ibp); @@ -2836,7 +2836,7 @@ static int pma_get_opa_porterrors(struct opa_pma_mad *pmp, rsp->uncorrectable_errors = tmp < 0x100 ? (tmp & 0xff) : 0xff; - vlinfo = (struct _vls_ectrs *)&(rsp->vls[0]); + vlinfo = (struct _vls_ectrs *)&rsp->vls[0]; vfi = 0; vl_select_mask = be32_to_cpu(req->vl_select_mask); for_each_set_bit(vl, (unsigned long *)&(vl_select_mask), @@ -2952,7 +2952,7 @@ static int pma_get_opa_errorinfo(struct opa_pma_mad *pmp, u64 reg; req = (struct opa_port_error_info_msg *)pmp->data; - rsp = (struct _port_ei *)&(req->port[0]); + rsp = (struct _port_ei *)&req->port[0]; num_ports = OPA_AM_NPORT(be32_to_cpu(pmp->mad_hdr.attr_mod)); num_pslm = hweight64(be64_to_cpu(req->port_select_mask[3])); @@ -3192,7 +3192,7 @@ static int pma_set_opa_errorinfo(struct opa_pma_mad *pmp, u32 error_info_select; req = (struct opa_port_error_info_msg *)pmp->data; - rsp = (struct _port_ei *)&(req->port[0]); + rsp = (struct _port_ei *)&req->port[0]; num_ports = OPA_AM_NPORT(be32_to_cpu(pmp->mad_hdr.attr_mod)); num_pslm = hweight64(be64_to_cpu(req->port_select_mask[3])); -- cgit v0.10.2 From 3f34d9588ff3b8adc4b5828327554fce98a94204 Mon Sep 17 00:00:00 2001 From: Jubin John Date: Sun, 14 Feb 2016 20:20:42 -0800 Subject: staging/rdma/hfi1: Use BIT_ULL macro Use BIT_ULL macro to fix checkpatch check: CHECK: Prefer using the BIT_ULL macro Reviewed-by: Dennis Dalessandro Reviewed-by: Ira Weiny Reviewed-by: Mike Marciniszyn Signed-off-by: Jubin John Signed-off-by: Doug Ledford diff --git a/drivers/staging/rdma/hfi1/chip.h b/drivers/staging/rdma/hfi1/chip.h index 6c581e0..2b30aaa 100644 --- a/drivers/staging/rdma/hfi1/chip.h +++ b/drivers/staging/rdma/hfi1/chip.h @@ -93,15 +93,15 @@ #define TXE_PIO_SEND (TXE + TXE_PIO_SEND_OFFSET) /* PBC flags */ -#define PBC_INTR (1ull << 31) +#define PBC_INTR BIT_ULL(31) #define PBC_DC_INFO_SHIFT (30) -#define PBC_DC_INFO (1ull << PBC_DC_INFO_SHIFT) -#define PBC_TEST_EBP (1ull << 29) -#define PBC_PACKET_BYPASS (1ull << 28) -#define PBC_CREDIT_RETURN (1ull << 25) -#define PBC_INSERT_BYPASS_ICRC (1ull << 24) -#define PBC_TEST_BAD_ICRC (1ull << 23) -#define PBC_FECN (1ull << 22) +#define PBC_DC_INFO BIT_ULL(PBC_DC_INFO_SHIFT) +#define PBC_TEST_EBP BIT_ULL(29) +#define PBC_PACKET_BYPASS BIT_ULL(28) +#define PBC_CREDIT_RETURN BIT_ULL(25) +#define PBC_INSERT_BYPASS_ICRC BIT_ULL(24) +#define PBC_TEST_BAD_ICRC BIT_ULL(23) +#define PBC_FECN BIT_ULL(22) /* PbcInsertHcrc field settings */ #define PBC_IHCRC_LKDETH 0x0 /* insert @ local KDETH offset */ diff --git a/drivers/staging/rdma/hfi1/eprom.c b/drivers/staging/rdma/hfi1/eprom.c index 9a0ddd7..d7250af 100644 --- a/drivers/staging/rdma/hfi1/eprom.c +++ b/drivers/staging/rdma/hfi1/eprom.c @@ -102,7 +102,7 @@ #define COUNT_DELAY_SEC(n) ((n) * (1000000 / WAIT_SLEEP_US)) /* GPIO pins */ -#define EPROM_WP_N (1ull << 14) /* EPROM write line */ +#define EPROM_WP_N BIT_ULL(14) /* EPROM write line */ /* * Use the EP mutex to guard against other callers from within the driver. diff --git a/drivers/staging/rdma/hfi1/sdma.h b/drivers/staging/rdma/hfi1/sdma.h index cc01e81..0ee22c4 100644 --- a/drivers/staging/rdma/hfi1/sdma.h +++ b/drivers/staging/rdma/hfi1/sdma.h @@ -107,8 +107,8 @@ /* * Bits defined in the send DMA descriptor. */ -#define SDMA_DESC0_FIRST_DESC_FLAG (1ULL << 63) -#define SDMA_DESC0_LAST_DESC_FLAG (1ULL << 62) +#define SDMA_DESC0_FIRST_DESC_FLAG BIT_ULL(63) +#define SDMA_DESC0_LAST_DESC_FLAG BIT_ULL(62) #define SDMA_DESC0_BYTE_COUNT_SHIFT 48 #define SDMA_DESC0_BYTE_COUNT_WIDTH 14 #define SDMA_DESC0_BYTE_COUNT_MASK \ @@ -152,8 +152,8 @@ ((1ULL << SDMA_DESC1_GENERATION_WIDTH) - 1) #define SDMA_DESC1_GENERATION_SMASK \ (SDMA_DESC1_GENERATION_MASK << SDMA_DESC1_GENERATION_SHIFT) -#define SDMA_DESC1_INT_REQ_FLAG (1ULL << 1) -#define SDMA_DESC1_HEAD_TO_HOST_FLAG (1ULL << 0) +#define SDMA_DESC1_INT_REQ_FLAG BIT_ULL(1) +#define SDMA_DESC1_HEAD_TO_HOST_FLAG BIT_ULL(0) enum sdma_states { sdma_state_s00_hw_down, -- cgit v0.10.2 From f3ff8189419e34b61c0e1040174dbd6701bf3428 Mon Sep 17 00:00:00 2001 From: Jubin John Date: Sun, 14 Feb 2016 20:20:50 -0800 Subject: staging/rdma/hfi1: Split multiple assignments Split multiple assignments into individual assignments to fix checkpatch check: CHECK: multiple assignments should be avoided Reviewed-by: Dennis Dalessandro Reviewed-by: Ira Weiny Reviewed-by: Mike Marciniszyn Signed-off-by: Jubin John Signed-off-by: Doug Ledford diff --git a/drivers/staging/rdma/hfi1/chip.c b/drivers/staging/rdma/hfi1/chip.c index bdc5610..233958d 100644 --- a/drivers/staging/rdma/hfi1/chip.c +++ b/drivers/staging/rdma/hfi1/chip.c @@ -12459,8 +12459,10 @@ static int request_msix_irqs(struct hfi1_devdata *dd) /* calculate the ranges we are going to use */ first_general = 0; - first_sdma = last_general = first_general + 1; - first_rx = last_sdma = first_sdma + dd->num_sdma; + last_general = first_general + 1; + first_sdma = last_general; + last_sdma = first_sdma + dd->num_sdma; + first_rx = last_sdma; last_rx = first_rx + dd->n_krcv_queues; /* diff --git a/drivers/staging/rdma/hfi1/mad.c b/drivers/staging/rdma/hfi1/mad.c index 36bd6fae..118a09e 100644 --- a/drivers/staging/rdma/hfi1/mad.c +++ b/drivers/staging/rdma/hfi1/mad.c @@ -896,8 +896,8 @@ static int port_states_transition_allowed(struct hfi1_pportdata *ppd, u32 logical_old = driver_logical_state(ppd); int ret, logical_allowed, physical_allowed; - logical_allowed = ret = - logical_transition_allowed(logical_old, logical_new); + ret = logical_transition_allowed(logical_old, logical_new); + logical_allowed = ret; if (ret == HFI_TRANSITION_DISALLOWED || ret == HFI_TRANSITION_UNDEFINED) { @@ -907,8 +907,8 @@ static int port_states_transition_allowed(struct hfi1_pportdata *ppd, return ret; } - physical_allowed = ret = - physical_transition_allowed(physical_old, physical_new); + ret = physical_transition_allowed(physical_old, physical_new); + physical_allowed = ret; if (ret == HFI_TRANSITION_DISALLOWED || ret == HFI_TRANSITION_UNDEFINED) { diff --git a/drivers/staging/rdma/hfi1/sdma.h b/drivers/staging/rdma/hfi1/sdma.h index 0ee22c4..0c5f501 100644 --- a/drivers/staging/rdma/hfi1/sdma.h +++ b/drivers/staging/rdma/hfi1/sdma.h @@ -565,7 +565,8 @@ static inline int sdma_txinit_ahg( tx->complete = cb; tx->coalesce_buf = NULL; tx->wait = NULL; - tx->tlen = tx->packet_len = tlen; + tx->packet_len = tlen; + tx->tlen = tx->packet_len; tx->descs[0].qw[0] = SDMA_DESC0_FIRST_DESC_FLAG; tx->descs[0].qw[1] = 0; if (flags & SDMA_TXREQ_F_AHG_COPY) -- cgit v0.10.2 From 16733b8822017b84d2abdb8ae2b6c7d554a4e0d0 Mon Sep 17 00:00:00 2001 From: Jubin John Date: Sun, 14 Feb 2016 20:20:58 -0800 Subject: staging/rdma/hfi1: Fix misspellings Fix misspelled word based on checkpatch check: CHECK: 'ffoo' may be misspelled - perhaps 'foo'? Reviewed-by: Dennis Dalessandro Reviewed-by: Ira Weiny Reviewed-by: Mike Marciniszyn Signed-off-by: Jubin John Signed-off-by: Doug Ledford diff --git a/drivers/staging/rdma/hfi1/chip.c b/drivers/staging/rdma/hfi1/chip.c index 233958d..53e3273 100644 --- a/drivers/staging/rdma/hfi1/chip.c +++ b/drivers/staging/rdma/hfi1/chip.c @@ -8080,7 +8080,7 @@ static inline int check_packet_present(struct hfi1_ctxtdata *rcd) * Receive packet IRQ handler. This routine expects to be on its own IRQ. * This routine will try to handle packets immediately (latency), but if * it finds too many, it will invoke the thread handler (bandwitdh). The - * chip receive interupt is *not* cleared down until this or the thread (if + * chip receive interrupt is *not* cleared down until this or the thread (if * invoked) is finished. The intent is to avoid extra interrupts while we * are processing packets anyway. */ diff --git a/drivers/staging/rdma/hfi1/qsfp.c b/drivers/staging/rdma/hfi1/qsfp.c index aa9c62b..bdb1504 100644 --- a/drivers/staging/rdma/hfi1/qsfp.c +++ b/drivers/staging/rdma/hfi1/qsfp.c @@ -216,7 +216,7 @@ int qsfp_write(struct hfi1_pportdata *ppd, u32 target, int addr, void *bp, while (count < len) { /* - * Set the qsfp page based on a zero-based addresss + * Set the qsfp page based on a zero-based address * and a page size of QSFP_PAGESIZE bytes. */ page = (u8)(addr / QSFP_PAGESIZE); diff --git a/drivers/staging/rdma/hfi1/sdma.c b/drivers/staging/rdma/hfi1/sdma.c index c8c0aace..cd818de 100644 --- a/drivers/staging/rdma/hfi1/sdma.c +++ b/drivers/staging/rdma/hfi1/sdma.c @@ -2739,7 +2739,7 @@ enomem: * This function calls _extend_sdma_tx_descs to extend or allocate * coalesce buffer. If there is a allocated coalesce buffer, it will * copy the input packet data into the coalesce buffer. It also adds - * coalesce buffer descriptor once whe whole packet is received. + * coalesce buffer descriptor once when whole packet is received. * * Return: * <0 - error -- cgit v0.10.2 From 3db68f4672be95d6f8b0482f1e14c4257b1ee45e Mon Sep 17 00:00:00 2001 From: Jubin John Date: Sun, 14 Feb 2016 20:21:07 -0800 Subject: staging/rdma/hfi1: Remove CamelCase Remove CamelCase to fix checkpatch check: CHECK: Avoid CamelCase: Reviewed-by: Dennis Dalessandro Reviewed-by: Ira Weiny Reviewed-by: Mike Marciniszyn Signed-off-by: Jubin John Signed-off-by: Doug Ledford diff --git a/drivers/staging/rdma/hfi1/chip.h b/drivers/staging/rdma/hfi1/chip.h index 2b30aaa..32e91e7 100644 --- a/drivers/staging/rdma/hfi1/chip.h +++ b/drivers/staging/rdma/hfi1/chip.h @@ -212,7 +212,7 @@ #define PLS_CONFIGPHY_DEBOUCE 0x40 #define PLS_CONFIGPHY_ESTCOMM 0x41 #define PLS_CONFIGPHY_ESTCOMM_TXRX_HUNT 0x42 -#define PLS_CONFIGPHY_ESTcOMM_LOCAL_COMPLETE 0x43 +#define PLS_CONFIGPHY_ESTCOMM_LOCAL_COMPLETE 0x43 #define PLS_CONFIGPHY_OPTEQ 0x44 #define PLS_CONFIGPHY_OPTEQ_OPTIMIZING 0x44 #define PLS_CONFIGPHY_OPTEQ_LOCAL_COMPLETE 0x45 -- cgit v0.10.2 From fcdd76df519e7be5e1094a4bf995374398c44efc Mon Sep 17 00:00:00 2001 From: Jubin John Date: Sun, 14 Feb 2016 20:21:16 -0800 Subject: staging/rdma/hfi1: Use pointer instead of struct name Use sizeof(*p) instead of sizeof(struct foo) to fix checkpatch check: CHECK: Prefer alloc(sizeof(*p)...) over alloc(sizeof(struct foo)...) Reviewed-by: Dennis Dalessandro Reviewed-by: Ira Weiny Reviewed-by: Mike Marciniszyn Signed-off-by: Jubin John Signed-off-by: Doug Ledford diff --git a/drivers/staging/rdma/hfi1/pio.c b/drivers/staging/rdma/hfi1/pio.c index 7907e4c..b0a2a45 100644 --- a/drivers/staging/rdma/hfi1/pio.c +++ b/drivers/staging/rdma/hfi1/pio.c @@ -700,7 +700,7 @@ struct send_context *sc_alloc(struct hfi1_devdata *dd, int type, if (dd->flags & HFI1_FROZEN) return NULL; - sc = kzalloc_node(sizeof(struct send_context), GFP_KERNEL, numa); + sc = kzalloc_node(sizeof(*sc), GFP_KERNEL, numa); if (!sc) return NULL; -- cgit v0.10.2 From d17c0cada704d6d5a291425192fb5148fb99cca1 Mon Sep 17 00:00:00 2001 From: Jubin John Date: Sun, 14 Feb 2016 20:21:26 -0800 Subject: staging/rdma/hfi1: Remove void function return statement Remove return statement at the end of a void function to fix checkpatch warning: WARNING: void function return statements are not generally useful Reviewed-by: Dennis Dalessandro Reviewed-by: Ira Weiny Reviewed-by: Mike Marciniszyn Signed-off-by: Jubin John Signed-off-by: Doug Ledford diff --git a/drivers/staging/rdma/hfi1/uc.c b/drivers/staging/rdma/hfi1/uc.c index afdf539..8915401 100644 --- a/drivers/staging/rdma/hfi1/uc.c +++ b/drivers/staging/rdma/hfi1/uc.c @@ -603,5 +603,4 @@ drop: op_err: hfi1_rc_error(qp, IB_WC_LOC_QP_OP_ERR); - return; } -- cgit v0.10.2 From 6a14c5ea380c1260772c70b9fd0a1492131f6116 Mon Sep 17 00:00:00 2001 From: Jubin John Date: Sun, 14 Feb 2016 20:21:34 -0800 Subject: staging/rdma/hfi1: Add comment for spinlock_t definition Add comments describing the spinlock for spinlock_t definitions to fix checkpatch check: CHECK: spinlock_t definition without comment Reviewed-by: Dennis Dalessandro Reviewed-by: Ira Weiny Reviewed-by: Mike Marciniszyn Signed-off-by: Jubin John Signed-off-by: Doug Ledford diff --git a/drivers/staging/rdma/hfi1/hfi.h b/drivers/staging/rdma/hfi1/hfi.h index 805535e..774d8ff 100644 --- a/drivers/staging/rdma/hfi1/hfi.h +++ b/drivers/staging/rdma/hfi1/hfi.h @@ -313,6 +313,7 @@ struct hfi1_ctxtdata { */ struct task_struct *progress; struct list_head sdma_queues; + /* protect sdma queues */ spinlock_t sdma_qlock; /* Is ASPM interrupt supported for this context */ @@ -380,6 +381,7 @@ struct hfi1_snoop_data { int mode_flag; struct cdev cdev; struct device *class_dev; + /* protect snoop data */ spinlock_t snoop_lock; struct list_head queue; wait_queue_head_t waitq; @@ -561,6 +563,7 @@ enum { }; struct vl_arb_cache { + /* protect vl arb cache */ spinlock_t lock; struct ib_vl_weight_elem table[VL_ARB_TABLE_SIZE]; }; diff --git a/drivers/staging/rdma/hfi1/qsfp.h b/drivers/staging/rdma/hfi1/qsfp.h index 9f6e2f3..c391750 100644 --- a/drivers/staging/rdma/hfi1/qsfp.h +++ b/drivers/staging/rdma/hfi1/qsfp.h @@ -215,6 +215,7 @@ struct qsfp_data { struct hfi1_pportdata *ppd; struct work_struct qsfp_work; u8 cache[QSFP_MAX_NUM_PAGES * 128]; + /* protect qsfp data */ spinlock_t qsfp_lock; u8 check_interrupt_flags; u8 reset_needed; diff --git a/drivers/staging/rdma/hfi1/sdma.h b/drivers/staging/rdma/hfi1/sdma.h index 0c5f501..5aec18b 100644 --- a/drivers/staging/rdma/hfi1/sdma.h +++ b/drivers/staging/rdma/hfi1/sdma.h @@ -412,6 +412,7 @@ struct sdma_engine { u32 progress_check_head; /* private: */ struct work_struct flush_worker; + /* protect flush list */ spinlock_t flushlist_lock; /* private: */ struct list_head flushlist; -- cgit v0.10.2 From 4d114fdd90ab4152a1477593edd9375be71d282d Mon Sep 17 00:00:00 2001 From: Jubin John Date: Sun, 14 Feb 2016 20:21:43 -0800 Subject: staging/rdma/hfi1: Fix block comments Fix block comments with proper formatting to fix checkpatch warnings: WARNING: Block comments use * on subsequent lines WARNING: Block comments use a trailing */ on a separate line Reviewed-by: Dennis Dalessandro Reviewed-by: Ira Weiny Reviewed-by: Mike Marciniszyn Signed-off-by: Jubin John Signed-off-by: Doug Ledford diff --git a/drivers/staging/rdma/hfi1/chip.c b/drivers/staging/rdma/hfi1/chip.c index 53e3273..8e84060 100644 --- a/drivers/staging/rdma/hfi1/chip.c +++ b/drivers/staging/rdma/hfi1/chip.c @@ -6392,14 +6392,18 @@ static void dc_shutdown(struct hfi1_devdata *dd) spin_unlock_irqrestore(&dd->dc8051_lock, flags); /* Shutdown the LCB */ lcb_shutdown(dd, 1); - /* Going to OFFLINE would have causes the 8051 to put the + /* + * Going to OFFLINE would have causes the 8051 to put the * SerDes into reset already. Just need to shut down the 8051, - * itself. */ + * itself. + */ write_csr(dd, DC_DC8051_CFG_RST, 0x1); } -/* Calling this after the DC has been brought out of reset should not - * do any damage. */ +/* + * Calling this after the DC has been brought out of reset should not + * do any damage. + */ static void dc_start(struct hfi1_devdata *dd) { unsigned long flags; @@ -6525,8 +6529,10 @@ void handle_sma_message(struct work_struct *work) u64 msg; int ret; - /* msg is bytes 1-4 of the 40-bit idle message - the command code - is stripped off */ + /* + * msg is bytes 1-4 of the 40-bit idle message - the command code + * is stripped off + */ ret = read_idle_sma(dd, &msg); if (ret) return; @@ -6815,8 +6821,10 @@ void handle_link_up(struct work_struct *work) } } -/* Several pieces of LNI information were cached for SMA in ppd. - * Reset these on link down */ +/* + * Several pieces of LNI information were cached for SMA in ppd. + * Reset these on link down + */ static void reset_neighbor_info(struct hfi1_pportdata *ppd) { ppd->neighbor_guid = 0; @@ -6862,8 +6870,10 @@ void handle_link_down(struct work_struct *work) /* disable the port */ clear_rcvctrl(ppd->dd, RCV_CTRL_RCV_PORT_ENABLE_SMASK); - /* If there is no cable attached, turn the DC off. Otherwise, - * start the link bring up. */ + /* + * If there is no cable attached, turn the DC off. Otherwise, + * start the link bring up. + */ if (!qsfp_mod_present(ppd)) { dc_shutdown(ppd->dd); } else { @@ -7564,8 +7574,10 @@ static void handle_8051_interrupt(struct hfi1_devdata *dd, u32 unused, u64 reg) } if (queue_link_down) { - /* if the link is already going down or disabled, do not - * queue another */ + /* + * if the link is already going down or disabled, do not + * queue another + */ if ((ppd->host_link_state & (HLS_GOING_OFFLINE | HLS_LINK_COOLDOWN)) || ppd->link_enabled == 0) { @@ -7712,8 +7724,10 @@ static void handle_dcc_err(struct hfi1_devdata *dd, u32 unused, u64 reg) /* set status bit */ dd->err_info_rcvport.status_and_code |= OPA_EI_STATUS_SMASK; - /* save first 2 flits in the packet that caused - * the error */ + /* + * save first 2 flits in the packet that caused + * the error + */ dd->err_info_rcvport.packet_flit1 = hdr0; dd->err_info_rcvport.packet_flit2 = hdr1; } @@ -7913,8 +7927,10 @@ static void is_reserved_int(struct hfi1_devdata *dd, unsigned int source) } static const struct is_table is_table[] = { -/* start end - name func interrupt func */ +/* + * start end + * name func interrupt func + */ { IS_GENERAL_ERR_START, IS_GENERAL_ERR_END, is_misc_err_name, is_misc_err_int }, { IS_SDMAENG_ERR_START, IS_SDMAENG_ERR_END, @@ -10763,8 +10779,10 @@ int set_buffer_control(struct hfi1_pportdata *ppd, */ memset(changing, 0, sizeof(changing)); memset(lowering_dedicated, 0, sizeof(lowering_dedicated)); - /* NOTE: Assumes that the individual VL bits are adjacent and in - increasing order */ + /* + * NOTE: Assumes that the individual VL bits are adjacent and in + * increasing order + */ stat_mask = SEND_CM_CREDIT_USED_STATUS_VL0_RETURN_CREDIT_STATUS_SMASK; changing_mask = 0; @@ -11129,8 +11147,10 @@ static void adjust_rcv_timeout(struct hfi1_ctxtdata *rcd, u32 npkts) } rcd->rcvavail_timeout = timeout; - /* timeout cannot be larger than rcv_intr_timeout_csr which has already - been verified to be in range */ + /* + * timeout cannot be larger than rcv_intr_timeout_csr which has already + * been verified to be in range + */ write_kctxt_csr(dd, rcd->ctxt, RCV_AVAIL_TIME_OUT, (u64)timeout << RCV_AVAIL_TIME_OUT_TIME_OUT_RELOAD_SHIFT); } @@ -11323,8 +11343,10 @@ void hfi1_rcvctrl(struct hfi1_devdata *dd, unsigned int op, int ctxt) if (op & HFI1_RCVCTRL_TIDFLOW_DIS) rcvctrl &= ~RCV_CTXT_CTRL_TID_FLOW_ENABLE_SMASK; if (op & HFI1_RCVCTRL_ONE_PKT_EGR_ENB) { - /* In one-packet-per-eager mode, the size comes from - the RcvArray entry. */ + /* + * In one-packet-per-eager mode, the size comes from + * the RcvArray entry. + */ rcvctrl &= ~RCV_CTXT_CTRL_EGR_BUF_SIZE_SMASK; rcvctrl |= RCV_CTXT_CTRL_ONE_PACKET_PER_EGR_BUFFER_SMASK; } @@ -12524,7 +12546,8 @@ static int request_msix_irqs(struct hfi1_devdata *dd) me->type = IRQ_RCVCTXT; } else { /* not in our expected range - complain, then - ignore it */ + * ignore it + */ dd_dev_err(dd, "Unexpected extra MSI-X interrupt %d\n", i); continue; @@ -12830,8 +12853,10 @@ static void write_uninitialized_csrs_and_memories(struct hfi1_devdata *dd) /* PIO Send buffers */ /* SDMA Send buffers */ - /* These are not normally read, and (presently) have no method - to be read, so are not pre-initialized */ + /* + * These are not normally read, and (presently) have no method + * to be read, so are not pre-initialized + */ /* RcvHdrAddr */ /* RcvHdrTailAddr */ @@ -13026,8 +13051,10 @@ static void reset_misc_csrs(struct hfi1_devdata *dd) write_csr(dd, MISC_CFG_RSA_SIGNATURE + (8 * i), 0); write_csr(dd, MISC_CFG_RSA_MODULUS + (8 * i), 0); } - /* MISC_CFG_SHA_PRELOAD leave alone - always reads 0 and can - only be written 128-byte chunks */ + /* + * MISC_CFG_SHA_PRELOAD leave alone - always reads 0 and can + * only be written 128-byte chunks + */ /* init RSA engine to clear lingering errors */ write_csr(dd, MISC_CFG_RSA_CMD, 1); write_csr(dd, MISC_CFG_RSA_MU, 0); @@ -14045,8 +14072,10 @@ struct hfi1_devdata *hfi1_init_dd(struct pci_dev *pdev, dd->minrev = (dd->revision >> CCE_REVISION_CHIP_REV_MINOR_SHIFT) & CCE_REVISION_CHIP_REV_MINOR_MASK; - /* obtain the hardware ID - NOT related to unit, which is a - software enumeration */ + /* + * obtain the hardware ID - NOT related to unit, which is a + * software enumeration + */ reg = read_csr(dd, CCE_REVISION2); dd->hfi1_id = (reg >> CCE_REVISION2_HFI_ID_SHIFT) & CCE_REVISION2_HFI_ID_MASK; diff --git a/drivers/staging/rdma/hfi1/chip.h b/drivers/staging/rdma/hfi1/chip.h index 32e91e7..0b7055b 100644 --- a/drivers/staging/rdma/hfi1/chip.h +++ b/drivers/staging/rdma/hfi1/chip.h @@ -79,8 +79,10 @@ #define PIO_CMASK 0x7ff /* counter mask for free and fill counters */ #define MAX_EAGER_ENTRIES 2048 /* max receive eager entries */ #define MAX_TID_PAIR_ENTRIES 1024 /* max receive expected pairs */ -/* Virtual? Allocation Unit, defined as AU = 8*2^vAU, 64 bytes, AU is fixed - at 64 bytes for all generation one devices */ +/* + * Virtual? Allocation Unit, defined as AU = 8*2^vAU, 64 bytes, AU is fixed + * at 64 bytes for all generation one devices + */ #define CM_VAU 3 /* HFI link credit count, AKA receive buffer depth (RBUF_DEPTH) */ #define CM_GLOBAL_CREDITS 0x940 @@ -518,8 +520,10 @@ enum { #define LCB_CRC_48B 0x2 /* 48b CRC */ #define LCB_CRC_12B_16B_PER_LANE 0x3 /* 12b-16b per lane CRC */ -/* the following enum is (almost) a copy/paste of the definition - * in the OPA spec, section 20.2.2.6.8 (PortInfo) */ +/* + * the following enum is (almost) a copy/paste of the definition + * in the OPA spec, section 20.2.2.6.8 (PortInfo) + */ enum { PORT_LTP_CRC_MODE_NONE = 0, PORT_LTP_CRC_MODE_14 = 1, /* 14-bit LTP CRC mode (optional) */ diff --git a/drivers/staging/rdma/hfi1/file_ops.c b/drivers/staging/rdma/hfi1/file_ops.c index 5077ee0..c4b9dd4 100644 --- a/drivers/staging/rdma/hfi1/file_ops.c +++ b/drivers/staging/rdma/hfi1/file_ops.c @@ -388,8 +388,10 @@ static ssize_t hfi1_file_write(struct file *fp, const char __user *data, break; } if (dd->flags & HFI1_FORCED_FREEZE) { - /* Don't allow context reset if we are into - * forced freeze */ + /* + * Don't allow context reset if we are into + * forced freeze + */ ret = -ENODEV; break; } diff --git a/drivers/staging/rdma/hfi1/firmware.c b/drivers/staging/rdma/hfi1/firmware.c index 16c9dc7..3a7163d 100644 --- a/drivers/staging/rdma/hfi1/firmware.c +++ b/drivers/staging/rdma/hfi1/firmware.c @@ -1294,8 +1294,10 @@ static int load_pcie_serdes_firmware(struct hfi1_devdata *dd, /* step 3: enable XDMEM access */ sbus_request(dd, ra, 0x01, WRITE_SBUS_RECEIVER, 0x00000d40); /* step 4: load firmware into SBus Master XDMEM */ - /* NOTE: the dmem address, write_en, and wdata are all pre-packed, - we only need to pick up the bytes and write them */ + /* + * NOTE: the dmem address, write_en, and wdata are all pre-packed, + * we only need to pick up the bytes and write them + */ for (i = 0; i < fdet->firmware_len; i += 4) { sbus_request(dd, ra, 0x04, WRITE_SBUS_RECEIVER, *(u32 *)&fdet->firmware_ptr[i]); @@ -1305,8 +1307,10 @@ static int load_pcie_serdes_firmware(struct hfi1_devdata *dd, /* step 6: allow SBus Spico to run */ sbus_request(dd, ra, 0x05, WRITE_SBUS_RECEIVER, 0x00000000); - /* steps 7-11: run RSA, if it succeeds, firmware is available to - be swapped */ + /* + * steps 7-11: run RSA, if it succeeds, firmware is available to + * be swapped + */ return run_rsa(dd, "PCIe serdes", fdet->signature); } @@ -1744,8 +1748,10 @@ int get_platform_config_field(struct hfi1_devdata *dd, src_ptr = (u32 *)((u8 *)src_ptr + seek); - /* We expect the field to be byte aligned and whole byte - * lengths if we are here */ + /* + * We expect the field to be byte aligned and whole byte + * lengths if we are here + */ memcpy(data, src_ptr, wlen); return 0; } diff --git a/drivers/staging/rdma/hfi1/hfi.h b/drivers/staging/rdma/hfi1/hfi.h index 774d8ff..4db5ad9 100644 --- a/drivers/staging/rdma/hfi1/hfi.h +++ b/drivers/staging/rdma/hfi1/hfi.h @@ -718,8 +718,10 @@ struct hfi1_pportdata { /* CA's max number of 64 entry units in the congestion control table */ u8 cc_max_table_entries; - /* begin congestion log related entries - * cc_log_lock protects all congestion log related data */ + /* + * begin congestion log related entries + * cc_log_lock protects all congestion log related data + */ spinlock_t cc_log_lock ____cacheline_aligned_in_smp; u8 threshold_cong_event_map[OPA_MAX_SLS / 8]; u16 threshold_event_counter; diff --git a/drivers/staging/rdma/hfi1/init.c b/drivers/staging/rdma/hfi1/init.c index aabdc3d..f794604 100644 --- a/drivers/staging/rdma/hfi1/init.c +++ b/drivers/staging/rdma/hfi1/init.c @@ -790,8 +790,10 @@ done: for (pidx = 0; pidx < dd->num_pports; ++pidx) { ppd = dd->pport + pidx; - /* start the serdes - must be after interrupts are - enabled so we are notified when the link goes up */ + /* + * start the serdes - must be after interrupts are + * enabled so we are notified when the link goes up + */ lastfail = bringup_serdes(ppd); if (lastfail) dd_dev_info(dd, @@ -1188,8 +1190,10 @@ static int __init hfi1_mod_init(void) user_credit_return_threshold = 100; compute_krcvqs(); - /* sanitize receive interrupt count, time must wait until after - the hardware type is known */ + /* + * sanitize receive interrupt count, time must wait until after + * the hardware type is known + */ if (rcv_intr_count > RCV_HDR_HEAD_COUNTER_MASK) rcv_intr_count = RCV_HDR_HEAD_COUNTER_MASK; /* reject invalid combinations */ diff --git a/drivers/staging/rdma/hfi1/mad.c b/drivers/staging/rdma/hfi1/mad.c index 118a09e..13cf66f 100644 --- a/drivers/staging/rdma/hfi1/mad.c +++ b/drivers/staging/rdma/hfi1/mad.c @@ -696,8 +696,10 @@ static int __subn_get_opa_portinfo(struct opa_smp *smp, u32 am, u8 *data, /* read the cached value of DC_LCB_STS_ROUND_TRIP_LTP_CNT */ read_lcb_cache(DC_LCB_STS_ROUND_TRIP_LTP_CNT, &tmp); - /* this counter is 16 bits wide, but the replay_depth.wire - * variable is only 8 bits */ + /* + * this counter is 16 bits wide, but the replay_depth.wire + * variable is only 8 bits + */ if (tmp > 0xff) tmp = 0xff; pi->replay_depth.wire = tmp; @@ -1621,8 +1623,10 @@ static int __subn_set_opa_sc_to_vlt(struct opa_smp *smp, u32 am, u8 *data, /* IB numbers ports from 1, hw from 0 */ ppd = dd->pport + (port - 1); lstate = driver_lstate(ppd); - /* it's known that async_update is 0 by this point, but include - * the explicit check for clarity */ + /* + * it's known that async_update is 0 by this point, but include + * the explicit check for clarity + */ if (!async_update && (lstate == IB_PORT_ARMED || lstate == IB_PORT_ACTIVE)) { smp->status |= IB_SMP_INVALID_FIELD; @@ -1797,8 +1801,10 @@ static int __subn_get_opa_cable_info(struct opa_smp *smp, u32 am, u8 *data, #define __CI_PAGE_MASK ~(__CI_PAGE_SIZE - 1) #define __CI_PAGE_NUM(a) ((a) & __CI_PAGE_MASK) - /* check that addr is within spec, and - * addr and (addr + len - 1) are on the same "page" */ + /* + * check that addr is within spec, and + * addr and (addr + len - 1) are on the same "page" + */ if (addr >= 4096 || (__CI_PAGE_NUM(addr) != __CI_PAGE_NUM(addr + len - 1))) { smp->status |= IB_SMP_INVALID_FIELD; @@ -1935,8 +1941,10 @@ static int __subn_set_opa_vl_arb(struct opa_smp *smp, u32 am, u8 *data, case OPA_VLARB_HIGH_ELEMENTS: (void)fm_set_table(ppd, FM_TBL_VL_HIGH_ARB, p); break; - /* neither OPA_VLARB_PREEMPT_ELEMENTS, or OPA_VLARB_PREEMPT_MATRIX - * can be changed from the default values */ + /* + * neither OPA_VLARB_PREEMPT_ELEMENTS, or OPA_VLARB_PREEMPT_MATRIX + * can be changed from the default values + */ case OPA_VLARB_PREEMPT_ELEMENTS: /* FALLTHROUGH */ case OPA_VLARB_PREEMPT_MATRIX: @@ -2148,8 +2156,10 @@ struct opa_port_data_counters_msg { }; struct opa_port_error_counters64_msg { - /* Request contains first two fields, response contains the - * whole magilla */ + /* + * Request contains first two fields, response contains the + * whole magilla + */ __be64 port_select_mask[4]; __be32 vl_select_mask; @@ -2673,11 +2683,12 @@ static int pma_get_opa_datacounters(struct opa_pma_mad *pmp, /* rsp->port_vl_xmit_time_cong is 0 for HFIs */ /* rsp->port_vl_xmit_wasted_bw ??? */ /* port_vl_xmit_wait_data - TXE (table 13-9 HFI spec) ??? - * does this differ from rsp->vls[vfi].port_vl_xmit_wait */ + * does this differ from rsp->vls[vfi].port_vl_xmit_wait + */ /*rsp->vls[vfi].port_vl_mark_fecn = - cpu_to_be64(read_csr(dd, DCC_PRF_PORT_VL_MARK_FECN_CNT - + offset)); - */ + * cpu_to_be64(read_csr(dd, DCC_PRF_PORT_VL_MARK_FECN_CNT + * + offset)); + */ vlinfo++; vfi++; } @@ -2996,8 +3007,10 @@ static int pma_get_opa_errorinfo(struct opa_pma_mad *pmp, /* ExcessiverBufferOverrunInfo */ reg = read_csr(dd, RCV_ERR_INFO); if (reg & RCV_ERR_INFO_RCV_EXCESS_BUFFER_OVERRUN_SMASK) { - /* if the RcvExcessBufferOverrun bit is set, save SC of - * first pkt that encountered an excess buffer overrun */ + /* + * if the RcvExcessBufferOverrun bit is set, save SC of + * first pkt that encountered an excess buffer overrun + */ u8 tmp = (u8)reg; tmp &= RCV_ERR_INFO_RCV_EXCESS_BUFFER_OVERRUN_SC_SMASK; @@ -3093,8 +3106,9 @@ static int pma_set_opa_portstatus(struct opa_pma_mad *pmp, write_dev_cntr(dd, C_DC_RCV_BBL, CNTR_INVALID_VL, 0); /* Only applicable for switch */ - /*if (counter_select & CS_PORT_MARK_FECN) - write_csr(dd, DCC_PRF_PORT_MARK_FECN_CNT, 0);*/ + /* if (counter_select & CS_PORT_MARK_FECN) + * write_csr(dd, DCC_PRF_PORT_MARK_FECN_CNT, 0); + */ if (counter_select & CS_PORT_RCV_CONSTRAINT_ERRORS) write_port_cntr(ppd, C_SW_RCV_CSTR_ERR, CNTR_INVALID_VL, 0); @@ -3167,9 +3181,9 @@ static int pma_set_opa_portstatus(struct opa_pma_mad *pmp, if (counter_select & CS_PORT_RCV_BUBBLE) write_dev_cntr(dd, C_DC_RCV_BBL_VL, idx_from_vl(vl), 0); - /*if (counter_select & CS_PORT_MARK_FECN) - write_csr(dd, DCC_PRF_PORT_VL_MARK_FECN_CNT + offset, 0); - */ + /* if (counter_select & CS_PORT_MARK_FECN) + * write_csr(dd, DCC_PRF_PORT_VL_MARK_FECN_CNT + offset, 0); + */ /* port_vl_xmit_discards ??? */ } @@ -3226,8 +3240,10 @@ static int pma_set_opa_errorinfo(struct opa_pma_mad *pmp, /* ExcessiverBufferOverrunInfo */ if (error_info_select & ES_EXCESSIVE_BUFFER_OVERRUN_INFO) - /* status bit is essentially kept in the h/w - bit 5 of - * RCV_ERR_INFO */ + /* + * status bit is essentially kept in the h/w - bit 5 of + * RCV_ERR_INFO + */ write_csr(dd, RCV_ERR_INFO, RCV_ERR_INFO_RCV_EXCESS_BUFFER_OVERRUN_SMASK); diff --git a/drivers/staging/rdma/hfi1/mad.h b/drivers/staging/rdma/hfi1/mad.h index f9e93c0..9ebaaf9 100644 --- a/drivers/staging/rdma/hfi1/mad.h +++ b/drivers/staging/rdma/hfi1/mad.h @@ -51,8 +51,10 @@ #define _HFI1_MAD_H #include -#define USE_PI_LED_ENABLE 1 /* use led enabled bit in struct - * opa_port_states, if available */ +#define USE_PI_LED_ENABLE 1 /* + * use led enabled bit in struct + * opa_port_states, if available + */ #include #include #ifndef PI_LED_ENABLE_SUP diff --git a/drivers/staging/rdma/hfi1/pcie.c b/drivers/staging/rdma/hfi1/pcie.c index b169166..4d9fd3b 100644 --- a/drivers/staging/rdma/hfi1/pcie.c +++ b/drivers/staging/rdma/hfi1/pcie.c @@ -284,9 +284,11 @@ static void msix_setup(struct hfi1_devdata *dd, int pos, u32 *msixcnt, struct msix_entry *msix_entry; int i; - /* We can't pass hfi1_msix_entry array to msix_setup + /* + * We can't pass hfi1_msix_entry array to msix_setup * so use a dummy msix_entry array and copy the allocated - * irq back to the hfi1_msix_entry array. */ + * irq back to the hfi1_msix_entry array. + */ msix_entry = kmalloc_array(nvec, sizeof(*msix_entry), GFP_KERNEL); if (!msix_entry) { ret = -ENOMEM; diff --git a/drivers/staging/rdma/hfi1/pio.c b/drivers/staging/rdma/hfi1/pio.c index b0a2a45..191b260 100644 --- a/drivers/staging/rdma/hfi1/pio.c +++ b/drivers/staging/rdma/hfi1/pio.c @@ -177,8 +177,10 @@ static struct mem_pool_config sc_mem_pool_config[NUM_SC_POOLS] = { /* memory pool information, used when calculating final sizes */ struct mem_pool_info { - int centipercent; /* 100th of 1% of memory to use, -1 if blocks - already set */ + int centipercent; /* + * 100th of 1% of memory to use, -1 if blocks + * already set + */ int count; /* count of contexts in the pool */ int blocks; /* block size of the pool */ int size; /* context size, in blocks */ @@ -1429,8 +1431,10 @@ retry: next = head + 1; if (next >= sc->sr_size) next = 0; - /* update the head - must be last! - the releaser can look at fields - in pbuf once we move the head */ + /* + * update the head - must be last! - the releaser can look at fields + * in pbuf once we move the head + */ smp_wmb(); sc->sr_head = next; spin_unlock_irqrestore(&sc->alloc_lock, flags); diff --git a/drivers/staging/rdma/hfi1/pio_copy.c b/drivers/staging/rdma/hfi1/pio_copy.c index dc0c178..6f97d22 100644 --- a/drivers/staging/rdma/hfi1/pio_copy.c +++ b/drivers/staging/rdma/hfi1/pio_copy.c @@ -86,8 +86,10 @@ void pio_copy(struct hfi1_devdata *dd, struct pio_buf *pbuf, u64 pbc, dend = dest + ((count >> 1) * sizeof(u64)); if (dend < send) { - /* all QWORD data is within the SOP block, does *not* - reach the end of the SOP block */ + /* + * all QWORD data is within the SOP block, does *not* + * reach the end of the SOP block + */ while (dest < dend) { writeq(*(u64 *)from, dest); @@ -152,8 +154,10 @@ void pio_copy(struct hfi1_devdata *dd, struct pio_buf *pbuf, u64 pbc, writeq(val.val64, dest); dest += sizeof(u64); } - /* fill in rest of block, no need to check pbuf->end - as we only wrap on a block boundary */ + /* + * fill in rest of block, no need to check pbuf->end + * as we only wrap on a block boundary + */ while (((unsigned long)dest & PIO_BLOCK_MASK) != 0) { writeq(0, dest); dest += sizeof(u64); @@ -466,8 +470,10 @@ void seg_pio_copy_start(struct pio_buf *pbuf, u64 pbc, dend = dest + ((nbytes >> 3) * sizeof(u64)); if (dend < send) { - /* all QWORD data is within the SOP block, does *not* - reach the end of the SOP block */ + /* + * all QWORD data is within the SOP block, does *not* + * reach the end of the SOP block + */ while (dest < dend) { writeq(*(u64 *)from, dest); @@ -562,8 +568,10 @@ static void mid_copy_mix(struct pio_buf *pbuf, const void *from, size_t nbytes) void __iomem *send; /* SOP end */ void __iomem *xend; - /* calculate the end of data or end of block, whichever - comes first */ + /* + * calculate the end of data or end of block, whichever + * comes first + */ send = pbuf->start + PIO_BLOCK_SIZE; xend = send < dend ? send : dend; @@ -656,8 +664,10 @@ static void mid_copy_straight(struct pio_buf *pbuf, void __iomem *send; /* SOP end */ void __iomem *xend; - /* calculate the end of data or end of block, whichever - comes first */ + /* + * calculate the end of data or end of block, whichever + * comes first + */ send = pbuf->start + PIO_BLOCK_SIZE; xend = send < dend ? send : dend; diff --git a/drivers/staging/rdma/hfi1/platform.h b/drivers/staging/rdma/hfi1/platform.h index cc280cc..1f41bdc 100644 --- a/drivers/staging/rdma/hfi1/platform.h +++ b/drivers/staging/rdma/hfi1/platform.h @@ -186,9 +186,9 @@ static const u32 platform_config_table_limits[PLATFORM_CONFIG_TABLE_MAX] = { */ /* - *===================================================== + * ===================================================== * System table encodings - *==================================================== + * ===================================================== */ #define PLATFORM_CONFIG_MAGIC_NUM 0x3d4f5041 #define PLATFORM_CONFIG_MAGIC_NUMBER_LEN 4 @@ -208,9 +208,9 @@ enum platform_config_qsfp_power_class_encoding { }; /* - *===================================================== + * ==================================================== * Port table encodings - *==================================================== + * ==================================================== */ enum platform_config_port_type_encoding { PORT_TYPE_UNKNOWN, diff --git a/drivers/staging/rdma/hfi1/sdma.c b/drivers/staging/rdma/hfi1/sdma.c index cd818de..5f62d02 100644 --- a/drivers/staging/rdma/hfi1/sdma.c +++ b/drivers/staging/rdma/hfi1/sdma.c @@ -2219,7 +2219,8 @@ static void __sdma_process_event(struct sdma_engine *sde, * of link up, then we need to start up. * This can happen when hw down is requested while * bringing the link up with traffic active on - * 7220, e.g. */ + * 7220, e.g. + */ ss->go_s99_running = 1; /* fall through and start dma engine */ case sdma_event_e10_go_hw_start: diff --git a/drivers/staging/rdma/hfi1/user_sdma.c b/drivers/staging/rdma/hfi1/user_sdma.c index 097d278..b6d0926 100644 --- a/drivers/staging/rdma/hfi1/user_sdma.c +++ b/drivers/staging/rdma/hfi1/user_sdma.c @@ -179,8 +179,10 @@ struct user_sdma_iovec { unsigned npages; /* array of pinned pages for this vector */ struct page **pages; - /* offset into the virtual address space of the vector at - * which we last left off. */ + /* + * offset into the virtual address space of the vector at + * which we last left off. + */ u64 offset; }; @@ -596,8 +598,10 @@ int hfi1_user_sdma_process_request(struct file *fp, struct iovec *iovec, } req->koffset = le32_to_cpu(req->hdr.kdeth.swdata[6]); - /* Calculate the initial TID offset based on the values of - KDETH.OFFSET and KDETH.OM that are passed in. */ + /* + * Calculate the initial TID offset based on the values of + * KDETH.OFFSET and KDETH.OM that are passed in. + */ req->tidoffset = KDETH_GET(req->hdr.kdeth.ver_tid_offset, OFFSET) * (KDETH_GET(req->hdr.kdeth.ver_tid_offset, OM) ? KDETH_OM_LARGE : KDETH_OM_SMALL); @@ -742,8 +746,10 @@ static inline u32 compute_data_length(struct user_sdma_request *req, } else if (req_opcode(req->info.ctrl) == EXPECTED) { u32 tidlen = EXP_TID_GET(req->tids[req->tididx], LEN) * PAGE_SIZE; - /* Get the data length based on the remaining space in the - * TID pair. */ + /* + * Get the data length based on the remaining space in the + * TID pair. + */ len = min(tidlen - req->tidoffset, (u32)req->info.fragsize); /* If we've filled up the TID pair, move to the next one. */ if (unlikely(!len) && ++req->tididx < req->n_tids && @@ -753,9 +759,11 @@ static inline u32 compute_data_length(struct user_sdma_request *req, req->tidoffset = 0; len = min_t(u32, tidlen, req->info.fragsize); } - /* Since the TID pairs map entire pages, make sure that we + /* + * Since the TID pairs map entire pages, make sure that we * are not going to try to send more data that we have - * remaining. */ + * remaining. + */ len = min(len, req->data_len - req->sent); } else len = min(req->data_len - req->sent, (u32)req->info.fragsize); @@ -979,8 +987,10 @@ static int user_sdma_send_pkts(struct user_sdma_request *req, unsigned maxpkts) req->sent += data_sent; if (req->data_len) { tx->iovecs[tx->idx].vec->offset += iov_offset; - /* If we've reached the end of the io vector, mark it - * so the callback can unpin the pages and free it. */ + /* + * If we've reached the end of the io vector, mark it + * so the callback can unpin the pages and free it. + */ if (tx->iovecs[tx->idx].vec->offset == tx->iovecs[tx->idx].vec->iov.iov_len) tx->iovecs[tx->idx].flags |= @@ -1216,8 +1226,10 @@ static int set_txreq_header(struct user_sdma_request *req, if ((req->tidoffset) == (EXP_TID_GET(tidval, LEN) * PAGE_SIZE)) { req->tidoffset = 0; - /* Since we don't copy all the TIDs, all at once, - * we have to check again. */ + /* + * Since we don't copy all the TIDs, all at once, + * we have to check again. + */ if (++req->tididx > req->n_tids - 1 || !req->tids[req->tididx]) { return -EINVAL; @@ -1298,8 +1310,10 @@ static int set_txreq_header_ahg(struct user_sdma_request *req, if ((req->tidoffset) == (EXP_TID_GET(tidval, LEN) * PAGE_SIZE)) { req->tidoffset = 0; - /* Since we don't copy all the TIDs, all at once, - * we have to check again. */ + /* + * Since we don't copy all the TIDs, all at once, + * we have to check again. + */ if (++req->tididx > req->n_tids - 1 || !req->tids[req->tididx]) { return -EINVAL; -- cgit v0.10.2 From 17fb4f2923d7fc7ee778dedc0aa60ab6f402f56c Mon Sep 17 00:00:00 2001 From: Jubin John Date: Sun, 14 Feb 2016 20:21:52 -0800 Subject: staging/rdma/hfi1: Fix code alignment Fix code alignment to fix checkpatch check: CHECK: Alignment should match open parenthesis Reviewed-by: Dennis Dalessandro Reviewed-by: Ira Weiny Reviewed-by: Mike Marciniszyn Signed-off-by: Jubin John Signed-off-by: Doug Ledford diff --git a/drivers/staging/rdma/hfi1/chip.c b/drivers/staging/rdma/hfi1/chip.c index 8e84060..ce61883 100644 --- a/drivers/staging/rdma/hfi1/chip.c +++ b/drivers/staging/rdma/hfi1/chip.c @@ -944,7 +944,7 @@ static struct flag_table dc8051_err_flags[] = { FLAG_ENTRY0("IRAM_MBE", D8E(IRAM_MBE)), FLAG_ENTRY0("IRAM_SBE", D8E(IRAM_SBE)), FLAG_ENTRY0("UNMATCHED_SECURE_MSG_ACROSS_BCC_LANES", - D8E(UNMATCHED_SECURE_MSG_ACROSS_BCC_LANES)), + D8E(UNMATCHED_SECURE_MSG_ACROSS_BCC_LANES)), FLAG_ENTRY0("INVALID_CSR_ADDR", D8E(INVALID_CSR_ADDR)), }; @@ -958,7 +958,7 @@ static struct flag_table dc8051_info_err_flags[] = { FLAG_ENTRY0("Unknown frame received", UNKNOWN_FRAME), FLAG_ENTRY0("Target BER not met", TARGET_BER_NOT_MET), FLAG_ENTRY0("Serdes internal loopback failure", - FAILED_SERDES_INTERNAL_LOOPBACK), + FAILED_SERDES_INTERNAL_LOOPBACK), FLAG_ENTRY0("Failed SerDes init", FAILED_SERDES_INIT), FLAG_ENTRY0("Failed LNI(Polling)", FAILED_LNI_POLLING), FLAG_ENTRY0("Failed LNI(Debounce)", FAILED_LNI_DEBOUNCE), @@ -1147,11 +1147,8 @@ struct cntr_entry { /* * accessor for stat element, context either dd or ppd */ - u64 (*rw_cntr)(const struct cntr_entry *, - void *context, - int vl, - int mode, - u64 data); + u64 (*rw_cntr)(const struct cntr_entry *, void *context, int vl, + int mode, u64 data); }; #define C_RCV_HDR_OVF_FIRST C_RCV_HDR_OVF_0 @@ -1300,7 +1297,7 @@ static inline u64 read_write_csr(const struct hfi1_devdata *dd, u32 csr, /* Dev Access */ static u64 dev_access_u32_csr(const struct cntr_entry *entry, - void *context, int vl, int mode, u64 data) + void *context, int vl, int mode, u64 data) { struct hfi1_devdata *dd = context; u64 csr = entry->csr; @@ -1358,7 +1355,7 @@ static u64 access_sde_progress_int_cnt(const struct cntr_entry *entry, } static u64 dev_access_u64_csr(const struct cntr_entry *entry, void *context, - int vl, int mode, u64 data) + int vl, int mode, u64 data) { struct hfi1_devdata *dd = context; @@ -1379,7 +1376,7 @@ static u64 dev_access_u64_csr(const struct cntr_entry *entry, void *context, } static u64 dc_access_lcb_cntr(const struct cntr_entry *entry, void *context, - int vl, int mode, u64 data) + int vl, int mode, u64 data) { struct hfi1_devdata *dd = context; u32 csr = entry->csr; @@ -1403,7 +1400,7 @@ static u64 dc_access_lcb_cntr(const struct cntr_entry *entry, void *context, /* Port Access */ static u64 port_access_u32_csr(const struct cntr_entry *entry, void *context, - int vl, int mode, u64 data) + int vl, int mode, u64 data) { struct hfi1_pportdata *ppd = context; @@ -1413,7 +1410,7 @@ static u64 port_access_u32_csr(const struct cntr_entry *entry, void *context, } static u64 port_access_u64_csr(const struct cntr_entry *entry, - void *context, int vl, int mode, u64 data) + void *context, int vl, int mode, u64 data) { struct hfi1_pportdata *ppd = context; u64 val; @@ -1453,7 +1450,7 @@ static inline u64 read_write_sw(struct hfi1_devdata *dd, u64 *cntr, int mode, } static u64 access_sw_link_dn_cnt(const struct cntr_entry *entry, void *context, - int vl, int mode, u64 data) + int vl, int mode, u64 data) { struct hfi1_pportdata *ppd = context; @@ -1463,7 +1460,7 @@ static u64 access_sw_link_dn_cnt(const struct cntr_entry *entry, void *context, } static u64 access_sw_link_up_cnt(const struct cntr_entry *entry, void *context, - int vl, int mode, u64 data) + int vl, int mode, u64 data) { struct hfi1_pportdata *ppd = context; @@ -1484,7 +1481,7 @@ static u64 access_sw_unknown_frame_cnt(const struct cntr_entry *entry, } static u64 access_sw_xmit_discards(const struct cntr_entry *entry, - void *context, int vl, int mode, u64 data) + void *context, int vl, int mode, u64 data) { struct hfi1_pportdata *ppd = (struct hfi1_pportdata *)context; u64 zero = 0; @@ -1501,7 +1498,8 @@ static u64 access_sw_xmit_discards(const struct cntr_entry *entry, } static u64 access_xmit_constraint_errs(const struct cntr_entry *entry, - void *context, int vl, int mode, u64 data) + void *context, int vl, int mode, + u64 data) { struct hfi1_pportdata *ppd = context; @@ -1513,7 +1511,7 @@ static u64 access_xmit_constraint_errs(const struct cntr_entry *entry, } static u64 access_rcv_constraint_errs(const struct cntr_entry *entry, - void *context, int vl, int mode, u64 data) + void *context, int vl, int mode, u64 data) { struct hfi1_pportdata *ppd = context; @@ -1569,7 +1567,7 @@ static u64 access_sw_cpu_intr(const struct cntr_entry *entry, } static u64 access_sw_cpu_rcv_limit(const struct cntr_entry *entry, - void *context, int vl, int mode, u64 data) + void *context, int vl, int mode, u64 data) { struct hfi1_devdata *dd = context; @@ -1610,7 +1608,7 @@ static u64 access_sw_kmem_wait(const struct cntr_entry *entry, } static u64 access_sw_send_schedule(const struct cntr_entry *entry, - void *context, int vl, int mode, u64 data) + void *context, int vl, int mode, u64 data) { struct hfi1_devdata *dd = (struct hfi1_devdata *)context; @@ -4965,28 +4963,28 @@ static struct cntr_entry port_cntrs[PORT_CNTR_LAST] = { [C_TX_WORDS] = TXE64_PORT_CNTR_ELEM(TxWords, SEND_DWORD_CNT, CNTR_NORMAL), [C_TX_WAIT] = TXE64_PORT_CNTR_ELEM(TxWait, SEND_WAIT_CNT, CNTR_SYNTH), [C_TX_FLIT_VL] = TXE64_PORT_CNTR_ELEM(TxFlitVL, SEND_DATA_VL0_CNT, - CNTR_SYNTH | CNTR_VL), + CNTR_SYNTH | CNTR_VL), [C_TX_PKT_VL] = TXE64_PORT_CNTR_ELEM(TxPktVL, SEND_DATA_PKT_VL0_CNT, - CNTR_SYNTH | CNTR_VL), + CNTR_SYNTH | CNTR_VL), [C_TX_WAIT_VL] = TXE64_PORT_CNTR_ELEM(TxWaitVL, SEND_WAIT_VL0_CNT, - CNTR_SYNTH | CNTR_VL), + CNTR_SYNTH | CNTR_VL), [C_RX_PKT] = RXE64_PORT_CNTR_ELEM(RxPkt, RCV_DATA_PKT_CNT, CNTR_NORMAL), [C_RX_WORDS] = RXE64_PORT_CNTR_ELEM(RxWords, RCV_DWORD_CNT, CNTR_NORMAL), [C_SW_LINK_DOWN] = CNTR_ELEM("SwLinkDown", 0, 0, CNTR_SYNTH | CNTR_32BIT, - access_sw_link_dn_cnt), + access_sw_link_dn_cnt), [C_SW_LINK_UP] = CNTR_ELEM("SwLinkUp", 0, 0, CNTR_SYNTH | CNTR_32BIT, - access_sw_link_up_cnt), + access_sw_link_up_cnt), [C_SW_UNKNOWN_FRAME] = CNTR_ELEM("UnknownFrame", 0, 0, CNTR_NORMAL, access_sw_unknown_frame_cnt), [C_SW_XMIT_DSCD] = CNTR_ELEM("XmitDscd", 0, 0, CNTR_SYNTH | CNTR_32BIT, - access_sw_xmit_discards), + access_sw_xmit_discards), [C_SW_XMIT_DSCD_VL] = CNTR_ELEM("XmitDscdVl", 0, 0, - CNTR_SYNTH | CNTR_32BIT | CNTR_VL, - access_sw_xmit_discards), + CNTR_SYNTH | CNTR_32BIT | CNTR_VL, + access_sw_xmit_discards), [C_SW_XMIT_CSTR_ERR] = CNTR_ELEM("XmitCstrErr", 0, 0, CNTR_SYNTH, - access_xmit_constraint_errs), + access_xmit_constraint_errs), [C_SW_RCV_CSTR_ERR] = CNTR_ELEM("RcvCstrErr", 0, 0, CNTR_SYNTH, - access_rcv_constraint_errs), + access_rcv_constraint_errs), [C_SW_IBP_LOOP_PKTS] = SW_IBP_CNTR(LoopPkts, loop_pkts), [C_SW_IBP_RC_RESENDS] = SW_IBP_CNTR(RcResend, rc_resends), [C_SW_IBP_RNR_NAKS] = SW_IBP_CNTR(RnrNak, rnr_naks), @@ -5002,9 +5000,9 @@ static struct cntr_entry port_cntrs[PORT_CNTR_LAST] = { [C_SW_CPU_RC_ACKS] = CNTR_ELEM("RcAcks", 0, 0, CNTR_NORMAL, access_sw_cpu_rc_acks), [C_SW_CPU_RC_QACKS] = CNTR_ELEM("RcQacks", 0, 0, CNTR_NORMAL, - access_sw_cpu_rc_qacks), + access_sw_cpu_rc_qacks), [C_SW_CPU_RC_DELAYED_COMP] = CNTR_ELEM("RcDelayComp", 0, 0, CNTR_NORMAL, - access_sw_cpu_rc_delayed_comp), + access_sw_cpu_rc_delayed_comp), [OVR_LBL(0)] = OVR_ELM(0), [OVR_LBL(1)] = OVR_ELM(1), [OVR_LBL(2)] = OVR_ELM(2), [OVR_LBL(3)] = OVR_ELM(3), [OVR_LBL(4)] = OVR_ELM(4), [OVR_LBL(5)] = OVR_ELM(5), @@ -5153,7 +5151,7 @@ done: * the buffer. End in '*' if the buffer is too short. */ static char *flag_string(char *buf, int buf_len, u64 flags, - struct flag_table *table, int table_size) + struct flag_table *table, int table_size) { char extra[32]; char *p = buf; @@ -5214,10 +5212,8 @@ static char *is_misc_err_name(char *buf, size_t bsize, unsigned int source) if (source < ARRAY_SIZE(cce_misc_names)) strncpy(buf, cce_misc_names[source], bsize); else - snprintf(buf, - bsize, - "Reserved%u", - source + IS_GENERAL_ERR_START); + snprintf(buf, bsize, "Reserved%u", + source + IS_GENERAL_ERR_START); return buf; } @@ -5341,51 +5337,56 @@ static char *is_reserved_name(char *buf, size_t bsize, unsigned int source) static char *cce_err_status_string(char *buf, int buf_len, u64 flags) { return flag_string(buf, buf_len, flags, - cce_err_status_flags, ARRAY_SIZE(cce_err_status_flags)); + cce_err_status_flags, + ARRAY_SIZE(cce_err_status_flags)); } static char *rxe_err_status_string(char *buf, int buf_len, u64 flags) { return flag_string(buf, buf_len, flags, - rxe_err_status_flags, ARRAY_SIZE(rxe_err_status_flags)); + rxe_err_status_flags, + ARRAY_SIZE(rxe_err_status_flags)); } static char *misc_err_status_string(char *buf, int buf_len, u64 flags) { return flag_string(buf, buf_len, flags, misc_err_status_flags, - ARRAY_SIZE(misc_err_status_flags)); + ARRAY_SIZE(misc_err_status_flags)); } static char *pio_err_status_string(char *buf, int buf_len, u64 flags) { return flag_string(buf, buf_len, flags, - pio_err_status_flags, ARRAY_SIZE(pio_err_status_flags)); + pio_err_status_flags, + ARRAY_SIZE(pio_err_status_flags)); } static char *sdma_err_status_string(char *buf, int buf_len, u64 flags) { return flag_string(buf, buf_len, flags, - sdma_err_status_flags, - ARRAY_SIZE(sdma_err_status_flags)); + sdma_err_status_flags, + ARRAY_SIZE(sdma_err_status_flags)); } static char *egress_err_status_string(char *buf, int buf_len, u64 flags) { return flag_string(buf, buf_len, flags, - egress_err_status_flags, ARRAY_SIZE(egress_err_status_flags)); + egress_err_status_flags, + ARRAY_SIZE(egress_err_status_flags)); } static char *egress_err_info_string(char *buf, int buf_len, u64 flags) { return flag_string(buf, buf_len, flags, - egress_err_info_flags, ARRAY_SIZE(egress_err_info_flags)); + egress_err_info_flags, + ARRAY_SIZE(egress_err_info_flags)); } static char *send_err_status_string(char *buf, int buf_len, u64 flags) { return flag_string(buf, buf_len, flags, - send_err_status_flags, - ARRAY_SIZE(send_err_status_flags)); + send_err_status_flags, + ARRAY_SIZE(send_err_status_flags)); } static void handle_cce_err(struct hfi1_devdata *dd, u32 unused, u64 reg) @@ -5398,7 +5399,7 @@ static void handle_cce_err(struct hfi1_devdata *dd, u32 unused, u64 reg) * report or record it. */ dd_dev_info(dd, "CCE Error: %s\n", - cce_err_status_string(buf, sizeof(buf), reg)); + cce_err_status_string(buf, sizeof(buf), reg)); if ((reg & CCE_ERR_STATUS_CCE_CLI2_ASYNC_FIFO_PARITY_ERR_SMASK) && is_ax(dd) && (dd->icode != ICODE_FUNCTIONAL_SIMULATOR)) { @@ -5428,11 +5429,11 @@ static void update_rcverr_timer(unsigned long opaque) u32 cur_ovfl_cnt = read_dev_cntr(dd, C_RCV_OVF, CNTR_INVALID_VL); if (dd->rcv_ovfl_cnt < cur_ovfl_cnt && - ppd->port_error_action & OPA_PI_MASK_EX_BUFFER_OVERRUN) { + ppd->port_error_action & OPA_PI_MASK_EX_BUFFER_OVERRUN) { dd_dev_info(dd, "%s: PortErrorAction bounce\n", __func__); - set_link_down_reason(ppd, - OPA_LINKDOWN_REASON_EXCESSIVE_BUFFER_OVERRUN, 0, - OPA_LINKDOWN_REASON_EXCESSIVE_BUFFER_OVERRUN); + set_link_down_reason( + ppd, OPA_LINKDOWN_REASON_EXCESSIVE_BUFFER_OVERRUN, 0, + OPA_LINKDOWN_REASON_EXCESSIVE_BUFFER_OVERRUN); queue_work(ppd->hfi1_wq, &ppd->link_bounce_work); } dd->rcv_ovfl_cnt = (u32)cur_ovfl_cnt; @@ -5461,7 +5462,7 @@ static void handle_rxe_err(struct hfi1_devdata *dd, u32 unused, u64 reg) int i = 0; dd_dev_info(dd, "Receive Error: %s\n", - rxe_err_status_string(buf, sizeof(buf), reg)); + rxe_err_status_string(buf, sizeof(buf), reg)); if (reg & ALL_RXE_FREEZE_ERR) { int flags = 0; @@ -5488,7 +5489,7 @@ static void handle_misc_err(struct hfi1_devdata *dd, u32 unused, u64 reg) int i = 0; dd_dev_info(dd, "Misc Error: %s", - misc_err_status_string(buf, sizeof(buf), reg)); + misc_err_status_string(buf, sizeof(buf), reg)); for (i = 0; i < NUM_MISC_ERR_STATUS_COUNTERS; i++) { if (reg & (1ull << i)) incr_cntr64(&dd->misc_err_status_cnt[i]); @@ -5501,7 +5502,7 @@ static void handle_pio_err(struct hfi1_devdata *dd, u32 unused, u64 reg) int i = 0; dd_dev_info(dd, "PIO Error: %s\n", - pio_err_status_string(buf, sizeof(buf), reg)); + pio_err_status_string(buf, sizeof(buf), reg)); if (reg & ALL_PIO_FREEZE_ERR) start_freeze_handling(dd->pport, 0); @@ -5518,7 +5519,7 @@ static void handle_sdma_err(struct hfi1_devdata *dd, u32 unused, u64 reg) int i = 0; dd_dev_info(dd, "SDMA Error: %s\n", - sdma_err_status_string(buf, sizeof(buf), reg)); + sdma_err_status_string(buf, sizeof(buf), reg)); if (reg & ALL_SDMA_FREEZE_ERR) start_freeze_handling(dd->pport, 0); @@ -5560,8 +5561,8 @@ static void handle_send_egress_err_info(struct hfi1_devdata *dd, write_csr(dd, SEND_EGRESS_ERR_INFO, info); dd_dev_info(dd, - "Egress Error Info: 0x%llx, %s Egress Error Src 0x%llx\n", - info, egress_err_info_string(buf, sizeof(buf), info), src); + "Egress Error Info: 0x%llx, %s Egress Error Src 0x%llx\n", + info, egress_err_info_string(buf, sizeof(buf), info), src); /* Eventually add other counters for each bit */ if (info & PORT_DISCARD_EGRESS_ERRS) { @@ -5699,7 +5700,7 @@ static void handle_egress_err(struct hfi1_devdata *dd, u32 unused, u64 reg) if (reg) dd_dev_info(dd, "Egress Error: %s\n", - egress_err_status_string(buf, sizeof(buf), reg)); + egress_err_status_string(buf, sizeof(buf), reg)); for (i = 0; i < NUM_SEND_EGRESS_ERR_STATUS_COUNTERS; i++) { if (reg & (1ull << i)) @@ -5713,7 +5714,7 @@ static void handle_txe_err(struct hfi1_devdata *dd, u32 unused, u64 reg) int i = 0; dd_dev_info(dd, "Send Error: %s\n", - send_err_status_string(buf, sizeof(buf), reg)); + send_err_status_string(buf, sizeof(buf), reg)); for (i = 0; i < NUM_SEND_ERR_STATUS_COUNTERS; i++) { if (reg & (1ull << i)) @@ -5759,7 +5760,7 @@ static void interrupt_clear_down(struct hfi1_devdata *dd, u64 mask; dd_dev_err(dd, "Repeating %s bits 0x%llx - masking\n", - eri->desc, reg); + eri->desc, reg); /* * Read-modify-write so any other masked bits * remain masked. @@ -5783,14 +5784,15 @@ static void is_misc_err_int(struct hfi1_devdata *dd, unsigned int source) interrupt_clear_down(dd, 0, eri); } else { dd_dev_err(dd, "Unexpected misc interrupt (%u) - reserved\n", - source); + source); } } static char *send_context_err_status_string(char *buf, int buf_len, u64 flags) { return flag_string(buf, buf_len, flags, - sc_err_status_flags, ARRAY_SIZE(sc_err_status_flags)); + sc_err_status_flags, + ARRAY_SIZE(sc_err_status_flags)); } /* @@ -5815,15 +5817,15 @@ static void is_sendctxt_err_int(struct hfi1_devdata *dd, sw_index = dd->hw_to_sw[hw_context]; if (sw_index >= dd->num_send_contexts) { dd_dev_err(dd, - "out of range sw index %u for send context %u\n", - sw_index, hw_context); + "out of range sw index %u for send context %u\n", + sw_index, hw_context); return; } sci = &dd->send_contexts[sw_index]; sc = sci->sc; if (!sc) { dd_dev_err(dd, "%s: context %u(%u): no sc?\n", __func__, - sw_index, hw_context); + sw_index, hw_context); return; } @@ -5833,7 +5835,8 @@ static void is_sendctxt_err_int(struct hfi1_devdata *dd, status = read_kctxt_csr(dd, hw_context, SEND_CTXT_ERR_STATUS); dd_dev_info(dd, "Send Context %u(%u) Error: %s\n", sw_index, hw_context, - send_context_err_status_string(flags, sizeof(flags), status)); + send_context_err_status_string(flags, sizeof(flags), + status)); if (status & SEND_CTXT_ERR_STATUS_PIO_DISALLOWED_PACKET_ERR_SMASK) handle_send_egress_err_info(dd, sc_to_vl(dd, sw_index)); @@ -5918,8 +5921,8 @@ static void is_various_int(struct hfi1_devdata *dd, unsigned int source) interrupt_clear_down(dd, 0, eri); else dd_dev_info(dd, - "%s: Unimplemented/reserved interrupt %d\n", - __func__, source); + "%s: Unimplemented/reserved interrupt %d\n", + __func__, source); } static void handle_qsfp_int(struct hfi1_devdata *dd, u32 src_ctx, u64 reg) @@ -5931,7 +5934,7 @@ static void handle_qsfp_int(struct hfi1_devdata *dd, u32 src_ctx, u64 reg) if (reg & QSFP_HFI0_MODPRST_N) { dd_dev_info(dd, "%s: ModPresent triggered QSFP interrupt\n", - __func__); + __func__); if (!qsfp_mod_present(ppd)) { ppd->driver_link_ready = 0; @@ -5949,7 +5952,7 @@ static void handle_qsfp_int(struct hfi1_devdata *dd, u32 src_ctx, u64 reg) ppd->qsfp_info.reset_needed = 0; ppd->qsfp_info.limiting_active = 0; spin_unlock_irqrestore(&ppd->qsfp_info.qsfp_lock, - flags); + flags); /* Invert the ModPresent pin now to detect plug-in */ write_csr(dd, dd->hfi1_id ? ASIC_QSFP2_INVERT : ASIC_QSFP1_INVERT, qsfp_int_mgmt); @@ -5977,7 +5980,7 @@ static void handle_qsfp_int(struct hfi1_devdata *dd, u32 src_ctx, u64 reg) ppd->qsfp_info.cache_valid = 0; ppd->qsfp_info.cache_refresh_required = 1; spin_unlock_irqrestore(&ppd->qsfp_info.qsfp_lock, - flags); + flags); /* * Stop inversion of ModPresent pin to detect @@ -5994,7 +5997,7 @@ static void handle_qsfp_int(struct hfi1_devdata *dd, u32 src_ctx, u64 reg) if (reg & QSFP_HFI0_INT_N) { dd_dev_info(dd, "%s: IntN triggered QSFP interrupt\n", - __func__); + __func__); spin_lock_irqsave(&ppd->qsfp_info.qsfp_lock, flags); ppd->qsfp_info.check_interrupt_flags = 1; spin_unlock_irqrestore(&ppd->qsfp_info.qsfp_lock, flags); @@ -6010,11 +6013,11 @@ static int request_host_lcb_access(struct hfi1_devdata *dd) int ret; ret = do_8051_command(dd, HCMD_MISC, - (u64)HCMD_MISC_REQUEST_LCB_ACCESS << LOAD_DATA_FIELD_ID_SHIFT, - NULL); + (u64)HCMD_MISC_REQUEST_LCB_ACCESS << + LOAD_DATA_FIELD_ID_SHIFT, NULL); if (ret != HCMD_SUCCESS) { dd_dev_err(dd, "%s: command failed with error %d\n", - __func__, ret); + __func__, ret); } return ret == HCMD_SUCCESS ? 0 : -EBUSY; } @@ -6024,11 +6027,11 @@ static int request_8051_lcb_access(struct hfi1_devdata *dd) int ret; ret = do_8051_command(dd, HCMD_MISC, - (u64)HCMD_MISC_GRANT_LCB_ACCESS << LOAD_DATA_FIELD_ID_SHIFT, - NULL); + (u64)HCMD_MISC_GRANT_LCB_ACCESS << + LOAD_DATA_FIELD_ID_SHIFT, NULL); if (ret != HCMD_SUCCESS) { dd_dev_err(dd, "%s: command failed with error %d\n", - __func__, ret); + __func__, ret); } return ret == HCMD_SUCCESS ? 0 : -EBUSY; } @@ -6040,8 +6043,8 @@ static int request_8051_lcb_access(struct hfi1_devdata *dd) static inline void set_host_lcb_access(struct hfi1_devdata *dd) { write_csr(dd, DC_DC8051_CFG_CSR_ACCESS_SEL, - DC_DC8051_CFG_CSR_ACCESS_SEL_DCC_SMASK - | DC_DC8051_CFG_CSR_ACCESS_SEL_LCB_SMASK); + DC_DC8051_CFG_CSR_ACCESS_SEL_DCC_SMASK | + DC_DC8051_CFG_CSR_ACCESS_SEL_LCB_SMASK); } /* @@ -6051,7 +6054,7 @@ static inline void set_host_lcb_access(struct hfi1_devdata *dd) static inline void set_8051_lcb_access(struct hfi1_devdata *dd) { write_csr(dd, DC_DC8051_CFG_CSR_ACCESS_SEL, - DC_DC8051_CFG_CSR_ACCESS_SEL_DCC_SMASK); + DC_DC8051_CFG_CSR_ACCESS_SEL_DCC_SMASK); } /* @@ -6085,7 +6088,7 @@ int acquire_lcb_access(struct hfi1_devdata *dd, int sleep_ok) /* this access is valid only when the link is up */ if ((ppd->host_link_state & HLS_UP) == 0) { dd_dev_info(dd, "%s: link state %s not up\n", - __func__, link_state_name(ppd->host_link_state)); + __func__, link_state_name(ppd->host_link_state)); ret = -EBUSY; goto done; } @@ -6094,8 +6097,8 @@ int acquire_lcb_access(struct hfi1_devdata *dd, int sleep_ok) ret = request_host_lcb_access(dd); if (ret) { dd_dev_err(dd, - "%s: unable to acquire LCB access, err %d\n", - __func__, ret); + "%s: unable to acquire LCB access, err %d\n", + __func__, ret); goto done; } set_host_lcb_access(dd); @@ -6132,7 +6135,7 @@ int release_lcb_access(struct hfi1_devdata *dd, int sleep_ok) if (dd->lcb_access_count == 0) { dd_dev_err(dd, "%s: LCB access count is zero. Skipping.\n", - __func__); + __func__); goto done; } @@ -6141,8 +6144,8 @@ int release_lcb_access(struct hfi1_devdata *dd, int sleep_ok) ret = request_8051_lcb_access(dd); if (ret) { dd_dev_err(dd, - "%s: unable to release LCB access, err %d\n", - __func__, ret); + "%s: unable to release LCB access, err %d\n", + __func__, ret); /* restore host access if the grant didn't work */ set_host_lcb_access(dd); goto done; @@ -6174,9 +6177,10 @@ static void init_lcb_access(struct hfi1_devdata *dd) static void hreq_response(struct hfi1_devdata *dd, u8 return_code, u16 rsp_data) { write_csr(dd, DC_DC8051_CFG_EXT_DEV_0, - DC_DC8051_CFG_EXT_DEV_0_COMPLETED_SMASK - | (u64)return_code << DC_DC8051_CFG_EXT_DEV_0_RETURN_CODE_SHIFT - | (u64)rsp_data << DC_DC8051_CFG_EXT_DEV_0_RSP_DATA_SHIFT); + DC_DC8051_CFG_EXT_DEV_0_COMPLETED_SMASK | + (u64)return_code << + DC_DC8051_CFG_EXT_DEV_0_RETURN_CODE_SHIFT | + (u64)rsp_data << DC_DC8051_CFG_EXT_DEV_0_RSP_DATA_SHIFT); } /* @@ -6214,7 +6218,7 @@ void handle_8051_request(struct work_struct *work) case HREQ_SET_TX_EQ_ABS: case HREQ_SET_TX_EQ_REL: dd_dev_info(dd, "8051 request: request 0x%x not supported\n", - type); + type); hreq_response(dd, HREQ_NOT_SUPPORTED, 0); break; @@ -6272,11 +6276,11 @@ static void write_global_credit(struct hfi1_devdata *dd, u8 vau, u16 total, u16 shared) { write_csr(dd, SEND_CM_GLOBAL_CREDIT, - ((u64)total - << SEND_CM_GLOBAL_CREDIT_TOTAL_CREDIT_LIMIT_SHIFT) - | ((u64)shared - << SEND_CM_GLOBAL_CREDIT_SHARED_LIMIT_SHIFT) - | ((u64)vau << SEND_CM_GLOBAL_CREDIT_AU_SHIFT)); + ((u64)total << + SEND_CM_GLOBAL_CREDIT_TOTAL_CREDIT_LIMIT_SHIFT) | + ((u64)shared << + SEND_CM_GLOBAL_CREDIT_SHARED_LIMIT_SHIFT) | + ((u64)vau << SEND_CM_GLOBAL_CREDIT_AU_SHIFT)); } /* @@ -6355,14 +6359,13 @@ static void lcb_shutdown(struct hfi1_devdata *dd, int abort) write_csr(dd, DC_LCB_CFG_RUN, 0); /* set tx fifo reset: LCB_CFG_TX_FIFOS_RESET.VAL = 1 */ write_csr(dd, DC_LCB_CFG_TX_FIFOS_RESET, - 1ull << DC_LCB_CFG_TX_FIFOS_RESET_VAL_SHIFT); + 1ull << DC_LCB_CFG_TX_FIFOS_RESET_VAL_SHIFT); /* set dcc reset csr: DCC_CFG_RESET.{reset_lcb,reset_rx_fpe} = 1 */ dd->lcb_err_en = read_csr(dd, DC_LCB_ERR_EN); reg = read_csr(dd, DCC_CFG_RESET); - write_csr(dd, DCC_CFG_RESET, - reg - | (1ull << DCC_CFG_RESET_RESET_LCB_SHIFT) - | (1ull << DCC_CFG_RESET_RESET_RX_FPE_SHIFT)); + write_csr(dd, DCC_CFG_RESET, reg | + (1ull << DCC_CFG_RESET_RESET_LCB_SHIFT) | + (1ull << DCC_CFG_RESET_RESET_RX_FPE_SHIFT)); (void)read_csr(dd, DCC_CFG_RESET); /* make sure the write completed */ if (!abort) { udelay(1); /* must hold for the longer of 16cclks or 20ns */ @@ -6419,7 +6422,7 @@ static void dc_start(struct hfi1_devdata *dd) ret = wait_fm_ready(dd, TIMEOUT_8051_START); if (ret) { dd_dev_err(dd, "%s: timeout starting 8051 firmware\n", - __func__); + __func__); } /* Take away reset for LCB and RX FPE (set in lcb_shutdown). */ write_csr(dd, DCC_CFG_RESET, 0x10); @@ -6512,7 +6515,7 @@ static void adjust_lcb_for_fpga_serdes(struct hfi1_devdata *dd) write_csr(dd, DC_LCB_CFG_RX_FIFOS_RADR, rx_radr); /* LCB_CFG_IGNORE_LOST_RCLK.EN = 1 */ write_csr(dd, DC_LCB_CFG_IGNORE_LOST_RCLK, - DC_LCB_CFG_IGNORE_LOST_RCLK_EN_SMASK); + DC_LCB_CFG_IGNORE_LOST_RCLK_EN_SMASK); write_csr(dd, DC_LCB_CFG_TX_FIFOS_RADR, tx_radr); } @@ -6571,8 +6574,8 @@ void handle_sma_message(struct work_struct *work) break; default: dd_dev_err(dd, - "%s: received unexpected SMA idle message 0x%llx\n", - __func__, msg); + "%s: received unexpected SMA idle message 0x%llx\n", + __func__, msg); break; } } @@ -6664,10 +6667,9 @@ static void wait_for_freeze_status(struct hfi1_devdata *dd, int freeze) if (time_after(jiffies, timeout)) { dd_dev_err(dd, - "Time out waiting for SPC %sfreeze, bits 0x%llx, expecting 0x%llx, continuing", - freeze ? "" : "un", - reg & ALL_FROZE, - freeze ? ALL_FROZE : 0ull); + "Time out waiting for SPC %sfreeze, bits 0x%llx, expecting 0x%llx, continuing", + freeze ? "" : "un", reg & ALL_FROZE, + freeze ? ALL_FROZE : 0ull); return; } usleep_range(80, 120); @@ -6792,7 +6794,7 @@ void handle_freeze(struct work_struct *work) void handle_link_up(struct work_struct *work) { struct hfi1_pportdata *ppd = container_of(work, struct hfi1_pportdata, - link_up_work); + link_up_work); set_link_state(ppd, HLS_UP_INIT); /* cache the read of DC_LCB_STS_ROUND_TRIP_LTP_CNT */ @@ -6811,10 +6813,10 @@ void handle_link_up(struct work_struct *work) if ((ppd->link_speed_active & ppd->link_speed_enabled) == 0) { /* oops - current speed is not enabled, bounce */ dd_dev_err(ppd->dd, - "Link speed active 0x%x is outside enabled 0x%x, downing link\n", - ppd->link_speed_active, ppd->link_speed_enabled); + "Link speed active 0x%x is outside enabled 0x%x, downing link\n", + ppd->link_speed_active, ppd->link_speed_enabled); set_link_down_reason(ppd, OPA_LINKDOWN_REASON_SPEED_POLICY, 0, - OPA_LINKDOWN_REASON_SPEED_POLICY); + OPA_LINKDOWN_REASON_SPEED_POLICY); set_link_state(ppd, HLS_DN_OFFLINE); tune_serdes(ppd); start_link(ppd); @@ -6896,7 +6898,7 @@ void handle_link_bounce(struct work_struct *work) start_link(ppd); } else { dd_dev_info(ppd->dd, "%s: link not up (%s), nothing to do\n", - __func__, link_state_name(ppd->host_link_state)); + __func__, link_state_name(ppd->host_link_state)); } } @@ -6993,7 +6995,7 @@ static u16 link_width_to_bits(struct hfi1_devdata *dd, u16 width) case 3: return OPA_LINK_WIDTH_3X; default: dd_dev_info(dd, "%s: invalid width %d, using 4\n", - __func__, width); + __func__, width); /* fall through */ case 4: return OPA_LINK_WIDTH_4X; } @@ -7031,7 +7033,7 @@ static void get_link_widths(struct hfi1_devdata *dd, u16 *tx_width, /* read the active lanes */ read_tx_settings(dd, &enable_lane_tx, &tx_polarity_inversion, - &rx_polarity_inversion, &max_rate); + &rx_polarity_inversion, &max_rate); read_local_lni(dd, &enable_lane_rx); /* convert to counts */ @@ -7052,8 +7054,8 @@ static void get_link_widths(struct hfi1_devdata *dd, u16 *tx_width, break; default: dd_dev_err(dd, - "%s: unexpected max rate %d, using 25Gb\n", - __func__, (int)max_rate); + "%s: unexpected max rate %d, using 25Gb\n", + __func__, (int)max_rate); /* fall through */ case 1: dd->pport[0].link_speed_active = OPA_LINK_SPEED_25G; @@ -7062,8 +7064,8 @@ static void get_link_widths(struct hfi1_devdata *dd, u16 *tx_width, } dd_dev_info(dd, - "Fabric active lanes (width): tx 0x%x (%d), rx 0x%x (%d)\n", - enable_lane_tx, tx, enable_lane_rx, rx); + "Fabric active lanes (width): tx 0x%x (%d), rx 0x%x (%d)\n", + enable_lane_tx, tx, enable_lane_rx, rx); *tx_width = link_width_to_bits(dd, tx); *rx_width = link_width_to_bits(dd, rx); } @@ -7166,13 +7168,8 @@ void handle_verify_cap(struct work_struct *work) */ read_vc_remote_phy(dd, &power_management, &continious); - read_vc_remote_fabric( - dd, - &vau, - &z, - &vcu, - &vl15buf, - &partner_supported_crc); + read_vc_remote_fabric(dd, &vau, &z, &vcu, &vl15buf, + &partner_supported_crc); read_vc_remote_link_width(dd, &remote_tx_rate, &link_widths); read_remote_device_id(dd, &device_id, &device_rev); /* @@ -7183,19 +7180,16 @@ void handle_verify_cap(struct work_struct *work) /* print the active widths */ get_link_widths(dd, &active_tx, &active_rx); dd_dev_info(dd, - "Peer PHY: power management 0x%x, continuous updates 0x%x\n", - (int)power_management, (int)continious); + "Peer PHY: power management 0x%x, continuous updates 0x%x\n", + (int)power_management, (int)continious); dd_dev_info(dd, - "Peer Fabric: vAU %d, Z %d, vCU %d, vl15 credits 0x%x, CRC sizes 0x%x\n", - (int)vau, - (int)z, - (int)vcu, - (int)vl15buf, - (int)partner_supported_crc); + "Peer Fabric: vAU %d, Z %d, vCU %d, vl15 credits 0x%x, CRC sizes 0x%x\n", + (int)vau, (int)z, (int)vcu, (int)vl15buf, + (int)partner_supported_crc); dd_dev_info(dd, "Peer Link Width: tx rate 0x%x, widths 0x%x\n", - (u32)remote_tx_rate, (u32)link_widths); + (u32)remote_tx_rate, (u32)link_widths); dd_dev_info(dd, "Peer Device ID: 0x%04x, Revision 0x%02x\n", - (u32)device_id, (u32)device_rev); + (u32)device_id, (u32)device_rev); /* * The peer vAU value just read is the peer receiver value. HFI does * not support a transmit vAU of 0 (AU == 8). We advertised that @@ -7230,10 +7224,10 @@ void handle_verify_cap(struct work_struct *work) reg = read_csr(dd, SEND_CM_CTRL); if (crc_val == LCB_CRC_14B && crc_14b_sideband) { write_csr(dd, SEND_CM_CTRL, - reg | SEND_CM_CTRL_FORCE_CREDIT_MODE_SMASK); + reg | SEND_CM_CTRL_FORCE_CREDIT_MODE_SMASK); } else { write_csr(dd, SEND_CM_CTRL, - reg & ~SEND_CM_CTRL_FORCE_CREDIT_MODE_SMASK); + reg & ~SEND_CM_CTRL_FORCE_CREDIT_MODE_SMASK); } ppd->link_speed_active = 0; /* invalid value */ @@ -7258,7 +7252,7 @@ void handle_verify_cap(struct work_struct *work) } if (ppd->link_speed_active == 0) { dd_dev_err(dd, "%s: unexpected remote tx rate %d, using 25Gb\n", - __func__, (int)remote_tx_rate); + __func__, (int)remote_tx_rate); ppd->link_speed_active = OPA_LINK_SPEED_25G; } @@ -7314,9 +7308,9 @@ void handle_verify_cap(struct work_struct *work) read_csr(dd, DC_DC8051_STS_REMOTE_FM_SECURITY) & DC_DC8051_STS_LOCAL_FM_SECURITY_DISABLED_MASK; dd_dev_info(dd, - "Neighbor Guid: %llx Neighbor type %d MgmtAllowed %d FM security bypass %d\n", - ppd->neighbor_guid, ppd->neighbor_type, - ppd->mgmt_allowed, ppd->neighbor_fm_security); + "Neighbor Guid: %llx Neighbor type %d MgmtAllowed %d FM security bypass %d\n", + ppd->neighbor_guid, ppd->neighbor_type, + ppd->mgmt_allowed, ppd->neighbor_fm_security); if (ppd->mgmt_allowed) add_full_mgmt_pkey(ppd); @@ -7369,27 +7363,28 @@ retry: /* downgrade is disabled */ /* bounce if not at starting active width */ - if ((ppd->link_width_active != ppd->link_width_downgrade_tx_active) || - (ppd->link_width_active != ppd->link_width_downgrade_rx_active)) { + if ((ppd->link_width_active != + ppd->link_width_downgrade_tx_active) || + (ppd->link_width_active != + ppd->link_width_downgrade_rx_active)) { dd_dev_err(ppd->dd, - "Link downgrade is disabled and link has downgraded, downing link\n"); + "Link downgrade is disabled and link has downgraded, downing link\n"); dd_dev_err(ppd->dd, - " original 0x%x, tx active 0x%x, rx active 0x%x\n", - ppd->link_width_active, - ppd->link_width_downgrade_tx_active, - ppd->link_width_downgrade_rx_active); + " original 0x%x, tx active 0x%x, rx active 0x%x\n", + ppd->link_width_active, + ppd->link_width_downgrade_tx_active, + ppd->link_width_downgrade_rx_active); do_bounce = 1; } } else if ((lwde & ppd->link_width_downgrade_tx_active) == 0 || (lwde & ppd->link_width_downgrade_rx_active) == 0) { /* Tx or Rx is outside the enabled policy */ dd_dev_err(ppd->dd, - "Link is outside of downgrade allowed, downing link\n"); + "Link is outside of downgrade allowed, downing link\n"); dd_dev_err(ppd->dd, - " enabled 0x%x, tx active 0x%x, rx active 0x%x\n", - lwde, - ppd->link_width_downgrade_tx_active, - ppd->link_width_downgrade_rx_active); + " enabled 0x%x, tx active 0x%x, rx active 0x%x\n", + lwde, ppd->link_width_downgrade_tx_active, + ppd->link_width_downgrade_rx_active); do_bounce = 1; } @@ -7398,7 +7393,7 @@ done: if (do_bounce) { set_link_down_reason(ppd, OPA_LINKDOWN_REASON_WIDTH_POLICY, 0, - OPA_LINKDOWN_REASON_WIDTH_POLICY); + OPA_LINKDOWN_REASON_WIDTH_POLICY); set_link_state(ppd, HLS_DN_OFFLINE); tune_serdes(ppd); start_link(ppd); @@ -7481,9 +7476,10 @@ static void handle_8051_interrupt(struct hfi1_devdata *dd, u32 unused, u64 reg) & (HLS_DN_POLL | HLS_VERIFY_CAP | HLS_GOING_UP)) { queue_link_down = 1; dd_dev_info(dd, "Link error: %s\n", - dc8051_info_err_string(buf, - sizeof(buf), - err & FAILED_LNI)); + dc8051_info_err_string(buf, + sizeof(buf), + err & + FAILED_LNI)); } err &= ~(u64)FAILED_LNI; } @@ -7495,7 +7491,8 @@ static void handle_8051_interrupt(struct hfi1_devdata *dd, u32 unused, u64 reg) if (err) { /* report remaining errors, but do not do anything */ dd_dev_err(dd, "8051 info error: %s\n", - dc8051_info_err_string(buf, sizeof(buf), err)); + dc8051_info_err_string(buf, sizeof(buf), + err)); } /* @@ -7548,8 +7545,9 @@ static void handle_8051_interrupt(struct hfi1_devdata *dd, u32 unused, u64 reg) if (host_msg) { /* report remaining messages, but do not do anything */ dd_dev_info(dd, "8051 info host message: %s\n", - dc8051_info_host_msg_string(buf, sizeof(buf), - host_msg)); + dc8051_info_host_msg_string(buf, + sizeof(buf), + host_msg)); } reg &= ~DC_DC8051_ERR_FLG_SET_BY_8051_SMASK; @@ -7562,15 +7560,15 @@ static void handle_8051_interrupt(struct hfi1_devdata *dd, u32 unused, u64 reg) */ dd_dev_err(dd, "Lost 8051 heartbeat\n"); write_csr(dd, DC_DC8051_ERR_EN, - read_csr(dd, DC_DC8051_ERR_EN) - & ~DC_DC8051_ERR_EN_LOST_8051_HEART_BEAT_SMASK); + read_csr(dd, DC_DC8051_ERR_EN) & + ~DC_DC8051_ERR_EN_LOST_8051_HEART_BEAT_SMASK); reg &= ~DC_DC8051_ERR_FLG_LOST_8051_HEART_BEAT_SMASK; } if (reg) { /* report the error, but do not do anything */ dd_dev_err(dd, "8051 error: %s\n", - dc8051_err_string(buf, sizeof(buf), reg)); + dc8051_err_string(buf, sizeof(buf), reg)); } if (queue_link_down) { @@ -7582,7 +7580,7 @@ static void handle_8051_interrupt(struct hfi1_devdata *dd, u32 unused, u64 reg) (HLS_GOING_OFFLINE | HLS_LINK_COOLDOWN)) || ppd->link_enabled == 0) { dd_dev_info(dd, "%s: not queuing link down\n", - __func__); + __func__); } else { queue_work(ppd->hfi1_wq, &ppd->link_down_work); } @@ -7760,7 +7758,7 @@ static void handle_dcc_err(struct hfi1_devdata *dd, u32 unused, u64 reg) /* just report this */ dd_dev_info(dd, "DCC Error: PortRcv error: %s\n", extra); dd_dev_info(dd, " hdr0 0x%llx, hdr1 0x%llx\n", - hdr0, hdr1); + hdr0, hdr1); reg &= ~DCC_ERR_FLG_RCVPORT_ERR_SMASK; } @@ -7779,7 +7777,7 @@ static void handle_dcc_err(struct hfi1_devdata *dd, u32 unused, u64 reg) /* report any remaining errors */ if (reg) dd_dev_info(dd, "DCC Error: %s\n", - dcc_err_string(buf, sizeof(buf), reg)); + dcc_err_string(buf, sizeof(buf), reg)); if (lcl_reason == 0) lcl_reason = OPA_LINKDOWN_REASON_UNKNOWN; @@ -7796,7 +7794,7 @@ static void handle_lcb_err(struct hfi1_devdata *dd, u32 unused, u64 reg) char buf[96]; dd_dev_info(dd, "LCB Error: %s\n", - lcb_err_string(buf, sizeof(buf), reg)); + lcb_err_string(buf, sizeof(buf), reg)); } /* @@ -7886,7 +7884,7 @@ static void is_rcv_avail_int(struct hfi1_devdata *dd, unsigned int source) err_detail = "out of range"; } dd_dev_err(dd, "unexpected %s receive available context interrupt %u\n", - err_detail, source); + err_detail, source); } /* @@ -7912,7 +7910,7 @@ static void is_rcv_urgent_int(struct hfi1_devdata *dd, unsigned int source) err_detail = "out of range"; } dd_dev_err(dd, "unexpected %s receive urgent context interrupt %u\n", - err_detail, source); + err_detail, source); } /* @@ -7923,7 +7921,7 @@ static void is_reserved_int(struct hfi1_devdata *dd, unsigned int source) char name[64]; dd_dev_err(dd, "unexpected %s interrupt\n", - is_reserved_name(name, sizeof(name), source)); + is_reserved_name(name, sizeof(name), source)); } static const struct is_table is_table[] = { @@ -8001,7 +7999,7 @@ static irqreturn_t general_interrupt(int irq, void *data) /* phase 2: call the appropriate handler */ for_each_set_bit(bit, (unsigned long *)®s[0], - CCE_NUM_INT_CSRS * 64) { + CCE_NUM_INT_CSRS * 64) { is_interrupt(dd, bit); } @@ -8024,19 +8022,19 @@ static irqreturn_t sdma_interrupt(int irq, void *data) /* This read_csr is really bad in the hot path */ status = read_csr(dd, - CCE_INT_STATUS + (8 * (IS_SDMA_START / 64))) - & sde->imask; + CCE_INT_STATUS + (8 * (IS_SDMA_START / 64))) + & sde->imask; if (likely(status)) { /* clear the interrupt(s) */ write_csr(dd, - CCE_INT_CLEAR + (8 * (IS_SDMA_START / 64)), - status); + CCE_INT_CLEAR + (8 * (IS_SDMA_START / 64)), + status); /* handle the interrupt(s) */ sdma_engine_interrupt(sde, status); } else dd_dev_err(dd, "SDMA engine %u interrupt, but no status bits set\n", - sde->this_idx); + sde->this_idx); return IRQ_HANDLED; } @@ -8436,8 +8434,8 @@ int load_8051_config(struct hfi1_devdata *dd, u8 field_id, ret = do_8051_command(dd, HCMD_LOAD_CONFIG_DATA, data, NULL); if (ret != HCMD_SUCCESS) { dd_dev_err(dd, - "load 8051 config: field id %d, lane %d, err %d\n", - (int)field_id, (int)lane_id, ret); + "load 8051 config: field id %d, lane %d, err %d\n", + (int)field_id, (int)lane_id, ret); } return ret; } @@ -8474,7 +8472,7 @@ int read_8051_config(struct hfi1_devdata *dd, u8 field_id, u8 lane_id, } else { *result = 0; dd_dev_err(dd, "%s: direct read failed, lane %d, field %d!\n", - __func__, lane_id, field_id); + __func__, lane_id, field_id); } return ret; @@ -8511,7 +8509,7 @@ static void read_vc_local_link_width(struct hfi1_devdata *dd, u8 *misc_bits, u32 frame; read_8051_config(dd, VERIFY_CAP_LOCAL_LINK_WIDTH, GENERAL_CONFIG, - &frame); + &frame); *misc_bits = (frame >> MISC_CONFIG_BITS_SHIFT) & MISC_CONFIG_BITS_MASK; *flag_bits = (frame >> LOCAL_FLAG_BITS_SHIFT) & LOCAL_FLAG_BITS_MASK; *link_widths = (frame >> LINK_WIDTH_SHIFT) & LINK_WIDTH_MASK; @@ -8593,7 +8591,7 @@ static void read_vc_remote_link_width(struct hfi1_devdata *dd, u32 frame; read_8051_config(dd, VERIFY_CAP_REMOTE_LINK_WIDTH, GENERAL_CONFIG, - &frame); + &frame); *remote_tx_rate = (frame >> REMOTE_TX_RATE_SHIFT) & REMOTE_TX_RATE_MASK; *link_widths = (frame >> LINK_WIDTH_SHIFT) & LINK_WIDTH_MASK; @@ -8633,7 +8631,7 @@ void hfi1_read_link_quality(struct hfi1_devdata *dd, u8 *link_quality) *link_quality = 0; if (dd->pport->host_link_state & HLS_UP) { ret = read_8051_config(dd, LINK_QUALITY_INFO, GENERAL_CONFIG, - &frame); + &frame); if (ret == 0) *link_quality = (frame >> LINK_QUALITY_SHIFT) & LINK_QUALITY_MASK; @@ -8693,10 +8691,9 @@ static void check_fabric_firmware_versions(struct hfi1_devdata *dd) for (lane = 0; lane < 4; lane++) { ret = read_8051_config(dd, SPICO_FW_VERSION, lane, &frame); if (ret) { - dd_dev_err( - dd, - "Unable to read lane %d firmware details\n", - lane); + dd_dev_err(dd, + "Unable to read lane %d firmware details\n", + lane); continue; } version = (frame >> SPICO_ROM_VERSION_SHIFT) @@ -8704,8 +8701,8 @@ static void check_fabric_firmware_versions(struct hfi1_devdata *dd) prod_id = (frame >> SPICO_ROM_PROD_ID_SHIFT) & SPICO_ROM_PROD_ID_MASK; dd_dev_info(dd, - "Lane %d firmware: version 0x%04x, prod_id 0x%04x\n", - lane, version, prod_id); + "Lane %d firmware: version 0x%04x, prod_id 0x%04x\n", + lane, version, prod_id); } } @@ -8718,11 +8715,10 @@ static int read_idle_message(struct hfi1_devdata *dd, u64 type, u64 *data_out) { int ret; - ret = do_8051_command(dd, HCMD_READ_LCB_IDLE_MSG, - type, data_out); + ret = do_8051_command(dd, HCMD_READ_LCB_IDLE_MSG, type, data_out); if (ret != HCMD_SUCCESS) { dd_dev_err(dd, "read idle message: type %d, err %d\n", - (u32)type, ret); + (u32)type, ret); return -EINVAL; } dd_dev_info(dd, "%s: read idle message 0x%llx\n", __func__, *data_out); @@ -8739,8 +8735,8 @@ static int read_idle_message(struct hfi1_devdata *dd, u64 type, u64 *data_out) */ static int read_idle_sma(struct hfi1_devdata *dd, u64 *data) { - return read_idle_message(dd, - (u64)IDLE_SMA << IDLE_MSG_TYPE_SHIFT, data); + return read_idle_message(dd, (u64)IDLE_SMA << IDLE_MSG_TYPE_SHIFT, + data); } /* @@ -8756,7 +8752,7 @@ static int send_idle_message(struct hfi1_devdata *dd, u64 data) ret = do_8051_command(dd, HCMD_SEND_LCB_IDLE_MSG, data, NULL); if (ret != HCMD_SUCCESS) { dd_dev_err(dd, "send idle message: data 0x%llx, err %d\n", - data, ret); + data, ret); return -EINVAL; } return 0; @@ -8771,8 +8767,8 @@ int send_idle_sma(struct hfi1_devdata *dd, u64 message) { u64 data; - data = ((message & IDLE_PAYLOAD_MASK) << IDLE_PAYLOAD_SHIFT) - | ((u64)IDLE_SMA << IDLE_MSG_TYPE_SHIFT); + data = ((message & IDLE_PAYLOAD_MASK) << IDLE_PAYLOAD_SHIFT) | + ((u64)IDLE_SMA << IDLE_MSG_TYPE_SHIFT); return send_idle_message(dd, data); } @@ -8794,7 +8790,7 @@ static int do_quick_linkup(struct hfi1_devdata *dd) /* LCB_CFG_LOOPBACK.VAL = 2 */ /* LCB_CFG_LANE_WIDTH.VAL = 0 */ write_csr(dd, DC_LCB_CFG_LOOPBACK, - IB_PACKET_TYPE << DC_LCB_CFG_LOOPBACK_VAL_SHIFT); + IB_PACKET_TYPE << DC_LCB_CFG_LOOPBACK_VAL_SHIFT); write_csr(dd, DC_LCB_CFG_LANE_WIDTH, 0); } @@ -8806,25 +8802,24 @@ static int do_quick_linkup(struct hfi1_devdata *dd) if (loopback && dd->icode == ICODE_FUNCTIONAL_SIMULATOR) { /* LCB_CFG_RUN.EN = 1 */ write_csr(dd, DC_LCB_CFG_RUN, - 1ull << DC_LCB_CFG_RUN_EN_SHIFT); + 1ull << DC_LCB_CFG_RUN_EN_SHIFT); /* watch LCB_STS_LINK_TRANSFER_ACTIVE */ timeout = jiffies + msecs_to_jiffies(10); while (1) { - reg = read_csr(dd, - DC_LCB_STS_LINK_TRANSFER_ACTIVE); + reg = read_csr(dd, DC_LCB_STS_LINK_TRANSFER_ACTIVE); if (reg) break; if (time_after(jiffies, timeout)) { dd_dev_err(dd, - "timeout waiting for LINK_TRANSFER_ACTIVE\n"); + "timeout waiting for LINK_TRANSFER_ACTIVE\n"); return -ETIMEDOUT; } udelay(2); } write_csr(dd, DC_LCB_CFG_ALLOW_LINK_UP, - 1ull << DC_LCB_CFG_ALLOW_LINK_UP_VAL_SHIFT); + 1ull << DC_LCB_CFG_ALLOW_LINK_UP_VAL_SHIFT); } if (!loopback) { @@ -8836,10 +8831,9 @@ static int do_quick_linkup(struct hfi1_devdata *dd) * done with LCB set up before resuming. */ dd_dev_err(dd, - "Pausing for peer to be finished with LCB set up\n"); + "Pausing for peer to be finished with LCB set up\n"); msleep(5000); - dd_dev_err(dd, - "Continuing with quick linkup\n"); + dd_dev_err(dd, "Continuing with quick linkup\n"); } write_csr(dd, DC_LCB_ERR_EN, 0); /* mask LCB errors */ @@ -8853,8 +8847,8 @@ static int do_quick_linkup(struct hfi1_devdata *dd) ret = set_physical_link_state(dd, PLS_QUICK_LINKUP); if (ret != HCMD_SUCCESS) { dd_dev_err(dd, - "%s: set physical link state to quick LinkUp failed with return %d\n", - __func__, ret); + "%s: set physical link state to quick LinkUp failed with return %d\n", + __func__, ret); set_host_lcb_access(dd); write_csr(dd, DC_LCB_ERR_EN, ~0ull); /* watch LCB errors */ @@ -8879,8 +8873,8 @@ static int set_serdes_loopback_mode(struct hfi1_devdata *dd) if (ret == HCMD_SUCCESS) return 0; dd_dev_err(dd, - "Set physical link state to SerDes Loopback failed with return %d\n", - ret); + "Set physical link state to SerDes Loopback failed with return %d\n", + ret); if (ret >= 0) ret = -EINVAL; return ret; @@ -8895,7 +8889,7 @@ static int init_loopback(struct hfi1_devdata *dd) /* all loopbacks should disable self GUID check */ write_csr(dd, DC_DC8051_CFG_MODE, - (read_csr(dd, DC_DC8051_CFG_MODE) | DISABLE_SELF_GUID_CHECK)); + (read_csr(dd, DC_DC8051_CFG_MODE) | DISABLE_SELF_GUID_CHECK)); /* * The simulator has only one loopback option - LCB. Switch @@ -8926,7 +8920,7 @@ static int init_loopback(struct hfi1_devdata *dd) /* not supported in emulation due to emulation RTL changes */ if (dd->icode == ICODE_FPGA_EMULATION) { dd_dev_err(dd, - "LCB loopback not supported in emulation\n"); + "LCB loopback not supported in emulation\n"); return -EINVAL; } return 0; @@ -8982,7 +8976,7 @@ static int set_local_link_attributes(struct hfi1_pportdata *ppd) /* set the local tx rate - need to read-modify-write */ ret = read_tx_settings(dd, &enable_lane_tx, &tx_polarity_inversion, - &rx_polarity_inversion, &ppd->local_tx_rate); + &rx_polarity_inversion, &ppd->local_tx_rate); if (ret) goto set_local_link_attributes_fail; @@ -9003,15 +8997,16 @@ static int set_local_link_attributes(struct hfi1_pportdata *ppd) enable_lane_tx = 0xF; /* enable all four lanes */ ret = write_tx_settings(dd, enable_lane_tx, tx_polarity_inversion, - rx_polarity_inversion, ppd->local_tx_rate); + rx_polarity_inversion, ppd->local_tx_rate); if (ret != HCMD_SUCCESS) goto set_local_link_attributes_fail; /* * DC supports continuous updates. */ - ret = write_vc_local_phy(dd, 0 /* no power management */, - 1 /* continuous updates */); + ret = write_vc_local_phy(dd, + 0 /* no power management */, + 1 /* continuous updates */); if (ret != HCMD_SUCCESS) goto set_local_link_attributes_fail; @@ -9022,7 +9017,8 @@ static int set_local_link_attributes(struct hfi1_pportdata *ppd) goto set_local_link_attributes_fail; ret = write_vc_local_link_width(dd, 0, 0, - opa_to_vc_link_widths(ppd->link_width_enabled)); + opa_to_vc_link_widths( + ppd->link_width_enabled)); if (ret != HCMD_SUCCESS) goto set_local_link_attributes_fail; @@ -9033,8 +9029,8 @@ static int set_local_link_attributes(struct hfi1_pportdata *ppd) set_local_link_attributes_fail: dd_dev_err(dd, - "Failed to set local link attributes, return 0x%x\n", - ret); + "Failed to set local link attributes, return 0x%x\n", + ret); return ret; } @@ -9047,25 +9043,25 @@ int start_link(struct hfi1_pportdata *ppd) { if (!ppd->link_enabled) { dd_dev_info(ppd->dd, - "%s: stopping link start because link is disabled\n", - __func__); + "%s: stopping link start because link is disabled\n", + __func__); return 0; } if (!ppd->driver_link_ready) { dd_dev_info(ppd->dd, - "%s: stopping link start because driver is not ready\n", - __func__); + "%s: stopping link start because driver is not ready\n", + __func__); return 0; } if (qsfp_mod_present(ppd) || loopback == LOOPBACK_SERDES || - loopback == LOOPBACK_LCB || - ppd->dd->icode == ICODE_FUNCTIONAL_SIMULATOR) + loopback == LOOPBACK_LCB || + ppd->dd->icode == ICODE_FUNCTIONAL_SIMULATOR) return set_link_state(ppd, HLS_DN_POLL); dd_dev_info(ppd->dd, - "%s: stopping link start because no cable is present\n", - __func__); + "%s: stopping link start because no cable is present\n", + __func__); return -EAGAIN; } @@ -9121,20 +9117,19 @@ void reset_qsfp(struct hfi1_pportdata *ppd) mask = (u64)QSFP_HFI0_RESET_N; qsfp_mask = read_csr(dd, dd->hfi1_id ? ASIC_QSFP2_OE : ASIC_QSFP1_OE); qsfp_mask |= mask; - write_csr(dd, - dd->hfi1_id ? ASIC_QSFP2_OE : ASIC_QSFP1_OE, qsfp_mask); + write_csr(dd, dd->hfi1_id ? ASIC_QSFP2_OE : ASIC_QSFP1_OE, qsfp_mask); - qsfp_mask = read_csr(dd, dd->hfi1_id ? - ASIC_QSFP2_OUT : ASIC_QSFP1_OUT); + qsfp_mask = read_csr(dd, + dd->hfi1_id ? ASIC_QSFP2_OUT : ASIC_QSFP1_OUT); qsfp_mask &= ~mask; write_csr(dd, - dd->hfi1_id ? ASIC_QSFP2_OUT : ASIC_QSFP1_OUT, qsfp_mask); + dd->hfi1_id ? ASIC_QSFP2_OUT : ASIC_QSFP1_OUT, qsfp_mask); udelay(10); qsfp_mask |= mask; write_csr(dd, - dd->hfi1_id ? ASIC_QSFP2_OUT : ASIC_QSFP1_OUT, qsfp_mask); + dd->hfi1_id ? ASIC_QSFP2_OUT : ASIC_QSFP1_OUT, qsfp_mask); wait_for_qsfp_init(ppd); @@ -9151,102 +9146,86 @@ static int handle_qsfp_error_conditions(struct hfi1_pportdata *ppd, struct hfi1_devdata *dd = ppd->dd; if ((qsfp_interrupt_status[0] & QSFP_HIGH_TEMP_ALARM) || - (qsfp_interrupt_status[0] & QSFP_HIGH_TEMP_WARNING)) - dd_dev_info(dd, - "%s: QSFP cable on fire\n", - __func__); + (qsfp_interrupt_status[0] & QSFP_HIGH_TEMP_WARNING)) + dd_dev_info(dd, "%s: QSFP cable on fire\n", + __func__); if ((qsfp_interrupt_status[0] & QSFP_LOW_TEMP_ALARM) || - (qsfp_interrupt_status[0] & QSFP_LOW_TEMP_WARNING)) - dd_dev_info(dd, - "%s: QSFP cable temperature too low\n", - __func__); + (qsfp_interrupt_status[0] & QSFP_LOW_TEMP_WARNING)) + dd_dev_info(dd, "%s: QSFP cable temperature too low\n", + __func__); if ((qsfp_interrupt_status[1] & QSFP_HIGH_VCC_ALARM) || - (qsfp_interrupt_status[1] & QSFP_HIGH_VCC_WARNING)) - dd_dev_info(dd, - "%s: QSFP supply voltage too high\n", - __func__); + (qsfp_interrupt_status[1] & QSFP_HIGH_VCC_WARNING)) + dd_dev_info(dd, "%s: QSFP supply voltage too high\n", + __func__); if ((qsfp_interrupt_status[1] & QSFP_LOW_VCC_ALARM) || - (qsfp_interrupt_status[1] & QSFP_LOW_VCC_WARNING)) - dd_dev_info(dd, - "%s: QSFP supply voltage too low\n", - __func__); + (qsfp_interrupt_status[1] & QSFP_LOW_VCC_WARNING)) + dd_dev_info(dd, "%s: QSFP supply voltage too low\n", + __func__); /* Byte 2 is vendor specific */ if ((qsfp_interrupt_status[3] & QSFP_HIGH_POWER_ALARM) || - (qsfp_interrupt_status[3] & QSFP_HIGH_POWER_WARNING)) - dd_dev_info(dd, - "%s: Cable RX channel 1/2 power too high\n", - __func__); + (qsfp_interrupt_status[3] & QSFP_HIGH_POWER_WARNING)) + dd_dev_info(dd, "%s: Cable RX channel 1/2 power too high\n", + __func__); if ((qsfp_interrupt_status[3] & QSFP_LOW_POWER_ALARM) || - (qsfp_interrupt_status[3] & QSFP_LOW_POWER_WARNING)) - dd_dev_info(dd, - "%s: Cable RX channel 1/2 power too low\n", - __func__); + (qsfp_interrupt_status[3] & QSFP_LOW_POWER_WARNING)) + dd_dev_info(dd, "%s: Cable RX channel 1/2 power too low\n", + __func__); if ((qsfp_interrupt_status[4] & QSFP_HIGH_POWER_ALARM) || - (qsfp_interrupt_status[4] & QSFP_HIGH_POWER_WARNING)) - dd_dev_info(dd, - "%s: Cable RX channel 3/4 power too high\n", - __func__); + (qsfp_interrupt_status[4] & QSFP_HIGH_POWER_WARNING)) + dd_dev_info(dd, "%s: Cable RX channel 3/4 power too high\n", + __func__); if ((qsfp_interrupt_status[4] & QSFP_LOW_POWER_ALARM) || - (qsfp_interrupt_status[4] & QSFP_LOW_POWER_WARNING)) - dd_dev_info(dd, - "%s: Cable RX channel 3/4 power too low\n", - __func__); + (qsfp_interrupt_status[4] & QSFP_LOW_POWER_WARNING)) + dd_dev_info(dd, "%s: Cable RX channel 3/4 power too low\n", + __func__); if ((qsfp_interrupt_status[5] & QSFP_HIGH_BIAS_ALARM) || - (qsfp_interrupt_status[5] & QSFP_HIGH_BIAS_WARNING)) - dd_dev_info(dd, - "%s: Cable TX channel 1/2 bias too high\n", - __func__); + (qsfp_interrupt_status[5] & QSFP_HIGH_BIAS_WARNING)) + dd_dev_info(dd, "%s: Cable TX channel 1/2 bias too high\n", + __func__); if ((qsfp_interrupt_status[5] & QSFP_LOW_BIAS_ALARM) || - (qsfp_interrupt_status[5] & QSFP_LOW_BIAS_WARNING)) - dd_dev_info(dd, - "%s: Cable TX channel 1/2 bias too low\n", - __func__); + (qsfp_interrupt_status[5] & QSFP_LOW_BIAS_WARNING)) + dd_dev_info(dd, "%s: Cable TX channel 1/2 bias too low\n", + __func__); if ((qsfp_interrupt_status[6] & QSFP_HIGH_BIAS_ALARM) || - (qsfp_interrupt_status[6] & QSFP_HIGH_BIAS_WARNING)) - dd_dev_info(dd, - "%s: Cable TX channel 3/4 bias too high\n", - __func__); + (qsfp_interrupt_status[6] & QSFP_HIGH_BIAS_WARNING)) + dd_dev_info(dd, "%s: Cable TX channel 3/4 bias too high\n", + __func__); if ((qsfp_interrupt_status[6] & QSFP_LOW_BIAS_ALARM) || - (qsfp_interrupt_status[6] & QSFP_LOW_BIAS_WARNING)) - dd_dev_info(dd, - "%s: Cable TX channel 3/4 bias too low\n", - __func__); + (qsfp_interrupt_status[6] & QSFP_LOW_BIAS_WARNING)) + dd_dev_info(dd, "%s: Cable TX channel 3/4 bias too low\n", + __func__); if ((qsfp_interrupt_status[7] & QSFP_HIGH_POWER_ALARM) || - (qsfp_interrupt_status[7] & QSFP_HIGH_POWER_WARNING)) - dd_dev_info(dd, - "%s: Cable TX channel 1/2 power too high\n", - __func__); + (qsfp_interrupt_status[7] & QSFP_HIGH_POWER_WARNING)) + dd_dev_info(dd, "%s: Cable TX channel 1/2 power too high\n", + __func__); if ((qsfp_interrupt_status[7] & QSFP_LOW_POWER_ALARM) || - (qsfp_interrupt_status[7] & QSFP_LOW_POWER_WARNING)) - dd_dev_info(dd, - "%s: Cable TX channel 1/2 power too low\n", - __func__); + (qsfp_interrupt_status[7] & QSFP_LOW_POWER_WARNING)) + dd_dev_info(dd, "%s: Cable TX channel 1/2 power too low\n", + __func__); if ((qsfp_interrupt_status[8] & QSFP_HIGH_POWER_ALARM) || - (qsfp_interrupt_status[8] & QSFP_HIGH_POWER_WARNING)) - dd_dev_info(dd, - "%s: Cable TX channel 3/4 power too high\n", - __func__); + (qsfp_interrupt_status[8] & QSFP_HIGH_POWER_WARNING)) + dd_dev_info(dd, "%s: Cable TX channel 3/4 power too high\n", + __func__); if ((qsfp_interrupt_status[8] & QSFP_LOW_POWER_ALARM) || - (qsfp_interrupt_status[8] & QSFP_LOW_POWER_WARNING)) - dd_dev_info(dd, - "%s: Cable TX channel 3/4 power too low\n", - __func__); + (qsfp_interrupt_status[8] & QSFP_LOW_POWER_WARNING)) + dd_dev_info(dd, "%s: Cable TX channel 3/4 power too low\n", + __func__); /* Bytes 9-10 and 11-12 are reserved */ /* Bytes 13-15 are vendor specific */ @@ -9298,8 +9277,8 @@ void qsfp_event(struct work_struct *work) if (qsfp_read(ppd, dd->hfi1_id, 6, &qsfp_interrupt_status[0], 16) != 16) { dd_dev_info(dd, - "%s: Failed to read status of QSFP module\n", - __func__); + "%s: Failed to read status of QSFP module\n", + __func__); } else { unsigned long flags; @@ -9308,7 +9287,7 @@ void qsfp_event(struct work_struct *work) spin_lock_irqsave(&ppd->qsfp_info.qsfp_lock, flags); ppd->qsfp_info.check_interrupt_flags = 0; spin_unlock_irqrestore(&ppd->qsfp_info.qsfp_lock, - flags); + flags); } } } @@ -9430,7 +9409,7 @@ void hfi1_quiet_serdes(struct hfi1_pportdata *ppd) ppd->offline_disabled_reason = HFI1_ODR_MASK(OPA_LINKDOWN_REASON_SMA_DISABLED); set_link_down_reason(ppd, OPA_LINKDOWN_REASON_SMA_DISABLED, 0, - OPA_LINKDOWN_REASON_SMA_DISABLED); + OPA_LINKDOWN_REASON_SMA_DISABLED); set_link_state(ppd, HLS_DN_OFFLINE); /* disable the port */ @@ -9486,8 +9465,8 @@ void hfi1_put_tid(struct hfi1_devdata *dd, u32 index, pa = 0; } else if (type > PT_INVALID) { dd_dev_err(dd, - "unexpected receive array type %u for index %u, not handled\n", - type, index); + "unexpected receive array type %u for index %u, not handled\n", + type, index); goto done; } @@ -9702,12 +9681,15 @@ static void set_send_length(struct hfi1_pportdata *ppd) /* all kernel receive contexts have the same hdrqentsize */ for (i = 0; i < ppd->vls_supported; i++) { sc_set_cr_threshold(dd->vld[i].sc, - sc_mtu_to_threshold(dd->vld[i].sc, dd->vld[i].mtu, - dd->rcd[0]->rcvhdrqentsize)); + sc_mtu_to_threshold(dd->vld[i].sc, + dd->vld[i].mtu, + dd->rcd[0]-> + rcvhdrqentsize)); } sc_set_cr_threshold(dd->vld[15].sc, - sc_mtu_to_threshold(dd->vld[15].sc, dd->vld[15].mtu, - dd->rcd[0]->rcvhdrqentsize)); + sc_mtu_to_threshold(dd->vld[15].sc, + dd->vld[15].mtu, + dd->rcd[0]->rcvhdrqentsize)); /* Adjust maximum MTU for the port in DC */ dcmtu = maxvlmtu == 10240 ? DCC_CFG_PORT_MTU_CAP_10240 : @@ -9768,8 +9750,8 @@ static int wait_phy_linkstate(struct hfi1_devdata *dd, u32 state, u32 msecs) break; if (time_after(jiffies, timeout)) { dd_dev_err(dd, - "timeout waiting for phy link state 0x%x, current state is 0x%x\n", - state, curr_state); + "timeout waiting for phy link state 0x%x, current state is 0x%x\n", + state, curr_state); return -ETIMEDOUT; } usleep_range(1950, 2050); /* sleep 2ms-ish */ @@ -9812,12 +9794,12 @@ static int goto_offline(struct hfi1_pportdata *ppd, u8 rem_reason) if (do_transition) { ret = set_physical_link_state(dd, - PLS_OFFLINE | (rem_reason << 8)); + PLS_OFFLINE | (rem_reason << 8)); if (ret != HCMD_SUCCESS) { dd_dev_err(dd, - "Failed to transition to Offline link state, return %d\n", - ret); + "Failed to transition to Offline link state, return %d\n", + ret); return -EINVAL; } if (ppd->offline_disabled_reason == @@ -9862,7 +9844,7 @@ static int goto_offline(struct hfi1_pportdata *ppd, u8 rem_reason) ret = wait_fm_ready(dd, 7000); if (ret) { dd_dev_err(dd, - "After going offline, timed out waiting for the 8051 to become ready to accept host requests\n"); + "After going offline, timed out waiting for the 8051 to become ready to accept host requests\n"); /* state is really offline, so make it so */ ppd->host_link_state = HLS_DN_OFFLINE; return ret; @@ -9885,8 +9867,8 @@ static int goto_offline(struct hfi1_pportdata *ppd, u8 rem_reason) read_last_local_state(dd, &last_local_state); read_last_remote_state(dd, &last_remote_state); dd_dev_err(dd, - "LNI failure last states: local 0x%08x, remote 0x%08x\n", - last_local_state, last_remote_state); + "LNI failure last states: local 0x%08x, remote 0x%08x\n", + last_local_state, last_remote_state); } /* the active link width (downgrade) is 0 on link down */ @@ -10038,10 +10020,10 @@ int set_link_state(struct hfi1_pportdata *ppd, u32 state) state == HLS_DN_POLL; dd_dev_info(dd, "%s: current %s, new %s %s%s\n", __func__, - link_state_name(ppd->host_link_state), - link_state_name(orig_new_state), - poll_bounce ? "(bounce) " : "", - link_state_reason_name(ppd, state)); + link_state_name(ppd->host_link_state), + link_state_name(orig_new_state), + poll_bounce ? "(bounce) " : "", + link_state_reason_name(ppd, state)); was_up = !!(ppd->host_link_state & HLS_UP); @@ -10071,7 +10053,7 @@ int set_link_state(struct hfi1_pportdata *ppd, u32 state) * simulator jumps from polling to link up. * Accept that here. */ - /* OK */; + /* OK */ } else if (ppd->host_link_state != HLS_GOING_UP) { goto unexpected; } @@ -10082,8 +10064,8 @@ int set_link_state(struct hfi1_pportdata *ppd, u32 state) /* logical state didn't change, stay at going_up */ ppd->host_link_state = HLS_GOING_UP; dd_dev_err(dd, - "%s: logical state did not change to INIT\n", - __func__); + "%s: logical state did not change to INIT\n", + __func__); } else { /* clear old transient LINKINIT_REASON code */ if (ppd->linkinit_reason >= OPA_LINKINIT_REASON_CLEAR) @@ -10107,8 +10089,8 @@ int set_link_state(struct hfi1_pportdata *ppd, u32 state) /* logical state didn't change, stay at init */ ppd->host_link_state = HLS_UP_INIT; dd_dev_err(dd, - "%s: logical state did not change to ARMED\n", - __func__); + "%s: logical state did not change to ARMED\n", + __func__); } /* * The simulator does not currently implement SMA messages, @@ -10129,8 +10111,8 @@ int set_link_state(struct hfi1_pportdata *ppd, u32 state) /* logical state didn't change, stay at armed */ ppd->host_link_state = HLS_UP_ARMED; dd_dev_err(dd, - "%s: logical state did not change to ACTIVE\n", - __func__); + "%s: logical state did not change to ACTIVE\n", + __func__); } else { /* tell all engines to go running */ sdma_all_running(dd); @@ -10178,8 +10160,8 @@ int set_link_state(struct hfi1_pportdata *ppd, u32 state) ret1 = set_physical_link_state(dd, PLS_POLLING); if (ret1 != HCMD_SUCCESS) { dd_dev_err(dd, - "Failed to transition to Polling link state, return 0x%x\n", - ret1); + "Failed to transition to Polling link state, return 0x%x\n", + ret1); ret = -EINVAL; } } @@ -10209,8 +10191,8 @@ int set_link_state(struct hfi1_pportdata *ppd, u32 state) ret1 = set_physical_link_state(dd, PLS_DISABLED); if (ret1 != HCMD_SUCCESS) { dd_dev_err(dd, - "Failed to transition to Disabled link state, return 0x%x\n", - ret1); + "Failed to transition to Disabled link state, return 0x%x\n", + ret1); ret = -EINVAL; break; } @@ -10238,8 +10220,8 @@ int set_link_state(struct hfi1_pportdata *ppd, u32 state) ret1 = set_physical_link_state(dd, PLS_LINKUP); if (ret1 != HCMD_SUCCESS) { dd_dev_err(dd, - "Failed to transition to link up state, return 0x%x\n", - ret1); + "Failed to transition to link up state, return 0x%x\n", + ret1); ret = -EINVAL; break; } @@ -10250,7 +10232,7 @@ int set_link_state(struct hfi1_pportdata *ppd, u32 state) case HLS_LINK_COOLDOWN: /* transient within goto_offline() */ default: dd_dev_info(dd, "%s: state 0x%x: not supported\n", - __func__, state); + __func__, state); ret = -EINVAL; break; } @@ -10270,8 +10252,8 @@ int set_link_state(struct hfi1_pportdata *ppd, u32 state) unexpected: dd_dev_err(dd, "%s: unexpected state transition from %s to %s\n", - __func__, link_state_name(ppd->host_link_state), - link_state_name(state)); + __func__, link_state_name(ppd->host_link_state), + link_state_name(state)); ret = -EINVAL; done: @@ -10359,8 +10341,8 @@ int hfi1_set_ib_cfg(struct hfi1_pportdata *ppd, int which, u32 val) default: if (HFI1_CAP_IS_KSET(PRINT_UNIMPL)) dd_dev_info(ppd->dd, - "%s: which %s, val 0x%x: not implemented\n", - __func__, ib_cfg_name(which), val); + "%s: which %s, val 0x%x: not implemented\n", + __func__, ib_cfg_name(which), val); break; } return ret; @@ -10569,41 +10551,41 @@ static void get_vlarb_preempt(struct hfi1_devdata *dd, u32 nelems, static void set_sc2vlnt(struct hfi1_devdata *dd, struct sc2vlnt *dp) { write_csr(dd, DCC_CFG_SC_VL_TABLE_15_0, - DC_SC_VL_VAL(15_0, - 0, dp->vlnt[0] & 0xf, - 1, dp->vlnt[1] & 0xf, - 2, dp->vlnt[2] & 0xf, - 3, dp->vlnt[3] & 0xf, - 4, dp->vlnt[4] & 0xf, - 5, dp->vlnt[5] & 0xf, - 6, dp->vlnt[6] & 0xf, - 7, dp->vlnt[7] & 0xf, - 8, dp->vlnt[8] & 0xf, - 9, dp->vlnt[9] & 0xf, - 10, dp->vlnt[10] & 0xf, - 11, dp->vlnt[11] & 0xf, - 12, dp->vlnt[12] & 0xf, - 13, dp->vlnt[13] & 0xf, - 14, dp->vlnt[14] & 0xf, - 15, dp->vlnt[15] & 0xf)); + DC_SC_VL_VAL(15_0, + 0, dp->vlnt[0] & 0xf, + 1, dp->vlnt[1] & 0xf, + 2, dp->vlnt[2] & 0xf, + 3, dp->vlnt[3] & 0xf, + 4, dp->vlnt[4] & 0xf, + 5, dp->vlnt[5] & 0xf, + 6, dp->vlnt[6] & 0xf, + 7, dp->vlnt[7] & 0xf, + 8, dp->vlnt[8] & 0xf, + 9, dp->vlnt[9] & 0xf, + 10, dp->vlnt[10] & 0xf, + 11, dp->vlnt[11] & 0xf, + 12, dp->vlnt[12] & 0xf, + 13, dp->vlnt[13] & 0xf, + 14, dp->vlnt[14] & 0xf, + 15, dp->vlnt[15] & 0xf)); write_csr(dd, DCC_CFG_SC_VL_TABLE_31_16, - DC_SC_VL_VAL(31_16, - 16, dp->vlnt[16] & 0xf, - 17, dp->vlnt[17] & 0xf, - 18, dp->vlnt[18] & 0xf, - 19, dp->vlnt[19] & 0xf, - 20, dp->vlnt[20] & 0xf, - 21, dp->vlnt[21] & 0xf, - 22, dp->vlnt[22] & 0xf, - 23, dp->vlnt[23] & 0xf, - 24, dp->vlnt[24] & 0xf, - 25, dp->vlnt[25] & 0xf, - 26, dp->vlnt[26] & 0xf, - 27, dp->vlnt[27] & 0xf, - 28, dp->vlnt[28] & 0xf, - 29, dp->vlnt[29] & 0xf, - 30, dp->vlnt[30] & 0xf, - 31, dp->vlnt[31] & 0xf)); + DC_SC_VL_VAL(31_16, + 16, dp->vlnt[16] & 0xf, + 17, dp->vlnt[17] & 0xf, + 18, dp->vlnt[18] & 0xf, + 19, dp->vlnt[19] & 0xf, + 20, dp->vlnt[20] & 0xf, + 21, dp->vlnt[21] & 0xf, + 22, dp->vlnt[22] & 0xf, + 23, dp->vlnt[23] & 0xf, + 24, dp->vlnt[24] & 0xf, + 25, dp->vlnt[25] & 0xf, + 26, dp->vlnt[26] & 0xf, + 27, dp->vlnt[27] & 0xf, + 28, dp->vlnt[28] & 0xf, + 29, dp->vlnt[29] & 0xf, + 30, dp->vlnt[30] & 0xf, + 31, dp->vlnt[31] & 0xf)); } static void nonzero_msg(struct hfi1_devdata *dd, int idx, const char *what, @@ -10611,7 +10593,7 @@ static void nonzero_msg(struct hfi1_devdata *dd, int idx, const char *what, { if (limit != 0) dd_dev_info(dd, "Invalid %s limit %d on VL %d, ignoring\n", - what, (int)limit, idx); + what, (int)limit, idx); } /* change only the shared limit portion of SendCmGLobalCredit */ @@ -10689,14 +10671,14 @@ static void wait_for_vl_status_clear(struct hfi1_devdata *dd, u64 mask, } dd_dev_err(dd, - "%s credit change status not clearing after %dms, mask 0x%llx, not clear 0x%llx\n", - which, VL_STATUS_CLEAR_TIMEOUT, mask, reg); + "%s credit change status not clearing after %dms, mask 0x%llx, not clear 0x%llx\n", + which, VL_STATUS_CLEAR_TIMEOUT, mask, reg); /* * If this occurs, it is likely there was a credit loss on the link. * The only recovery from that is a link bounce. */ dd_dev_err(dd, - "Continuing anyway. A credit loss may occur. Suggest a link bounce\n"); + "Continuing anyway. A credit loss may occur. Suggest a link bounce\n"); } /* @@ -10763,9 +10745,9 @@ int set_buffer_control(struct hfi1_pportdata *ppd, continue; } nonzero_msg(dd, i, "dedicated", - be16_to_cpu(new_bc->vl[i].dedicated)); + be16_to_cpu(new_bc->vl[i].dedicated)); nonzero_msg(dd, i, "shared", - be16_to_cpu(new_bc->vl[i].shared)); + be16_to_cpu(new_bc->vl[i].shared)); new_bc->vl[i].dedicated = 0; new_bc->vl[i].shared = 0; } @@ -10836,7 +10818,7 @@ int set_buffer_control(struct hfi1_pportdata *ppd, } wait_for_vl_status_clear(dd, use_all_mask ? all_mask : changing_mask, - "shared"); + "shared"); if (change_count > 0) { for (i = 0; i < NUM_USABLE_VLS; i++) { @@ -10845,7 +10827,8 @@ int set_buffer_control(struct hfi1_pportdata *ppd, if (lowering_dedicated[i]) { set_vl_dedicated(dd, i, - be16_to_cpu(new_bc->vl[i].dedicated)); + be16_to_cpu(new_bc-> + vl[i].dedicated)); cur_bc.vl[i].dedicated = new_bc->vl[i].dedicated; } @@ -10861,7 +10844,8 @@ int set_buffer_control(struct hfi1_pportdata *ppd, if (be16_to_cpu(new_bc->vl[i].dedicated) > be16_to_cpu(cur_bc.vl[i].dedicated)) set_vl_dedicated(dd, i, - be16_to_cpu(new_bc->vl[i].dedicated)); + be16_to_cpu(new_bc-> + vl[i].dedicated)); } } @@ -10877,9 +10861,9 @@ int set_buffer_control(struct hfi1_pportdata *ppd, /* finally raise the global shared */ if (be16_to_cpu(new_bc->overall_shared_limit) > - be16_to_cpu(cur_bc.overall_shared_limit)) + be16_to_cpu(cur_bc.overall_shared_limit)) set_global_shared(dd, - be16_to_cpu(new_bc->overall_shared_limit)); + be16_to_cpu(new_bc->overall_shared_limit)); /* bracket the credit change with a total adjustment */ if (new_total < cur_total) @@ -11152,7 +11136,8 @@ static void adjust_rcv_timeout(struct hfi1_ctxtdata *rcd, u32 npkts) * been verified to be in range */ write_kctxt_csr(dd, rcd->ctxt, RCV_AVAIL_TIME_OUT, - (u64)timeout << RCV_AVAIL_TIME_OUT_TIME_OUT_RELOAD_SHIFT); + (u64)timeout << + RCV_AVAIL_TIME_OUT_TIME_OUT_RELOAD_SHIFT); } void update_usrhead(struct hfi1_ctxtdata *rcd, u32 hd, u32 updegr, u32 egrhd, @@ -11370,14 +11355,14 @@ void hfi1_rcvctrl(struct hfi1_devdata *dd, unsigned int op, int ctxt) reg = read_kctxt_csr(dd, ctxt, RCV_CTXT_STATUS); if (reg != 0) { dd_dev_info(dd, "ctxt %d status %lld (blocked)\n", - ctxt, reg); + ctxt, reg); read_uctxt_csr(dd, ctxt, RCV_HDR_HEAD); write_uctxt_csr(dd, ctxt, RCV_HDR_HEAD, 0x10); write_uctxt_csr(dd, ctxt, RCV_HDR_HEAD, 0x00); read_uctxt_csr(dd, ctxt, RCV_HDR_HEAD); reg = read_kctxt_csr(dd, ctxt, RCV_CTXT_STATUS); dd_dev_info(dd, "ctxt %d status %lld (%s blocked)\n", - ctxt, reg, reg == 0 ? "not" : "still"); + ctxt, reg, reg == 0 ? "not" : "still"); } } @@ -11388,7 +11373,7 @@ void hfi1_rcvctrl(struct hfi1_devdata *dd, unsigned int op, int ctxt) */ /* set interrupt timeout */ write_kctxt_csr(dd, ctxt, RCV_AVAIL_TIME_OUT, - (u64)rcd->rcvavail_timeout << + (u64)rcd->rcvavail_timeout << RCV_AVAIL_TIME_OUT_TIME_OUT_RELOAD_SHIFT); /* set RcvHdrHead.Counter, zero RcvHdrHead.Head (again) */ @@ -11863,8 +11848,7 @@ static int init_cntrs(struct hfi1_devdata *dd) dev_cntrs[i].offset = dd->ndevcntrs; for (j = 0; j < C_VL_COUNT; j++) { snprintf(name, C_MAX_NAME, "%s%d", - dev_cntrs[i].name, - vl_from_idx(j)); + dev_cntrs[i].name, vl_from_idx(j)); sz += strlen(name); /* Add ",32" for 32-bit counters */ if (dev_cntrs[i].flags & CNTR_32BIT) @@ -11987,8 +11971,7 @@ static int init_cntrs(struct hfi1_devdata *dd) port_cntrs[i].offset = dd->nportcntrs; for (j = 0; j < C_VL_COUNT; j++) { snprintf(name, C_MAX_NAME, "%s%d", - port_cntrs[i].name, - vl_from_idx(j)); + port_cntrs[i].name, vl_from_idx(j)); sz += strlen(name); /* Add ",32" for 32-bit counters */ if (port_cntrs[i].flags & CNTR_32BIT) @@ -12021,8 +12004,7 @@ static int init_cntrs(struct hfi1_devdata *dd) if (port_cntrs[i].flags & CNTR_VL) { for (j = 0; j < C_VL_COUNT; j++) { snprintf(name, C_MAX_NAME, "%s%d", - port_cntrs[i].name, - vl_from_idx(j)); + port_cntrs[i].name, vl_from_idx(j)); memcpy(p, name, strlen(name)); p += strlen(name); @@ -12077,8 +12059,8 @@ static u32 chip_to_opa_lstate(struct hfi1_devdata *dd, u32 chip_lstate) switch (chip_lstate) { default: dd_dev_err(dd, - "Unknown logical state 0x%x, reporting IB_PORT_DOWN\n", - chip_lstate); + "Unknown logical state 0x%x, reporting IB_PORT_DOWN\n", + chip_lstate); /* fall through */ case LSTATE_DOWN: return IB_PORT_DOWN; @@ -12097,7 +12079,7 @@ u32 chip_to_opa_pstate(struct hfi1_devdata *dd, u32 chip_pstate) switch (chip_pstate & 0xf0) { default: dd_dev_err(dd, "Unexpected chip physical state of 0x%x\n", - chip_pstate); + chip_pstate); /* fall through */ case PLS_DISABLED: return IB_PORTPHYSSTATE_DISABLED; @@ -12163,7 +12145,7 @@ u32 get_logical_state(struct hfi1_pportdata *ppd) new_state = chip_to_opa_lstate(ppd->dd, read_logical_state(ppd->dd)); if (new_state != ppd->lstate) { dd_dev_info(ppd->dd, "logical state changed to %s (0x%x)\n", - opa_lstate_name(new_state), new_state); + opa_lstate_name(new_state), new_state); ppd->lstate = new_state; } /* @@ -12229,9 +12211,9 @@ u8 hfi1_ibphys_portstate(struct hfi1_pportdata *ppd) ib_pstate = chip_to_opa_pstate(ppd->dd, pstate); if (ppd->last_pstate != ib_pstate) { dd_dev_info(ppd->dd, - "%s: physical state changed to %s (0x%x), phy 0x%x\n", - __func__, opa_pstate_name(ib_pstate), ib_pstate, - pstate); + "%s: physical state changed to %s (0x%x), phy 0x%x\n", + __func__, opa_pstate_name(ib_pstate), ib_pstate, + pstate); ppd->last_pstate = ib_pstate; } return ib_pstate; @@ -12449,11 +12431,11 @@ static void remap_sdma_interrupts(struct hfi1_devdata *dd, * SDMAIdle */ remap_intr(dd, IS_SDMA_START + 0 * TXE_NUM_SDMA_ENGINES + engine, - msix_intr); + msix_intr); remap_intr(dd, IS_SDMA_START + 1 * TXE_NUM_SDMA_ENGINES + engine, - msix_intr); + msix_intr); remap_intr(dd, IS_SDMA_START + 2 * TXE_NUM_SDMA_ENGINES + engine, - msix_intr); + msix_intr); } static int request_intx_irq(struct hfi1_devdata *dd) @@ -12463,10 +12445,10 @@ static int request_intx_irq(struct hfi1_devdata *dd) snprintf(dd->intx_name, sizeof(dd->intx_name), DRIVER_NAME "_%d", dd->unit); ret = request_irq(dd->pcidev->irq, general_interrupt, - IRQF_SHARED, dd->intx_name, dd); + IRQF_SHARED, dd->intx_name, dd); if (ret) dd_dev_err(dd, "unable to request INTx interrupt, err %d\n", - ret); + ret); else dd->requested_intx_irq = 1; return ret; @@ -12549,7 +12531,7 @@ static int request_msix_irqs(struct hfi1_devdata *dd) * ignore it */ dd_dev_err(dd, - "Unexpected extra MSI-X interrupt %d\n", i); + "Unexpected extra MSI-X interrupt %d\n", i); continue; } /* no argument, no interrupt */ @@ -12559,11 +12541,11 @@ static int request_msix_irqs(struct hfi1_devdata *dd) me->name[sizeof(me->name) - 1] = 0; ret = request_threaded_irq(me->msix.vector, handler, thread, 0, - me->name, arg); + me->name, arg); if (ret) { dd_dev_err(dd, - "unable to allocate %s interrupt, vector %d, index %d, err %d\n", - err_info, me->msix.vector, idx, ret); + "unable to allocate %s interrupt, vector %d, index %d, err %d\n", + err_info, me->msix.vector, idx, ret); return ret; } /* @@ -12748,11 +12730,11 @@ static int set_up_context_variables(struct hfi1_devdata *dd) dd->num_user_contexts = num_user_contexts; dd->freectxts = num_user_contexts; dd_dev_info(dd, - "rcv contexts: chip %d, used %d (kernel %d, user %d)\n", - (int)dd->chip_rcv_contexts, - (int)dd->num_rcv_contexts, - (int)dd->n_krcv_queues, - (int)dd->num_rcv_contexts - dd->n_krcv_queues); + "rcv contexts: chip %d, used %d (kernel %d, user %d)\n", + (int)dd->chip_rcv_contexts, + (int)dd->num_rcv_contexts, + (int)dd->n_krcv_queues, + (int)dd->num_rcv_contexts - dd->n_krcv_queues); /* * Receive array allocation: @@ -12778,8 +12760,8 @@ static int set_up_context_variables(struct hfi1_devdata *dd) dd->rcv_entries.ngroups = (MAX_EAGER_ENTRIES * 2) / dd->rcv_entries.group_size; dd_dev_info(dd, - "RcvArray group count too high, change to %u\n", - dd->rcv_entries.ngroups); + "RcvArray group count too high, change to %u\n", + dd->rcv_entries.ngroups); dd->rcv_entries.nctxt_extra = 0; } /* @@ -12871,7 +12853,7 @@ static void write_uninitialized_csrs_and_memories(struct hfi1_devdata *dd) /* RcvArray */ for (i = 0; i < dd->chip_rcv_array_count; i++) write_csr(dd, RCV_ARRAY + (8 * i), - RCV_ARRAY_RT_WRITE_ENABLE_SMASK); + RCV_ARRAY_RT_WRITE_ENABLE_SMASK); /* RcvQPMapTable */ for (i = 0; i < 32; i++) @@ -12903,8 +12885,8 @@ static void clear_cce_status(struct hfi1_devdata *dd, u64 status_bits, return; if (time_after(jiffies, timeout)) { dd_dev_err(dd, - "Timeout waiting for CceStatus to clear bits 0x%llx, remaining 0x%llx\n", - status_bits, reg & status_bits); + "Timeout waiting for CceStatus to clear bits 0x%llx, remaining 0x%llx\n", + status_bits, reg & status_bits); return; } udelay(1); @@ -12936,7 +12918,7 @@ static void reset_cce_csrs(struct hfi1_devdata *dd) for (i = 0; i < CCE_NUM_MSIX_VECTORS; i++) { write_csr(dd, CCE_MSIX_TABLE_LOWER + (8 * i), 0); write_csr(dd, CCE_MSIX_TABLE_UPPER + (8 * i), - CCE_MSIX_TABLE_UPPER_RESETCSR); + CCE_MSIX_TABLE_UPPER_RESETCSR); } for (i = 0; i < CCE_NUM_MSIX_PBAS; i++) { /* CCE_MSIX_PBA read-only */ @@ -13120,8 +13102,7 @@ static void reset_txe_csrs(struct hfi1_devdata *dd) for (i = 0; i < TXE_NUM_64_BIT_COUNTER; i++) write_csr(dd, SEND_COUNTER_ARRAY64 + (8 * i), 0); write_csr(dd, SEND_CM_CTRL, SEND_CM_CTRL_RESETCSR); - write_csr(dd, SEND_CM_GLOBAL_CREDIT, - SEND_CM_GLOBAL_CREDIT_RESETCSR); + write_csr(dd, SEND_CM_GLOBAL_CREDIT, SEND_CM_GLOBAL_CREDIT_RESETCSR); /* SEND_CM_CREDIT_USED_STATUS read-only */ write_csr(dd, SEND_CM_TIMER_CTRL, 0); write_csr(dd, SEND_CM_LOCAL_AU_TABLE0_TO3, 0); @@ -13215,8 +13196,8 @@ static void init_rbufs(struct hfi1_devdata *dd) */ if (count++ > 500) { dd_dev_err(dd, - "%s: in-progress DMA not clearing: RcvStatus 0x%llx, continuing\n", - __func__, reg); + "%s: in-progress DMA not clearing: RcvStatus 0x%llx, continuing\n", + __func__, reg); break; } udelay(2); /* do not busy-wait the CSR */ @@ -13245,8 +13226,8 @@ static void init_rbufs(struct hfi1_devdata *dd) /* give up after 100us - slowest possible at 33MHz is 73us */ if (count++ > 50) { dd_dev_err(dd, - "%s: RcvStatus.RxRbufInit not set, continuing\n", - __func__); + "%s: RcvStatus.RxRbufInit not set, continuing\n", + __func__); break; } } @@ -13272,7 +13253,7 @@ static void reset_rxe_csrs(struct hfi1_devdata *dd) write_csr(dd, RCV_VL15, 0); /* this is a clear-down */ write_csr(dd, RCV_ERR_INFO, - RCV_ERR_INFO_RCV_EXCESS_BUFFER_OVERRUN_SMASK); + RCV_ERR_INFO_RCV_EXCESS_BUFFER_OVERRUN_SMASK); /* RCV_ERR_STATUS read-only */ write_csr(dd, RCV_ERR_MASK, 0); write_csr(dd, RCV_ERR_CLEAR, ~0ull); @@ -13318,8 +13299,8 @@ static void reset_rxe_csrs(struct hfi1_devdata *dd) write_uctxt_csr(dd, i, RCV_EGR_INDEX_HEAD, 0); /* RCV_EGR_OFFSET_TAIL read-only */ for (j = 0; j < RXE_NUM_TID_FLOWS; j++) { - write_uctxt_csr(dd, i, RCV_TID_FLOW_TABLE + (8 * j), - 0); + write_uctxt_csr(dd, i, + RCV_TID_FLOW_TABLE + (8 * j), 0); } } } @@ -13519,12 +13500,12 @@ static void init_kdeth_qp(struct hfi1_devdata *dd) kdeth_qp = DEFAULT_KDETH_QP; write_csr(dd, SEND_BTH_QP, - (kdeth_qp & SEND_BTH_QP_KDETH_QP_MASK) - << SEND_BTH_QP_KDETH_QP_SHIFT); + (kdeth_qp & SEND_BTH_QP_KDETH_QP_MASK) << + SEND_BTH_QP_KDETH_QP_SHIFT); write_csr(dd, RCV_BTH_QP, - (kdeth_qp & RCV_BTH_QP_KDETH_QP_MASK) - << RCV_BTH_QP_KDETH_QP_SHIFT); + (kdeth_qp & RCV_BTH_QP_KDETH_QP_MASK) << + RCV_BTH_QP_KDETH_QP_SHIFT); } /** @@ -13649,22 +13630,21 @@ static void init_qos(struct hfi1_devdata *dd, u32 first_ctxt) write_csr(dd, RCV_RSM_MAP_TABLE + (8 * i), rsmmap[i]); /* add rule0 */ write_csr(dd, RCV_RSM_CFG /* + (8 * 0) */, - RCV_RSM_CFG_ENABLE_OR_CHAIN_RSM0_MASK - << RCV_RSM_CFG_ENABLE_OR_CHAIN_RSM0_SHIFT | - 2ull << RCV_RSM_CFG_PACKET_TYPE_SHIFT); + RCV_RSM_CFG_ENABLE_OR_CHAIN_RSM0_MASK << + RCV_RSM_CFG_ENABLE_OR_CHAIN_RSM0_SHIFT | + 2ull << RCV_RSM_CFG_PACKET_TYPE_SHIFT); write_csr(dd, RCV_RSM_SELECT /* + (8 * 0) */, - LRH_BTH_MATCH_OFFSET - << RCV_RSM_SELECT_FIELD1_OFFSET_SHIFT | - LRH_SC_MATCH_OFFSET << RCV_RSM_SELECT_FIELD2_OFFSET_SHIFT | - LRH_SC_SELECT_OFFSET << RCV_RSM_SELECT_INDEX1_OFFSET_SHIFT | - ((u64)n) << RCV_RSM_SELECT_INDEX1_WIDTH_SHIFT | - QPN_SELECT_OFFSET << RCV_RSM_SELECT_INDEX2_OFFSET_SHIFT | - ((u64)m + (u64)n) << RCV_RSM_SELECT_INDEX2_WIDTH_SHIFT); + LRH_BTH_MATCH_OFFSET << RCV_RSM_SELECT_FIELD1_OFFSET_SHIFT | + LRH_SC_MATCH_OFFSET << RCV_RSM_SELECT_FIELD2_OFFSET_SHIFT | + LRH_SC_SELECT_OFFSET << RCV_RSM_SELECT_INDEX1_OFFSET_SHIFT | + ((u64)n) << RCV_RSM_SELECT_INDEX1_WIDTH_SHIFT | + QPN_SELECT_OFFSET << RCV_RSM_SELECT_INDEX2_OFFSET_SHIFT | + ((u64)m + (u64)n) << RCV_RSM_SELECT_INDEX2_WIDTH_SHIFT); write_csr(dd, RCV_RSM_MATCH /* + (8 * 0) */, - LRH_BTH_MASK << RCV_RSM_MATCH_MASK1_SHIFT | - LRH_BTH_VALUE << RCV_RSM_MATCH_VALUE1_SHIFT | - LRH_SC_MASK << RCV_RSM_MATCH_MASK2_SHIFT | - LRH_SC_VALUE << RCV_RSM_MATCH_VALUE2_SHIFT); + LRH_BTH_MASK << RCV_RSM_MATCH_MASK1_SHIFT | + LRH_BTH_VALUE << RCV_RSM_MATCH_VALUE1_SHIFT | + LRH_SC_MASK << RCV_RSM_MATCH_MASK2_SHIFT | + LRH_SC_VALUE << RCV_RSM_MATCH_VALUE2_SHIFT); /* Enable RSM */ add_rcvctrl(dd, RCV_CTRL_RCV_RSM_ENABLE_SMASK); kfree(rsmmap); @@ -13682,9 +13662,8 @@ static void init_rxe(struct hfi1_devdata *dd) /* enable all receive errors */ write_csr(dd, RCV_ERR_MASK, ~0ull); /* setup QPN map table - start where VL15 context leaves off */ - init_qos( - dd, - dd->n_krcv_queues > MIN_KERNEL_KCTXTS ? MIN_KERNEL_KCTXTS : 0); + init_qos(dd, dd->n_krcv_queues > MIN_KERNEL_KCTXTS ? + MIN_KERNEL_KCTXTS : 0); /* * make sure RcvCtrl.RcvWcb <= PCIe Device Control * Register Max_Payload_Size (PCI_EXP_DEVCTL in Linux PCIe config @@ -13721,35 +13700,33 @@ static void assign_cm_au_table(struct hfi1_devdata *dd, u32 cu, u32 csr0to3, u32 csr4to7) { write_csr(dd, csr0to3, - 0ull << - SEND_CM_LOCAL_AU_TABLE0_TO3_LOCAL_AU_TABLE0_SHIFT - | 1ull << - SEND_CM_LOCAL_AU_TABLE0_TO3_LOCAL_AU_TABLE1_SHIFT - | 2ull * cu << - SEND_CM_LOCAL_AU_TABLE0_TO3_LOCAL_AU_TABLE2_SHIFT - | 4ull * cu << - SEND_CM_LOCAL_AU_TABLE0_TO3_LOCAL_AU_TABLE3_SHIFT); + 0ull << SEND_CM_LOCAL_AU_TABLE0_TO3_LOCAL_AU_TABLE0_SHIFT | + 1ull << SEND_CM_LOCAL_AU_TABLE0_TO3_LOCAL_AU_TABLE1_SHIFT | + 2ull * cu << + SEND_CM_LOCAL_AU_TABLE0_TO3_LOCAL_AU_TABLE2_SHIFT | + 4ull * cu << + SEND_CM_LOCAL_AU_TABLE0_TO3_LOCAL_AU_TABLE3_SHIFT); write_csr(dd, csr4to7, - 8ull * cu << - SEND_CM_LOCAL_AU_TABLE4_TO7_LOCAL_AU_TABLE4_SHIFT - | 16ull * cu << - SEND_CM_LOCAL_AU_TABLE4_TO7_LOCAL_AU_TABLE5_SHIFT - | 32ull * cu << - SEND_CM_LOCAL_AU_TABLE4_TO7_LOCAL_AU_TABLE6_SHIFT - | 64ull * cu << - SEND_CM_LOCAL_AU_TABLE4_TO7_LOCAL_AU_TABLE7_SHIFT); + 8ull * cu << + SEND_CM_LOCAL_AU_TABLE4_TO7_LOCAL_AU_TABLE4_SHIFT | + 16ull * cu << + SEND_CM_LOCAL_AU_TABLE4_TO7_LOCAL_AU_TABLE5_SHIFT | + 32ull * cu << + SEND_CM_LOCAL_AU_TABLE4_TO7_LOCAL_AU_TABLE6_SHIFT | + 64ull * cu << + SEND_CM_LOCAL_AU_TABLE4_TO7_LOCAL_AU_TABLE7_SHIFT); } static void assign_local_cm_au_table(struct hfi1_devdata *dd, u8 vcu) { assign_cm_au_table(dd, vcu_to_cu(vcu), SEND_CM_LOCAL_AU_TABLE0_TO3, - SEND_CM_LOCAL_AU_TABLE4_TO7); + SEND_CM_LOCAL_AU_TABLE4_TO7); } void assign_remote_cm_au_table(struct hfi1_devdata *dd, u8 vcu) { assign_cm_au_table(dd, vcu_to_cu(vcu), SEND_CM_REMOTE_AU_TABLE0_TO3, - SEND_CM_REMOTE_AU_TABLE4_TO7); + SEND_CM_REMOTE_AU_TABLE4_TO7); } static void init_txe(struct hfi1_devdata *dd) @@ -13995,8 +13972,8 @@ struct hfi1_devdata *hfi1_init_dd(struct pci_dev *pdev, "Functional simulator" }; - dd = hfi1_alloc_devdata(pdev, - NUM_IB_PORTS * sizeof(struct hfi1_pportdata)); + dd = hfi1_alloc_devdata(pdev, NUM_IB_PORTS * + sizeof(struct hfi1_pportdata)); if (IS_ERR(dd)) goto bail; ppd = dd->pport; @@ -14083,8 +14060,8 @@ struct hfi1_devdata *hfi1_init_dd(struct pci_dev *pdev, dd->icode = reg >> CCE_REVISION2_IMPL_CODE_SHIFT; dd->irev = reg >> CCE_REVISION2_IMPL_REVISION_SHIFT; dd_dev_info(dd, "Implementation: %s, revision 0x%x\n", - dd->icode < ARRAY_SIZE(inames) ? inames[dd->icode] : "unknown", - (int)dd->irev); + dd->icode < ARRAY_SIZE(inames) ? + inames[dd->icode] : "unknown", (int)dd->irev); /* speeds the hardware can support */ dd->pport->link_speed_supported = OPA_LINK_SPEED_25G; diff --git a/drivers/staging/rdma/hfi1/debugfs.c b/drivers/staging/rdma/hfi1/debugfs.c index e02c527..0b0fd8a 100644 --- a/drivers/staging/rdma/hfi1/debugfs.c +++ b/drivers/staging/rdma/hfi1/debugfs.c @@ -150,8 +150,8 @@ static int _opcode_stats_seq_show(struct seq_file *s, void *v) if (!n_packets && !n_bytes) return SEQ_SKIP; seq_printf(s, "%02llx %llu/%llu\n", i, - (unsigned long long)n_packets, - (unsigned long long)n_bytes); + (unsigned long long)n_packets, + (unsigned long long)n_bytes); return 0; } @@ -246,7 +246,7 @@ __acquires(RCU) } static void *_qp_stats_seq_next(struct seq_file *s, void *iter_ptr, - loff_t *pos) + loff_t *pos) { struct qp_iter *iter = iter_ptr; @@ -392,7 +392,7 @@ static ssize_t portnames_read(struct file *file, char __user *buf, /* read the per-port counters */ static ssize_t portcntrs_debugfs_read(struct file *file, char __user *buf, - size_t count, loff_t *ppos) + size_t count, loff_t *ppos) { u64 *counters; size_t avail; @@ -413,7 +413,7 @@ static ssize_t portcntrs_debugfs_read(struct file *file, char __user *buf, * read the per-port QSFP data for ppd */ static ssize_t qsfp_debugfs_dump(struct file *file, char __user *buf, - size_t count, loff_t *ppos) + size_t count, loff_t *ppos) { struct hfi1_pportdata *ppd; char *tmp; @@ -437,7 +437,7 @@ static ssize_t qsfp_debugfs_dump(struct file *file, char __user *buf, /* Do an i2c write operation on the chain for the given HFI. */ static ssize_t __i2c_debugfs_write(struct file *file, const char __user *buf, - size_t count, loff_t *ppos, u32 target) + size_t count, loff_t *ppos, u32 target) { struct hfi1_pportdata *ppd; char *buff; @@ -484,21 +484,21 @@ static ssize_t __i2c_debugfs_write(struct file *file, const char __user *buf, /* Do an i2c write operation on chain for HFI 0. */ static ssize_t i2c1_debugfs_write(struct file *file, const char __user *buf, - size_t count, loff_t *ppos) + size_t count, loff_t *ppos) { return __i2c_debugfs_write(file, buf, count, ppos, 0); } /* Do an i2c write operation on chain for HFI 1. */ static ssize_t i2c2_debugfs_write(struct file *file, const char __user *buf, - size_t count, loff_t *ppos) + size_t count, loff_t *ppos) { return __i2c_debugfs_write(file, buf, count, ppos, 1); } /* Do an i2c read operation on the chain for the given HFI. */ static ssize_t __i2c_debugfs_read(struct file *file, char __user *buf, - size_t count, loff_t *ppos, u32 target) + size_t count, loff_t *ppos, u32 target) { struct hfi1_pportdata *ppd; char *buff; @@ -545,21 +545,21 @@ static ssize_t __i2c_debugfs_read(struct file *file, char __user *buf, /* Do an i2c read operation on chain for HFI 0. */ static ssize_t i2c1_debugfs_read(struct file *file, char __user *buf, - size_t count, loff_t *ppos) + size_t count, loff_t *ppos) { return __i2c_debugfs_read(file, buf, count, ppos, 0); } /* Do an i2c read operation on chain for HFI 1. */ static ssize_t i2c2_debugfs_read(struct file *file, char __user *buf, - size_t count, loff_t *ppos) + size_t count, loff_t *ppos) { return __i2c_debugfs_read(file, buf, count, ppos, 1); } /* Do a QSFP write operation on the i2c chain for the given HFI. */ static ssize_t __qsfp_debugfs_write(struct file *file, const char __user *buf, - size_t count, loff_t *ppos, u32 target) + size_t count, loff_t *ppos, u32 target) { struct hfi1_pportdata *ppd; char *buff; @@ -605,21 +605,21 @@ static ssize_t __qsfp_debugfs_write(struct file *file, const char __user *buf, /* Do a QSFP write operation on i2c chain for HFI 0. */ static ssize_t qsfp1_debugfs_write(struct file *file, const char __user *buf, - size_t count, loff_t *ppos) + size_t count, loff_t *ppos) { return __qsfp_debugfs_write(file, buf, count, ppos, 0); } /* Do a QSFP write operation on i2c chain for HFI 1. */ static ssize_t qsfp2_debugfs_write(struct file *file, const char __user *buf, - size_t count, loff_t *ppos) + size_t count, loff_t *ppos) { return __qsfp_debugfs_write(file, buf, count, ppos, 1); } /* Do a QSFP read operation on the i2c chain for the given HFI. */ static ssize_t __qsfp_debugfs_read(struct file *file, char __user *buf, - size_t count, loff_t *ppos, u32 target) + size_t count, loff_t *ppos, u32 target) { struct hfi1_pportdata *ppd; char *buff; @@ -665,14 +665,14 @@ static ssize_t __qsfp_debugfs_read(struct file *file, char __user *buf, /* Do a QSFP read operation on i2c chain for HFI 0. */ static ssize_t qsfp1_debugfs_read(struct file *file, char __user *buf, - size_t count, loff_t *ppos) + size_t count, loff_t *ppos) { return __qsfp_debugfs_read(file, buf, count, ppos, 0); } /* Do a QSFP read operation on i2c chain for HFI 1. */ static ssize_t qsfp2_debugfs_read(struct file *file, char __user *buf, - size_t count, loff_t *ppos) + size_t count, loff_t *ppos) { return __qsfp_debugfs_read(file, buf, count, ppos, 1); } diff --git a/drivers/staging/rdma/hfi1/driver.c b/drivers/staging/rdma/hfi1/driver.c index 3ef297e..50a3b5a 100644 --- a/drivers/staging/rdma/hfi1/driver.c +++ b/drivers/staging/rdma/hfi1/driver.c @@ -438,7 +438,7 @@ drop: } static inline void init_packet(struct hfi1_ctxtdata *rcd, - struct hfi1_packet *packet) + struct hfi1_packet *packet) { packet->rsize = rcd->rcvhdrqentsize; /* words */ packet->maxcnt = rcd->rcvhdrq_cnt * packet->rsize; /* words */ @@ -700,8 +700,9 @@ static inline int process_rcv_packet(struct hfi1_packet *packet, int thread) * The +2 is the size of the RHF. */ prefetch_range(packet->ebuf, - packet->tlen - ((packet->rcd->rcvhdrqentsize - - (rhf_hdrq_offset(packet->rhf) + 2)) * 4)); + packet->tlen - ((packet->rcd->rcvhdrqentsize - + (rhf_hdrq_offset(packet->rhf) + + 2)) * 4)); } /* @@ -958,9 +959,9 @@ int handle_receive_interrupt(struct hfi1_ctxtdata *rcd, int thread) prescan_rxq(rcd, &packet); while (last == RCV_PKT_OK) { - - if (unlikely(dd->do_drop && atomic_xchg(&dd->drop_packet, - DROP_PACKET_OFF) == DROP_PACKET_ON)) { + if (unlikely(dd->do_drop && + atomic_xchg(&dd->drop_packet, DROP_PACKET_OFF) == + DROP_PACKET_ON)) { dd->do_drop = 0; /* On to the next packet */ @@ -990,8 +991,7 @@ int handle_receive_interrupt(struct hfi1_ctxtdata *rcd, int thread) if (seq != rcd->seq_cnt) last = RCV_PKT_DONE; if (needset) { - dd_dev_info(dd, - "Switching to NO_DMA_RTAIL\n"); + dd_dev_info(dd, "Switching to NO_DMA_RTAIL\n"); set_all_nodma_rtail(dd); needset = 0; } @@ -1234,7 +1234,7 @@ void hfi1_set_led_override(struct hfi1_pportdata *ppd, unsigned int timeon, if (atomic_inc_return(&ppd->led_override_timer_active) == 1) { /* Need to start timer */ setup_timer(&ppd->led_override_timer, run_led_override, - (unsigned long)ppd); + (unsigned long)ppd); ppd->led_override_timer.expires = jiffies + 1; add_timer(&ppd->led_override_timer); @@ -1271,8 +1271,8 @@ int hfi1_reset_device(int unit) if (!dd->kregbase || !(dd->flags & HFI1_PRESENT)) { dd_dev_info(dd, - "Invalid unit number %u or not initialized or not present\n", - unit); + "Invalid unit number %u or not initialized or not present\n", + unit); ret = -ENXIO; goto bail; } @@ -1302,11 +1302,11 @@ int hfi1_reset_device(int unit) if (ret) dd_dev_err(dd, - "Reinitialize unit %u after reset failed with %d\n", - unit, ret); + "Reinitialize unit %u after reset failed with %d\n", + unit, ret); else dd_dev_info(dd, "Reinitialized unit %u after resetting\n", - unit); + unit); bail: return ret; @@ -1363,7 +1363,7 @@ int process_receive_bypass(struct hfi1_packet *packet) handle_eflags(packet); dd_dev_err(packet->rcd->dd, - "Bypass packets are not supported in normal operation. Dropping\n"); + "Bypass packets are not supported in normal operation. Dropping\n"); return RHF_RCV_CONTINUE; } @@ -1401,6 +1401,6 @@ int kdeth_process_eager(struct hfi1_packet *packet) int process_receive_invalid(struct hfi1_packet *packet) { dd_dev_err(packet->rcd->dd, "Invalid packet type %d. Dropping\n", - rhf_rcv_type(packet->rhf)); + rhf_rcv_type(packet->rhf)); return RHF_RCV_CONTINUE; } diff --git a/drivers/staging/rdma/hfi1/eprom.c b/drivers/staging/rdma/hfi1/eprom.c index d7250af..f36d06b 100644 --- a/drivers/staging/rdma/hfi1/eprom.c +++ b/drivers/staging/rdma/hfi1/eprom.c @@ -115,11 +115,9 @@ static DEFINE_MUTEX(eprom_mutex); static void write_enable(struct hfi1_devdata *dd) { /* raise signal */ - write_csr(dd, ASIC_GPIO_OUT, - read_csr(dd, ASIC_GPIO_OUT) | EPROM_WP_N); + write_csr(dd, ASIC_GPIO_OUT, read_csr(dd, ASIC_GPIO_OUT) | EPROM_WP_N); /* raise enable */ - write_csr(dd, ASIC_GPIO_OE, - read_csr(dd, ASIC_GPIO_OE) | EPROM_WP_N); + write_csr(dd, ASIC_GPIO_OE, read_csr(dd, ASIC_GPIO_OE) | EPROM_WP_N); } /* @@ -128,11 +126,9 @@ static void write_enable(struct hfi1_devdata *dd) static void write_disable(struct hfi1_devdata *dd) { /* lower signal */ - write_csr(dd, ASIC_GPIO_OUT, - read_csr(dd, ASIC_GPIO_OUT) & ~EPROM_WP_N); + write_csr(dd, ASIC_GPIO_OUT, read_csr(dd, ASIC_GPIO_OUT) & ~EPROM_WP_N); /* lower enable */ - write_csr(dd, ASIC_GPIO_OE, - read_csr(dd, ASIC_GPIO_OE) & ~EPROM_WP_N); + write_csr(dd, ASIC_GPIO_OE, read_csr(dd, ASIC_GPIO_OE) & ~EPROM_WP_N); } /* @@ -210,8 +206,8 @@ static int erase_range(struct hfi1_devdata *dd, u32 start, u32 len) /* check the end points for the minimum erase */ if ((start & MASK_4KB) || (end & MASK_4KB)) { dd_dev_err(dd, - "%s: non-aligned range (0x%x,0x%x) for a 4KB erase\n", - __func__, start, end); + "%s: non-aligned range (0x%x,0x%x) for a 4KB erase\n", + __func__, start, end); return -EINVAL; } @@ -275,7 +271,7 @@ static int read_length(struct hfi1_devdata *dd, u32 start, u32 len, u64 addr) for (offset = 0; offset < len; offset += EP_PAGE_SIZE) { read_page(dd, start + offset, buffer); if (copy_to_user((void __user *)(addr + offset), - buffer, EP_PAGE_SIZE)) { + buffer, EP_PAGE_SIZE)) { ret = -EFAULT; goto done; } @@ -319,7 +315,7 @@ static int write_length(struct hfi1_devdata *dd, u32 start, u32 len, u64 addr) for (offset = 0; offset < len; offset += EP_PAGE_SIZE) { if (copy_from_user(buffer, (void __user *)(addr + offset), - EP_PAGE_SIZE)) { + EP_PAGE_SIZE)) { ret = -EFAULT; goto done; } @@ -385,13 +381,13 @@ int handle_eprom_command(struct file *fp, const struct hfi1_cmd *cmd) ret = acquire_hw_mutex(dd); if (ret) { dd_dev_err(dd, - "%s: unable to acquire hw mutex, no EPROM support\n", - __func__); + "%s: unable to acquire hw mutex, no EPROM support\n", + __func__); goto done_asic; } dd_dev_info(dd, "%s: cmd: type %d, len 0x%x, addr 0x%016llx\n", - __func__, cmd->type, cmd->len, cmd->addr); + __func__, cmd->type, cmd->len, cmd->addr); switch (cmd->type) { case HFI1_CMD_EP_INFO: @@ -402,7 +398,7 @@ int handle_eprom_command(struct file *fp, const struct hfi1_cmd *cmd) dev_id = read_device_id(dd); /* addr points to a u32 user buffer */ if (copy_to_user((void __user *)cmd->addr, &dev_id, - sizeof(u32))) + sizeof(u32))) ret = -EFAULT; break; @@ -430,7 +426,7 @@ int handle_eprom_command(struct file *fp, const struct hfi1_cmd *cmd) default: dd_dev_err(dd, "%s: unexpected command %d\n", - __func__, cmd->type); + __func__, cmd->type); ret = -EINVAL; break; } @@ -464,19 +460,18 @@ int eprom_init(struct hfi1_devdata *dd) ret = acquire_hw_mutex(dd); if (ret) { dd_dev_err(dd, - "%s: unable to acquire hw mutex, no EPROM support\n", - __func__); + "%s: unable to acquire hw mutex, no EPROM support\n", + __func__); goto done_asic; } /* reset EPROM to be sure it is in a good state */ /* set reset */ - write_csr(dd, ASIC_EEP_CTL_STAT, - ASIC_EEP_CTL_STAT_EP_RESET_SMASK); + write_csr(dd, ASIC_EEP_CTL_STAT, ASIC_EEP_CTL_STAT_EP_RESET_SMASK); /* clear reset, set speed */ write_csr(dd, ASIC_EEP_CTL_STAT, - EP_SPEED_FULL << ASIC_EEP_CTL_STAT_RATE_SPI_SHIFT); + EP_SPEED_FULL << ASIC_EEP_CTL_STAT_RATE_SPI_SHIFT); /* wake the device with command "release powerdown NoID" */ write_csr(dd, ASIC_EEP_ADDR_CMD, CMD_RELEASE_POWERDOWN_NOID); diff --git a/drivers/staging/rdma/hfi1/file_ops.c b/drivers/staging/rdma/hfi1/file_ops.c index c4b9dd4..1bd1545 100644 --- a/drivers/staging/rdma/hfi1/file_ops.c +++ b/drivers/staging/rdma/hfi1/file_ops.c @@ -1145,9 +1145,9 @@ static int user_init(struct file *fp) * has done it. */ if (fd->subctxt) { - ret = wait_event_interruptible(uctxt->wait, - !test_bit(HFI1_CTXT_MASTER_UNINIT, - &uctxt->event_flags)); + ret = wait_event_interruptible(uctxt->wait, !test_bit( + HFI1_CTXT_MASTER_UNINIT, + &uctxt->event_flags)); goto expected; } @@ -1592,7 +1592,7 @@ static loff_t ui_lseek(struct file *filp, loff_t offset, int whence) /* NOTE: assumes unsigned long is 8 bytes */ static ssize_t ui_read(struct file *filp, char __user *buf, size_t count, - loff_t *f_pos) + loff_t *f_pos) { struct hfi1_devdata *dd = filp->private_data; void __iomem *base = dd->kregbase; diff --git a/drivers/staging/rdma/hfi1/firmware.c b/drivers/staging/rdma/hfi1/firmware.c index 3a7163d..52a3e8c 100644 --- a/drivers/staging/rdma/hfi1/firmware.c +++ b/drivers/staging/rdma/hfi1/firmware.c @@ -382,8 +382,8 @@ static int invalid_header(struct hfi1_devdata *dd, const char *what, return 0; dd_dev_err(dd, - "invalid firmware header field %s: expected 0x%x, actual 0x%x\n", - what, expected, actual); + "invalid firmware header field %s: expected 0x%x, actual 0x%x\n", + what, expected, actual); return 1; } @@ -393,13 +393,19 @@ static int invalid_header(struct hfi1_devdata *dd, const char *what, static int verify_css_header(struct hfi1_devdata *dd, struct css_header *css) { /* verify CSS header fields (most sizes are in DW, so add /4) */ - if (invalid_header(dd, "module_type", css->module_type, CSS_MODULE_TYPE) || - invalid_header(dd, "header_len", css->header_len, (sizeof(struct firmware_file) / 4)) || - invalid_header(dd, "header_version", css->header_version, CSS_HEADER_VERSION) || - invalid_header(dd, "module_vendor", css->module_vendor, CSS_MODULE_VENDOR) || + if (invalid_header(dd, "module_type", css->module_type, + CSS_MODULE_TYPE) || + invalid_header(dd, "header_len", css->header_len, + (sizeof(struct firmware_file) / 4)) || + invalid_header(dd, "header_version", css->header_version, + CSS_HEADER_VERSION) || + invalid_header(dd, "module_vendor", css->module_vendor, + CSS_MODULE_VENDOR) || invalid_header(dd, "key_size", css->key_size, KEY_SIZE / 4) || - invalid_header(dd, "modulus_size", css->modulus_size, KEY_SIZE / 4) || - invalid_header(dd, "exponent_size", css->exponent_size, EXPONENT_SIZE / 4)) { + invalid_header(dd, "modulus_size", css->modulus_size, + KEY_SIZE / 4) || + invalid_header(dd, "exponent_size", css->exponent_size, + EXPONENT_SIZE / 4)) { return -EINVAL; } return 0; @@ -414,8 +420,8 @@ static int payload_check(struct hfi1_devdata *dd, const char *name, /* make sure we have some payload */ if (prefix_size >= file_size) { dd_dev_err(dd, - "firmware \"%s\", size %ld, must be larger than %ld bytes\n", - name, file_size, prefix_size); + "firmware \"%s\", size %ld, must be larger than %ld bytes\n", + name, file_size, prefix_size); return -EINVAL; } @@ -491,7 +497,7 @@ static int obtain_one_firmware(struct hfi1_devdata *dd, const char *name, /* make sure there are bytes in the payload */ ret = payload_check(dd, name, fdet->fw->size, - sizeof(struct firmware_file)); + sizeof(struct firmware_file)); if (ret == 0) { fdet->css_header = css; fdet->modulus = ff->modulus; @@ -516,7 +522,7 @@ static int obtain_one_firmware(struct hfi1_devdata *dd, const char *name, /* make sure there are bytes in the payload */ ret = payload_check(dd, name, fdet->fw->size, - sizeof(struct augmented_firmware_file)); + sizeof(struct augmented_firmware_file)); if (ret == 0) { fdet->css_header = css; fdet->modulus = aff->modulus; @@ -531,9 +537,10 @@ static int obtain_one_firmware(struct hfi1_devdata *dd, const char *name, } else { /* css->size check failed */ dd_dev_err(dd, - "invalid firmware header field size: expected 0x%lx or 0x%lx, actual 0x%x\n", - fdet->fw->size / 4, (fdet->fw->size - AUGMENT_SIZE) / 4, - css->size); + "invalid firmware header field size: expected 0x%lx or 0x%lx, actual 0x%x\n", + fdet->fw->size / 4, + (fdet->fw->size - AUGMENT_SIZE) / 4, + css->size); ret = -EINVAL; } @@ -696,7 +703,7 @@ static int obtain_firmware(struct hfi1_devdata *dd) if (platform_config_load) { platform_config = NULL; err = request_firmware(&platform_config, platform_config_name, - &dd->pcidev->dev); + &dd->pcidev->dev); if (err) { platform_config = NULL; goto done; @@ -837,7 +844,7 @@ static int run_rsa(struct hfi1_devdata *dd, const char *who, >> MISC_CFG_FW_CTRL_RSA_STATUS_SHIFT; if (status != RSA_STATUS_IDLE) { dd_dev_err(dd, "%s security engine not idle - giving up\n", - who); + who); return -EBUSY; } @@ -874,7 +881,7 @@ static int run_rsa(struct hfi1_devdata *dd, const char *who, if (status == RSA_STATUS_IDLE) { /* should not happen */ dd_dev_err(dd, "%s firmware security bad idle state\n", - who); + who); ret = -EINVAL; break; } else if (status == RSA_STATUS_DONE) { @@ -908,8 +915,8 @@ static int run_rsa(struct hfi1_devdata *dd, const char *who, * is not keeping the error high. */ write_csr(dd, MISC_ERR_CLEAR, - MISC_ERR_STATUS_MISC_FW_AUTH_FAILED_ERR_SMASK - | MISC_ERR_STATUS_MISC_KEY_MISMATCH_ERR_SMASK); + MISC_ERR_STATUS_MISC_FW_AUTH_FAILED_ERR_SMASK | + MISC_ERR_STATUS_MISC_KEY_MISMATCH_ERR_SMASK); /* * All that is left are the current errors. Print warnings on * authorization failure details, if any. Firmware authorization @@ -938,7 +945,8 @@ static void load_security_variables(struct hfi1_devdata *dd, write_rsa_data(dd, MISC_CFG_RSA_MU, fdet->mu, MU_SIZE); /* Security variables d. Write the header */ write_streamed_rsa_data(dd, MISC_CFG_SHA_PRELOAD, - (u8 *)fdet->css_header, sizeof(struct css_header)); + (u8 *)fdet->css_header, + sizeof(struct css_header)); } /* return the 8051 firmware state */ @@ -1018,7 +1026,7 @@ static int load_8051_firmware(struct hfi1_devdata *dd, /* Firmware load steps 3-5 */ ret = write_8051(dd, 1/*code*/, 0, fdet->firmware_ptr, - fdet->firmware_len); + fdet->firmware_len); if (ret) return ret; @@ -1045,13 +1053,13 @@ static int load_8051_firmware(struct hfi1_devdata *dd, ret = wait_fm_ready(dd, TIMEOUT_8051_START); if (ret) { /* timed out */ dd_dev_err(dd, "8051 start timeout, current state 0x%x\n", - get_firmware_state(dd)); + get_firmware_state(dd)); return -ETIMEDOUT; } read_misc_status(dd, &ver_a, &ver_b); dd_dev_info(dd, "8051 firmware version %d.%d\n", - (int)ver_b, (int)ver_a); + (int)ver_b, (int)ver_a); dd->dc8051_ver = dc8051_ver(ver_b, ver_a); return 0; @@ -1066,11 +1074,11 @@ void sbus_request(struct hfi1_devdata *dd, u8 receiver_addr, u8 data_addr, u8 command, u32 data_in) { write_csr(dd, ASIC_CFG_SBUS_REQUEST, - ((u64)data_in << ASIC_CFG_SBUS_REQUEST_DATA_IN_SHIFT) - | ((u64)command << ASIC_CFG_SBUS_REQUEST_COMMAND_SHIFT) - | ((u64)data_addr << ASIC_CFG_SBUS_REQUEST_DATA_ADDR_SHIFT) - | ((u64)receiver_addr - << ASIC_CFG_SBUS_REQUEST_RECEIVER_ADDR_SHIFT)); + ((u64)data_in << ASIC_CFG_SBUS_REQUEST_DATA_IN_SHIFT) | + ((u64)command << ASIC_CFG_SBUS_REQUEST_COMMAND_SHIFT) | + ((u64)data_addr << ASIC_CFG_SBUS_REQUEST_DATA_ADDR_SHIFT) | + ((u64)receiver_addr << + ASIC_CFG_SBUS_REQUEST_RECEIVER_ADDR_SHIFT)); } /* @@ -1088,14 +1096,14 @@ static void turn_off_spicos(struct hfi1_devdata *dd, int flags) return; dd_dev_info(dd, "Turning off spicos:%s%s\n", - flags & SPICO_SBUS ? " SBus" : "", - flags & SPICO_FABRIC ? " fabric" : ""); + flags & SPICO_SBUS ? " SBus" : "", + flags & SPICO_FABRIC ? " fabric" : ""); write_csr(dd, MISC_CFG_FW_CTRL, ENABLE_SPICO_SMASK); /* disable SBus spico */ if (flags & SPICO_SBUS) sbus_request(dd, SBUS_MASTER_BROADCAST, 0x01, - WRITE_SBUS_RECEIVER, 0x00000040); + WRITE_SBUS_RECEIVER, 0x00000040); /* disable the fabric serdes spicos */ if (flags & SPICO_FABRIC) @@ -1222,7 +1230,7 @@ static int load_fabric_serdes_firmware(struct hfi1_devdata *dd, /* step 5: download SerDes machine code */ for (i = 0; i < fdet->firmware_len; i += 4) { sbus_request(dd, ra, 0x0a, WRITE_SBUS_RECEIVER, - *(u32 *)&fdet->firmware_ptr[i]); + *(u32 *)&fdet->firmware_ptr[i]); } /* step 6: IMEM override off */ sbus_request(dd, ra, 0x00, WRITE_SBUS_RECEIVER, 0x00000000); @@ -1261,7 +1269,7 @@ static int load_sbus_firmware(struct hfi1_devdata *dd, /* step 5: download the SBus Master machine code */ for (i = 0; i < fdet->firmware_len; i += 4) { sbus_request(dd, ra, 0x14, WRITE_SBUS_RECEIVER, - *(u32 *)&fdet->firmware_ptr[i]); + *(u32 *)&fdet->firmware_ptr[i]); } /* step 6: set IMEM_CNTL_EN off */ sbus_request(dd, ra, 0x01, WRITE_SBUS_RECEIVER, 0x00000040); @@ -1300,7 +1308,7 @@ static int load_pcie_serdes_firmware(struct hfi1_devdata *dd, */ for (i = 0; i < fdet->firmware_len; i += 4) { sbus_request(dd, ra, 0x04, WRITE_SBUS_RECEIVER, - *(u32 *)&fdet->firmware_ptr[i]); + *(u32 *)&fdet->firmware_ptr[i]); } /* step 5: disable XDMEM access */ sbus_request(dd, ra, 0x01, WRITE_SBUS_RECEIVER, 0x00000140); @@ -1334,7 +1342,7 @@ static void set_serdes_broadcast(struct hfi1_devdata *dd, u8 bg1, u8 bg2, * 23:16 BROADCAST_GROUP_2 (default 0xff) */ sbus_request(dd, addrs[count], 0xfd, WRITE_SBUS_RECEIVER, - (u32)bg1 << 4 | (u32)bg2 << 16); + (u32)bg1 << 4 | (u32)bg2 << 16); } } @@ -1359,8 +1367,8 @@ retry: /* timed out */ dd_dev_err(dd, - "Unable to acquire hardware mutex, mutex mask %u, my mask %u (%s)\n", - (u32)user, (u32)mask, (try == 0) ? "retrying" : "giving up"); + "Unable to acquire hardware mutex, mutex mask %u, my mask %u (%s)\n", + (u32)user, (u32)mask, (try == 0) ? "retrying" : "giving up"); if (try == 0) { /* break mutex and retry */ @@ -1380,7 +1388,7 @@ void release_hw_mutex(struct hfi1_devdata *dd) void set_sbus_fast_mode(struct hfi1_devdata *dd) { write_csr(dd, ASIC_CFG_SBUS_EXECUTE, - ASIC_CFG_SBUS_EXECUTE_FAST_MODE_SMASK); + ASIC_CFG_SBUS_EXECUTE_FAST_MODE_SMASK); } void clear_sbus_fast_mode(struct hfi1_devdata *dd) @@ -1410,9 +1418,9 @@ int load_firmware(struct hfi1_devdata *dd) set_sbus_fast_mode(dd); set_serdes_broadcast(dd, all_fabric_serdes_broadcast, - fabric_serdes_broadcast[dd->hfi1_id], - fabric_serdes_addrs[dd->hfi1_id], - NUM_FABRIC_SERDES); + fabric_serdes_broadcast[dd->hfi1_id], + fabric_serdes_addrs[dd->hfi1_id], + NUM_FABRIC_SERDES); turn_off_spicos(dd, SPICO_FABRIC); do { ret = load_fabric_serdes_firmware(dd, &fw_fabric); @@ -1551,8 +1559,8 @@ int parse_platform_config(struct hfi1_devdata *dd) header2 = *(ptr + 1); if (header1 != ~header2) { dd_dev_info(dd, "%s: Failed validation at offset %ld\n", - __func__, (ptr - - (u32 *)dd->platform_config.data)); + __func__, (ptr - (u32 *) + dd->platform_config.data)); goto bail; } @@ -1595,9 +1603,10 @@ int parse_platform_config(struct hfi1_devdata *dd) break; default: dd_dev_info(dd, - "%s: Unknown data table %d, offset %ld\n", - __func__, table_type, - (ptr - (u32 *)dd->platform_config.data)); + "%s: Unknown data table %d, offset %ld\n", + __func__, table_type, + (ptr - (u32 *) + dd->platform_config.data)); goto bail; /* We don't trust this file now */ } pcfgcache->config_tables[table_type].table = ptr; @@ -1630,7 +1639,7 @@ int parse_platform_config(struct hfi1_devdata *dd) /* Calculate and check table crc */ crc = crc32_le(~(u32)0, (unsigned char const *)ptr, - (table_length_dwords * 4)); + (table_length_dwords * 4)); crc ^= ~(u32)0; /* Jump the table */ @@ -1654,7 +1663,8 @@ bail: } static int get_platform_fw_field_metadata(struct hfi1_devdata *dd, int table, - int field, u32 *field_len_bits, u32 *field_start_bits) + int field, u32 *field_len_bits, + u32 *field_start_bits) { struct platform_config_cache *pcfgcache = &dd->pcfg_cache; u32 *src_ptr = NULL; @@ -1714,8 +1724,9 @@ static int get_platform_fw_field_metadata(struct hfi1_devdata *dd, int table, * @len: length of memory pointed by @data in bytes. */ int get_platform_config_field(struct hfi1_devdata *dd, - enum platform_config_table_type_encoding table_type, - int table_index, int field_index, u32 *data, u32 len) + enum platform_config_table_type_encoding + table_type, int table_index, int field_index, + u32 *data, u32 len) { int ret = 0, wlen = 0, seek = 0; u32 field_len_bits = 0, field_start_bits = 0, *src_ptr = NULL; @@ -1727,7 +1738,8 @@ int get_platform_config_field(struct hfi1_devdata *dd, return -EINVAL; ret = get_platform_fw_field_metadata(dd, table_type, field_index, - &field_len_bits, &field_start_bits); + &field_len_bits, + &field_start_bits); if (ret) return -EINVAL; @@ -1817,9 +1829,9 @@ int load_pcie_firmware(struct hfi1_devdata *dd) if (fw_pcie_serdes_load) { dd_dev_info(dd, "Setting PCIe SerDes broadcast\n"); set_serdes_broadcast(dd, all_pcie_serdes_broadcast, - pcie_serdes_broadcast[dd->hfi1_id], - pcie_serdes_addrs[dd->hfi1_id], - NUM_PCIE_SERDES); + pcie_serdes_broadcast[dd->hfi1_id], + pcie_serdes_addrs[dd->hfi1_id], + NUM_PCIE_SERDES); do { ret = load_pcie_serdes_firmware(dd, &fw_pcie); } while (retry_firmware(dd, ret)); @@ -1844,5 +1856,5 @@ void read_guid(struct hfi1_devdata *dd) dd->base_guid = read_csr(dd, DC_DC8051_CFG_LOCAL_GUID); dd_dev_info(dd, "GUID %llx", - (unsigned long long)dd->base_guid); + (unsigned long long)dd->base_guid); } diff --git a/drivers/staging/rdma/hfi1/hfi.h b/drivers/staging/rdma/hfi1/hfi.h index 4db5ad9..07df515 100644 --- a/drivers/staging/rdma/hfi1/hfi.h +++ b/drivers/staging/rdma/hfi1/hfi.h @@ -1715,8 +1715,9 @@ void restore_pci_variables(struct hfi1_devdata *dd); int do_pcie_gen3_transition(struct hfi1_devdata *dd); int parse_platform_config(struct hfi1_devdata *dd); int get_platform_config_field(struct hfi1_devdata *dd, - enum platform_config_table_type_encoding table_type, - int table_index, int field_index, u32 *data, u32 len); + enum platform_config_table_type_encoding + table_type, int table_index, int field_index, + u32 *data, u32 len); const char *get_unit_name(int unit); const char *get_card_name(struct rvt_dev_info *rdi); diff --git a/drivers/staging/rdma/hfi1/init.c b/drivers/staging/rdma/hfi1/init.c index f794604..a721059 100644 --- a/drivers/staging/rdma/hfi1/init.c +++ b/drivers/staging/rdma/hfi1/init.c @@ -149,7 +149,7 @@ int hfi1_create_ctxts(struct hfi1_devdata *dd) rcd = hfi1_create_ctxtdata(ppd, i, dd->node); if (!rcd) { dd_dev_err(dd, - "Unable to allocate kernel receive context, failing\n"); + "Unable to allocate kernel receive context, failing\n"); goto nomem; } /* @@ -170,7 +170,7 @@ int hfi1_create_ctxts(struct hfi1_devdata *dd) rcd->sc = sc_alloc(dd, SC_ACK, rcd->rcvhdrqentsize, dd->node); if (!rcd->sc) { dd_dev_err(dd, - "Unable to allocate kernel send context, failing\n"); + "Unable to allocate kernel send context, failing\n"); dd->rcd[rcd->ctxt] = NULL; hfi1_free_ctxtdata(dd, rcd); goto nomem; @@ -741,7 +741,7 @@ int hfi1_init(struct hfi1_devdata *dd, int reinit) lastfail = hfi1_setup_eagerbufs(rcd); if (lastfail) dd_dev_err(dd, - "failed to allocate kernel ctxt's rcvhdrq and/or egr bufs\n"); + "failed to allocate kernel ctxt's rcvhdrq and/or egr bufs\n"); } if (lastfail) ret = lastfail; @@ -797,8 +797,8 @@ done: lastfail = bringup_serdes(ppd); if (lastfail) dd_dev_info(dd, - "Failed to bring up port %u\n", - ppd->port); + "Failed to bring up port %u\n", + ppd->port); /* * Set status even if port serdes is not initialized @@ -1542,8 +1542,8 @@ int hfi1_create_rcvhdrq(struct hfi1_devdata *dd, struct hfi1_ctxtdata *rcd) if (!rcd->rcvhdrq) { dd_dev_err(dd, - "attempt to allocate %d bytes for ctxt %u rcvhdrq failed\n", - amt, rcd->ctxt); + "attempt to allocate %d bytes for ctxt %u rcvhdrq failed\n", + amt, rcd->ctxt); goto bail; } @@ -1587,8 +1587,8 @@ int hfi1_create_rcvhdrq(struct hfi1_devdata *dd, struct hfi1_ctxtdata *rcd) bail_free: dd_dev_err(dd, - "attempt to allocate 1 page for ctxt %u rcvhdrqtailaddr failed\n", - rcd->ctxt); + "attempt to allocate 1 page for ctxt %u rcvhdrqtailaddr failed\n", + rcd->ctxt); vfree(rcd->user_event_mask); rcd->user_event_mask = NULL; dma_free_coherent(&dd->pcidev->dev, amt, rcd->rcvhdrq, @@ -1678,7 +1678,7 @@ int hfi1_setup_eagerbufs(struct hfi1_ctxtdata *rcd) if (rcd->egrbufs.rcvtid_size == round_mtu || !HFI1_CAP_KGET_MASK(rcd->flags, MULTI_PKT_EGR)) { dd_dev_err(dd, "ctxt%u: Failed to allocate eager buffers\n", - rcd->ctxt); + rcd->ctxt); goto bail_rcvegrbuf_phys; } @@ -1760,14 +1760,14 @@ int hfi1_setup_eagerbufs(struct hfi1_ctxtdata *rcd) for (idx = 0; idx < rcd->egrbufs.alloced; idx++) { hfi1_put_tid(dd, rcd->eager_base + idx, PT_EAGER, - rcd->egrbufs.rcvtids[idx].phys, order); + rcd->egrbufs.rcvtids[idx].phys, order); cond_resched(); } goto bail; bail_rcvegrbuf_phys: for (idx = 0; idx < rcd->egrbufs.alloced && - rcd->egrbufs.buffers[idx].addr; + rcd->egrbufs.buffers[idx].addr; idx++) { dma_free_coherent(&dd->pcidev->dev, rcd->egrbufs.buffers[idx].len, diff --git a/drivers/staging/rdma/hfi1/intr.c b/drivers/staging/rdma/hfi1/intr.c index 03cebae..46eeeca 100644 --- a/drivers/staging/rdma/hfi1/intr.c +++ b/drivers/staging/rdma/hfi1/intr.c @@ -135,18 +135,16 @@ void handle_linkup_change(struct hfi1_devdata *dd, u32 linkup) set_up_vl15(dd, dd->vau, dd->vl15_init); assign_remote_cm_au_table(dd, dd->vcu); ppd->neighbor_guid = - read_csr(dd, - DC_DC8051_STS_REMOTE_GUID); + read_csr(dd, DC_DC8051_STS_REMOTE_GUID); ppd->neighbor_type = read_csr(dd, DC_DC8051_STS_REMOTE_NODE_TYPE) & DC_DC8051_STS_REMOTE_NODE_TYPE_VAL_MASK; ppd->neighbor_port_number = read_csr(dd, DC_DC8051_STS_REMOTE_PORT_NO) & - DC_DC8051_STS_REMOTE_PORT_NO_VAL_SMASK; - dd_dev_info(dd, - "Neighbor GUID: %llx Neighbor type %d\n", - ppd->neighbor_guid, - ppd->neighbor_type); + DC_DC8051_STS_REMOTE_PORT_NO_VAL_SMASK; + dd_dev_info(dd, "Neighbor GUID: %llx Neighbor type %d\n", + ppd->neighbor_guid, + ppd->neighbor_type); } /* physical link went up */ diff --git a/drivers/staging/rdma/hfi1/mad.c b/drivers/staging/rdma/hfi1/mad.c index 13cf66f..7619b75 100644 --- a/drivers/staging/rdma/hfi1/mad.c +++ b/drivers/staging/rdma/hfi1/mad.c @@ -535,7 +535,7 @@ static int __subn_get_opa_portinfo(struct opa_smp *smp, u32 am, u8 *data, ibp = &ppd->ibport_data; if (ppd->vls_supported / 2 > ARRAY_SIZE(pi->neigh_mtu.pvlx_to_mtu) || - ppd->vls_supported > ARRAY_SIZE(dd->vld)) { + ppd->vls_supported > ARRAY_SIZE(dd->vld)) { smp->status |= IB_SMP_INVALID_FIELD; return reply((struct ib_mad_hdr *)smp); } @@ -981,9 +981,8 @@ static int set_port_states(struct hfi1_pportdata *ppd, struct opa_smp *smp, link_state = HLS_DN_DOWNDEF; else if (phys_state == IB_PORTPHYSSTATE_POLLING) { link_state = HLS_DN_POLL; - set_link_down_reason(ppd, - OPA_LINKDOWN_REASON_FM_BOUNCE, 0, - OPA_LINKDOWN_REASON_FM_BOUNCE); + set_link_down_reason(ppd, OPA_LINKDOWN_REASON_FM_BOUNCE, + 0, OPA_LINKDOWN_REASON_FM_BOUNCE); } else if (phys_state == IB_PORTPHYSSTATE_DISABLED) link_state = HLS_DN_DISABLE; else { @@ -1102,7 +1101,7 @@ static int __subn_set_opa_portinfo(struct opa_smp *smp, u32 am, u8 *data, /* Must be a valid unicast LID address. */ if ((lid == 0 && ls_old > IB_PORT_INIT) || - lid >= be16_to_cpu(IB_MULTICAST_LID_BASE)) { + lid >= be16_to_cpu(IB_MULTICAST_LID_BASE)) { smp->status |= IB_SMP_INVALID_FIELD; pr_warn("SubnSet(OPA_PortInfo) lid invalid 0x%x\n", lid); @@ -1135,7 +1134,7 @@ static int __subn_set_opa_portinfo(struct opa_smp *smp, u32 am, u8 *data, /* Must be a valid unicast LID address. */ if ((smlid == 0 && ls_old > IB_PORT_INIT) || - smlid >= be16_to_cpu(IB_MULTICAST_LID_BASE)) { + smlid >= be16_to_cpu(IB_MULTICAST_LID_BASE)) { smp->status |= IB_SMP_INVALID_FIELD; pr_warn("SubnSet(OPA_PortInfo) smlid invalid 0x%x\n", smlid); } else if (smlid != ibp->rvp.sm_lid || msl != ibp->rvp.sm_sl) { @@ -1185,7 +1184,9 @@ static int __subn_set_opa_portinfo(struct opa_smp *smp, u32 am, u8 *data, if (lwe == OPA_LINK_WIDTH_RESET || lwe == OPA_LINK_WIDTH_RESET_OLD) { set_link_width_downgrade_enabled(ppd, - ppd->link_width_downgrade_supported); + ppd-> + link_width_downgrade_supported + ); } else if ((lwe & ~ppd->link_width_downgrade_supported) == 0) { /* only set and apply if something changed */ if (lwe != ppd->link_width_downgrade_enabled) { @@ -1210,16 +1211,17 @@ static int __subn_set_opa_portinfo(struct opa_smp *smp, u32 am, u8 *data, ibp->rvp.vl_high_limit); if (ppd->vls_supported / 2 > ARRAY_SIZE(pi->neigh_mtu.pvlx_to_mtu) || - ppd->vls_supported > ARRAY_SIZE(dd->vld)) { + ppd->vls_supported > ARRAY_SIZE(dd->vld)) { smp->status |= IB_SMP_INVALID_FIELD; return reply((struct ib_mad_hdr *)smp); } for (i = 0; i < ppd->vls_supported; i++) { if ((i % 2) == 0) - mtu = enum_to_mtu((pi->neigh_mtu.pvlx_to_mtu[i / 2] >> 4) - & 0xF); + mtu = enum_to_mtu((pi->neigh_mtu.pvlx_to_mtu[i / 2] >> + 4) & 0xF); else - mtu = enum_to_mtu(pi->neigh_mtu.pvlx_to_mtu[i / 2] & 0xF); + mtu = enum_to_mtu(pi->neigh_mtu.pvlx_to_mtu[i / 2] & + 0xF); if (mtu == 0xffff) { pr_warn("SubnSet(OPA_PortInfo) mtu invalid %d (0x%x)\n", mtu, @@ -1229,8 +1231,8 @@ static int __subn_set_opa_portinfo(struct opa_smp *smp, u32 am, u8 *data, } if (dd->vld[i].mtu != mtu) { dd_dev_info(dd, - "MTU change on vl %d from %d to %d\n", - i, dd->vld[i].mtu, mtu); + "MTU change on vl %d from %d to %d\n", + i, dd->vld[i].mtu, mtu); dd->vld[i].mtu = mtu; call_set_mtu++; } @@ -1243,8 +1245,8 @@ static int __subn_set_opa_portinfo(struct opa_smp *smp, u32 am, u8 *data, mtu = 2048; if (dd->vld[15].mtu != mtu) { dd_dev_info(dd, - "MTU change on vl 15 from %d to %d\n", - dd->vld[15].mtu, mtu); + "MTU change on vl 15 from %d to %d\n", + dd->vld[15].mtu, mtu); dd->vld[15].mtu = mtu; call_set_mtu++; } @@ -1260,7 +1262,7 @@ static int __subn_set_opa_portinfo(struct opa_smp *smp, u32 am, u8 *data, smp->status |= IB_SMP_INVALID_FIELD; } else { if (hfi1_set_ib_cfg(ppd, HFI1_IB_CFG_OP_VLS, - vls) == -EINVAL) + vls) == -EINVAL) smp->status |= IB_SMP_INVALID_FIELD; } } @@ -1806,7 +1808,7 @@ static int __subn_get_opa_cable_info(struct opa_smp *smp, u32 am, u8 *data, * addr and (addr + len - 1) are on the same "page" */ if (addr >= 4096 || - (__CI_PAGE_NUM(addr) != __CI_PAGE_NUM(addr + len - 1))) { + (__CI_PAGE_NUM(addr) != __CI_PAGE_NUM(addr + len - 1))) { smp->status |= IB_SMP_INVALID_FIELD; return reply((struct ib_mad_hdr *)smp); } @@ -2271,7 +2273,7 @@ enum error_info_selects { }; static int pma_get_opa_classportinfo(struct opa_pma_mad *pmp, - struct ib_device *ibdev, u32 *resp_len) + struct ib_device *ibdev, u32 *resp_len) { struct opa_class_port_info *p = (struct opa_class_port_info *)pmp->data; @@ -2320,7 +2322,8 @@ static void a0_portstatus(struct hfi1_pportdata *ppd, } static int pma_get_opa_portstatus(struct opa_pma_mad *pmp, - struct ib_device *ibdev, u8 port, u32 *resp_len) + struct ib_device *ibdev, + u8 port, u32 *resp_len) { struct opa_port_status_req *req = (struct opa_port_status_req *)pmp->data; @@ -2376,7 +2379,7 @@ static int pma_get_opa_portstatus(struct opa_pma_mad *pmp, CNTR_INVALID_VL)); rsp->port_multicast_xmit_pkts = cpu_to_be64(read_dev_cntr(dd, C_DC_MC_XMIT_PKTS, - CNTR_INVALID_VL)); + CNTR_INVALID_VL)); rsp->port_multicast_rcv_pkts = cpu_to_be64(read_dev_cntr(dd, C_DC_MC_RCV_PKTS, CNTR_INVALID_VL)); @@ -2405,7 +2408,7 @@ static int pma_get_opa_portstatus(struct opa_pma_mad *pmp, } tmp = read_dev_cntr(dd, C_DC_SEQ_CRC_CNT, CNTR_INVALID_VL); tmp2 = tmp + read_dev_cntr(dd, C_DC_REINIT_FROM_PEER_CNT, - CNTR_INVALID_VL); + CNTR_INVALID_VL); if (tmp2 > (u32)UINT_MAX || tmp2 < tmp) { /* overflow/wrapped */ rsp->link_error_recovery = cpu_to_be32(~0); @@ -2420,7 +2423,7 @@ static int pma_get_opa_portstatus(struct opa_pma_mad *pmp, cpu_to_be64(read_dev_cntr(dd, C_DC_FM_CFG_ERR, CNTR_INVALID_VL)); rsp->link_downed = cpu_to_be32(read_port_cntr(ppd, C_SW_LINK_DOWN, - CNTR_INVALID_VL)); + CNTR_INVALID_VL)); /* rsp->uncorrectable_errors is 8 bits wide, and it pegs at 0xff */ tmp = read_dev_cntr(dd, C_DC_UNC_ERR, CNTR_INVALID_VL); @@ -2442,27 +2445,27 @@ static int pma_get_opa_portstatus(struct opa_pma_mad *pmp, rsp->vls[vfi].port_vl_rcv_pkts = cpu_to_be64(read_dev_cntr(dd, C_DC_RX_PKT_VL, - idx_from_vl(vl))); + idx_from_vl(vl))); rsp->vls[vfi].port_vl_xmit_data = cpu_to_be64(read_port_cntr(ppd, C_TX_FLIT_VL, - idx_from_vl(vl))); + idx_from_vl(vl))); rsp->vls[vfi].port_vl_xmit_pkts = cpu_to_be64(read_port_cntr(ppd, C_TX_PKT_VL, - idx_from_vl(vl))); + idx_from_vl(vl))); rsp->vls[vfi].port_vl_xmit_wait = cpu_to_be64(read_port_cntr(ppd, C_TX_WAIT_VL, - idx_from_vl(vl))); + idx_from_vl(vl))); rsp->vls[vfi].port_vl_rcv_fecn = cpu_to_be64(read_dev_cntr(dd, C_DC_RCV_FCN_VL, - idx_from_vl(vl))); + idx_from_vl(vl))); rsp->vls[vfi].port_vl_rcv_becn = cpu_to_be64(read_dev_cntr(dd, C_DC_RCV_BCN_VL, - idx_from_vl(vl))); + idx_from_vl(vl))); vlinfo++; vfi++; @@ -2492,7 +2495,7 @@ static u64 get_error_counter_summary(struct ib_device *ibdev, u8 port, error_counter_summary += read_port_cntr(ppd, C_SW_XMIT_CSTR_ERR, CNTR_INVALID_VL); error_counter_summary += read_dev_cntr(dd, C_DC_RMT_PHY_ERR, - CNTR_INVALID_VL); + CNTR_INVALID_VL); /* local link integrity must be right-shifted by the lli resolution */ tmp = read_dev_cntr(dd, C_DC_RX_REPLAY, CNTR_INVALID_VL); tmp += read_dev_cntr(dd, C_DC_TX_REPLAY, CNTR_INVALID_VL); @@ -2502,10 +2505,10 @@ static u64 get_error_counter_summary(struct ib_device *ibdev, u8 port, tmp += read_dev_cntr(dd, C_DC_REINIT_FROM_PEER_CNT, CNTR_INVALID_VL); error_counter_summary += (tmp >> res_ler); error_counter_summary += read_dev_cntr(dd, C_DC_RCV_ERR, - CNTR_INVALID_VL); + CNTR_INVALID_VL); error_counter_summary += read_dev_cntr(dd, C_RCV_OVF, CNTR_INVALID_VL); error_counter_summary += read_dev_cntr(dd, C_DC_FM_CFG_ERR, - CNTR_INVALID_VL); + CNTR_INVALID_VL); /* ppd->link_downed is a 32-bit value */ error_counter_summary += read_port_cntr(ppd, C_SW_LINK_DOWN, CNTR_INVALID_VL); @@ -2563,7 +2566,8 @@ static void pma_get_opa_port_dctrs(struct ib_device *ibdev, } static int pma_get_opa_datacounters(struct opa_pma_mad *pmp, - struct ib_device *ibdev, u8 port, u32 *resp_len) + struct ib_device *ibdev, + u8 port, u32 *resp_len) { struct opa_port_data_counters_msg *req = (struct opa_port_data_counters_msg *)pmp->data; @@ -2650,35 +2654,35 @@ static int pma_get_opa_datacounters(struct opa_pma_mad *pmp, * any additional checks for vl. */ for_each_set_bit(vl, (unsigned long *)&(vl_select_mask), - 8 * sizeof(req->vl_select_mask)) { + 8 * sizeof(req->vl_select_mask)) { memset(vlinfo, 0, sizeof(*vlinfo)); rsp->vls[vfi].port_vl_xmit_data = cpu_to_be64(read_port_cntr(ppd, C_TX_FLIT_VL, - idx_from_vl(vl))); + idx_from_vl(vl))); rsp->vls[vfi].port_vl_rcv_data = cpu_to_be64(read_dev_cntr(dd, C_DC_RX_FLIT_VL, - idx_from_vl(vl))); + idx_from_vl(vl))); rsp->vls[vfi].port_vl_xmit_pkts = cpu_to_be64(read_port_cntr(ppd, C_TX_PKT_VL, - idx_from_vl(vl))); + idx_from_vl(vl))); rsp->vls[vfi].port_vl_rcv_pkts = cpu_to_be64(read_dev_cntr(dd, C_DC_RX_PKT_VL, - idx_from_vl(vl))); + idx_from_vl(vl))); rsp->vls[vfi].port_vl_xmit_wait = cpu_to_be64(read_port_cntr(ppd, C_TX_WAIT_VL, - idx_from_vl(vl))); + idx_from_vl(vl))); rsp->vls[vfi].port_vl_rcv_fecn = cpu_to_be64(read_dev_cntr(dd, C_DC_RCV_FCN_VL, - idx_from_vl(vl))); + idx_from_vl(vl))); rsp->vls[vfi].port_vl_rcv_becn = cpu_to_be64(read_dev_cntr(dd, C_DC_RCV_BCN_VL, - idx_from_vl(vl))); + idx_from_vl(vl))); /* rsp->port_vl_xmit_time_cong is 0 for HFIs */ /* rsp->port_vl_xmit_wasted_bw ??? */ @@ -2777,7 +2781,8 @@ static void pma_get_opa_port_ectrs(struct ib_device *ibdev, } static int pma_get_opa_porterrors(struct opa_pma_mad *pmp, - struct ib_device *ibdev, u8 port, u32 *resp_len) + struct ib_device *ibdev, + u8 port, u32 *resp_len) { size_t response_data_size; struct _port_ectrs *rsp; @@ -2820,7 +2825,7 @@ static int pma_get_opa_porterrors(struct opa_pma_mad *pmp, */ port_mask = be64_to_cpu(req->port_select_mask[3]); port_num = find_first_bit((unsigned long *)&port_mask, - sizeof(port_mask)); + sizeof(port_mask)); if (port_num != port) { pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD; @@ -2842,7 +2847,7 @@ static int pma_get_opa_porterrors(struct opa_pma_mad *pmp, CNTR_INVALID_VL)); rsp->fm_config_errors = cpu_to_be64(read_dev_cntr(dd, C_DC_FM_CFG_ERR, - CNTR_INVALID_VL)); + CNTR_INVALID_VL)); tmp = read_dev_cntr(dd, C_DC_UNC_ERR, CNTR_INVALID_VL); rsp->uncorrectable_errors = tmp < 0x100 ? (tmp & 0xff) : 0xff; @@ -2950,7 +2955,8 @@ bail: } static int pma_get_opa_errorinfo(struct opa_pma_mad *pmp, - struct ib_device *ibdev, u8 port, u32 *resp_len) + struct ib_device *ibdev, + u8 port, u32 *resp_len) { size_t response_data_size; struct _port_ei *rsp; @@ -3000,9 +3006,9 @@ static int pma_get_opa_errorinfo(struct opa_pma_mad *pmp, rsp->port_rcv_ei.status_and_code = dd->err_info_rcvport.status_and_code; memcpy(&rsp->port_rcv_ei.ei.ei1to12.packet_flit1, - &dd->err_info_rcvport.packet_flit1, sizeof(u64)); + &dd->err_info_rcvport.packet_flit1, sizeof(u64)); memcpy(&rsp->port_rcv_ei.ei.ei1to12.packet_flit2, - &dd->err_info_rcvport.packet_flit2, sizeof(u64)); + &dd->err_info_rcvport.packet_flit2, sizeof(u64)); /* ExcessiverBufferOverrunInfo */ reg = read_csr(dd, RCV_ERR_INFO); @@ -3047,7 +3053,8 @@ static int pma_get_opa_errorinfo(struct opa_pma_mad *pmp, } static int pma_set_opa_portstatus(struct opa_pma_mad *pmp, - struct ib_device *ibdev, u8 port, u32 *resp_len) + struct ib_device *ibdev, + u8 port, u32 *resp_len) { struct opa_clear_port_status *req = (struct opa_clear_port_status *)pmp->data; @@ -3131,7 +3138,7 @@ static int pma_set_opa_portstatus(struct opa_pma_mad *pmp, if (counter_select & CS_LINK_ERROR_RECOVERY) { write_dev_cntr(dd, C_DC_SEQ_CRC_CNT, CNTR_INVALID_VL, 0); write_dev_cntr(dd, C_DC_REINIT_FROM_PEER_CNT, - CNTR_INVALID_VL, 0); + CNTR_INVALID_VL, 0); } if (counter_select & CS_PORT_RCV_ERRORS) @@ -3194,7 +3201,8 @@ static int pma_set_opa_portstatus(struct opa_pma_mad *pmp, } static int pma_set_opa_errorinfo(struct opa_pma_mad *pmp, - struct ib_device *ibdev, u8 port, u32 *resp_len) + struct ib_device *ibdev, + u8 port, u32 *resp_len) { struct _port_ei *rsp; struct opa_port_error_info_msg *req; @@ -3295,9 +3303,8 @@ static int __subn_get_opa_cong_info(struct opa_smp *smp, u32 am, u8 *data, } static int __subn_get_opa_cong_setting(struct opa_smp *smp, u32 am, - u8 *data, - struct ib_device *ibdev, - u8 port, u32 *resp_len) + u8 *data, struct ib_device *ibdev, + u8 port, u32 *resp_len) { int i; struct opa_congestion_setting_attr *p = @@ -3402,7 +3409,7 @@ static int __subn_get_opa_hfi1_cong_log(struct opa_smp *smp, u32 am, continue; memcpy(cong_log->events[i].local_qp_cn_entry, &cce->lqpn, 3); memcpy(cong_log->events[i].remote_qp_number_cn_entry, - &cce->rqpn, 3); + &cce->rqpn, 3); cong_log->events[i].sl_svc_type_cn_entry = ((cce->sl & 0x1f) << 3) | (cce->svc_type & 0x7); cong_log->events[i].remote_lid_cn_entry = @@ -3584,8 +3591,8 @@ static int __subn_get_opa_led_info(struct opa_smp *smp, u32 am, u8 *data, reg = read_csr(dd, DCC_CFG_LED_CNTRL); if ((reg & DCC_CFG_LED_CNTRL_LED_CNTRL_SMASK) && - ((reg & DCC_CFG_LED_CNTRL_LED_SW_BLINK_RATE_SMASK) == 0xf)) - p->rsvd_led_mask = cpu_to_be32(OPA_LED_MASK); + ((reg & DCC_CFG_LED_CNTRL_LED_SW_BLINK_RATE_SMASK) == 0xf)) + p->rsvd_led_mask = cpu_to_be32(OPA_LED_MASK); if (resp_len) *resp_len += sizeof(struct opa_led_info); @@ -3653,7 +3660,7 @@ static int subn_get_opa_sma(__be16 attr_id, struct opa_smp *smp, u32 am, break; case OPA_ATTRIB_ID_SC_TO_VLNT_MAP: ret = __subn_get_opa_sc_to_vlnt(smp, am, data, ibdev, port, - resp_len); + resp_len); break; case OPA_ATTRIB_ID_PORT_STATE_INFO: ret = __subn_get_opa_psi(smp, am, data, ibdev, port, @@ -3735,7 +3742,7 @@ static int subn_set_opa_sma(__be16 attr_id, struct opa_smp *smp, u32 am, break; case OPA_ATTRIB_ID_SC_TO_VLNT_MAP: ret = __subn_set_opa_sc_to_vlnt(smp, am, data, ibdev, port, - resp_len); + resp_len); break; case OPA_ATTRIB_ID_PORT_STATE_INFO: ret = __subn_set_opa_psi(smp, am, data, ibdev, port, @@ -4092,10 +4099,10 @@ static int process_subn(struct ib_device *ibdev, int mad_flags, port_num && port_num <= ibdev->phys_port_cnt && port != port_num) (void)check_mkey(to_iport(ibdev, port_num), - (struct ib_mad_hdr *)smp, 0, - smp->mkey, - (__force __be32)smp->dr_slid, - smp->return_path, smp->hop_cnt); + (struct ib_mad_hdr *)smp, 0, + smp->mkey, + (__force __be32)smp->dr_slid, + smp->return_path, smp->hop_cnt); ret = IB_MAD_RESULT_FAILURE; return ret; } @@ -4203,19 +4210,19 @@ static int process_perf_opa(struct ib_device *ibdev, u8 port, break; case OPA_PM_ATTRIB_ID_PORT_STATUS: ret = pma_get_opa_portstatus(pmp, ibdev, port, - resp_len); + resp_len); break; case OPA_PM_ATTRIB_ID_DATA_PORT_COUNTERS: ret = pma_get_opa_datacounters(pmp, ibdev, port, - resp_len); + resp_len); break; case OPA_PM_ATTRIB_ID_ERROR_PORT_COUNTERS: ret = pma_get_opa_porterrors(pmp, ibdev, port, - resp_len); + resp_len); break; case OPA_PM_ATTRIB_ID_ERROR_INFO: ret = pma_get_opa_errorinfo(pmp, ibdev, port, - resp_len); + resp_len); break; default: pmp->mad_hdr.status |= IB_SMP_UNSUP_METH_ATTR; @@ -4228,11 +4235,11 @@ static int process_perf_opa(struct ib_device *ibdev, u8 port, switch (pmp->mad_hdr.attr_id) { case OPA_PM_ATTRIB_ID_CLEAR_PORT_STATUS: ret = pma_set_opa_portstatus(pmp, ibdev, port, - resp_len); + resp_len); break; case OPA_PM_ATTRIB_ID_ERROR_INFO: ret = pma_set_opa_errorinfo(pmp, ibdev, port, - resp_len); + resp_len); break; default: pmp->mad_hdr.status |= IB_SMP_UNSUP_METH_ATTR; diff --git a/drivers/staging/rdma/hfi1/pcie.c b/drivers/staging/rdma/hfi1/pcie.c index 4d9fd3b..cbd61cf 100644 --- a/drivers/staging/rdma/hfi1/pcie.c +++ b/drivers/staging/rdma/hfi1/pcie.c @@ -217,10 +217,9 @@ int hfi1_pcie_ddinit(struct hfi1_devdata *dd, struct pci_dev *pdev, pcie_capability_read_word(dd->pcidev, PCI_EXP_DEVCTL, &dd->pcie_devctl); pcie_capability_read_word(dd->pcidev, PCI_EXP_LNKCTL, &dd->pcie_lnkctl); pcie_capability_read_word(dd->pcidev, PCI_EXP_DEVCTL2, - &dd->pcie_devctl2); + &dd->pcie_devctl2); pci_read_config_dword(dd->pcidev, PCI_CFG_MSIX0, &dd->pci_msix0); - pci_read_config_dword(dd->pcidev, PCIE_CFG_SPCIE1, - &dd->pci_lnkctl3); + pci_read_config_dword(dd->pcidev, PCIE_CFG_SPCIE1, &dd->pci_lnkctl3); pci_read_config_dword(dd->pcidev, PCIE_CFG_TPH2, &dd->pci_tph2); return 0; @@ -271,7 +270,7 @@ void hfi1_pcie_flr(struct hfi1_devdata *dd) clear: pcie_capability_set_word(dd->pcidev, PCI_EXP_DEVCTL, - PCI_EXP_DEVCTL_BCR_FLR); + PCI_EXP_DEVCTL_BCR_FLR); /* PCIe spec requires the function to be back within 100ms */ msleep(100); } @@ -377,8 +376,8 @@ int pcie_speeds(struct hfi1_devdata *dd) pcie_capability_read_dword(dd->pcidev, PCI_EXP_LNKCAP, &linkcap); if ((linkcap & PCI_EXP_LNKCAP_SLS) != GEN3_SPEED_VECTOR) { dd_dev_info(dd, - "This HFI is not Gen3 capable, max speed 0x%x, need 0x3\n", - linkcap & PCI_EXP_LNKCAP_SLS); + "This HFI is not Gen3 capable, max speed 0x%x, need 0x3\n", + linkcap & PCI_EXP_LNKCAP_SLS); dd->link_gen3_capable = 0; } @@ -432,19 +431,15 @@ void hfi1_enable_intx(struct pci_dev *pdev) void restore_pci_variables(struct hfi1_devdata *dd) { pci_write_config_word(dd->pcidev, PCI_COMMAND, dd->pci_command); - pci_write_config_dword(dd->pcidev, - PCI_BASE_ADDRESS_0, dd->pcibar0); - pci_write_config_dword(dd->pcidev, - PCI_BASE_ADDRESS_1, dd->pcibar1); - pci_write_config_dword(dd->pcidev, - PCI_ROM_ADDRESS, dd->pci_rom); + pci_write_config_dword(dd->pcidev, PCI_BASE_ADDRESS_0, dd->pcibar0); + pci_write_config_dword(dd->pcidev, PCI_BASE_ADDRESS_1, dd->pcibar1); + pci_write_config_dword(dd->pcidev, PCI_ROM_ADDRESS, dd->pci_rom); pcie_capability_write_word(dd->pcidev, PCI_EXP_DEVCTL, dd->pcie_devctl); pcie_capability_write_word(dd->pcidev, PCI_EXP_LNKCTL, dd->pcie_lnkctl); pcie_capability_write_word(dd->pcidev, PCI_EXP_DEVCTL2, - dd->pcie_devctl2); + dd->pcie_devctl2); pci_write_config_dword(dd->pcidev, PCI_CFG_MSIX0, dd->pci_msix0); - pci_write_config_dword(dd->pcidev, PCIE_CFG_SPCIE1, - dd->pci_lnkctl3); + pci_write_config_dword(dd->pcidev, PCIE_CFG_SPCIE1, dd->pci_lnkctl3); pci_write_config_dword(dd->pcidev, PCIE_CFG_TPH2, dd->pci_tph2); } @@ -746,21 +741,22 @@ static int load_eq_table(struct hfi1_devdata *dd, const u8 eq[11][3], u8 fs, c0 = fs - (eq[i][PREC] / div) - (eq[i][POST] / div); c_plus1 = eq[i][POST] / div; pci_write_config_dword(pdev, PCIE_CFG_REG_PL102, - eq_value(c_minus1, c0, c_plus1)); + eq_value(c_minus1, c0, c_plus1)); /* check if these coefficients violate EQ rules */ pci_read_config_dword(dd->pcidev, PCIE_CFG_REG_PL105, - &violation); + &violation); if (violation & PCIE_CFG_REG_PL105_GEN3_EQ_VIOLATE_COEF_RULES_SMASK){ if (hit_error == 0) { dd_dev_err(dd, - "Gen3 EQ Table Coefficient rule violations\n"); + "Gen3 EQ Table Coefficient rule violations\n"); dd_dev_err(dd, " prec attn post\n"); } dd_dev_err(dd, " p%02d: %02x %02x %02x\n", - i, (u32)eq[i][0], (u32)eq[i][1], (u32)eq[i][2]); + i, (u32)eq[i][0], (u32)eq[i][1], + (u32)eq[i][2]); dd_dev_err(dd, " %02x %02x %02x\n", - (u32)c_minus1, (u32)c0, (u32)c_plus1); + (u32)c_minus1, (u32)c0, (u32)c_plus1); hit_error = 1; } } @@ -815,8 +811,8 @@ static int trigger_sbr(struct hfi1_devdata *dd) list_for_each_entry(pdev, &dev->bus->devices, bus_list) if (pdev != dev) { dd_dev_err(dd, - "%s: another device is on the same bus\n", - __func__); + "%s: another device is on the same bus\n", + __func__); return -ENOTTY; } @@ -840,8 +836,8 @@ static void write_gasket_interrupt(struct hfi1_devdata *dd, int index, u16 code, u16 data) { write_csr(dd, ASIC_PCIE_SD_INTRPT_LIST + (index * 8), - (((u64)code << ASIC_PCIE_SD_INTRPT_LIST_INTRPT_CODE_SHIFT) - | ((u64)data << ASIC_PCIE_SD_INTRPT_LIST_INTRPT_DATA_SHIFT))); + (((u64)code << ASIC_PCIE_SD_INTRPT_LIST_INTRPT_CODE_SHIFT) | + ((u64)data << ASIC_PCIE_SD_INTRPT_LIST_INTRPT_DATA_SHIFT))); } /* @@ -851,14 +847,13 @@ static void arm_gasket_logic(struct hfi1_devdata *dd) { u64 reg; - reg = (((u64)1 << dd->hfi1_id) - << ASIC_PCIE_SD_HOST_CMD_INTRPT_CMD_SHIFT) - | ((u64)pcie_serdes_broadcast[dd->hfi1_id] - << ASIC_PCIE_SD_HOST_CMD_SBUS_RCVR_ADDR_SHIFT - | ASIC_PCIE_SD_HOST_CMD_SBR_MODE_SMASK - | ((u64)SBR_DELAY_US & ASIC_PCIE_SD_HOST_CMD_TIMER_MASK) - << ASIC_PCIE_SD_HOST_CMD_TIMER_SHIFT - ); + reg = (((u64)1 << dd->hfi1_id) << + ASIC_PCIE_SD_HOST_CMD_INTRPT_CMD_SHIFT) | + ((u64)pcie_serdes_broadcast[dd->hfi1_id] << + ASIC_PCIE_SD_HOST_CMD_SBUS_RCVR_ADDR_SHIFT | + ASIC_PCIE_SD_HOST_CMD_SBR_MODE_SMASK | + ((u64)SBR_DELAY_US & ASIC_PCIE_SD_HOST_CMD_TIMER_MASK) << + ASIC_PCIE_SD_HOST_CMD_TIMER_SHIFT); write_csr(dd, ASIC_PCIE_SD_HOST_CMD, reg); /* read back to push the write */ read_csr(dd, ASIC_PCIE_SD_HOST_CMD); @@ -982,8 +977,8 @@ int do_pcie_gen3_transition(struct hfi1_devdata *dd) /* if already at target speed, done (unless forced) */ if (dd->lbus_speed == target_speed) { dd_dev_info(dd, "%s: PCIe already at gen%d, %s\n", __func__, - pcie_target, - pcie_force ? "re-doing anyway" : "skipping"); + pcie_target, + pcie_force ? "re-doing anyway" : "skipping"); if (!pcie_force) return 0; } @@ -1087,8 +1082,10 @@ retry: default_pset = DEFAULT_MCP_PSET; } pci_write_config_dword(dd->pcidev, PCIE_CFG_REG_PL101, - (fs << PCIE_CFG_REG_PL101_GEN3_EQ_LOCAL_FS_SHIFT) - | (lf << PCIE_CFG_REG_PL101_GEN3_EQ_LOCAL_LF_SHIFT)); + (fs << + PCIE_CFG_REG_PL101_GEN3_EQ_LOCAL_FS_SHIFT) | + (lf << + PCIE_CFG_REG_PL101_GEN3_EQ_LOCAL_LF_SHIFT)); ret = load_eq_table(dd, eq, fs, div); if (ret) goto done; @@ -1102,15 +1099,15 @@ retry: pcie_pset = default_pset; if (pcie_pset > 10) { /* valid range is 0-10, inclusive */ dd_dev_err(dd, "%s: Invalid Eq Pset %u, setting to %d\n", - __func__, pcie_pset, default_pset); + __func__, pcie_pset, default_pset); pcie_pset = default_pset; } dd_dev_info(dd, "%s: using EQ Pset %u\n", __func__, pcie_pset); pci_write_config_dword(dd->pcidev, PCIE_CFG_REG_PL106, - ((1 << pcie_pset) - << PCIE_CFG_REG_PL106_GEN3_EQ_PSET_REQ_VEC_SHIFT) - | PCIE_CFG_REG_PL106_GEN3_EQ_EVAL2MS_DISABLE_SMASK - | PCIE_CFG_REG_PL106_GEN3_EQ_PHASE23_EXIT_MODE_SMASK); + ((1 << pcie_pset) << + PCIE_CFG_REG_PL106_GEN3_EQ_PSET_REQ_VEC_SHIFT) | + PCIE_CFG_REG_PL106_GEN3_EQ_EVAL2MS_DISABLE_SMASK | + PCIE_CFG_REG_PL106_GEN3_EQ_PHASE23_EXIT_MODE_SMASK); /* * step 5b: Do post firmware download steps via SBus @@ -1165,13 +1162,13 @@ retry: parent = dd->pcidev->bus->self; pcie_capability_read_word(parent, PCI_EXP_LNKCTL2, &lnkctl2); dd_dev_info(dd, "%s: ..old link control2: 0x%x\n", __func__, - (u32)lnkctl2); + (u32)lnkctl2); /* only write to parent if target is not as high as ours */ if ((lnkctl2 & LNKCTL2_TARGET_LINK_SPEED_MASK) < target_vector) { lnkctl2 &= ~LNKCTL2_TARGET_LINK_SPEED_MASK; lnkctl2 |= target_vector; dd_dev_info(dd, "%s: ..new link control2: 0x%x\n", __func__, - (u32)lnkctl2); + (u32)lnkctl2); pcie_capability_write_word(parent, PCI_EXP_LNKCTL2, lnkctl2); } else { dd_dev_info(dd, "%s: ..target speed is OK\n", __func__); @@ -1180,11 +1177,11 @@ retry: dd_dev_info(dd, "%s: setting target link speed\n", __func__); pcie_capability_read_word(dd->pcidev, PCI_EXP_LNKCTL2, &lnkctl2); dd_dev_info(dd, "%s: ..old link control2: 0x%x\n", __func__, - (u32)lnkctl2); + (u32)lnkctl2); lnkctl2 &= ~LNKCTL2_TARGET_LINK_SPEED_MASK; lnkctl2 |= target_vector; dd_dev_info(dd, "%s: ..new link control2: 0x%x\n", __func__, - (u32)lnkctl2); + (u32)lnkctl2); pcie_capability_write_word(dd->pcidev, PCI_EXP_LNKCTL2, lnkctl2); /* step 5h: arm gasket logic */ @@ -1221,8 +1218,8 @@ retry: ret = pci_read_config_word(dd->pcidev, PCI_VENDOR_ID, &vendor); if (ret) { dd_dev_info(dd, - "%s: read of VendorID failed after SBR, err %d\n", - __func__, ret); + "%s: read of VendorID failed after SBR, err %d\n", + __func__, ret); return_error = 1; goto done; } @@ -1273,8 +1270,8 @@ retry: & ASIC_PCIE_SD_HOST_STATUS_FW_DNLD_STS_MASK; if ((status & (1 << dd->hfi1_id)) == 0) { dd_dev_err(dd, - "%s: gasket status 0x%x, expecting 0x%x\n", - __func__, status, 1 << dd->hfi1_id); + "%s: gasket status 0x%x, expecting 0x%x\n", + __func__, status, 1 << dd->hfi1_id); ret = -EIO; goto done; } @@ -1291,13 +1288,13 @@ retry: /* update our link information cache */ update_lbus_info(dd); dd_dev_info(dd, "%s: new speed and width: %s\n", __func__, - dd->lbus_info); + dd->lbus_info); if (dd->lbus_speed != target_speed) { /* not target */ /* maybe retry */ do_retry = retry_count < pcie_retry; dd_dev_err(dd, "PCIe link speed did not switch to Gen%d%s\n", - pcie_target, do_retry ? ", retrying" : ""); + pcie_target, do_retry ? ", retrying" : ""); retry_count++; if (do_retry) { msleep(100); /* allow time to settle */ diff --git a/drivers/staging/rdma/hfi1/pio.c b/drivers/staging/rdma/hfi1/pio.c index 191b260..a483c0a 100644 --- a/drivers/staging/rdma/hfi1/pio.c +++ b/drivers/staging/rdma/hfi1/pio.c @@ -511,7 +511,7 @@ static void sc_hw_free(struct hfi1_devdata *dd, u32 sw_index, u32 hw_context) sci = &dd->send_contexts[sw_index]; if (!sci->allocated) { dd_dev_err(dd, "%s: sw_index %u not allocated? hw_context %u\n", - __func__, sw_index, hw_context); + __func__, sw_index, hw_context); } sci->allocated = 0; dd->hw_to_sw[hw_context] = INVALID_SCI; @@ -627,7 +627,7 @@ void sc_set_cr_threshold(struct send_context *sc, u32 new_threshold) & SC(CREDIT_CTRL_THRESHOLD_MASK)) << SC(CREDIT_CTRL_THRESHOLD_SHIFT)); write_kctxt_csr(sc->dd, sc->hw_context, - SC(CREDIT_CTRL), sc->credit_ctrl); + SC(CREDIT_CTRL), sc->credit_ctrl); /* force a credit return on change to avoid a possible stall */ force_return = 1; @@ -765,9 +765,9 @@ struct send_context *sc_alloc(struct hfi1_devdata *dd, int type, /* set the default partition key */ write_kctxt_csr(dd, hw_context, SC(CHECK_PARTITION_KEY), - (DEFAULT_PKEY & - SC(CHECK_PARTITION_KEY_VALUE_MASK)) - << SC(CHECK_PARTITION_KEY_VALUE_SHIFT)); + (DEFAULT_PKEY & + SC(CHECK_PARTITION_KEY_VALUE_MASK)) << + SC(CHECK_PARTITION_KEY_VALUE_SHIFT)); /* per context type checks */ if (type == SC_USER) { @@ -780,8 +780,8 @@ struct send_context *sc_alloc(struct hfi1_devdata *dd, int type, /* set the send context check opcode mask and value */ write_kctxt_csr(dd, hw_context, SC(CHECK_OPCODE), - ((u64)opmask << SC(CHECK_OPCODE_MASK_SHIFT)) | - ((u64)opval << SC(CHECK_OPCODE_VALUE_SHIFT))); + ((u64)opmask << SC(CHECK_OPCODE_MASK_SHIFT)) | + ((u64)opval << SC(CHECK_OPCODE_VALUE_SHIFT))); /* set up credit return */ reg = pa & SC(CREDIT_RETURN_ADDR_ADDRESS_SMASK); @@ -799,7 +799,7 @@ struct send_context *sc_alloc(struct hfi1_devdata *dd, int type, thresh = sc_percent_to_threshold(sc, 50); } else if (type == SC_USER) { thresh = sc_percent_to_threshold(sc, - user_credit_return_threshold); + user_credit_return_threshold); } else { /* kernel */ thresh = sc_mtu_to_threshold(sc, hfi1_max_mtu, hdrqentsize); } @@ -972,11 +972,11 @@ static void sc_wait_for_packet_egress(struct send_context *sc, int pause) if (loop > 500) { /* timed out - bounce the link */ dd_dev_err(dd, - "%s: context %u(%u) timeout waiting for packets to egress, remaining count %u, bouncing link\n", - __func__, sc->sw_index, - sc->hw_context, (u32)reg); + "%s: context %u(%u) timeout waiting for packets to egress, remaining count %u, bouncing link\n", + __func__, sc->sw_index, + sc->hw_context, (u32)reg); queue_work(dd->pport->hfi1_wq, - &dd->pport->link_bounce_work); + &dd->pport->link_bounce_work); break; } loop++; @@ -1022,7 +1022,7 @@ int sc_restart(struct send_context *sc) return -EINVAL; dd_dev_info(dd, "restarting send context %u(%u)\n", sc->sw_index, - sc->hw_context); + sc->hw_context); /* * Step 1: Wait for the context to actually halt. @@ -1037,7 +1037,7 @@ int sc_restart(struct send_context *sc) break; if (loop > 100) { dd_dev_err(dd, "%s: context %u(%u) not halting, skipping\n", - __func__, sc->sw_index, sc->hw_context); + __func__, sc->sw_index, sc->hw_context); return -ETIME; } loop++; @@ -1063,9 +1063,9 @@ int sc_restart(struct send_context *sc) break; if (loop > 100) { dd_dev_err(dd, - "%s: context %u(%u) timeout waiting for PIO buffers to zero, remaining %d\n", - __func__, sc->sw_index, - sc->hw_context, count); + "%s: context %u(%u) timeout waiting for PIO buffers to zero, remaining %d\n", + __func__, sc->sw_index, + sc->hw_context, count); } loop++; udelay(1); @@ -1178,18 +1178,18 @@ void pio_reset_all(struct hfi1_devdata *dd) if (ret == -EIO) { /* clear the error */ write_csr(dd, SEND_PIO_ERR_CLEAR, - SEND_PIO_ERR_CLEAR_PIO_INIT_SM_IN_ERR_SMASK); + SEND_PIO_ERR_CLEAR_PIO_INIT_SM_IN_ERR_SMASK); } /* reset init all */ write_csr(dd, SEND_PIO_INIT_CTXT, - SEND_PIO_INIT_CTXT_PIO_ALL_CTXT_INIT_SMASK); + SEND_PIO_INIT_CTXT_PIO_ALL_CTXT_INIT_SMASK); udelay(2); ret = pio_init_wait_progress(dd); if (ret < 0) { dd_dev_err(dd, - "PIO send context init %s while initializing all PIO blocks\n", - ret == -ETIMEDOUT ? "is stuck" : "had an error"); + "PIO send context init %s while initializing all PIO blocks\n", + ret == -ETIMEDOUT ? "is stuck" : "had an error"); } } @@ -1237,8 +1237,7 @@ int sc_enable(struct send_context *sc) */ reg = read_kctxt_csr(dd, sc->hw_context, SC(ERR_STATUS)); if (reg) - write_kctxt_csr(dd, sc->hw_context, SC(ERR_CLEAR), - reg); + write_kctxt_csr(dd, sc->hw_context, SC(ERR_CLEAR), reg); /* * The HW PIO initialization engine can handle only one init @@ -1296,7 +1295,7 @@ void sc_return_credits(struct send_context *sc) /* a 0->1 transition schedules a credit return */ write_kctxt_csr(sc->dd, sc->hw_context, SC(CREDIT_FORCE), - SC(CREDIT_FORCE_FORCE_RETURN_SMASK)); + SC(CREDIT_FORCE_FORCE_RETURN_SMASK)); /* * Ensure that the write is flushed and the credit return is * scheduled. We care more about the 0 -> 1 transition. @@ -1322,7 +1321,7 @@ void sc_drop(struct send_context *sc) return; dd_dev_info(sc->dd, "%s: context %u(%u) - not implemented\n", - __func__, sc->sw_index, sc->hw_context); + __func__, sc->sw_index, sc->hw_context); } /* @@ -1472,7 +1471,7 @@ void sc_add_credit_return_intr(struct send_context *sc) if (sc->credit_intr_count == 0) { sc->credit_ctrl |= SC(CREDIT_CTRL_CREDIT_INTR_SMASK); write_kctxt_csr(sc->dd, sc->hw_context, - SC(CREDIT_CTRL), sc->credit_ctrl); + SC(CREDIT_CTRL), sc->credit_ctrl); } sc->credit_intr_count++; spin_unlock_irqrestore(&sc->credit_ctrl_lock, flags); @@ -1494,7 +1493,7 @@ void sc_del_credit_return_intr(struct send_context *sc) if (sc->credit_intr_count == 0) { sc->credit_ctrl &= ~SC(CREDIT_CTRL_CREDIT_INTR_SMASK); write_kctxt_csr(sc->dd, sc->hw_context, - SC(CREDIT_CTRL), sc->credit_ctrl); + SC(CREDIT_CTRL), sc->credit_ctrl); } spin_unlock_irqrestore(&sc->credit_ctrl_lock, flags); } @@ -1667,7 +1666,7 @@ void sc_group_release_update(struct hfi1_devdata *dd, u32 hw_context) sw_index = dd->hw_to_sw[hw_context]; if (unlikely(sw_index >= dd->num_send_contexts)) { dd_dev_err(dd, "%s: invalid hw (%u) to sw (%u) mapping\n", - __func__, hw_context, sw_index); + __func__, hw_context, sw_index); goto done; } sc = dd->send_contexts[sw_index].sc; @@ -1680,8 +1679,8 @@ void sc_group_release_update(struct hfi1_devdata *dd, u32 hw_context) sw_index = dd->hw_to_sw[gc]; if (unlikely(sw_index >= dd->num_send_contexts)) { dd_dev_err(dd, - "%s: invalid hw (%u) to sw (%u) mapping\n", - __func__, hw_context, sw_index); + "%s: invalid hw (%u) to sw (%u) mapping\n", + __func__, hw_context, sw_index); continue; } sc_release_update(dd->send_contexts[sw_index].sc); @@ -2009,8 +2008,8 @@ int init_credit_return(struct hfi1_devdata *dd) if (!dd->cr_base[i].va) { set_dev_node(&dd->pcidev->dev, dd->node); dd_dev_err(dd, - "Unable to allocate credit return DMA range for NUMA %d\n", - i); + "Unable to allocate credit return DMA range for NUMA %d\n", + i); ret = -ENOMEM; goto done; } @@ -2034,10 +2033,10 @@ void free_credit_return(struct hfi1_devdata *dd) for (i = 0; i < num_numa; i++) { if (dd->cr_base[i].va) { dma_free_coherent(&dd->pcidev->dev, - TXE_NUM_CONTEXTS - * sizeof(struct credit_return), - dd->cr_base[i].va, - dd->cr_base[i].pa); + TXE_NUM_CONTEXTS * + sizeof(struct credit_return), + dd->cr_base[i].va, + dd->cr_base[i].pa); } } kfree(dd->cr_base); diff --git a/drivers/staging/rdma/hfi1/pio.h b/drivers/staging/rdma/hfi1/pio.h index 09a5eeb..d80909a 100644 --- a/drivers/staging/rdma/hfi1/pio.h +++ b/drivers/staging/rdma/hfi1/pio.h @@ -289,7 +289,7 @@ void sc_flush(struct send_context *sc); void sc_drop(struct send_context *sc); void sc_stop(struct send_context *sc, int bit); struct pio_buf *sc_buffer_alloc(struct send_context *sc, u32 dw_len, - pio_release_cb cb, void *arg); + pio_release_cb cb, void *arg); void sc_release_update(struct send_context *sc); void sc_return_credits(struct send_context *sc); void sc_group_release_update(struct hfi1_devdata *dd, u32 hw_context); @@ -322,7 +322,7 @@ void pio_send_control(struct hfi1_devdata *dd, int op); void pio_copy(struct hfi1_devdata *dd, struct pio_buf *pbuf, u64 pbc, const void *from, size_t count); void seg_pio_copy_start(struct pio_buf *pbuf, u64 pbc, - const void *from, size_t nbytes); + const void *from, size_t nbytes); void seg_pio_copy_mid(struct pio_buf *pbuf, const void *from, size_t nbytes); void seg_pio_copy_end(struct pio_buf *pbuf); diff --git a/drivers/staging/rdma/hfi1/pio_copy.c b/drivers/staging/rdma/hfi1/pio_copy.c index 6f97d22..998e7bc 100644 --- a/drivers/staging/rdma/hfi1/pio_copy.c +++ b/drivers/staging/rdma/hfi1/pio_copy.c @@ -200,7 +200,7 @@ void pio_copy(struct hfi1_devdata *dd, struct pio_buf *pbuf, u64 pbc, * o nbytes must not span a QW boundary */ static inline void read_low_bytes(struct pio_buf *pbuf, const void *from, - unsigned int nbytes) + unsigned int nbytes) { unsigned long off; @@ -227,7 +227,7 @@ static inline void read_low_bytes(struct pio_buf *pbuf, const void *from, * o nbytes may span a QW boundary */ static inline void read_extra_bytes(struct pio_buf *pbuf, - const void *from, unsigned int nbytes) + const void *from, unsigned int nbytes) { unsigned long off = (unsigned long)from & 0x7; unsigned int room, xbytes; @@ -366,7 +366,7 @@ static inline void jcopy(u8 *dest, const u8 *src, u32 n) * o from may _not_ be u64 aligned. */ static inline void read_low_bytes(struct pio_buf *pbuf, const void *from, - unsigned int nbytes) + unsigned int nbytes) { jcopy(&pbuf->carry.val8[0], from, nbytes); pbuf->carry_bytes = nbytes; @@ -381,7 +381,7 @@ static inline void read_low_bytes(struct pio_buf *pbuf, const void *from, * o nbytes may span a QW boundary */ static inline void read_extra_bytes(struct pio_buf *pbuf, - const void *from, unsigned int nbytes) + const void *from, unsigned int nbytes) { jcopy(&pbuf->carry.val8[pbuf->carry_bytes], from, nbytes); pbuf->carry_bytes += nbytes; @@ -437,7 +437,7 @@ static inline int carry_write8(struct pio_buf *pbuf, void *dest) u64 zero = 0; jcopy(&pbuf->carry.val8[pbuf->carry_bytes], (u8 *)&zero, - 8 - pbuf->carry_bytes); + 8 - pbuf->carry_bytes); writeq(pbuf->carry.val64, dest); return 1; } @@ -457,7 +457,7 @@ static inline int carry_write8(struct pio_buf *pbuf, void *dest) * @nbytes: bytes to copy */ void seg_pio_copy_start(struct pio_buf *pbuf, u64 pbc, - const void *from, size_t nbytes) + const void *from, size_t nbytes) { void __iomem *dest = pbuf->start + SOP_DISTANCE; void __iomem *send = dest + PIO_BLOCK_SIZE; @@ -647,7 +647,7 @@ static void mid_copy_mix(struct pio_buf *pbuf, const void *from, size_t nbytes) * Must handle nbytes < 8. */ static void mid_copy_straight(struct pio_buf *pbuf, - const void *from, size_t nbytes) + const void *from, size_t nbytes) { void __iomem *dest = pbuf->start + (pbuf->qw_written * sizeof(u64)); void __iomem *dend; /* 8-byte data end */ diff --git a/drivers/staging/rdma/hfi1/qsfp.c b/drivers/staging/rdma/hfi1/qsfp.c index bdb1504..c5e04b0 100644 --- a/drivers/staging/rdma/hfi1/qsfp.c +++ b/drivers/staging/rdma/hfi1/qsfp.c @@ -468,7 +468,7 @@ int get_cable_info(struct hfi1_devdata *dd, u32 port_num, u32 addr, u32 len, if (port_num > dd->num_pports || port_num < 1) { dd_dev_info(dd, "%s: Invalid port number %d\n", - __func__, port_num); + __func__, port_num); ret = -EINVAL; goto set_zeroes; } diff --git a/drivers/staging/rdma/hfi1/rc.c b/drivers/staging/rdma/hfi1/rc.c index 99584f7..28ff638 100644 --- a/drivers/staging/rdma/hfi1/rc.c +++ b/drivers/staging/rdma/hfi1/rc.c @@ -1773,8 +1773,8 @@ static inline void rc_cancel_ack(struct rvt_qp *qp) * schedule a response to be sent. */ static noinline int rc_rcv_error(struct hfi1_other_headers *ohdr, void *data, - struct rvt_qp *qp, u32 opcode, u32 psn, int diff, - struct hfi1_ctxtdata *rcd) + struct rvt_qp *qp, u32 opcode, u32 psn, + int diff, struct hfi1_ctxtdata *rcd) { struct hfi1_ibport *ibp = to_iport(qp->ibqp.device, qp->port_num); struct rvt_ack_entry *e; diff --git a/drivers/staging/rdma/hfi1/ruc.c b/drivers/staging/rdma/hfi1/ruc.c index 6f0005a..e2c4f82 100644 --- a/drivers/staging/rdma/hfi1/ruc.c +++ b/drivers/staging/rdma/hfi1/ruc.c @@ -283,9 +283,10 @@ int hfi1_ruc_check_hdr(struct hfi1_ibport *ibp, struct hfi1_ib_header *hdr, if (!gid_ok(&hdr->u.l.grh.dgid, ibp->rvp.gid_prefix, guid)) goto err; - if (!gid_ok(&hdr->u.l.grh.sgid, - qp->alt_ah_attr.grh.dgid.global.subnet_prefix, - qp->alt_ah_attr.grh.dgid.global.interface_id)) + if (!gid_ok( + &hdr->u.l.grh.sgid, + qp->alt_ah_attr.grh.dgid.global.subnet_prefix, + qp->alt_ah_attr.grh.dgid.global.interface_id)) goto err; } if (unlikely(rcv_pkey_check(ppd_from_ibp(ibp), (u16)bth0, @@ -317,9 +318,10 @@ int hfi1_ruc_check_hdr(struct hfi1_ibport *ibp, struct hfi1_ib_header *hdr, if (!gid_ok(&hdr->u.l.grh.dgid, ibp->rvp.gid_prefix, guid)) goto err; - if (!gid_ok(&hdr->u.l.grh.sgid, - qp->remote_ah_attr.grh.dgid.global.subnet_prefix, - qp->remote_ah_attr.grh.dgid.global.interface_id)) + if (!gid_ok( + &hdr->u.l.grh.sgid, + qp->remote_ah_attr.grh.dgid.global.subnet_prefix, + qp->remote_ah_attr.grh.dgid.global.interface_id)) goto err; } if (unlikely(rcv_pkey_check(ppd_from_ibp(ibp), (u16)bth0, diff --git a/drivers/staging/rdma/hfi1/sdma.c b/drivers/staging/rdma/hfi1/sdma.c index 5f62d02..74086ea 100644 --- a/drivers/staging/rdma/hfi1/sdma.c +++ b/drivers/staging/rdma/hfi1/sdma.c @@ -325,9 +325,9 @@ static void sdma_wait_for_packet_egress(struct sdma_engine *sde, if (lcnt++ > 500) { /* timed out - bounce the link */ dd_dev_err(dd, "%s: engine %u timeout waiting for packets to egress, remaining count %u, bouncing link\n", - __func__, sde->this_idx, (u32)reg); + __func__, sde->this_idx, (u32)reg); queue_work(dd->pport->hfi1_wq, - &dd->pport->link_bounce_work); + &dd->pport->link_bounce_work); break; } udelay(1); @@ -458,8 +458,8 @@ static void sdma_err_halt_wait(struct work_struct *work) break; if (time_after(jiffies, timeout)) { dd_dev_err(sde->dd, - "SDMA engine %d - timeout waiting for engine to halt\n", - sde->this_idx); + "SDMA engine %d - timeout waiting for engine to halt\n", + sde->this_idx); /* * Continue anyway. This could happen if there was * an uncorrectable error in the wrong spot. @@ -656,7 +656,7 @@ static void sdma_start_hw_clean_up(struct sdma_engine *sde) } static void sdma_set_state(struct sdma_engine *sde, - enum sdma_states next_state) + enum sdma_states next_state) { struct sdma_state *ss = &sde->state; const struct sdma_set_state_action *action = sdma_action_table; @@ -908,7 +908,7 @@ int sdma_map_init(struct hfi1_devdata *dd, u8 port, u8 num_vls, u8 *vl_engines) /* newmap in hand, save old map */ spin_lock_irq(&dd->sde_map_lock); oldmap = rcu_dereference_protected(dd->sdma_map, - lockdep_is_held(&dd->sde_map_lock)); + lockdep_is_held(&dd->sde_map_lock)); /* publish newmap */ rcu_assign_pointer(dd->sdma_map, newmap); @@ -1006,16 +1006,16 @@ int sdma_init(struct hfi1_devdata *dd, u8 port) return 0; } if (mod_num_sdma && - /* can't exceed chip support */ - mod_num_sdma <= dd->chip_sdma_engines && - /* count must be >= vls */ - mod_num_sdma >= num_vls) + /* can't exceed chip support */ + mod_num_sdma <= dd->chip_sdma_engines && + /* count must be >= vls */ + mod_num_sdma >= num_vls) num_engines = mod_num_sdma; dd_dev_info(dd, "SDMA mod_num_sdma: %u\n", mod_num_sdma); dd_dev_info(dd, "SDMA chip_sdma_engines: %u\n", dd->chip_sdma_engines); dd_dev_info(dd, "SDMA chip_sdma_mem_size: %u\n", - dd->chip_sdma_mem_size); + dd->chip_sdma_mem_size); per_sdma_credits = dd->chip_sdma_mem_size / (num_engines * SDMA_BLOCK_SIZE); @@ -1026,7 +1026,7 @@ int sdma_init(struct hfi1_devdata *dd, u8 port) descq_cnt = sdma_get_descq_cnt(); dd_dev_info(dd, "SDMA engines %zu descq_cnt %u\n", - num_engines, descq_cnt); + num_engines, descq_cnt); /* alloc memory for array of send engines */ dd->per_sdma = kcalloc(num_engines, sizeof(*dd->per_sdma), GFP_KERNEL); @@ -1086,10 +1086,10 @@ int sdma_init(struct hfi1_devdata *dd, u8 port) SDMA_DESC1_INT_REQ_FLAG; tasklet_init(&sde->sdma_hw_clean_up_task, sdma_hw_clean_up_task, - (unsigned long)sde); + (unsigned long)sde); tasklet_init(&sde->sdma_sw_clean_up_task, sdma_sw_clean_up_task, - (unsigned long)sde); + (unsigned long)sde); INIT_WORK(&sde->err_halt_worker, sdma_err_halt_wait); INIT_WORK(&sde->flush_worker, sdma_field_flush); @@ -1240,7 +1240,7 @@ void sdma_exit(struct hfi1_devdata *dd) sde = &dd->per_sdma[this_idx]; if (!list_empty(&sde->dmawait)) dd_dev_err(dd, "sde %u: dmawait list not empty!\n", - sde->this_idx); + sde->this_idx); sdma_process_event(sde, sdma_event_e00_go_hw_down); del_timer_sync(&sde->err_progress_check_timer); @@ -1370,9 +1370,9 @@ retry: if (unlikely(!sane)) { dd_dev_err(dd, "SDMA(%u) bad head (%s) hwhd=%hu swhd=%hu swtl=%hu cnt=%hu\n", - sde->this_idx, - use_dmahead ? "dma" : "kreg", - hwhead, swhead, swtail, cnt); + sde->this_idx, + use_dmahead ? "dma" : "kreg", + hwhead, swhead, swtail, cnt); if (use_dmahead) { /* try one more time, using csr */ use_dmahead = 0; @@ -1550,10 +1550,10 @@ void sdma_engine_error(struct sdma_engine *sde, u64 status) __sdma_process_event(sde, sdma_event_e60_hw_halted); if (status & ~SD(ENG_ERR_STATUS_SDMA_HALT_ERR_SMASK)) { dd_dev_err(sde->dd, - "SDMA (%u) engine error: 0x%llx state %s\n", - sde->this_idx, - (unsigned long long)status, - sdma_state_names[sde->state.current_state]); + "SDMA (%u) engine error: 0x%llx state %s\n", + sde->this_idx, + (unsigned long long)status, + sdma_state_names[sde->state.current_state]); dump_sdma_state(sde); } write_sequnlock(&sde->head_lock); @@ -1597,8 +1597,8 @@ static void sdma_sendctrl(struct sdma_engine *sde, unsigned op) if (op & SDMA_SENDCTRL_OP_CLEANUP) write_sde_csr(sde, SD(CTRL), - sde->p_senddmactrl | - SD(CTRL_SDMA_CLEANUP_SMASK)); + sde->p_senddmactrl | + SD(CTRL_SDMA_CLEANUP_SMASK)); else write_sde_csr(sde, SD(CTRL), sde->p_senddmactrl); @@ -1622,12 +1622,10 @@ static void sdma_setlengen(struct sdma_engine *sde) * generation counter. */ write_sde_csr(sde, SD(LEN_GEN), - (sde->descq_cnt / 64) << SD(LEN_GEN_LENGTH_SHIFT) - ); + (sde->descq_cnt / 64) << SD(LEN_GEN_LENGTH_SHIFT)); write_sde_csr(sde, SD(LEN_GEN), - ((sde->descq_cnt / 64) << SD(LEN_GEN_LENGTH_SHIFT)) - | (4ULL << SD(LEN_GEN_GENERATION_SHIFT)) - ); + ((sde->descq_cnt / 64) << SD(LEN_GEN_LENGTH_SHIFT)) | + (4ULL << SD(LEN_GEN_GENERATION_SHIFT))); } static inline void sdma_update_tail(struct sdma_engine *sde, u16 tail) @@ -1707,17 +1705,16 @@ static void init_sdma_regs( write_sde_csr(sde, SD(DESC_CNT), 0); write_sde_csr(sde, SD(HEAD_ADDR), sde->head_phys); write_sde_csr(sde, SD(MEMORY), - ((u64)credits << - SD(MEMORY_SDMA_MEMORY_CNT_SHIFT)) | - ((u64)(credits * sde->this_idx) << - SD(MEMORY_SDMA_MEMORY_INDEX_SHIFT))); + ((u64)credits << SD(MEMORY_SDMA_MEMORY_CNT_SHIFT)) | + ((u64)(credits * sde->this_idx) << + SD(MEMORY_SDMA_MEMORY_INDEX_SHIFT))); write_sde_csr(sde, SD(ENG_ERR_MASK), ~0ull); set_sdma_integrity(sde); opmask = OPCODE_CHECK_MASK_DISABLED; opval = OPCODE_CHECK_VAL_DISABLED; write_sde_csr(sde, SD(CHECK_OPCODE), - (opmask << SEND_CTXT_CHECK_OPCODE_MASK_SHIFT) | - (opval << SEND_CTXT_CHECK_OPCODE_VALUE_SHIFT)); + (opmask << SEND_CTXT_CHECK_OPCODE_MASK_SHIFT) | + (opval << SEND_CTXT_CHECK_OPCODE_VALUE_SHIFT)); } #ifdef CONFIG_SDMA_VERBOSITY @@ -1796,12 +1793,9 @@ static void dump_sdma_state(struct sdma_engine *sde) descq = sde->descq; dd_dev_err(sde->dd, - "SDMA (%u) descq_head: %u descq_tail: %u freecnt: %u FLE %d\n", - sde->this_idx, - head, - tail, - cnt, - !list_empty(&sde->flushlist)); + "SDMA (%u) descq_head: %u descq_tail: %u freecnt: %u FLE %d\n", + sde->this_idx, head, tail, cnt, + !list_empty(&sde->flushlist)); /* print info for each entry in the descriptor queue */ while (head != tail) { @@ -1822,20 +1816,23 @@ static void dump_sdma_state(struct sdma_engine *sde) len = (desc[0] >> SDMA_DESC0_BYTE_COUNT_SHIFT) & SDMA_DESC0_BYTE_COUNT_MASK; dd_dev_err(sde->dd, - "SDMA sdmadesc[%u]: flags:%s addr:0x%016llx gen:%u len:%u bytes\n", - head, flags, addr, gen, len); + "SDMA sdmadesc[%u]: flags:%s addr:0x%016llx gen:%u len:%u bytes\n", + head, flags, addr, gen, len); dd_dev_err(sde->dd, - "\tdesc0:0x%016llx desc1 0x%016llx\n", - desc[0], desc[1]); + "\tdesc0:0x%016llx desc1 0x%016llx\n", + desc[0], desc[1]); if (desc[0] & SDMA_DESC0_FIRST_DESC_FLAG) dd_dev_err(sde->dd, - "\taidx: %u amode: %u alen: %u\n", - (u8)((desc[1] & SDMA_DESC1_HEADER_INDEX_SMASK) - >> SDMA_DESC1_HEADER_INDEX_SHIFT), - (u8)((desc[1] & SDMA_DESC1_HEADER_MODE_SMASK) - >> SDMA_DESC1_HEADER_MODE_SHIFT), - (u8)((desc[1] & SDMA_DESC1_HEADER_DWS_SMASK) - >> SDMA_DESC1_HEADER_DWS_SHIFT)); + "\taidx: %u amode: %u alen: %u\n", + (u8)((desc[1] & + SDMA_DESC1_HEADER_INDEX_SMASK) >> + SDMA_DESC1_HEADER_INDEX_SHIFT), + (u8)((desc[1] & + SDMA_DESC1_HEADER_MODE_SMASK) >> + SDMA_DESC1_HEADER_MODE_SHIFT), + (u8)((desc[1] & + SDMA_DESC1_HEADER_DWS_SMASK) >> + SDMA_DESC1_HEADER_DWS_SHIFT)); head++; head &= sde->sdma_mask; } @@ -1862,29 +1859,26 @@ void sdma_seqfile_dump_sde(struct seq_file *s, struct sdma_engine *sde) head = sde->descq_head & sde->sdma_mask; tail = ACCESS_ONCE(sde->descq_tail) & sde->sdma_mask; seq_printf(s, SDE_FMT, sde->this_idx, - sde->cpu, - sdma_state_name(sde->state.current_state), - (unsigned long long)read_sde_csr(sde, SD(CTRL)), - (unsigned long long)read_sde_csr(sde, SD(STATUS)), - (unsigned long long)read_sde_csr(sde, - SD(ENG_ERR_STATUS)), - (unsigned long long)read_sde_csr(sde, SD(TAIL)), - tail, - (unsigned long long)read_sde_csr(sde, SD(HEAD)), - head, - (unsigned long long)le64_to_cpu(*sde->head_dma), - (unsigned long long)read_sde_csr(sde, SD(MEMORY)), - (unsigned long long)read_sde_csr(sde, SD(LEN_GEN)), - (unsigned long long)read_sde_csr(sde, SD(RELOAD_CNT)), - (unsigned long long)sde->last_status, - (unsigned long long)sde->ahg_bits, - sde->tx_tail, - sde->tx_head, - sde->descq_tail, - sde->descq_head, + sde->cpu, + sdma_state_name(sde->state.current_state), + (unsigned long long)read_sde_csr(sde, SD(CTRL)), + (unsigned long long)read_sde_csr(sde, SD(STATUS)), + (unsigned long long)read_sde_csr(sde, SD(ENG_ERR_STATUS)), + (unsigned long long)read_sde_csr(sde, SD(TAIL)), tail, + (unsigned long long)read_sde_csr(sde, SD(HEAD)), head, + (unsigned long long)le64_to_cpu(*sde->head_dma), + (unsigned long long)read_sde_csr(sde, SD(MEMORY)), + (unsigned long long)read_sde_csr(sde, SD(LEN_GEN)), + (unsigned long long)read_sde_csr(sde, SD(RELOAD_CNT)), + (unsigned long long)sde->last_status, + (unsigned long long)sde->ahg_bits, + sde->tx_tail, + sde->tx_head, + sde->descq_tail, + sde->descq_head, !list_empty(&sde->flushlist), - sde->descq_full_count, - (unsigned long long)read_sde_csr(sde, SEND_DMA_CHECK_SLID)); + sde->descq_full_count, + (unsigned long long)read_sde_csr(sde, SEND_DMA_CHECK_SLID)); /* print info for each entry in the descriptor queue */ while (head != tail) { @@ -1905,14 +1899,16 @@ void sdma_seqfile_dump_sde(struct seq_file *s, struct sdma_engine *sde) len = (desc[0] >> SDMA_DESC0_BYTE_COUNT_SHIFT) & SDMA_DESC0_BYTE_COUNT_MASK; seq_printf(s, - "\tdesc[%u]: flags:%s addr:0x%016llx gen:%u len:%u bytes\n", - head, flags, addr, gen, len); + "\tdesc[%u]: flags:%s addr:0x%016llx gen:%u len:%u bytes\n", + head, flags, addr, gen, len); if (desc[0] & SDMA_DESC0_FIRST_DESC_FLAG) seq_printf(s, "\t\tahgidx: %u ahgmode: %u\n", - (u8)((desc[1] & SDMA_DESC1_HEADER_INDEX_SMASK) - >> SDMA_DESC1_HEADER_INDEX_SHIFT), - (u8)((desc[1] & SDMA_DESC1_HEADER_MODE_SMASK) - >> SDMA_DESC1_HEADER_MODE_SHIFT)); + (u8)((desc[1] & + SDMA_DESC1_HEADER_INDEX_SMASK) >> + SDMA_DESC1_HEADER_INDEX_SHIFT), + (u8)((desc[1] & + SDMA_DESC1_HEADER_MODE_SMASK) >> + SDMA_DESC1_HEADER_MODE_SHIFT)); head = (head + 1) & sde->sdma_mask; } } @@ -2108,9 +2104,8 @@ nodesc: * -EINVAL - sdma_txreq incomplete, -EBUSY - no space in ring (wait == NULL) * -EIOCBQUEUED - tx queued to iowait, -ECOMM bad sdma state */ -int sdma_send_txlist(struct sdma_engine *sde, - struct iowait *wait, - struct list_head *tx_list) +int sdma_send_txlist(struct sdma_engine *sde, struct iowait *wait, + struct list_head *tx_list) { struct sdma_txreq *tx, *tx_next; int ret = 0; @@ -2178,8 +2173,7 @@ nodesc: goto update_tail; } -static void sdma_process_event(struct sdma_engine *sde, - enum sdma_events event) +static void sdma_process_event(struct sdma_engine *sde, enum sdma_events event) { unsigned long flags; @@ -2196,7 +2190,7 @@ static void sdma_process_event(struct sdma_engine *sde, } static void __sdma_process_event(struct sdma_engine *sde, - enum sdma_events event) + enum sdma_events event) { struct sdma_state *ss = &sde->state; int need_progress = 0; @@ -2227,7 +2221,7 @@ static void __sdma_process_event(struct sdma_engine *sde, /* This reference means the state machine is started */ sdma_get(&sde->state); sdma_set_state(sde, - sdma_state_s10_hw_start_up_halt_wait); + sdma_state_s10_hw_start_up_halt_wait); break; case sdma_event_e15_hw_halt_done: break; @@ -2265,7 +2259,7 @@ static void __sdma_process_event(struct sdma_engine *sde, break; case sdma_event_e15_hw_halt_done: sdma_set_state(sde, - sdma_state_s15_hw_start_up_clean_wait); + sdma_state_s15_hw_start_up_clean_wait); sdma_start_hw_clean_up(sde); break; case sdma_event_e25_hw_clean_up_done: @@ -3003,7 +2997,8 @@ void sdma_freeze(struct hfi1_devdata *dd) * continuing. */ ret = wait_event_interruptible(dd->sdma_unfreeze_wq, - atomic_read(&dd->sdma_unfreeze_count) <= 0); + atomic_read(&dd->sdma_unfreeze_count) <= + 0); /* interrupted or count is negative, then unloading - just exit */ if (ret || atomic_read(&dd->sdma_unfreeze_count) < 0) return; @@ -3040,7 +3035,7 @@ void sdma_unfreeze(struct hfi1_devdata *dd) /* tell all engines start freeze clean up */ for (i = 0; i < dd->num_sdma; i++) sdma_process_event(&dd->per_sdma[i], - sdma_event_e82_hw_unfreeze); + sdma_event_e82_hw_unfreeze); } /** @@ -3054,5 +3049,6 @@ void _sdma_engine_progress_schedule( trace_hfi1_sdma_engine_progress(sde, sde->progress_mask); /* assume we have selected a good cpu */ write_csr(sde->dd, - CCE_INT_FORCE + (8 * (IS_SDMA_START / 64)), sde->progress_mask); + CCE_INT_FORCE + (8 * (IS_SDMA_START / 64)), + sde->progress_mask); } diff --git a/drivers/staging/rdma/hfi1/sysfs.c b/drivers/staging/rdma/hfi1/sysfs.c index fe232c1..3e3f180 100644 --- a/drivers/staging/rdma/hfi1/sysfs.c +++ b/drivers/staging/rdma/hfi1/sysfs.c @@ -61,8 +61,8 @@ * Congestion control table size followed by table entries */ static ssize_t read_cc_table_bin(struct file *filp, struct kobject *kobj, - struct bin_attribute *bin_attr, - char *buf, loff_t pos, size_t count) + struct bin_attribute *bin_attr, + char *buf, loff_t pos, size_t count) { int ret; struct hfi1_pportdata *ppd = @@ -110,8 +110,8 @@ static struct bin_attribute cc_table_bin_attr = { * trigger threshold and the minimum injection rate delay. */ static ssize_t read_cc_setting_bin(struct file *filp, struct kobject *kobj, - struct bin_attribute *bin_attr, - char *buf, loff_t pos, size_t count) + struct bin_attribute *bin_attr, + char *buf, loff_t pos, size_t count) { int ret; struct hfi1_pportdata *ppd = @@ -550,7 +550,7 @@ static ssize_t show_nctxts(struct device *device, } static ssize_t show_nfreectxts(struct device *device, - struct device_attribute *attr, char *buf) + struct device_attribute *attr, char *buf) { struct hfi1_ibdev *dev = container_of(device, struct hfi1_ibdev, rdi.ibdev.dev); @@ -660,8 +660,8 @@ int hfi1_create_port_files(struct ib_device *ibdev, u8 port_num, if (!port_num || port_num > dd->num_pports) { dd_dev_err(dd, - "Skipping infiniband class with invalid port %u\n", - port_num); + "Skipping infiniband class with invalid port %u\n", + port_num); return -ENODEV; } ppd = &dd->pport[port_num - 1]; @@ -700,34 +700,32 @@ int hfi1_create_port_files(struct ib_device *ibdev, u8 port_num, kobj, "CCMgtA"); if (ret) { dd_dev_err(dd, - "Skipping Congestion Control sysfs info, (err %d) port %u\n", - ret, port_num); + "Skipping Congestion Control sysfs info, (err %d) port %u\n", + ret, port_num); goto bail_vl2mtu; } kobject_uevent(&ppd->pport_cc_kobj, KOBJ_ADD); - ret = sysfs_create_bin_file(&ppd->pport_cc_kobj, - &cc_setting_bin_attr); + ret = sysfs_create_bin_file(&ppd->pport_cc_kobj, &cc_setting_bin_attr); if (ret) { dd_dev_err(dd, - "Skipping Congestion Control setting sysfs info, (err %d) port %u\n", - ret, port_num); + "Skipping Congestion Control setting sysfs info, (err %d) port %u\n", + ret, port_num); goto bail_cc; } - ret = sysfs_create_bin_file(&ppd->pport_cc_kobj, - &cc_table_bin_attr); + ret = sysfs_create_bin_file(&ppd->pport_cc_kobj, &cc_table_bin_attr); if (ret) { dd_dev_err(dd, - "Skipping Congestion Control table sysfs info, (err %d) port %u\n", - ret, port_num); + "Skipping Congestion Control table sysfs info, (err %d) port %u\n", + ret, port_num); goto bail_cc_entry_bin; } dd_dev_info(dd, - "IB%u: Congestion Control Agent enabled for port %d\n", - dd->unit, port_num); + "IB%u: Congestion Control Agent enabled for port %d\n", + dd->unit, port_num); return 0; diff --git a/drivers/staging/rdma/hfi1/trace.c b/drivers/staging/rdma/hfi1/trace.c index 923ca55..99fd017 100644 --- a/drivers/staging/rdma/hfi1/trace.c +++ b/drivers/staging/rdma/hfi1/trace.c @@ -109,17 +109,17 @@ const char *parse_everbs_hdrs( case OP(RC, RDMA_WRITE_LAST_WITH_IMMEDIATE): case OP(UC, RDMA_WRITE_LAST_WITH_IMMEDIATE): trace_seq_printf(p, IMM_PRN, - be32_to_cpu(eh->imm_data)); + be32_to_cpu(eh->imm_data)); break; /* reth + imm */ case OP(RC, RDMA_WRITE_ONLY_WITH_IMMEDIATE): case OP(UC, RDMA_WRITE_ONLY_WITH_IMMEDIATE): trace_seq_printf(p, RETH_PRN " " IMM_PRN, - (unsigned long long)ib_u64_get( - (__be32 *)&eh->rc.reth.vaddr), - be32_to_cpu(eh->rc.reth.rkey), - be32_to_cpu(eh->rc.reth.length), - be32_to_cpu(eh->rc.imm_data)); + (unsigned long long)ib_u64_get( + (__be32 *)&eh->rc.reth.vaddr), + be32_to_cpu(eh->rc.reth.rkey), + be32_to_cpu(eh->rc.reth.length), + be32_to_cpu(eh->rc.imm_data)); break; /* reth */ case OP(RC, RDMA_READ_REQUEST): @@ -128,10 +128,10 @@ const char *parse_everbs_hdrs( case OP(RC, RDMA_WRITE_ONLY): case OP(UC, RDMA_WRITE_ONLY): trace_seq_printf(p, RETH_PRN, - (unsigned long long)ib_u64_get( - (__be32 *)&eh->rc.reth.vaddr), - be32_to_cpu(eh->rc.reth.rkey), - be32_to_cpu(eh->rc.reth.length)); + (unsigned long long)ib_u64_get( + (__be32 *)&eh->rc.reth.vaddr), + be32_to_cpu(eh->rc.reth.rkey), + be32_to_cpu(eh->rc.reth.length)); break; case OP(RC, RDMA_READ_RESPONSE_FIRST): case OP(RC, RDMA_READ_RESPONSE_LAST): @@ -154,19 +154,20 @@ const char *parse_everbs_hdrs( case OP(RC, COMPARE_SWAP): case OP(RC, FETCH_ADD): trace_seq_printf(p, ATOMICETH_PRN, - (unsigned long long)ib_u64_get(eh->atomic_eth.vaddr), - eh->atomic_eth.rkey, - (unsigned long long)ib_u64_get( - (__be32 *)&eh->atomic_eth.swap_data), - (unsigned long long)ib_u64_get( + (unsigned long long)ib_u64_get( + eh->atomic_eth.vaddr), + eh->atomic_eth.rkey, + (unsigned long long)ib_u64_get( + (__be32 *)&eh->atomic_eth.swap_data), + (unsigned long long)ib_u64_get( (__be32 *)&eh->atomic_eth.compare_data)); break; /* deth */ case OP(UD, SEND_ONLY): case OP(UD, SEND_ONLY_WITH_IMMEDIATE): trace_seq_printf(p, DETH_PRN, - be32_to_cpu(eh->ud.deth[0]), - be32_to_cpu(eh->ud.deth[1]) & RVT_QPN_MASK); + be32_to_cpu(eh->ud.deth[0]), + be32_to_cpu(eh->ud.deth[1]) & RVT_QPN_MASK); break; } trace_seq_putc(p, 0); @@ -187,12 +188,12 @@ const char *parse_sdma_flags( trace_seq_printf(p, "%s", flags); if (desc0 & SDMA_DESC0_FIRST_DESC_FLAG) trace_seq_printf(p, " amode:%u aidx:%u alen:%u", - (u8)((desc1 >> SDMA_DESC1_HEADER_MODE_SHIFT) - & SDMA_DESC1_HEADER_MODE_MASK), - (u8)((desc1 >> SDMA_DESC1_HEADER_INDEX_SHIFT) - & SDMA_DESC1_HEADER_INDEX_MASK), - (u8)((desc1 >> SDMA_DESC1_HEADER_DWS_SHIFT) - & SDMA_DESC1_HEADER_DWS_MASK)); + (u8)((desc1 >> SDMA_DESC1_HEADER_MODE_SHIFT) & + SDMA_DESC1_HEADER_MODE_MASK), + (u8)((desc1 >> SDMA_DESC1_HEADER_INDEX_SHIFT) & + SDMA_DESC1_HEADER_INDEX_MASK), + (u8)((desc1 >> SDMA_DESC1_HEADER_DWS_SHIFT) & + SDMA_DESC1_HEADER_DWS_MASK)); return ret; } diff --git a/drivers/staging/rdma/hfi1/trace.h b/drivers/staging/rdma/hfi1/trace.h index a13215f..dfa9967 100644 --- a/drivers/staging/rdma/hfi1/trace.h +++ b/drivers/staging/rdma/hfi1/trace.h @@ -76,81 +76,77 @@ __print_symbolic(etype, \ #define TRACE_SYSTEM hfi1_rx TRACE_EVENT(hfi1_rcvhdr, - TP_PROTO(struct hfi1_devdata *dd, - u64 eflags, - u32 ctxt, - u32 etype, - u32 hlen, - u32 tlen, - u32 updegr, - u32 etail), - TP_ARGS(dd, ctxt, eflags, etype, hlen, tlen, updegr, etail), - TP_STRUCT__entry( - DD_DEV_ENTRY(dd) - __field(u64, eflags) - __field(u32, ctxt) - __field(u32, etype) - __field(u32, hlen) - __field(u32, tlen) - __field(u32, updegr) - __field(u32, etail) - ), - TP_fast_assign( - DD_DEV_ASSIGN(dd); - __entry->eflags = eflags; - __entry->ctxt = ctxt; - __entry->etype = etype; - __entry->hlen = hlen; - __entry->tlen = tlen; - __entry->updegr = updegr; - __entry->etail = etail; - ), - TP_printk( -"[%s] ctxt %d eflags 0x%llx etype %d,%s hlen %d tlen %d updegr %d etail %d", - __get_str(dev), - __entry->ctxt, - __entry->eflags, - __entry->etype, show_packettype(__entry->etype), - __entry->hlen, - __entry->tlen, - __entry->updegr, - __entry->etail - ) + TP_PROTO(struct hfi1_devdata *dd, + u64 eflags, + u32 ctxt, + u32 etype, + u32 hlen, + u32 tlen, + u32 updegr, + u32 etail + ), + TP_ARGS(dd, ctxt, eflags, etype, hlen, tlen, updegr, etail), + TP_STRUCT__entry(DD_DEV_ENTRY(dd) + __field(u64, eflags) + __field(u32, ctxt) + __field(u32, etype) + __field(u32, hlen) + __field(u32, tlen) + __field(u32, updegr) + __field(u32, etail) + ), + TP_fast_assign(DD_DEV_ASSIGN(dd); + __entry->eflags = eflags; + __entry->ctxt = ctxt; + __entry->etype = etype; + __entry->hlen = hlen; + __entry->tlen = tlen; + __entry->updegr = updegr; + __entry->etail = etail; + ), + TP_printk( + "[%s] ctxt %d eflags 0x%llx etype %d,%s hlen %d tlen %d updegr %d etail %d", + __get_str(dev), + __entry->ctxt, + __entry->eflags, + __entry->etype, show_packettype(__entry->etype), + __entry->hlen, + __entry->tlen, + __entry->updegr, + __entry->etail + ) ); TRACE_EVENT(hfi1_receive_interrupt, - TP_PROTO(struct hfi1_devdata *dd, u32 ctxt), - TP_ARGS(dd, ctxt), - TP_STRUCT__entry( - DD_DEV_ENTRY(dd) - __field(u32, ctxt) - __field(u8, slow_path) - __field(u8, dma_rtail) - ), - TP_fast_assign( - DD_DEV_ASSIGN(dd); - __entry->ctxt = ctxt; - if (dd->rcd[ctxt]->do_interrupt == - &handle_receive_interrupt) { - __entry->slow_path = 1; - __entry->dma_rtail = 0xFF; - } else if (dd->rcd[ctxt]->do_interrupt == - &handle_receive_interrupt_dma_rtail){ - __entry->dma_rtail = 1; - __entry->slow_path = 0; - } else if (dd->rcd[ctxt]->do_interrupt == - &handle_receive_interrupt_nodma_rtail) { - __entry->dma_rtail = 0; - __entry->slow_path = 0; - } - ), - TP_printk( - "[%s] ctxt %d SlowPath: %d DmaRtail: %d", - __get_str(dev), - __entry->ctxt, - __entry->slow_path, - __entry->dma_rtail - ) + TP_PROTO(struct hfi1_devdata *dd, u32 ctxt), + TP_ARGS(dd, ctxt), + TP_STRUCT__entry(DD_DEV_ENTRY(dd) + __field(u32, ctxt) + __field(u8, slow_path) + __field(u8, dma_rtail) + ), + TP_fast_assign(DD_DEV_ASSIGN(dd); + __entry->ctxt = ctxt; + if (dd->rcd[ctxt]->do_interrupt == + &handle_receive_interrupt) { + __entry->slow_path = 1; + __entry->dma_rtail = 0xFF; + } else if (dd->rcd[ctxt]->do_interrupt == + &handle_receive_interrupt_dma_rtail){ + __entry->dma_rtail = 1; + __entry->slow_path = 0; + } else if (dd->rcd[ctxt]->do_interrupt == + &handle_receive_interrupt_nodma_rtail) { + __entry->dma_rtail = 0; + __entry->slow_path = 0; + } + ), + TP_printk("[%s] ctxt %d SlowPath: %d DmaRtail: %d", + __get_str(dev), + __entry->ctxt, + __entry->slow_path, + __entry->dma_rtail + ) ); TRACE_EVENT(hfi1_exp_tid_reg, @@ -281,78 +277,72 @@ TRACE_EVENT(hfi1_mmu_invalidate, #define TRACE_SYSTEM hfi1_tx TRACE_EVENT(hfi1_piofree, - TP_PROTO(struct send_context *sc, int extra), - TP_ARGS(sc, extra), - TP_STRUCT__entry( - DD_DEV_ENTRY(sc->dd) - __field(u32, sw_index) - __field(u32, hw_context) - __field(int, extra) - ), - TP_fast_assign( - DD_DEV_ASSIGN(sc->dd); - __entry->sw_index = sc->sw_index; - __entry->hw_context = sc->hw_context; - __entry->extra = extra; - ), - TP_printk( - "[%s] ctxt %u(%u) extra %d", - __get_str(dev), - __entry->sw_index, - __entry->hw_context, - __entry->extra - ) + TP_PROTO(struct send_context *sc, int extra), + TP_ARGS(sc, extra), + TP_STRUCT__entry(DD_DEV_ENTRY(sc->dd) + __field(u32, sw_index) + __field(u32, hw_context) + __field(int, extra) + ), + TP_fast_assign(DD_DEV_ASSIGN(sc->dd); + __entry->sw_index = sc->sw_index; + __entry->hw_context = sc->hw_context; + __entry->extra = extra; + ), + TP_printk("[%s] ctxt %u(%u) extra %d", + __get_str(dev), + __entry->sw_index, + __entry->hw_context, + __entry->extra + ) ); TRACE_EVENT(hfi1_wantpiointr, - TP_PROTO(struct send_context *sc, u32 needint, u64 credit_ctrl), - TP_ARGS(sc, needint, credit_ctrl), - TP_STRUCT__entry( - DD_DEV_ENTRY(sc->dd) - __field(u32, sw_index) - __field(u32, hw_context) - __field(u32, needint) - __field(u64, credit_ctrl) - ), - TP_fast_assign( - DD_DEV_ASSIGN(sc->dd); - __entry->sw_index = sc->sw_index; - __entry->hw_context = sc->hw_context; - __entry->needint = needint; - __entry->credit_ctrl = credit_ctrl; - ), - TP_printk( - "[%s] ctxt %u(%u) on %d credit_ctrl 0x%llx", - __get_str(dev), - __entry->sw_index, - __entry->hw_context, - __entry->needint, - (unsigned long long)__entry->credit_ctrl - ) + TP_PROTO(struct send_context *sc, u32 needint, u64 credit_ctrl), + TP_ARGS(sc, needint, credit_ctrl), + TP_STRUCT__entry(DD_DEV_ENTRY(sc->dd) + __field(u32, sw_index) + __field(u32, hw_context) + __field(u32, needint) + __field(u64, credit_ctrl) + ), + TP_fast_assign(DD_DEV_ASSIGN(sc->dd); + __entry->sw_index = sc->sw_index; + __entry->hw_context = sc->hw_context; + __entry->needint = needint; + __entry->credit_ctrl = credit_ctrl; + ), + TP_printk("[%s] ctxt %u(%u) on %d credit_ctrl 0x%llx", + __get_str(dev), + __entry->sw_index, + __entry->hw_context, + __entry->needint, + (unsigned long long)__entry->credit_ctrl + ) ); DECLARE_EVENT_CLASS(hfi1_qpsleepwakeup_template, - TP_PROTO(struct rvt_qp *qp, u32 flags), - TP_ARGS(qp, flags), - TP_STRUCT__entry( - DD_DEV_ENTRY(dd_from_ibdev(qp->ibqp.device)) - __field(u32, qpn) - __field(u32, flags) - __field(u32, s_flags) - ), - TP_fast_assign( - DD_DEV_ASSIGN(dd_from_ibdev(qp->ibqp.device)) - __entry->flags = flags; - __entry->qpn = qp->ibqp.qp_num; - __entry->s_flags = qp->s_flags; - ), - TP_printk( - "[%s] qpn 0x%x flags 0x%x s_flags 0x%x", - __get_str(dev), - __entry->qpn, - __entry->flags, - __entry->s_flags - ) + TP_PROTO(struct rvt_qp *qp, u32 flags), + TP_ARGS(qp, flags), + TP_STRUCT__entry( + DD_DEV_ENTRY(dd_from_ibdev(qp->ibqp.device)) + __field(u32, qpn) + __field(u32, flags) + __field(u32, s_flags) + ), + TP_fast_assign( + DD_DEV_ASSIGN(dd_from_ibdev(qp->ibqp.device)) + __entry->flags = flags; + __entry->qpn = qp->ibqp.qp_num; + __entry->s_flags = qp->s_flags; + ), + TP_printk( + "[%s] qpn 0x%x flags 0x%x s_flags 0x%x", + __get_str(dev), + __entry->qpn, + __entry->flags, + __entry->s_flags + ) ); DEFINE_EVENT(hfi1_qpsleepwakeup_template, hfi1_qpwakeup, @@ -367,16 +357,11 @@ DEFINE_EVENT(hfi1_qpsleepwakeup_template, hfi1_qpsleep, #define TRACE_SYSTEM hfi1_ibhdrs u8 ibhdr_exhdr_len(struct hfi1_ib_header *hdr); -const char *parse_everbs_hdrs( - struct trace_seq *p, - u8 opcode, - void *ehdrs); +const char *parse_everbs_hdrs(struct trace_seq *p, u8 opcode, void *ehdrs); #define __parse_ib_ehdrs(op, ehdrs) parse_everbs_hdrs(p, op, ehdrs) -const char *parse_sdma_flags( - struct trace_seq *p, - u64 desc0, u64 desc1); +const char *parse_sdma_flags(struct trace_seq *p, u64 desc0, u64 desc1); #define __parse_sdma_flags(desc0, desc1) parse_sdma_flags(p, desc0, desc1) @@ -433,117 +418,115 @@ __print_symbolic(opcode, \ #define EHDR_PRN "%s" DECLARE_EVENT_CLASS(hfi1_ibhdr_template, - TP_PROTO(struct hfi1_devdata *dd, - struct hfi1_ib_header *hdr), - TP_ARGS(dd, hdr), - TP_STRUCT__entry( - DD_DEV_ENTRY(dd) - /* LRH */ - __field(u8, vl) - __field(u8, lver) - __field(u8, sl) - __field(u8, lnh) - __field(u16, dlid) - __field(u16, len) - __field(u16, slid) - /* BTH */ - __field(u8, opcode) - __field(u8, se) - __field(u8, m) - __field(u8, pad) - __field(u8, tver) - __field(u16, pkey) - __field(u8, f) - __field(u8, b) - __field(u32, qpn) - __field(u8, a) - __field(u32, psn) - /* extended headers */ - __dynamic_array(u8, ehdrs, ibhdr_exhdr_len(hdr)) - ), - TP_fast_assign( - struct hfi1_other_headers *ohdr; - - DD_DEV_ASSIGN(dd); - /* LRH */ - __entry->vl = - (u8)(be16_to_cpu(hdr->lrh[0]) >> 12); - __entry->lver = - (u8)(be16_to_cpu(hdr->lrh[0]) >> 8) & 0xf; - __entry->sl = - (u8)(be16_to_cpu(hdr->lrh[0]) >> 4) & 0xf; - __entry->lnh = - (u8)(be16_to_cpu(hdr->lrh[0]) & 3); - __entry->dlid = - be16_to_cpu(hdr->lrh[1]); - /* allow for larger len */ - __entry->len = - be16_to_cpu(hdr->lrh[2]); - __entry->slid = - be16_to_cpu(hdr->lrh[3]); - /* BTH */ - if (__entry->lnh == HFI1_LRH_BTH) - ohdr = &hdr->u.oth; - else - ohdr = &hdr->u.l.oth; - __entry->opcode = - (be32_to_cpu(ohdr->bth[0]) >> 24) & 0xff; - __entry->se = - (be32_to_cpu(ohdr->bth[0]) >> 23) & 1; - __entry->m = - (be32_to_cpu(ohdr->bth[0]) >> 22) & 1; - __entry->pad = - (be32_to_cpu(ohdr->bth[0]) >> 20) & 3; - __entry->tver = - (be32_to_cpu(ohdr->bth[0]) >> 16) & 0xf; - __entry->pkey = - be32_to_cpu(ohdr->bth[0]) & 0xffff; - __entry->f = - (be32_to_cpu(ohdr->bth[1]) >> HFI1_FECN_SHIFT) - & HFI1_FECN_MASK; - __entry->b = - (be32_to_cpu(ohdr->bth[1]) >> HFI1_BECN_SHIFT) - & HFI1_BECN_MASK; - __entry->qpn = - be32_to_cpu(ohdr->bth[1]) & RVT_QPN_MASK; - __entry->a = - (be32_to_cpu(ohdr->bth[2]) >> 31) & 1; - /* allow for larger PSN */ - __entry->psn = - be32_to_cpu(ohdr->bth[2]) & 0x7fffffff; - /* extended headers */ - memcpy( - __get_dynamic_array(ehdrs), - &ohdr->u, - ibhdr_exhdr_len(hdr)); - ), - TP_printk("[%s] " LRH_PRN " " BTH_PRN " " EHDR_PRN, - __get_str(dev), - /* LRH */ - __entry->vl, - __entry->lver, - __entry->sl, - __entry->lnh, show_lnh(__entry->lnh), - __entry->dlid, - __entry->len, - __entry->slid, - /* BTH */ - __entry->opcode, show_ib_opcode(__entry->opcode), - __entry->se, - __entry->m, - __entry->pad, - __entry->tver, - __entry->pkey, - __entry->f, - __entry->b, - __entry->qpn, - __entry->a, - __entry->psn, - /* extended headers */ - __parse_ib_ehdrs( - __entry->opcode, - (void *)__get_dynamic_array(ehdrs)) - ) + TP_PROTO(struct hfi1_devdata *dd, + struct hfi1_ib_header *hdr), + TP_ARGS(dd, hdr), + TP_STRUCT__entry( + DD_DEV_ENTRY(dd) + /* LRH */ + __field(u8, vl) + __field(u8, lver) + __field(u8, sl) + __field(u8, lnh) + __field(u16, dlid) + __field(u16, len) + __field(u16, slid) + /* BTH */ + __field(u8, opcode) + __field(u8, se) + __field(u8, m) + __field(u8, pad) + __field(u8, tver) + __field(u16, pkey) + __field(u8, f) + __field(u8, b) + __field(u32, qpn) + __field(u8, a) + __field(u32, psn) + /* extended headers */ + __dynamic_array(u8, ehdrs, ibhdr_exhdr_len(hdr)) + ), + TP_fast_assign( + struct hfi1_other_headers *ohdr; + + DD_DEV_ASSIGN(dd); + /* LRH */ + __entry->vl = + (u8)(be16_to_cpu(hdr->lrh[0]) >> 12); + __entry->lver = + (u8)(be16_to_cpu(hdr->lrh[0]) >> 8) & 0xf; + __entry->sl = + (u8)(be16_to_cpu(hdr->lrh[0]) >> 4) & 0xf; + __entry->lnh = + (u8)(be16_to_cpu(hdr->lrh[0]) & 3); + __entry->dlid = + be16_to_cpu(hdr->lrh[1]); + /* allow for larger len */ + __entry->len = + be16_to_cpu(hdr->lrh[2]); + __entry->slid = + be16_to_cpu(hdr->lrh[3]); + /* BTH */ + if (__entry->lnh == HFI1_LRH_BTH) + ohdr = &hdr->u.oth; + else + ohdr = &hdr->u.l.oth; + __entry->opcode = + (be32_to_cpu(ohdr->bth[0]) >> 24) & 0xff; + __entry->se = + (be32_to_cpu(ohdr->bth[0]) >> 23) & 1; + __entry->m = + (be32_to_cpu(ohdr->bth[0]) >> 22) & 1; + __entry->pad = + (be32_to_cpu(ohdr->bth[0]) >> 20) & 3; + __entry->tver = + (be32_to_cpu(ohdr->bth[0]) >> 16) & 0xf; + __entry->pkey = + be32_to_cpu(ohdr->bth[0]) & 0xffff; + __entry->f = + (be32_to_cpu(ohdr->bth[1]) >> HFI1_FECN_SHIFT) & + HFI1_FECN_MASK; + __entry->b = + (be32_to_cpu(ohdr->bth[1]) >> HFI1_BECN_SHIFT) & + HFI1_BECN_MASK; + __entry->qpn = + be32_to_cpu(ohdr->bth[1]) & RVT_QPN_MASK; + __entry->a = + (be32_to_cpu(ohdr->bth[2]) >> 31) & 1; + /* allow for larger PSN */ + __entry->psn = + be32_to_cpu(ohdr->bth[2]) & 0x7fffffff; + /* extended headers */ + memcpy(__get_dynamic_array(ehdrs), &ohdr->u, + ibhdr_exhdr_len(hdr)); + ), + TP_printk("[%s] " LRH_PRN " " BTH_PRN " " EHDR_PRN, + __get_str(dev), + /* LRH */ + __entry->vl, + __entry->lver, + __entry->sl, + __entry->lnh, show_lnh(__entry->lnh), + __entry->dlid, + __entry->len, + __entry->slid, + /* BTH */ + __entry->opcode, show_ib_opcode(__entry->opcode), + __entry->se, + __entry->m, + __entry->pad, + __entry->tver, + __entry->pkey, + __entry->f, + __entry->b, + __entry->qpn, + __entry->a, + __entry->psn, + /* extended headers */ + __parse_ib_ehdrs( + __entry->opcode, + (void *)__get_dynamic_array(ehdrs)) + ) ); DEFINE_EVENT(hfi1_ibhdr_template, input_ibhdr, @@ -562,13 +545,13 @@ DEFINE_EVENT(hfi1_ibhdr_template, output_ibhdr, #define TRACE_SYSTEM hfi1_snoop TRACE_EVENT(snoop_capture, - TP_PROTO(struct hfi1_devdata *dd, - int hdr_len, - struct hfi1_ib_header *hdr, - int data_len, - void *data), - TP_ARGS(dd, hdr_len, hdr, data_len, data), - TP_STRUCT__entry( + TP_PROTO(struct hfi1_devdata *dd, + int hdr_len, + struct hfi1_ib_header *hdr, + int data_len, + void *data), + TP_ARGS(dd, hdr_len, hdr, data_len, data), + TP_STRUCT__entry( DD_DEV_ENTRY(dd) __field(u16, slid) __field(u16, dlid) @@ -581,8 +564,8 @@ TRACE_EVENT(snoop_capture, __field(u8, lnh) __dynamic_array(u8, raw_hdr, hdr_len) __dynamic_array(u8, raw_pkt, data_len) - ), - TP_fast_assign( + ), + TP_fast_assign( struct hfi1_other_headers *ohdr; __entry->lnh = (u8)(be16_to_cpu(hdr->lrh[0]) & 3); @@ -601,8 +584,9 @@ TRACE_EVENT(snoop_capture, __entry->data_len = data_len; memcpy(__get_dynamic_array(raw_hdr), hdr, hdr_len); memcpy(__get_dynamic_array(raw_pkt), data, data_len); - ), - TP_printk("[%s] " SNOOP_PRN, + ), + TP_printk( + "[%s] " SNOOP_PRN, __get_str(dev), __entry->slid, __entry->dlid, @@ -613,7 +597,7 @@ TRACE_EVENT(snoop_capture, __entry->pkey, __entry->hdr_len, __entry->data_len - ) + ) ); #undef TRACE_SYSTEM @@ -625,41 +609,39 @@ TRACE_EVENT(snoop_capture, TRACE_EVENT(hfi1_uctxtdata, TP_PROTO(struct hfi1_devdata *dd, struct hfi1_ctxtdata *uctxt), TP_ARGS(dd, uctxt), - TP_STRUCT__entry( - DD_DEV_ENTRY(dd) - __field(unsigned, ctxt) - __field(u32, credits) - __field(u64, hw_free) - __field(u64, piobase) - __field(u16, rcvhdrq_cnt) - __field(u64, rcvhdrq_phys) - __field(u32, eager_cnt) - __field(u64, rcvegr_phys) - ), - TP_fast_assign( - DD_DEV_ASSIGN(dd); - __entry->ctxt = uctxt->ctxt; - __entry->credits = uctxt->sc->credits; - __entry->hw_free = (u64)uctxt->sc->hw_free; - __entry->piobase = (u64)uctxt->sc->base_addr; - __entry->rcvhdrq_cnt = uctxt->rcvhdrq_cnt; - __entry->rcvhdrq_phys = uctxt->rcvhdrq_phys; - __entry->eager_cnt = uctxt->egrbufs.alloced; - __entry->rcvegr_phys = uctxt->egrbufs.rcvtids[0].phys; - ), - TP_printk( - "[%s] ctxt %u " UCTXT_FMT, - __get_str(dev), - __entry->ctxt, - __entry->credits, - __entry->hw_free, - __entry->piobase, - __entry->rcvhdrq_cnt, - __entry->rcvhdrq_phys, - __entry->eager_cnt, - __entry->rcvegr_phys - ) - ); + TP_STRUCT__entry(DD_DEV_ENTRY(dd) + __field(unsigned, ctxt) + __field(u32, credits) + __field(u64, hw_free) + __field(u64, piobase) + __field(u16, rcvhdrq_cnt) + __field(u64, rcvhdrq_phys) + __field(u32, eager_cnt) + __field(u64, rcvegr_phys) + ), + TP_fast_assign(DD_DEV_ASSIGN(dd); + __entry->ctxt = uctxt->ctxt; + __entry->credits = uctxt->sc->credits; + __entry->hw_free = (u64)uctxt->sc->hw_free; + __entry->piobase = (u64)uctxt->sc->base_addr; + __entry->rcvhdrq_cnt = uctxt->rcvhdrq_cnt; + __entry->rcvhdrq_phys = uctxt->rcvhdrq_phys; + __entry->eager_cnt = uctxt->egrbufs.alloced; + __entry->rcvegr_phys = + uctxt->egrbufs.rcvtids[0].phys; + ), + TP_printk("[%s] ctxt %u " UCTXT_FMT, + __get_str(dev), + __entry->ctxt, + __entry->credits, + __entry->hw_free, + __entry->piobase, + __entry->rcvhdrq_cnt, + __entry->rcvhdrq_phys, + __entry->eager_cnt, + __entry->rcvegr_phys + ) +); #define CINFO_FMT \ "egrtids:%u, egr_size:%u, hdrq_cnt:%u, hdrq_size:%u, sdma_ring_size:%u" @@ -667,38 +649,35 @@ TRACE_EVENT(hfi1_ctxt_info, TP_PROTO(struct hfi1_devdata *dd, unsigned ctxt, unsigned subctxt, struct hfi1_ctxt_info cinfo), TP_ARGS(dd, ctxt, subctxt, cinfo), - TP_STRUCT__entry( - DD_DEV_ENTRY(dd) - __field(unsigned, ctxt) - __field(unsigned, subctxt) - __field(u16, egrtids) - __field(u16, rcvhdrq_cnt) - __field(u16, rcvhdrq_size) - __field(u16, sdma_ring_size) - __field(u32, rcvegr_size) - ), - TP_fast_assign( - DD_DEV_ASSIGN(dd); - __entry->ctxt = ctxt; - __entry->subctxt = subctxt; - __entry->egrtids = cinfo.egrtids; - __entry->rcvhdrq_cnt = cinfo.rcvhdrq_cnt; - __entry->rcvhdrq_size = cinfo.rcvhdrq_entsize; - __entry->sdma_ring_size = cinfo.sdma_ring_size; - __entry->rcvegr_size = cinfo.rcvegr_size; - ), - TP_printk( - "[%s] ctxt %u:%u " CINFO_FMT, - __get_str(dev), - __entry->ctxt, - __entry->subctxt, - __entry->egrtids, - __entry->rcvegr_size, - __entry->rcvhdrq_cnt, - __entry->rcvhdrq_size, - __entry->sdma_ring_size - ) - ); + TP_STRUCT__entry(DD_DEV_ENTRY(dd) + __field(unsigned, ctxt) + __field(unsigned, subctxt) + __field(u16, egrtids) + __field(u16, rcvhdrq_cnt) + __field(u16, rcvhdrq_size) + __field(u16, sdma_ring_size) + __field(u32, rcvegr_size) + ), + TP_fast_assign(DD_DEV_ASSIGN(dd); + __entry->ctxt = ctxt; + __entry->subctxt = subctxt; + __entry->egrtids = cinfo.egrtids; + __entry->rcvhdrq_cnt = cinfo.rcvhdrq_cnt; + __entry->rcvhdrq_size = cinfo.rcvhdrq_entsize; + __entry->sdma_ring_size = cinfo.sdma_ring_size; + __entry->rcvegr_size = cinfo.rcvegr_size; + ), + TP_printk("[%s] ctxt %u:%u " CINFO_FMT, + __get_str(dev), + __entry->ctxt, + __entry->subctxt, + __entry->egrtids, + __entry->rcvegr_size, + __entry->rcvhdrq_cnt, + __entry->rcvhdrq_size, + __entry->sdma_ring_size + ) +); #undef TRACE_SYSTEM #define TRACE_SYSTEM hfi1_sma @@ -712,49 +691,46 @@ TRACE_EVENT(hfi1_ctxt_info, ) DECLARE_EVENT_CLASS(hfi1_bct_template, - TP_PROTO(struct hfi1_devdata *dd, struct buffer_control *bc), - TP_ARGS(dd, bc), - TP_STRUCT__entry( - DD_DEV_ENTRY(dd) - __dynamic_array(u8, bct, sizeof(*bc)) - ), - TP_fast_assign( - DD_DEV_ASSIGN(dd); - memcpy( - __get_dynamic_array(bct), - bc, - sizeof(*bc)); - ), - TP_printk(BCT_FORMAT, - BCT(overall_shared_limit), - - BCT(vl[0].dedicated), - BCT(vl[0].shared), - - BCT(vl[1].dedicated), - BCT(vl[1].shared), - - BCT(vl[2].dedicated), - BCT(vl[2].shared), - - BCT(vl[3].dedicated), - BCT(vl[3].shared), - - BCT(vl[4].dedicated), - BCT(vl[4].shared), - - BCT(vl[5].dedicated), - BCT(vl[5].shared), - - BCT(vl[6].dedicated), - BCT(vl[6].shared), - - BCT(vl[7].dedicated), - BCT(vl[7].shared), - - BCT(vl[15].dedicated), - BCT(vl[15].shared) - ) + TP_PROTO(struct hfi1_devdata *dd, + struct buffer_control *bc), + TP_ARGS(dd, bc), + TP_STRUCT__entry(DD_DEV_ENTRY(dd) + __dynamic_array(u8, bct, sizeof(*bc)) + ), + TP_fast_assign(DD_DEV_ASSIGN(dd); + memcpy(__get_dynamic_array(bct), bc, + sizeof(*bc)); + ), + TP_printk(BCT_FORMAT, + BCT(overall_shared_limit), + + BCT(vl[0].dedicated), + BCT(vl[0].shared), + + BCT(vl[1].dedicated), + BCT(vl[1].shared), + + BCT(vl[2].dedicated), + BCT(vl[2].shared), + + BCT(vl[3].dedicated), + BCT(vl[3].shared), + + BCT(vl[4].dedicated), + BCT(vl[4].shared), + + BCT(vl[5].dedicated), + BCT(vl[5].shared), + + BCT(vl[6].dedicated), + BCT(vl[6].shared), + + BCT(vl[7].dedicated), + BCT(vl[7].shared), + + BCT(vl[15].dedicated), + BCT(vl[15].shared) + ) ); DEFINE_EVENT(hfi1_bct_template, bct_set, @@ -769,252 +745,209 @@ DEFINE_EVENT(hfi1_bct_template, bct_get, #define TRACE_SYSTEM hfi1_sdma TRACE_EVENT(hfi1_sdma_descriptor, - TP_PROTO( - struct sdma_engine *sde, - u64 desc0, - u64 desc1, - u16 e, - void *descp), + TP_PROTO(struct sdma_engine *sde, + u64 desc0, + u64 desc1, + u16 e, + void *descp), TP_ARGS(sde, desc0, desc1, e, descp), - TP_STRUCT__entry( - DD_DEV_ENTRY(sde->dd) - __field(void *, descp) - __field(u64, desc0) - __field(u64, desc1) - __field(u16, e) - __field(u8, idx) - ), - TP_fast_assign( - DD_DEV_ASSIGN(sde->dd); - __entry->desc0 = desc0; - __entry->desc1 = desc1; - __entry->idx = sde->this_idx; - __entry->descp = descp; - __entry->e = e; - ), + TP_STRUCT__entry(DD_DEV_ENTRY(sde->dd) + __field(void *, descp) + __field(u64, desc0) + __field(u64, desc1) + __field(u16, e) + __field(u8, idx) + ), + TP_fast_assign(DD_DEV_ASSIGN(sde->dd); + __entry->desc0 = desc0; + __entry->desc1 = desc1; + __entry->idx = sde->this_idx; + __entry->descp = descp; + __entry->e = e; + ), TP_printk( - "[%s] SDE(%u) flags:%s addr:0x%016llx gen:%u len:%u d0:%016llx d1:%016llx to %p,%u", - __get_str(dev), - __entry->idx, - __parse_sdma_flags(__entry->desc0, __entry->desc1), - (__entry->desc0 >> SDMA_DESC0_PHY_ADDR_SHIFT) - & SDMA_DESC0_PHY_ADDR_MASK, - (u8)((__entry->desc1 >> SDMA_DESC1_GENERATION_SHIFT) - & SDMA_DESC1_GENERATION_MASK), - (u16)((__entry->desc0 >> SDMA_DESC0_BYTE_COUNT_SHIFT) - & SDMA_DESC0_BYTE_COUNT_MASK), - __entry->desc0, - __entry->desc1, - __entry->descp, - __entry->e - ) + "[%s] SDE(%u) flags:%s addr:0x%016llx gen:%u len:%u d0:%016llx d1:%016llx to %p,%u", + __get_str(dev), + __entry->idx, + __parse_sdma_flags(__entry->desc0, __entry->desc1), + (__entry->desc0 >> SDMA_DESC0_PHY_ADDR_SHIFT) & + SDMA_DESC0_PHY_ADDR_MASK, + (u8)((__entry->desc1 >> SDMA_DESC1_GENERATION_SHIFT) & + SDMA_DESC1_GENERATION_MASK), + (u16)((__entry->desc0 >> SDMA_DESC0_BYTE_COUNT_SHIFT) & + SDMA_DESC0_BYTE_COUNT_MASK), + __entry->desc0, + __entry->desc1, + __entry->descp, + __entry->e + ) ); TRACE_EVENT(hfi1_sdma_engine_select, - TP_PROTO(struct hfi1_devdata *dd, u32 sel, u8 vl, u8 idx), - TP_ARGS(dd, sel, vl, idx), - TP_STRUCT__entry( - DD_DEV_ENTRY(dd) - __field(u32, sel) - __field(u8, vl) - __field(u8, idx) - ), - TP_fast_assign( - DD_DEV_ASSIGN(dd); - __entry->sel = sel; - __entry->vl = vl; - __entry->idx = idx; - ), - TP_printk( - "[%s] selecting SDE %u sel 0x%x vl %u", - __get_str(dev), - __entry->idx, - __entry->sel, - __entry->vl - ) + TP_PROTO(struct hfi1_devdata *dd, u32 sel, u8 vl, u8 idx), + TP_ARGS(dd, sel, vl, idx), + TP_STRUCT__entry(DD_DEV_ENTRY(dd) + __field(u32, sel) + __field(u8, vl) + __field(u8, idx) + ), + TP_fast_assign(DD_DEV_ASSIGN(dd); + __entry->sel = sel; + __entry->vl = vl; + __entry->idx = idx; + ), + TP_printk("[%s] selecting SDE %u sel 0x%x vl %u", + __get_str(dev), + __entry->idx, + __entry->sel, + __entry->vl + ) ); DECLARE_EVENT_CLASS(hfi1_sdma_engine_class, - TP_PROTO( - struct sdma_engine *sde, - u64 status - ), - TP_ARGS(sde, status), - TP_STRUCT__entry( - DD_DEV_ENTRY(sde->dd) - __field(u64, status) - __field(u8, idx) - ), - TP_fast_assign( - DD_DEV_ASSIGN(sde->dd); - __entry->status = status; - __entry->idx = sde->this_idx; - ), - TP_printk( - "[%s] SDE(%u) status %llx", - __get_str(dev), - __entry->idx, - (unsigned long long)__entry->status - ) + TP_PROTO(struct sdma_engine *sde, u64 status), + TP_ARGS(sde, status), + TP_STRUCT__entry(DD_DEV_ENTRY(sde->dd) + __field(u64, status) + __field(u8, idx) + ), + TP_fast_assign(DD_DEV_ASSIGN(sde->dd); + __entry->status = status; + __entry->idx = sde->this_idx; + ), + TP_printk("[%s] SDE(%u) status %llx", + __get_str(dev), + __entry->idx, + (unsigned long long)__entry->status + ) ); DEFINE_EVENT(hfi1_sdma_engine_class, hfi1_sdma_engine_interrupt, - TP_PROTO( - struct sdma_engine *sde, - u64 status - ), - TP_ARGS(sde, status) + TP_PROTO(struct sdma_engine *sde, u64 status), + TP_ARGS(sde, status) ); DEFINE_EVENT(hfi1_sdma_engine_class, hfi1_sdma_engine_progress, - TP_PROTO( - struct sdma_engine *sde, - u64 status - ), - TP_ARGS(sde, status) + TP_PROTO(struct sdma_engine *sde, u64 status), + TP_ARGS(sde, status) ); DECLARE_EVENT_CLASS(hfi1_sdma_ahg_ad, - TP_PROTO( - struct sdma_engine *sde, - int aidx - ), - TP_ARGS(sde, aidx), - TP_STRUCT__entry( - DD_DEV_ENTRY(sde->dd) - __field(int, aidx) - __field(u8, idx) - ), - TP_fast_assign( - DD_DEV_ASSIGN(sde->dd); - __entry->idx = sde->this_idx; - __entry->aidx = aidx; - ), - TP_printk( - "[%s] SDE(%u) aidx %d", - __get_str(dev), - __entry->idx, - __entry->aidx - ) + TP_PROTO(struct sdma_engine *sde, int aidx), + TP_ARGS(sde, aidx), + TP_STRUCT__entry(DD_DEV_ENTRY(sde->dd) + __field(int, aidx) + __field(u8, idx) + ), + TP_fast_assign(DD_DEV_ASSIGN(sde->dd); + __entry->idx = sde->this_idx; + __entry->aidx = aidx; + ), + TP_printk("[%s] SDE(%u) aidx %d", + __get_str(dev), + __entry->idx, + __entry->aidx + ) ); DEFINE_EVENT(hfi1_sdma_ahg_ad, hfi1_ahg_allocate, - TP_PROTO( - struct sdma_engine *sde, - int aidx - ), + TP_PROTO(struct sdma_engine *sde, int aidx), TP_ARGS(sde, aidx)); DEFINE_EVENT(hfi1_sdma_ahg_ad, hfi1_ahg_deallocate, - TP_PROTO( - struct sdma_engine *sde, - int aidx - ), + TP_PROTO(struct sdma_engine *sde, int aidx), TP_ARGS(sde, aidx)); #ifdef CONFIG_HFI1_DEBUG_SDMA_ORDER TRACE_EVENT(hfi1_sdma_progress, - TP_PROTO( - struct sdma_engine *sde, - u16 hwhead, - u16 swhead, - struct sdma_txreq *txp - ), - TP_ARGS(sde, hwhead, swhead, txp), - TP_STRUCT__entry( - DD_DEV_ENTRY(sde->dd) - __field(u64, sn) - __field(u16, hwhead) - __field(u16, swhead) - __field(u16, txnext) - __field(u16, tx_tail) - __field(u16, tx_head) - __field(u8, idx) - ), - TP_fast_assign( - DD_DEV_ASSIGN(sde->dd); - __entry->hwhead = hwhead; - __entry->swhead = swhead; - __entry->tx_tail = sde->tx_tail; - __entry->tx_head = sde->tx_head; - __entry->txnext = txp ? txp->next_descq_idx : ~0; - __entry->idx = sde->this_idx; - __entry->sn = txp ? txp->sn : ~0; - ), - TP_printk( - "[%s] SDE(%u) sn %llu hwhead %u swhead %u next_descq_idx %u tx_head %u tx_tail %u", - __get_str(dev), - __entry->idx, - __entry->sn, - __entry->hwhead, - __entry->swhead, - __entry->txnext, - __entry->tx_head, - __entry->tx_tail - ) + TP_PROTO(struct sdma_engine *sde, + u16 hwhead, + u16 swhead, + struct sdma_txreq *txp + ), + TP_ARGS(sde, hwhead, swhead, txp), + TP_STRUCT__entry(DD_DEV_ENTRY(sde->dd) + __field(u64, sn) + __field(u16, hwhead) + __field(u16, swhead) + __field(u16, txnext) + __field(u16, tx_tail) + __field(u16, tx_head) + __field(u8, idx) + ), + TP_fast_assign(DD_DEV_ASSIGN(sde->dd); + __entry->hwhead = hwhead; + __entry->swhead = swhead; + __entry->tx_tail = sde->tx_tail; + __entry->tx_head = sde->tx_head; + __entry->txnext = txp ? txp->next_descq_idx : ~0; + __entry->idx = sde->this_idx; + __entry->sn = txp ? txp->sn : ~0; + ), + TP_printk( + "[%s] SDE(%u) sn %llu hwhead %u swhead %u next_descq_idx %u tx_head %u tx_tail %u", + __get_str(dev), + __entry->idx, + __entry->sn, + __entry->hwhead, + __entry->swhead, + __entry->txnext, + __entry->tx_head, + __entry->tx_tail + ) ); #else TRACE_EVENT(hfi1_sdma_progress, - TP_PROTO( - struct sdma_engine *sde, - u16 hwhead, - u16 swhead, - struct sdma_txreq *txp + TP_PROTO(struct sdma_engine *sde, + u16 hwhead, u16 swhead, + struct sdma_txreq *txp ), TP_ARGS(sde, hwhead, swhead, txp), - TP_STRUCT__entry( - DD_DEV_ENTRY(sde->dd) - __field(u16, hwhead) - __field(u16, swhead) - __field(u16, txnext) - __field(u16, tx_tail) - __field(u16, tx_head) - __field(u8, idx) - ), - TP_fast_assign( - DD_DEV_ASSIGN(sde->dd); - __entry->hwhead = hwhead; - __entry->swhead = swhead; - __entry->tx_tail = sde->tx_tail; - __entry->tx_head = sde->tx_head; - __entry->txnext = txp ? txp->next_descq_idx : ~0; - __entry->idx = sde->this_idx; - ), + TP_STRUCT__entry(DD_DEV_ENTRY(sde->dd) + __field(u16, hwhead) + __field(u16, swhead) + __field(u16, txnext) + __field(u16, tx_tail) + __field(u16, tx_head) + __field(u8, idx) + ), + TP_fast_assign(DD_DEV_ASSIGN(sde->dd); + __entry->hwhead = hwhead; + __entry->swhead = swhead; + __entry->tx_tail = sde->tx_tail; + __entry->tx_head = sde->tx_head; + __entry->txnext = txp ? txp->next_descq_idx : ~0; + __entry->idx = sde->this_idx; + ), TP_printk( - "[%s] SDE(%u) hwhead %u swhead %u next_descq_idx %u tx_head %u tx_tail %u", - __get_str(dev), - __entry->idx, - __entry->hwhead, - __entry->swhead, - __entry->txnext, - __entry->tx_head, - __entry->tx_tail - ) + "[%s] SDE(%u) hwhead %u swhead %u next_descq_idx %u tx_head %u tx_tail %u", + __get_str(dev), + __entry->idx, + __entry->hwhead, + __entry->swhead, + __entry->txnext, + __entry->tx_head, + __entry->tx_tail + ) ); #endif DECLARE_EVENT_CLASS(hfi1_sdma_sn, - TP_PROTO( - struct sdma_engine *sde, - u64 sn - ), - TP_ARGS(sde, sn), - TP_STRUCT__entry( - DD_DEV_ENTRY(sde->dd) - __field(u64, sn) - __field(u8, idx) - ), - TP_fast_assign( - DD_DEV_ASSIGN(sde->dd); - __entry->sn = sn; - __entry->idx = sde->this_idx; - ), - TP_printk( - "[%s] SDE(%u) sn %llu", - __get_str(dev), - __entry->idx, - __entry->sn - ) + TP_PROTO(struct sdma_engine *sde, u64 sn), + TP_ARGS(sde, sn), + TP_STRUCT__entry(DD_DEV_ENTRY(sde->dd) + __field(u64, sn) + __field(u8, idx) + ), + TP_fast_assign(DD_DEV_ASSIGN(sde->dd); + __entry->sn = sn; + __entry->idx = sde->this_idx; + ), + TP_printk("[%s] SDE(%u) sn %llu", + __get_str(dev), + __entry->idx, + __entry->sn + ) ); DEFINE_EVENT(hfi1_sdma_sn, hfi1_sdma_out_sn, @@ -1026,10 +959,7 @@ DEFINE_EVENT(hfi1_sdma_sn, hfi1_sdma_out_sn, ); DEFINE_EVENT(hfi1_sdma_sn, hfi1_sdma_in_sn, - TP_PROTO( - struct sdma_engine *sde, - u64 sn - ), + TP_PROTO(struct sdma_engine *sde, u64 sn), TP_ARGS(sde, sn) ); @@ -1230,69 +1160,66 @@ TRACE_EVENT(hfi1_sdma_user_header_ahg, ); TRACE_EVENT(hfi1_sdma_state, - TP_PROTO( - struct sdma_engine *sde, - const char *cstate, - const char *nstate - ), - TP_ARGS(sde, cstate, nstate), - TP_STRUCT__entry( - DD_DEV_ENTRY(sde->dd) - __string(curstate, cstate) - __string(newstate, nstate) - ), - TP_fast_assign( - DD_DEV_ASSIGN(sde->dd); - __assign_str(curstate, cstate); - __assign_str(newstate, nstate); - ), + TP_PROTO(struct sdma_engine *sde, + const char *cstate, + const char *nstate + ), + TP_ARGS(sde, cstate, nstate), + TP_STRUCT__entry(DD_DEV_ENTRY(sde->dd) + __string(curstate, cstate) + __string(newstate, nstate) + ), + TP_fast_assign(DD_DEV_ASSIGN(sde->dd); + __assign_str(curstate, cstate); + __assign_str(newstate, nstate); + ), TP_printk("[%s] current state %s new state %s", - __get_str(dev), - __get_str(curstate), - __get_str(newstate) - ) + __get_str(dev), + __get_str(curstate), + __get_str(newstate) + ) ); #undef TRACE_SYSTEM #define TRACE_SYSTEM hfi1_rc DECLARE_EVENT_CLASS(hfi1_rc_template, - TP_PROTO(struct rvt_qp *qp, u32 psn), - TP_ARGS(qp, psn), - TP_STRUCT__entry( - DD_DEV_ENTRY(dd_from_ibdev(qp->ibqp.device)) - __field(u32, qpn) - __field(u32, s_flags) - __field(u32, psn) - __field(u32, s_psn) - __field(u32, s_next_psn) - __field(u32, s_sending_psn) - __field(u32, s_sending_hpsn) - __field(u32, r_psn) - ), - TP_fast_assign( - DD_DEV_ASSIGN(dd_from_ibdev(qp->ibqp.device)) - __entry->qpn = qp->ibqp.qp_num; - __entry->s_flags = qp->s_flags; - __entry->psn = psn; - __entry->s_psn = qp->s_psn; - __entry->s_next_psn = qp->s_next_psn; - __entry->s_sending_psn = qp->s_sending_psn; - __entry->s_sending_hpsn = qp->s_sending_hpsn; - __entry->r_psn = qp->r_psn; - ), - TP_printk( - "[%s] qpn 0x%x s_flags 0x%x psn 0x%x s_psn 0x%x s_next_psn 0x%x s_sending_psn 0x%x sending_hpsn 0x%x r_psn 0x%x", - __get_str(dev), - __entry->qpn, - __entry->s_flags, - __entry->psn, - __entry->s_psn, - __entry->s_next_psn, - __entry->s_sending_psn, - __entry->s_sending_hpsn, - __entry->r_psn - ) + TP_PROTO(struct rvt_qp *qp, u32 psn), + TP_ARGS(qp, psn), + TP_STRUCT__entry( + DD_DEV_ENTRY(dd_from_ibdev(qp->ibqp.device)) + __field(u32, qpn) + __field(u32, s_flags) + __field(u32, psn) + __field(u32, s_psn) + __field(u32, s_next_psn) + __field(u32, s_sending_psn) + __field(u32, s_sending_hpsn) + __field(u32, r_psn) + ), + TP_fast_assign( + DD_DEV_ASSIGN(dd_from_ibdev(qp->ibqp.device)) + __entry->qpn = qp->ibqp.qp_num; + __entry->s_flags = qp->s_flags; + __entry->psn = psn; + __entry->s_psn = qp->s_psn; + __entry->s_next_psn = qp->s_next_psn; + __entry->s_sending_psn = qp->s_sending_psn; + __entry->s_sending_hpsn = qp->s_sending_hpsn; + __entry->r_psn = qp->r_psn; + ), + TP_printk( + "[%s] qpn 0x%x s_flags 0x%x psn 0x%x s_psn 0x%x s_next_psn 0x%x s_sending_psn 0x%x sending_hpsn 0x%x r_psn 0x%x", + __get_str(dev), + __entry->qpn, + __entry->s_flags, + __entry->psn, + __entry->s_psn, + __entry->s_next_psn, + __entry->s_sending_psn, + __entry->s_sending_hpsn, + __entry->r_psn + ) ); DEFINE_EVENT(hfi1_rc_template, hfi1_rc_sendcomplete, @@ -1319,21 +1246,20 @@ DEFINE_EVENT(hfi1_rc_template, hfi1_rc_rcv_error, #define TRACE_SYSTEM hfi1_misc TRACE_EVENT(hfi1_interrupt, - TP_PROTO(struct hfi1_devdata *dd, const struct is_table *is_entry, - int src), - TP_ARGS(dd, is_entry, src), - TP_STRUCT__entry( - DD_DEV_ENTRY(dd) - __array(char, buf, 64) - __field(int, src) - ), - TP_fast_assign( - DD_DEV_ASSIGN(dd) - is_entry->is_name(__entry->buf, 64, src - is_entry->start); - __entry->src = src; - ), - TP_printk("[%s] source: %s [%d]", __get_str(dev), __entry->buf, - __entry->src) + TP_PROTO(struct hfi1_devdata *dd, const struct is_table *is_entry, + int src), + TP_ARGS(dd, is_entry, src), + TP_STRUCT__entry(DD_DEV_ENTRY(dd) + __array(char, buf, 64) + __field(int, src) + ), + TP_fast_assign(DD_DEV_ASSIGN(dd) + is_entry->is_name(__entry->buf, 64, + src - is_entry->start); + __entry->src = src; + ), + TP_printk("[%s] source: %s [%d]", __get_str(dev), __entry->buf, + __entry->src) ); /* @@ -1348,21 +1274,21 @@ TRACE_EVENT(hfi1_interrupt, #define MAX_MSG_LEN 512 DECLARE_EVENT_CLASS(hfi1_trace_template, - TP_PROTO(const char *function, struct va_format *vaf), - TP_ARGS(function, vaf), - TP_STRUCT__entry( - __string(function, function) - __dynamic_array(char, msg, MAX_MSG_LEN) - ), - TP_fast_assign( - __assign_str(function, function); - WARN_ON_ONCE(vsnprintf(__get_dynamic_array(msg), - MAX_MSG_LEN, vaf->fmt, - *vaf->va) >= MAX_MSG_LEN); - ), - TP_printk("(%s) %s", - __get_str(function), - __get_str(msg)) + TP_PROTO(const char *function, struct va_format *vaf), + TP_ARGS(function, vaf), + TP_STRUCT__entry(__string(function, function) + __dynamic_array(char, msg, MAX_MSG_LEN) + ), + TP_fast_assign(__assign_str(function, function); + WARN_ON_ONCE(vsnprintf + (__get_dynamic_array(msg), + MAX_MSG_LEN, vaf->fmt, + *vaf->va) >= + MAX_MSG_LEN); + ), + TP_printk("(%s) %s", + __get_str(function), + __get_str(msg)) ); /* diff --git a/drivers/staging/rdma/hfi1/twsi.c b/drivers/staging/rdma/hfi1/twsi.c index d7dfdd2..6c7f198 100644 --- a/drivers/staging/rdma/hfi1/twsi.c +++ b/drivers/staging/rdma/hfi1/twsi.c @@ -131,7 +131,7 @@ static void scl_out(struct hfi1_devdata *dd, u32 target, u8 bit) } if (rise_usec <= 0) dd_dev_err(dd, "SCL interface stuck low > %d uSec\n", - SCL_WAIT_USEC); + SCL_WAIT_USEC); } i2c_wait_for_writes(dd, target); } diff --git a/drivers/staging/rdma/hfi1/uc.c b/drivers/staging/rdma/hfi1/uc.c index 8915401..e24cb62 100644 --- a/drivers/staging/rdma/hfi1/uc.c +++ b/drivers/staging/rdma/hfi1/uc.c @@ -318,7 +318,7 @@ void hfi1_uc_rcv(struct hfi1_packet *packet) sl = ibp->sc_to_sl[sc5]; process_becn(ppd, sl, rlid, lqpn, rqpn, - IB_CC_SVCTYPE_UC); + IB_CC_SVCTYPE_UC); } if (bth1 & HFI1_FECN_SMASK) { diff --git a/drivers/staging/rdma/hfi1/ud.c b/drivers/staging/rdma/hfi1/ud.c index c3f0697..ea8f706 100644 --- a/drivers/staging/rdma/hfi1/ud.c +++ b/drivers/staging/rdma/hfi1/ud.c @@ -320,9 +320,10 @@ int hfi1_make_ud_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps) if (ah_attr->dlid < be16_to_cpu(IB_MULTICAST_LID_BASE) || ah_attr->dlid == be16_to_cpu(IB_LID_PERMISSIVE)) { lid = ah_attr->dlid & ~((1 << ppd->lmc) - 1); - if (unlikely(!loopback && (lid == ppd->lid || - (lid == be16_to_cpu(IB_LID_PERMISSIVE) && - qp->ibqp.qp_type == IB_QPT_GSI)))) { + if (unlikely(!loopback && + (lid == ppd->lid || + (lid == be16_to_cpu(IB_LID_PERMISSIVE) && + qp->ibqp.qp_type == IB_QPT_GSI)))) { unsigned long flags; /* * If DMAs are in progress, we can't generate diff --git a/drivers/staging/rdma/hfi1/user_sdma.c b/drivers/staging/rdma/hfi1/user_sdma.c index b6d0926..ce94cbc 100644 --- a/drivers/staging/rdma/hfi1/user_sdma.c +++ b/drivers/staging/rdma/hfi1/user_sdma.c @@ -728,7 +728,7 @@ free_req: } static inline u32 compute_data_length(struct user_sdma_request *req, - struct user_sdma_txreq *tx) + struct user_sdma_txreq *tx) { /* * Determine the proper size of the packet data. diff --git a/drivers/staging/rdma/hfi1/verbs.c b/drivers/staging/rdma/hfi1/verbs.c index c412f1c..275af19 100644 --- a/drivers/staging/rdma/hfi1/verbs.c +++ b/drivers/staging/rdma/hfi1/verbs.c @@ -970,7 +970,8 @@ static inline int egress_pkey_check(struct hfi1_pportdata *ppd, /* The most likely matching pkey has index qp->s_pkey_index */ if (unlikely(!egress_pkey_matches_entry(pkey, - ppd->pkeys[qp->s_pkey_index]))) { + ppd->pkeys + [qp->s_pkey_index]))) { /* no match - try the entire table */ for (; i < MAX_PKEY_VALUES; i++) { if (egress_pkey_matches_entry(pkey, ppd->pkeys[i])) -- cgit v0.10.2 From e490974e675e8ddec795137c1db7f38e0308cbcd Mon Sep 17 00:00:00 2001 From: Jubin John Date: Sun, 14 Feb 2016 20:22:00 -0800 Subject: staging/rdma/hfi1: Add braces on all arms of statement Add braces on all arms of statements to fix checkpatch check: CHECK: braces {} should be used on all arms of this statement Reviewed-by: Dennis Dalessandro Reviewed-by: Ira Weiny Reviewed-by: Mike Marciniszyn Signed-off-by: Jubin John Signed-off-by: Doug Ledford diff --git a/drivers/staging/rdma/hfi1/chip.c b/drivers/staging/rdma/hfi1/chip.c index ce61883..0a77465 100644 --- a/drivers/staging/rdma/hfi1/chip.c +++ b/drivers/staging/rdma/hfi1/chip.c @@ -13829,9 +13829,9 @@ int hfi1_set_ctxt_pkey(struct hfi1_devdata *dd, unsigned ctxt, u16 pkey) int ret = 0; u64 reg; - if (ctxt < dd->num_rcv_contexts) + if (ctxt < dd->num_rcv_contexts) { rcd = dd->rcd[ctxt]; - else { + } else { ret = -EINVAL; goto done; } @@ -13857,9 +13857,9 @@ int hfi1_clear_ctxt_pkey(struct hfi1_devdata *dd, unsigned ctxt) int ret = 0; u64 reg; - if (ctxt < dd->num_rcv_contexts) + if (ctxt < dd->num_rcv_contexts) { rcd = dd->rcd[ctxt]; - else { + } else { ret = -EINVAL; goto done; } diff --git a/drivers/staging/rdma/hfi1/driver.c b/drivers/staging/rdma/hfi1/driver.c index 50a3b5a..090b701 100644 --- a/drivers/staging/rdma/hfi1/driver.c +++ b/drivers/staging/rdma/hfi1/driver.c @@ -302,9 +302,9 @@ static void rcv_hdrerr(struct hfi1_ctxtdata *rcd, struct hfi1_pportdata *ppd, goto drop; /* Check for GRH */ - if (lnh == HFI1_LRH_BTH) + if (lnh == HFI1_LRH_BTH) { ohdr = &hdr->u.oth; - else if (lnh == HFI1_LRH_GRH) { + } else if (lnh == HFI1_LRH_GRH) { u32 vtf; ohdr = &hdr->u.l.oth; @@ -314,9 +314,9 @@ static void rcv_hdrerr(struct hfi1_ctxtdata *rcd, struct hfi1_pportdata *ppd, if ((vtf >> IB_GRH_VERSION_SHIFT) != IB_GRH_VERSION) goto drop; rcv_flags |= HFI1_HAS_GRH; - } else + } else { goto drop; - + } /* Get the destination QP number. */ qp_num = be32_to_cpu(ohdr->bth[1]) & RVT_QPN_MASK; if (lid < be16_to_cpu(IB_MULTICAST_LID_BASE)) { @@ -618,14 +618,14 @@ static void __prescan_rxq(struct hfi1_packet *packet) hfi1_get_msgheader(dd, rhf_addr); lnh = be16_to_cpu(hdr->lrh[0]) & 3; - if (lnh == HFI1_LRH_BTH) + if (lnh == HFI1_LRH_BTH) { ohdr = &hdr->u.oth; - else if (lnh == HFI1_LRH_GRH) { + } else if (lnh == HFI1_LRH_GRH) { ohdr = &hdr->u.l.oth; grh = &hdr->u.l.grh; - } else + } else { goto next; /* just in case */ - + } bth1 = be32_to_cpu(ohdr->bth[1]); is_ecn = !!(bth1 & (HFI1_FECN_SMASK | HFI1_BECN_SMASK)); diff --git a/drivers/staging/rdma/hfi1/file_ops.c b/drivers/staging/rdma/hfi1/file_ops.c index 1bd1545..7846f31 100644 --- a/drivers/staging/rdma/hfi1/file_ops.c +++ b/drivers/staging/rdma/hfi1/file_ops.c @@ -399,8 +399,9 @@ static ssize_t hfi1_file_write(struct file *fp, const char __user *data, ret = sc_enable(sc); hfi1_rcvctrl(dd, HFI1_RCVCTRL_CTXT_ENB, uctxt->ctxt); - } else + } else { ret = sc_restart(sc); + } if (!ret) sc_return_credits(sc); break; @@ -1409,8 +1410,9 @@ static unsigned int poll_next(struct file *fp, set_bit(HFI1_CTXT_WAITING_RCV, &uctxt->event_flags); hfi1_rcvctrl(dd, HFI1_RCVCTRL_INTRAVAIL_ENB, uctxt->ctxt); pollflag = 0; - } else + } else { pollflag = POLLIN | POLLRDNORM; + } spin_unlock_irq(&dd->uctxt_lock); return pollflag; @@ -1488,8 +1490,9 @@ static int manage_rcvq(struct hfi1_ctxtdata *uctxt, unsigned subctxt, if (uctxt->rcvhdrtail_kvaddr) clear_rcvhdrtail(uctxt); rcvctrl_op = HFI1_RCVCTRL_CTXT_ENB; - } else + } else { rcvctrl_op = HFI1_RCVCTRL_CTXT_DIS; + } hfi1_rcvctrl(dd, rcvctrl_op, uctxt->ctxt); /* always; new head should be equal to new tail; see above */ bail: diff --git a/drivers/staging/rdma/hfi1/init.c b/drivers/staging/rdma/hfi1/init.c index a721059..cb75fc7 100644 --- a/drivers/staging/rdma/hfi1/init.c +++ b/drivers/staging/rdma/hfi1/init.c @@ -1713,8 +1713,9 @@ int hfi1_setup_eagerbufs(struct hfi1_ctxtdata *rcd) rcd->egrbufs.buffers[j].len)) { j++; offset = 0; - } else + } else { offset += new_size; + } } rcd->egrbufs.rcvtid_size = new_size; } diff --git a/drivers/staging/rdma/hfi1/mad.c b/drivers/staging/rdma/hfi1/mad.c index 7619b75..305b7ae 100644 --- a/drivers/staging/rdma/hfi1/mad.c +++ b/drivers/staging/rdma/hfi1/mad.c @@ -135,15 +135,16 @@ static void send_trap(struct hfi1_ibport *ibp, void *data, unsigned len) struct ib_ah *ah; ah = hfi1_create_qp0_ah(ibp, ibp->rvp.sm_lid); - if (IS_ERR(ah)) + if (IS_ERR(ah)) { ret = PTR_ERR(ah); - else { + } else { send_buf->ah = ah; ibp->rvp.sm_ah = ibah_to_rvtah(ah); ret = 0; } - } else + } else { ret = -EINVAL; + } } else { send_buf->ah = &ibp->rvp.sm_ah->ibah; ret = 0; @@ -769,9 +770,9 @@ static int __subn_get_opa_pkeytable(struct opa_smp *smp, u32 am, u8 *data, p[i] = cpu_to_be16(q[i]); if (resp_len) *resp_len += size; - } else + } else { smp->status |= IB_SMP_INVALID_FIELD; - + } return reply((struct ib_mad_hdr *)smp); } @@ -977,15 +978,15 @@ static int set_port_states(struct hfi1_pportdata *ppd, struct opa_smp *smp, break; /* FALLTHROUGH */ case IB_PORT_DOWN: - if (phys_state == IB_PORTPHYSSTATE_NOP) + if (phys_state == IB_PORTPHYSSTATE_NOP) { link_state = HLS_DN_DOWNDEF; - else if (phys_state == IB_PORTPHYSSTATE_POLLING) { + } else if (phys_state == IB_PORTPHYSSTATE_POLLING) { link_state = HLS_DN_POLL; set_link_down_reason(ppd, OPA_LINKDOWN_REASON_FM_BOUNCE, 0, OPA_LINKDOWN_REASON_FM_BOUNCE); - } else if (phys_state == IB_PORTPHYSSTATE_DISABLED) + } else if (phys_state == IB_PORTPHYSSTATE_DISABLED) { link_state = HLS_DN_DISABLE; - else { + } else { pr_warn("SubnSet(OPA_PortInfo) invalid physical state 0x%x\n", phys_state); smp->status |= IB_SMP_INVALID_FIELD; @@ -1193,9 +1194,9 @@ static int __subn_set_opa_portinfo(struct opa_smp *smp, u32 am, u8 *data, set_link_width_downgrade_enabled(ppd, lwe); call_link_downgrade_policy = 1; } - } else + } else { smp->status |= IB_SMP_INVALID_FIELD; - + } lse = be16_to_cpu(pi->link_speed.enabled); if (lse) { if (lse & be16_to_cpu(pi->link_speed.supported)) diff --git a/drivers/staging/rdma/hfi1/pcie.c b/drivers/staging/rdma/hfi1/pcie.c index cbd61cf..2b0281c 100644 --- a/drivers/staging/rdma/hfi1/pcie.c +++ b/drivers/staging/rdma/hfi1/pcie.c @@ -123,8 +123,9 @@ int hfi1_pcie_init(struct pci_dev *pdev, const struct pci_device_id *ent) goto bail; } ret = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32)); - } else + } else { ret = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64)); + } if (ret) { hfi1_early_err(&pdev->dev, "Unable to set DMA consistent mask: %d\n", ret); diff --git a/drivers/staging/rdma/hfi1/qp.c b/drivers/staging/rdma/hfi1/qp.c index 9024673..4f40c98 100644 --- a/drivers/staging/rdma/hfi1/qp.c +++ b/drivers/staging/rdma/hfi1/qp.c @@ -326,12 +326,15 @@ __be32 hfi1_compute_aeth(struct rvt_qp *qp) x = (min + max) / 2; if (credit_table[x] == credits) break; - if (credit_table[x] > credits) + if (credit_table[x] > credits) { max = x; - else if (min == x) - break; - else - min = x; + } else { + if (min == x) { + break; + } else { + min = x; + } + } } aeth |= x << HFI1_AETH_CREDIT_SHIFT; } diff --git a/drivers/staging/rdma/hfi1/rc.c b/drivers/staging/rdma/hfi1/rc.c index 28ff638..70a6e63 100644 --- a/drivers/staging/rdma/hfi1/rc.c +++ b/drivers/staging/rdma/hfi1/rc.c @@ -505,9 +505,9 @@ int hfi1_make_rc_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps) len = pmtu; break; } - if (wqe->wr.opcode == IB_WR_SEND) + if (wqe->wr.opcode == IB_WR_SEND) { qp->s_state = OP(SEND_ONLY); - else { + } else { qp->s_state = OP(SEND_ONLY_WITH_IMMEDIATE); /* Immediate data comes after the BTH */ ohdr->u.imm_data = wqe->wr.ex.imm_data; @@ -542,9 +542,9 @@ int hfi1_make_rc_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps) len = pmtu; break; } - if (wqe->wr.opcode == IB_WR_RDMA_WRITE) + if (wqe->wr.opcode == IB_WR_RDMA_WRITE) { qp->s_state = OP(RDMA_WRITE_ONLY); - else { + } else { qp->s_state = OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE); /* Immediate data comes after RETH */ @@ -672,9 +672,9 @@ int hfi1_make_rc_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps) middle = HFI1_CAP_IS_KSET(SDMA_AHG); break; } - if (wqe->wr.opcode == IB_WR_SEND) + if (wqe->wr.opcode == IB_WR_SEND) { qp->s_state = OP(SEND_LAST); - else { + } else { qp->s_state = OP(SEND_LAST_WITH_IMMEDIATE); /* Immediate data comes after the BTH */ ohdr->u.imm_data = wqe->wr.ex.imm_data; @@ -712,9 +712,9 @@ int hfi1_make_rc_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps) middle = HFI1_CAP_IS_KSET(SDMA_AHG); break; } - if (wqe->wr.opcode == IB_WR_RDMA_WRITE) + if (wqe->wr.opcode == IB_WR_RDMA_WRITE) { qp->s_state = OP(RDMA_WRITE_LAST); - else { + } else { qp->s_state = OP(RDMA_WRITE_LAST_WITH_IMMEDIATE); /* Immediate data comes after the BTH */ ohdr->u.imm_data = wqe->wr.ex.imm_data; @@ -1013,10 +1013,12 @@ static void restart_rc(struct rvt_qp *qp, u32 psn, int wait) hfi1_send_complete(qp, wqe, IB_WC_RETRY_EXC_ERR); rvt_error_qp(qp, IB_WC_WR_FLUSH_ERR); return; - } else /* need to handle delayed completion */ + } else { /* need to handle delayed completion */ return; - } else + } + } else { qp->s_retry--; + } ibp = to_iport(qp->ibqp.device, qp->port_num); if (wqe->wr.opcode == IB_WR_RDMA_READ) @@ -1612,8 +1614,9 @@ static void rc_rcv_resp(struct hfi1_ibport *ibp, val = ((u64)be32_to_cpu(p[0]) << 32) | be32_to_cpu(p[1]); - } else + } else { val = 0; + } if (!do_rc_ack(qp, aeth, psn, opcode, val, rcd) || opcode != OP(RDMA_READ_RESPONSE_FIRST)) goto ack_done; diff --git a/drivers/staging/rdma/hfi1/sdma.c b/drivers/staging/rdma/hfi1/sdma.c index 74086ea..f2f76b3 100644 --- a/drivers/staging/rdma/hfi1/sdma.c +++ b/drivers/staging/rdma/hfi1/sdma.c @@ -2009,8 +2009,9 @@ static int sdma_check_progress( ret = wait->sleep(sde, wait, tx, seq); if (ret == -EAGAIN) sde->desc_avail = sdma_descq_freecnt(sde); - } else + } else { ret = -EBUSY; + } return ret; } diff --git a/drivers/staging/rdma/hfi1/twsi.c b/drivers/staging/rdma/hfi1/twsi.c index 6c7f198..1cc0748 100644 --- a/drivers/staging/rdma/hfi1/twsi.c +++ b/drivers/staging/rdma/hfi1/twsi.c @@ -119,9 +119,9 @@ static void scl_out(struct hfi1_devdata *dd, u32 target, u8 bit) * Allow for slow slaves by simple * delay for falling edge, sampling on rise. */ - if (!bit) + if (!bit) { udelay(2); - else { + } else { int rise_usec; for (rise_usec = SCL_WAIT_USEC; rise_usec > 0; rise_usec -= 2) { diff --git a/drivers/staging/rdma/hfi1/uc.c b/drivers/staging/rdma/hfi1/uc.c index e24cb62..81b2dc7 100644 --- a/drivers/staging/rdma/hfi1/uc.c +++ b/drivers/staging/rdma/hfi1/uc.c @@ -139,9 +139,9 @@ int hfi1_make_uc_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps) len = pmtu; break; } - if (wqe->wr.opcode == IB_WR_SEND) + if (wqe->wr.opcode == IB_WR_SEND) { qp->s_state = OP(SEND_ONLY); - else { + } else { qp->s_state = OP(SEND_ONLY_WITH_IMMEDIATE); /* Immediate data comes after the BTH */ @@ -168,9 +168,9 @@ int hfi1_make_uc_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps) len = pmtu; break; } - if (wqe->wr.opcode == IB_WR_RDMA_WRITE) + if (wqe->wr.opcode == IB_WR_RDMA_WRITE) { qp->s_state = OP(RDMA_WRITE_ONLY); - else { + } else { qp->s_state = OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE); /* Immediate data comes after the RETH */ @@ -199,9 +199,9 @@ int hfi1_make_uc_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps) middle = HFI1_CAP_IS_KSET(SDMA_AHG); break; } - if (wqe->wr.opcode == IB_WR_SEND) + if (wqe->wr.opcode == IB_WR_SEND) { qp->s_state = OP(SEND_LAST); - else { + } else { qp->s_state = OP(SEND_LAST_WITH_IMMEDIATE); /* Immediate data comes after the BTH */ ohdr->u.imm_data = wqe->wr.ex.imm_data; @@ -224,9 +224,9 @@ int hfi1_make_uc_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps) middle = HFI1_CAP_IS_KSET(SDMA_AHG); break; } - if (wqe->wr.opcode == IB_WR_RDMA_WRITE) + if (wqe->wr.opcode == IB_WR_RDMA_WRITE) { qp->s_state = OP(RDMA_WRITE_LAST); - else { + } else { qp->s_state = OP(RDMA_WRITE_LAST_WITH_IMMEDIATE); /* Immediate data comes after the BTH */ @@ -353,8 +353,9 @@ inv: qp->r_state == OP(SEND_MIDDLE)) { set_bit(RVT_R_REWIND_SGE, &qp->r_aflags); qp->r_sge.num_sge = 0; - } else + } else { rvt_put_ss(&qp->r_sge); + } qp->r_state = OP(SEND_LAST); switch (opcode) { case OP(SEND_FIRST): @@ -410,9 +411,9 @@ inv: case OP(SEND_ONLY): case OP(SEND_ONLY_WITH_IMMEDIATE): send_first: - if (test_and_clear_bit(RVT_R_REWIND_SGE, &qp->r_aflags)) + if (test_and_clear_bit(RVT_R_REWIND_SGE, &qp->r_aflags)) { qp->r_sge = qp->s_rdma_read_sge; - else { + } else { ret = hfi1_rvt_get_rwqe(qp, 0); if (ret < 0) goto op_err; @@ -523,9 +524,9 @@ rdma_first: qp->r_sge.sge.length = 0; qp->r_sge.sge.sge_length = 0; } - if (opcode == OP(RDMA_WRITE_ONLY)) + if (opcode == OP(RDMA_WRITE_ONLY)) { goto rdma_last; - else if (opcode == OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE)) { + } else if (opcode == OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE)) { wc.ex.imm_data = ohdr->u.rc.imm_data; goto rdma_last_imm; } @@ -555,9 +556,9 @@ rdma_last_imm: tlen -= (hdrsize + pad + 4); if (unlikely(tlen + qp->r_rcv_len != qp->r_len)) goto drop; - if (test_and_clear_bit(RVT_R_REWIND_SGE, &qp->r_aflags)) + if (test_and_clear_bit(RVT_R_REWIND_SGE, &qp->r_aflags)) { rvt_put_ss(&qp->s_rdma_read_sge); - else { + } else { ret = hfi1_rvt_get_rwqe(qp, 1); if (ret < 0) goto op_err; diff --git a/drivers/staging/rdma/hfi1/ud.c b/drivers/staging/rdma/hfi1/ud.c index ea8f706..65157a4 100644 --- a/drivers/staging/rdma/hfi1/ud.c +++ b/drivers/staging/rdma/hfi1/ud.c @@ -163,9 +163,9 @@ static void ud_loopback(struct rvt_qp *sqp, struct rvt_swqe *swqe) /* * Get the next work request entry to find where to put the data. */ - if (qp->r_flags & RVT_R_REUSE_SGE) + if (qp->r_flags & RVT_R_REUSE_SGE) { qp->r_flags &= ~RVT_R_REUSE_SGE; - else { + } else { int ret; ret = hfi1_rvt_get_rwqe(qp, 0); @@ -190,8 +190,9 @@ static void ud_loopback(struct rvt_qp *sqp, struct rvt_swqe *swqe) hfi1_copy_sge(&qp->r_sge, &ah_attr->grh, sizeof(struct ib_grh), 1, 0); wc.wc_flags |= IB_WC_GRH; - } else + } else { hfi1_skip_sge(&qp->r_sge, sizeof(struct ib_grh), 1); + } ssge.sg_list = swqe->sg_list + 1; ssge.sge = *swqe->sg_list; ssge.num_sge = swqe->wr.num_sge; @@ -383,8 +384,9 @@ int hfi1_make_ud_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps) qp->s_hdrwords++; ohdr->u.ud.imm_data = wqe->wr.ex.imm_data; bth0 = IB_OPCODE_UD_SEND_ONLY_WITH_IMMEDIATE << 24; - } else + } else { bth0 = IB_OPCODE_UD_SEND_ONLY << 24; + } sc5 = ibp->sl_to_sc[ah_attr->sl]; lrh0 |= (ah_attr->sl & 0xf) << 4; if (qp->ibqp.qp_type == IB_QPT_SMI) { @@ -820,8 +822,9 @@ void hfi1_ud_rcv(struct hfi1_packet *packet) } else if (opcode == IB_OPCODE_UD_SEND_ONLY) { wc.ex.imm_data = 0; wc.wc_flags = 0; - } else + } else { goto drop; + } /* * A GRH is expected to precede the data even if not @@ -832,9 +835,9 @@ void hfi1_ud_rcv(struct hfi1_packet *packet) /* * Get the next work request entry to find where to put the data. */ - if (qp->r_flags & RVT_R_REUSE_SGE) + if (qp->r_flags & RVT_R_REUSE_SGE) { qp->r_flags &= ~RVT_R_REUSE_SGE; - else { + } else { int ret; ret = hfi1_rvt_get_rwqe(qp, 0); @@ -857,8 +860,9 @@ void hfi1_ud_rcv(struct hfi1_packet *packet) hfi1_copy_sge(&qp->r_sge, &hdr->u.l.grh, sizeof(struct ib_grh), 1, 0); wc.wc_flags |= IB_WC_GRH; - } else + } else { hfi1_skip_sge(&qp->r_sge, sizeof(struct ib_grh), 1); + } hfi1_copy_sge(&qp->r_sge, data, wc.byte_len - sizeof(struct ib_grh), 1, 0); rvt_put_ss(&qp->r_sge); @@ -884,8 +888,9 @@ void hfi1_ud_rcv(struct hfi1_packet *packet) } } wc.pkey_index = (unsigned)mgmt_pkey_idx; - } else + } else { wc.pkey_index = 0; + } wc.slid = be16_to_cpu(hdr->lrh[3]); sc = (be16_to_cpu(hdr->lrh[0]) >> 12) & 0xf; diff --git a/drivers/staging/rdma/hfi1/user_sdma.c b/drivers/staging/rdma/hfi1/user_sdma.c index ce94cbc..7287307 100644 --- a/drivers/staging/rdma/hfi1/user_sdma.c +++ b/drivers/staging/rdma/hfi1/user_sdma.c @@ -765,8 +765,9 @@ static inline u32 compute_data_length(struct user_sdma_request *req, * remaining. */ len = min(len, req->data_len - req->sent); - } else + } else { len = min(req->data_len - req->sent, (u32)req->info.fragsize); + } SDMA_DBG(req, "Data Length = %u", len); return len; } @@ -1337,8 +1338,9 @@ static int set_txreq_header_ahg(struct user_sdma_request *req, INTR) >> 16); val &= cpu_to_le16(~(1U << 13)); AHG_HEADER_SET(req->ahg, diff, 7, 16, 14, val); - } else + } else { AHG_HEADER_SET(req->ahg, diff, 7, 16, 12, val); + } } trace_hfi1_sdma_user_header_ahg(pq->dd, pq->ctxt, pq->subctxt, diff --git a/drivers/staging/rdma/hfi1/verbs.c b/drivers/staging/rdma/hfi1/verbs.c index 275af19..0e650b4 100644 --- a/drivers/staging/rdma/hfi1/verbs.c +++ b/drivers/staging/rdma/hfi1/verbs.c @@ -400,9 +400,9 @@ void hfi1_ib_rcv(struct hfi1_packet *packet) /* Check for GRH */ lnh = be16_to_cpu(hdr->lrh[0]) & 3; - if (lnh == HFI1_LRH_BTH) + if (lnh == HFI1_LRH_BTH) { packet->ohdr = &hdr->u.oth; - else if (lnh == HFI1_LRH_GRH) { + } else if (lnh == HFI1_LRH_GRH) { u32 vtf; packet->ohdr = &hdr->u.l.oth; @@ -412,8 +412,9 @@ void hfi1_ib_rcv(struct hfi1_packet *packet) if ((vtf >> IB_GRH_VERSION_SHIFT) != IB_GRH_VERSION) goto drop; packet->rcv_flags |= HFI1_HAS_GRH; - } else + } else { goto drop; + } trace_input_ibhdr(rcd->dd, hdr); @@ -528,9 +529,9 @@ static void verbs_sdma_complete( struct rvt_qp *qp = tx->qp; spin_lock(&qp->s_lock); - if (tx->wqe) + if (tx->wqe) { hfi1_send_complete(qp, tx->wqe, IB_WC_SUCCESS); - else if (qp->ibqp.qp_type == IB_QPT_RC) { + } else if (qp->ibqp.qp_type == IB_QPT_RC) { struct hfi1_ib_header *hdr; hdr = &tx->phdr.hdr; -- cgit v0.10.2 From edddfca00eecd0949a9adccf8dd490478f641cbc Mon Sep 17 00:00:00 2001 From: Jubin John Date: Sun, 14 Feb 2016 20:22:09 -0800 Subject: staging/rdma/hfi1: Remove else after break Remove else after break to fix checkpatch warning: WARNING: else is not generally useful after a break or return Reviewed-by: Dennis Dalessandro Reviewed-by: Ira Weiny Reviewed-by: Mike Marciniszyn Signed-off-by: Jubin John Signed-off-by: Doug Ledford diff --git a/drivers/staging/rdma/hfi1/qp.c b/drivers/staging/rdma/hfi1/qp.c index 4f40c98..a17cb73 100644 --- a/drivers/staging/rdma/hfi1/qp.c +++ b/drivers/staging/rdma/hfi1/qp.c @@ -329,11 +329,9 @@ __be32 hfi1_compute_aeth(struct rvt_qp *qp) if (credit_table[x] > credits) { max = x; } else { - if (min == x) { + if (min == x) break; - } else { - min = x; - } + min = x; } } aeth |= x << HFI1_AETH_CREDIT_SHIFT; -- cgit v0.10.2 From 05d6ac1d8268915593480a34926f386970a9d720 Mon Sep 17 00:00:00 2001 From: Jubin John Date: Sun, 14 Feb 2016 20:22:17 -0800 Subject: staging/rdma/hfi1: Fix header Fix the header by moving the copyright notice out of the license text and to the top of the header. Also, update the copyright date. Reviewed-by: Dennis Dalessandro Signed-off-by: Jubin John Signed-off-by: Doug Ledford diff --git a/drivers/staging/rdma/hfi1/affinity.c b/drivers/staging/rdma/hfi1/affinity.c index 59b2972..2cb8ca7 100644 --- a/drivers/staging/rdma/hfi1/affinity.c +++ b/drivers/staging/rdma/hfi1/affinity.c @@ -1,12 +1,11 @@ /* + * Copyright(c) 2015, 2016 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. * * GPL LICENSE SUMMARY * - * Copyright(c) 2015 Intel Corporation. - * * This program is free software; you can redistribute it and/or modify * it under the terms of version 2 of the GNU General Public License as * published by the Free Software Foundation. @@ -18,8 +17,6 @@ * * BSD LICENSE * - * Copyright(c) 2015 Intel Corporation. - * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: diff --git a/drivers/staging/rdma/hfi1/affinity.h b/drivers/staging/rdma/hfi1/affinity.h index 2bdac96..b287e49 100644 --- a/drivers/staging/rdma/hfi1/affinity.h +++ b/drivers/staging/rdma/hfi1/affinity.h @@ -1,12 +1,11 @@ /* + * Copyright(c) 2015, 2016 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. * * GPL LICENSE SUMMARY * - * Copyright(c) 2015 Intel Corporation. - * * This program is free software; you can redistribute it and/or modify * it under the terms of version 2 of the GNU General Public License as * published by the Free Software Foundation. @@ -18,8 +17,6 @@ * * BSD LICENSE * - * Copyright(c) 2015 Intel Corporation. - * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: diff --git a/drivers/staging/rdma/hfi1/aspm.h b/drivers/staging/rdma/hfi1/aspm.h index 67fce1d..3aac802 100644 --- a/drivers/staging/rdma/hfi1/aspm.h +++ b/drivers/staging/rdma/hfi1/aspm.h @@ -1,12 +1,11 @@ /* + * Copyright(c) 2015, 2016 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. * * GPL LICENSE SUMMARY * - * Copyright(c) 2015 Intel Corporation. - * * This program is free software; you can redistribute it and/or modify * it under the terms of version 2 of the GNU General Public License as * published by the Free Software Foundation. @@ -18,8 +17,6 @@ * * BSD LICENSE * - * Copyright(c) 2015 Intel Corporation. - * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: diff --git a/drivers/staging/rdma/hfi1/chip.c b/drivers/staging/rdma/hfi1/chip.c index 0a77465..93bf465 100644 --- a/drivers/staging/rdma/hfi1/chip.c +++ b/drivers/staging/rdma/hfi1/chip.c @@ -1,12 +1,11 @@ /* + * Copyright(c) 2015, 2016 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. * * GPL LICENSE SUMMARY * - * Copyright(c) 2015 Intel Corporation. - * * This program is free software; you can redistribute it and/or modify * it under the terms of version 2 of the GNU General Public License as * published by the Free Software Foundation. @@ -18,8 +17,6 @@ * * BSD LICENSE * - * Copyright(c) 2015 Intel Corporation. - * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: diff --git a/drivers/staging/rdma/hfi1/chip.h b/drivers/staging/rdma/hfi1/chip.h index 0b7055b..8468139 100644 --- a/drivers/staging/rdma/hfi1/chip.h +++ b/drivers/staging/rdma/hfi1/chip.h @@ -1,14 +1,13 @@ #ifndef _CHIP_H #define _CHIP_H /* + * Copyright(c) 2015, 2016 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. * * GPL LICENSE SUMMARY * - * Copyright(c) 2015 Intel Corporation. - * * This program is free software; you can redistribute it and/or modify * it under the terms of version 2 of the GNU General Public License as * published by the Free Software Foundation. @@ -20,8 +19,6 @@ * * BSD LICENSE * - * Copyright(c) 2015 Intel Corporation. - * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: diff --git a/drivers/staging/rdma/hfi1/chip_registers.h b/drivers/staging/rdma/hfi1/chip_registers.h index 23898eb..770f05c 100644 --- a/drivers/staging/rdma/hfi1/chip_registers.h +++ b/drivers/staging/rdma/hfi1/chip_registers.h @@ -2,14 +2,13 @@ #define DEF_CHIP_REG /* + * Copyright(c) 2015, 2016 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. * * GPL LICENSE SUMMARY * - * Copyright(c) 2015 Intel Corporation. - * * This program is free software; you can redistribute it and/or modify * it under the terms of version 2 of the GNU General Public License as * published by the Free Software Foundation. @@ -21,8 +20,6 @@ * * BSD LICENSE * - * Copyright(c) 2015 Intel Corporation. - * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: diff --git a/drivers/staging/rdma/hfi1/common.h b/drivers/staging/rdma/hfi1/common.h index dcf8edf..e9b6bb3 100644 --- a/drivers/staging/rdma/hfi1/common.h +++ b/drivers/staging/rdma/hfi1/common.h @@ -1,12 +1,11 @@ /* + * Copyright(c) 2015, 2016 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. * * GPL LICENSE SUMMARY * - * Copyright(c) 2015 Intel Corporation. - * * This program is free software; you can redistribute it and/or modify * it under the terms of version 2 of the GNU General Public License as * published by the Free Software Foundation. @@ -18,8 +17,6 @@ * * BSD LICENSE * - * Copyright(c) 2015 Intel Corporation. - * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: diff --git a/drivers/staging/rdma/hfi1/debugfs.c b/drivers/staging/rdma/hfi1/debugfs.c index 0b0fd8a..4fd58e3 100644 --- a/drivers/staging/rdma/hfi1/debugfs.c +++ b/drivers/staging/rdma/hfi1/debugfs.c @@ -1,13 +1,12 @@ #ifdef CONFIG_DEBUG_FS /* + * Copyright(c) 2015, 2016 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. * * GPL LICENSE SUMMARY * - * Copyright(c) 2015 Intel Corporation. - * * This program is free software; you can redistribute it and/or modify * it under the terms of version 2 of the GNU General Public License as * published by the Free Software Foundation. @@ -19,8 +18,6 @@ * * BSD LICENSE * - * Copyright(c) 2015 Intel Corporation. - * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: diff --git a/drivers/staging/rdma/hfi1/debugfs.h b/drivers/staging/rdma/hfi1/debugfs.h index 92d6fe1..b6fb681 100644 --- a/drivers/staging/rdma/hfi1/debugfs.h +++ b/drivers/staging/rdma/hfi1/debugfs.h @@ -1,14 +1,13 @@ #ifndef _HFI1_DEBUGFS_H #define _HFI1_DEBUGFS_H /* + * Copyright(c) 2015, 2016 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. * * GPL LICENSE SUMMARY * - * Copyright(c) 2015 Intel Corporation. - * * This program is free software; you can redistribute it and/or modify * it under the terms of version 2 of the GNU General Public License as * published by the Free Software Foundation. @@ -20,8 +19,6 @@ * * BSD LICENSE * - * Copyright(c) 2015 Intel Corporation. - * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: diff --git a/drivers/staging/rdma/hfi1/device.c b/drivers/staging/rdma/hfi1/device.c index 58472e5..c05c39d 100644 --- a/drivers/staging/rdma/hfi1/device.c +++ b/drivers/staging/rdma/hfi1/device.c @@ -1,12 +1,11 @@ /* + * Copyright(c) 2015, 2016 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. * * GPL LICENSE SUMMARY * - * Copyright(c) 2015 Intel Corporation. - * * This program is free software; you can redistribute it and/or modify * it under the terms of version 2 of the GNU General Public License as * published by the Free Software Foundation. @@ -18,8 +17,6 @@ * * BSD LICENSE * - * Copyright(c) 2015 Intel Corporation. - * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: diff --git a/drivers/staging/rdma/hfi1/device.h b/drivers/staging/rdma/hfi1/device.h index 2850ff7..5bb3e83 100644 --- a/drivers/staging/rdma/hfi1/device.h +++ b/drivers/staging/rdma/hfi1/device.h @@ -1,14 +1,13 @@ #ifndef _HFI1_DEVICE_H #define _HFI1_DEVICE_H /* + * Copyright(c) 2015, 2016 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. * * GPL LICENSE SUMMARY * - * Copyright(c) 2015 Intel Corporation. - * * This program is free software; you can redistribute it and/or modify * it under the terms of version 2 of the GNU General Public License as * published by the Free Software Foundation. @@ -20,8 +19,6 @@ * * BSD LICENSE * - * Copyright(c) 2015 Intel Corporation. - * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: diff --git a/drivers/staging/rdma/hfi1/diag.c b/drivers/staging/rdma/hfi1/diag.c index b8faee0..6546e91 100644 --- a/drivers/staging/rdma/hfi1/diag.c +++ b/drivers/staging/rdma/hfi1/diag.c @@ -1,12 +1,11 @@ /* + * Copyright(c) 2015, 2016 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. * * GPL LICENSE SUMMARY * - * Copyright(c) 2015, 2016 Intel Corporation. - * * This program is free software; you can redistribute it and/or modify * it under the terms of version 2 of the GNU General Public License as * published by the Free Software Foundation. @@ -18,8 +17,6 @@ * * BSD LICENSE * - * Copyright(c) 2015, 2016 Intel Corporation. - * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: diff --git a/drivers/staging/rdma/hfi1/dma.c b/drivers/staging/rdma/hfi1/dma.c index afe572d..7e8dab8 100644 --- a/drivers/staging/rdma/hfi1/dma.c +++ b/drivers/staging/rdma/hfi1/dma.c @@ -1,12 +1,11 @@ /* + * Copyright(c) 2015, 2016 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. * * GPL LICENSE SUMMARY * - * Copyright(c) 2015 Intel Corporation. - * * This program is free software; you can redistribute it and/or modify * it under the terms of version 2 of the GNU General Public License as * published by the Free Software Foundation. @@ -18,8 +17,6 @@ * * BSD LICENSE * - * Copyright(c) 2015 Intel Corporation. - * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: diff --git a/drivers/staging/rdma/hfi1/driver.c b/drivers/staging/rdma/hfi1/driver.c index 090b701..76ed5f7 100644 --- a/drivers/staging/rdma/hfi1/driver.c +++ b/drivers/staging/rdma/hfi1/driver.c @@ -1,12 +1,11 @@ /* + * Copyright(c) 2015, 2016 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. * * GPL LICENSE SUMMARY * - * Copyright(c) 2015, 2016 Intel Corporation. - * * This program is free software; you can redistribute it and/or modify * it under the terms of version 2 of the GNU General Public License as * published by the Free Software Foundation. @@ -18,8 +17,6 @@ * * BSD LICENSE * - * Copyright(c) 2015, 2016 Intel Corporation. - * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: diff --git a/drivers/staging/rdma/hfi1/efivar.c b/drivers/staging/rdma/hfi1/efivar.c index 7dc5bae..5fe3924 100644 --- a/drivers/staging/rdma/hfi1/efivar.c +++ b/drivers/staging/rdma/hfi1/efivar.c @@ -1,12 +1,11 @@ /* + * Copyright(c) 2015, 2016 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. * * GPL LICENSE SUMMARY * - * Copyright(c) 2015 Intel Corporation. - * * This program is free software; you can redistribute it and/or modify * it under the terms of version 2 of the GNU General Public License as * published by the Free Software Foundation. @@ -18,8 +17,6 @@ * * BSD LICENSE * - * Copyright(c) 2015 Intel Corporation. - * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: diff --git a/drivers/staging/rdma/hfi1/efivar.h b/drivers/staging/rdma/hfi1/efivar.h index 0707062..94e9e70 100644 --- a/drivers/staging/rdma/hfi1/efivar.h +++ b/drivers/staging/rdma/hfi1/efivar.h @@ -1,12 +1,11 @@ /* + * Copyright(c) 2015, 2016 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. * * GPL LICENSE SUMMARY * - * Copyright(c) 2015 Intel Corporation. - * * This program is free software; you can redistribute it and/or modify * it under the terms of version 2 of the GNU General Public License as * published by the Free Software Foundation. @@ -18,8 +17,6 @@ * * BSD LICENSE * - * Copyright(c) 2015 Intel Corporation. - * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: diff --git a/drivers/staging/rdma/hfi1/eprom.c b/drivers/staging/rdma/hfi1/eprom.c index f36d06b..87114af 100644 --- a/drivers/staging/rdma/hfi1/eprom.c +++ b/drivers/staging/rdma/hfi1/eprom.c @@ -1,12 +1,11 @@ /* + * Copyright(c) 2015, 2016 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. * * GPL LICENSE SUMMARY * - * Copyright(c) 2015 Intel Corporation. - * * This program is free software; you can redistribute it and/or modify * it under the terms of version 2 of the GNU General Public License as * published by the Free Software Foundation. @@ -18,8 +17,6 @@ * * BSD LICENSE * - * Copyright(c) 2015 Intel Corporation. - * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: diff --git a/drivers/staging/rdma/hfi1/eprom.h b/drivers/staging/rdma/hfi1/eprom.h index 5a61ba3..d41f0b1 100644 --- a/drivers/staging/rdma/hfi1/eprom.h +++ b/drivers/staging/rdma/hfi1/eprom.h @@ -1,12 +1,11 @@ /* + * Copyright(c) 2015, 2016 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. * * GPL LICENSE SUMMARY * - * Copyright(c) 2015 Intel Corporation. - * * This program is free software; you can redistribute it and/or modify * it under the terms of version 2 of the GNU General Public License as * published by the Free Software Foundation. @@ -18,8 +17,6 @@ * * BSD LICENSE * - * Copyright(c) 2015 Intel Corporation. - * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: diff --git a/drivers/staging/rdma/hfi1/file_ops.c b/drivers/staging/rdma/hfi1/file_ops.c index 7846f31..e4490ae 100644 --- a/drivers/staging/rdma/hfi1/file_ops.c +++ b/drivers/staging/rdma/hfi1/file_ops.c @@ -1,12 +1,11 @@ /* + * Copyright(c) 2015, 2016 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. * * GPL LICENSE SUMMARY * - * Copyright(c) 2015 Intel Corporation. - * * This program is free software; you can redistribute it and/or modify * it under the terms of version 2 of the GNU General Public License as * published by the Free Software Foundation. @@ -18,8 +17,6 @@ * * BSD LICENSE * - * Copyright(c) 2015 Intel Corporation. - * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: diff --git a/drivers/staging/rdma/hfi1/firmware.c b/drivers/staging/rdma/hfi1/firmware.c index 52a3e8c..d5befd1 100644 --- a/drivers/staging/rdma/hfi1/firmware.c +++ b/drivers/staging/rdma/hfi1/firmware.c @@ -1,12 +1,11 @@ /* + * Copyright(c) 2015, 2016 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. * * GPL LICENSE SUMMARY * - * Copyright(c) 2015 Intel Corporation. - * * This program is free software; you can redistribute it and/or modify * it under the terms of version 2 of the GNU General Public License as * published by the Free Software Foundation. @@ -18,8 +17,6 @@ * * BSD LICENSE * - * Copyright(c) 2015 Intel Corporation. - * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: diff --git a/drivers/staging/rdma/hfi1/hfi.h b/drivers/staging/rdma/hfi1/hfi.h index 07df515..9215482 100644 --- a/drivers/staging/rdma/hfi1/hfi.h +++ b/drivers/staging/rdma/hfi1/hfi.h @@ -1,14 +1,13 @@ #ifndef _HFI1_KERNEL_H #define _HFI1_KERNEL_H /* + * Copyright(c) 2015, 2016 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. * * GPL LICENSE SUMMARY * - * Copyright(c) 2015, 2016 Intel Corporation. - * * This program is free software; you can redistribute it and/or modify * it under the terms of version 2 of the GNU General Public License as * published by the Free Software Foundation. @@ -20,8 +19,6 @@ * * BSD LICENSE * - * Copyright(c) 2015, 2016 Intel Corporation. - * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: diff --git a/drivers/staging/rdma/hfi1/init.c b/drivers/staging/rdma/hfi1/init.c index cb75fc7..2def538 100644 --- a/drivers/staging/rdma/hfi1/init.c +++ b/drivers/staging/rdma/hfi1/init.c @@ -1,12 +1,11 @@ /* + * Copyright(c) 2015, 2016 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. * * GPL LICENSE SUMMARY * - * Copyright(c) 2015, 2016 Intel Corporation. - * * This program is free software; you can redistribute it and/or modify * it under the terms of version 2 of the GNU General Public License as * published by the Free Software Foundation. @@ -18,8 +17,6 @@ * * BSD LICENSE * - * Copyright(c) 2015, 2016 Intel Corporation. - * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: diff --git a/drivers/staging/rdma/hfi1/intr.c b/drivers/staging/rdma/hfi1/intr.c index 46eeeca..65348d1 100644 --- a/drivers/staging/rdma/hfi1/intr.c +++ b/drivers/staging/rdma/hfi1/intr.c @@ -1,12 +1,11 @@ /* + * Copyright(c) 2015, 2016 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. * * GPL LICENSE SUMMARY * - * Copyright(c) 2015 Intel Corporation. - * * This program is free software; you can redistribute it and/or modify * it under the terms of version 2 of the GNU General Public License as * published by the Free Software Foundation. @@ -18,8 +17,6 @@ * * BSD LICENSE * - * Copyright(c) 2015 Intel Corporation. - * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: diff --git a/drivers/staging/rdma/hfi1/iowait.h b/drivers/staging/rdma/hfi1/iowait.h index 2cb3f04..2ec6ef3 100644 --- a/drivers/staging/rdma/hfi1/iowait.h +++ b/drivers/staging/rdma/hfi1/iowait.h @@ -1,14 +1,13 @@ #ifndef _HFI1_IOWAIT_H #define _HFI1_IOWAIT_H /* + * Copyright(c) 2015, 2016 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. * * GPL LICENSE SUMMARY * - * Copyright(c) 2015 Intel Corporation. - * * This program is free software; you can redistribute it and/or modify * it under the terms of version 2 of the GNU General Public License as * published by the Free Software Foundation. @@ -20,8 +19,6 @@ * * BSD LICENSE * - * Copyright(c) 2015 Intel Corporation. - * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: diff --git a/drivers/staging/rdma/hfi1/mad.c b/drivers/staging/rdma/hfi1/mad.c index 305b7ae..78931fc 100644 --- a/drivers/staging/rdma/hfi1/mad.c +++ b/drivers/staging/rdma/hfi1/mad.c @@ -1,12 +1,11 @@ /* + * Copyright(c) 2015, 2016 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. * * GPL LICENSE SUMMARY * - * Copyright(c) 2015, 2016 Intel Corporation. - * * This program is free software; you can redistribute it and/or modify * it under the terms of version 2 of the GNU General Public License as * published by the Free Software Foundation. @@ -18,8 +17,6 @@ * * BSD LICENSE * - * Copyright(c) 2015, 2016 Intel Corporation. - * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: diff --git a/drivers/staging/rdma/hfi1/mad.h b/drivers/staging/rdma/hfi1/mad.h index 9ebaaf9..55ee086 100644 --- a/drivers/staging/rdma/hfi1/mad.h +++ b/drivers/staging/rdma/hfi1/mad.h @@ -1,12 +1,11 @@ /* + * Copyright(c) 2015, 2016 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. * * GPL LICENSE SUMMARY * - * Copyright(c) 2015 Intel Corporation. - * * This program is free software; you can redistribute it and/or modify * it under the terms of version 2 of the GNU General Public License as * published by the Free Software Foundation. @@ -18,8 +17,6 @@ * * BSD LICENSE * - * Copyright(c) 2015 Intel Corporation. - * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: diff --git a/drivers/staging/rdma/hfi1/opa_compat.h b/drivers/staging/rdma/hfi1/opa_compat.h index 30f7707..6ef3c1c 100644 --- a/drivers/staging/rdma/hfi1/opa_compat.h +++ b/drivers/staging/rdma/hfi1/opa_compat.h @@ -1,14 +1,13 @@ #ifndef _LINUX_H #define _LINUX_H /* + * Copyright(c) 2015, 2016 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. * * GPL LICENSE SUMMARY * - * Copyright(c) 2015 Intel Corporation. - * * This program is free software; you can redistribute it and/or modify * it under the terms of version 2 of the GNU General Public License as * published by the Free Software Foundation. @@ -20,8 +19,6 @@ * * BSD LICENSE * - * Copyright(c) 2015 Intel Corporation. - * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: diff --git a/drivers/staging/rdma/hfi1/pcie.c b/drivers/staging/rdma/hfi1/pcie.c index 2b0281c..7855962 100644 --- a/drivers/staging/rdma/hfi1/pcie.c +++ b/drivers/staging/rdma/hfi1/pcie.c @@ -1,12 +1,11 @@ /* + * Copyright(c) 2015, 2016 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. * * GPL LICENSE SUMMARY * - * Copyright(c) 2015 Intel Corporation. - * * This program is free software; you can redistribute it and/or modify * it under the terms of version 2 of the GNU General Public License as * published by the Free Software Foundation. @@ -18,8 +17,6 @@ * * BSD LICENSE * - * Copyright(c) 2015 Intel Corporation. - * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: diff --git a/drivers/staging/rdma/hfi1/pio.c b/drivers/staging/rdma/hfi1/pio.c index a483c0a..859cb46 100644 --- a/drivers/staging/rdma/hfi1/pio.c +++ b/drivers/staging/rdma/hfi1/pio.c @@ -1,12 +1,11 @@ /* + * Copyright(c) 2015, 2016 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. * * GPL LICENSE SUMMARY * - * Copyright(c) 2015 Intel Corporation. - * * This program is free software; you can redistribute it and/or modify * it under the terms of version 2 of the GNU General Public License as * published by the Free Software Foundation. @@ -18,8 +17,6 @@ * * BSD LICENSE * - * Copyright(c) 2015 Intel Corporation. - * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: diff --git a/drivers/staging/rdma/hfi1/pio.h b/drivers/staging/rdma/hfi1/pio.h index d80909a..0026976 100644 --- a/drivers/staging/rdma/hfi1/pio.h +++ b/drivers/staging/rdma/hfi1/pio.h @@ -1,14 +1,13 @@ #ifndef _PIO_H #define _PIO_H /* + * Copyright(c) 2015, 2016 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. * * GPL LICENSE SUMMARY * - * Copyright(c) 2015 Intel Corporation. - * * This program is free software; you can redistribute it and/or modify * it under the terms of version 2 of the GNU General Public License as * published by the Free Software Foundation. @@ -20,8 +19,6 @@ * * BSD LICENSE * - * Copyright(c) 2015 Intel Corporation. - * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: diff --git a/drivers/staging/rdma/hfi1/pio_copy.c b/drivers/staging/rdma/hfi1/pio_copy.c index 998e7bc..228e9fb 100644 --- a/drivers/staging/rdma/hfi1/pio_copy.c +++ b/drivers/staging/rdma/hfi1/pio_copy.c @@ -1,12 +1,11 @@ /* + * Copyright(c) 2015, 2016 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. * * GPL LICENSE SUMMARY * - * Copyright(c) 2015 Intel Corporation. - * * This program is free software; you can redistribute it and/or modify * it under the terms of version 2 of the GNU General Public License as * published by the Free Software Foundation. @@ -18,8 +17,6 @@ * * BSD LICENSE * - * Copyright(c) 2015 Intel Corporation. - * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: diff --git a/drivers/staging/rdma/hfi1/platform.c b/drivers/staging/rdma/hfi1/platform.c index 2f07bec..4777414 100644 --- a/drivers/staging/rdma/hfi1/platform.c +++ b/drivers/staging/rdma/hfi1/platform.c @@ -1,12 +1,11 @@ /* + * Copyright(c) 2015, 2016 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. * * GPL LICENSE SUMMARY * - * Copyright(c) 2015 Intel Corporation. - * * This program is free software; you can redistribute it and/or modify * it under the terms of version 2 of the GNU General Public License as * published by the Free Software Foundation. @@ -18,8 +17,6 @@ * * BSD LICENSE * - * Copyright(c) 2015 Intel Corporation. - * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: diff --git a/drivers/staging/rdma/hfi1/platform.h b/drivers/staging/rdma/hfi1/platform.h index 1f41bdc..19620cf 100644 --- a/drivers/staging/rdma/hfi1/platform.h +++ b/drivers/staging/rdma/hfi1/platform.h @@ -1,12 +1,11 @@ /* + * Copyright(c) 2015, 2016 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. * * GPL LICENSE SUMMARY * - * Copyright(c) 2015 Intel Corporation. - * * This program is free software; you can redistribute it and/or modify * it under the terms of version 2 of the GNU General Public License as * published by the Free Software Foundation. @@ -18,8 +17,6 @@ * * BSD LICENSE * - * Copyright(c) 2015 Intel Corporation. - * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: diff --git a/drivers/staging/rdma/hfi1/qp.c b/drivers/staging/rdma/hfi1/qp.c index a17cb73..9e831a1 100644 --- a/drivers/staging/rdma/hfi1/qp.c +++ b/drivers/staging/rdma/hfi1/qp.c @@ -1,12 +1,11 @@ /* + * Copyright(c) 2015, 2016 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. * * GPL LICENSE SUMMARY * - * Copyright(c) 2015 Intel Corporation. - * * This program is free software; you can redistribute it and/or modify * it under the terms of version 2 of the GNU General Public License as * published by the Free Software Foundation. @@ -18,8 +17,6 @@ * * BSD LICENSE * - * Copyright(c) 2015 Intel Corporation. - * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: diff --git a/drivers/staging/rdma/hfi1/qp.h b/drivers/staging/rdma/hfi1/qp.h index 7b1c57e..e7bc8d6 100644 --- a/drivers/staging/rdma/hfi1/qp.h +++ b/drivers/staging/rdma/hfi1/qp.h @@ -1,14 +1,13 @@ #ifndef _QP_H #define _QP_H /* + * Copyright(c) 2015, 2016 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. * * GPL LICENSE SUMMARY * - * Copyright(c) 2015 Intel Corporation. - * * This program is free software; you can redistribute it and/or modify * it under the terms of version 2 of the GNU General Public License as * published by the Free Software Foundation. @@ -20,8 +19,6 @@ * * BSD LICENSE * - * Copyright(c) 2015 Intel Corporation. - * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: diff --git a/drivers/staging/rdma/hfi1/qsfp.c b/drivers/staging/rdma/hfi1/qsfp.c index c5e04b0..e38a0eb 100644 --- a/drivers/staging/rdma/hfi1/qsfp.c +++ b/drivers/staging/rdma/hfi1/qsfp.c @@ -1,12 +1,11 @@ /* + * Copyright(c) 2015, 2016 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. * * GPL LICENSE SUMMARY * - * Copyright(c) 2015, 2016 Intel Corporation. - * * This program is free software; you can redistribute it and/or modify * it under the terms of version 2 of the GNU General Public License as * published by the Free Software Foundation. @@ -18,8 +17,6 @@ * * BSD LICENSE * - * Copyright(c) 2015, 2016 Intel Corporation. - * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: diff --git a/drivers/staging/rdma/hfi1/qsfp.h b/drivers/staging/rdma/hfi1/qsfp.h index c391750..2ad5980 100644 --- a/drivers/staging/rdma/hfi1/qsfp.h +++ b/drivers/staging/rdma/hfi1/qsfp.h @@ -1,12 +1,11 @@ /* + * Copyright(c) 2015, 2016 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. * * GPL LICENSE SUMMARY * - * Copyright(c) 2015 Intel Corporation. - * * This program is free software; you can redistribute it and/or modify * it under the terms of version 2 of the GNU General Public License as * published by the Free Software Foundation. @@ -18,8 +17,6 @@ * * BSD LICENSE * - * Copyright(c) 2015 Intel Corporation. - * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: diff --git a/drivers/staging/rdma/hfi1/rc.c b/drivers/staging/rdma/hfi1/rc.c index 70a6e63..8caad18 100644 --- a/drivers/staging/rdma/hfi1/rc.c +++ b/drivers/staging/rdma/hfi1/rc.c @@ -1,12 +1,11 @@ /* + * Copyright(c) 2015, 2016 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. * * GPL LICENSE SUMMARY * - * Copyright(c) 2015 Intel Corporation. - * * This program is free software; you can redistribute it and/or modify * it under the terms of version 2 of the GNU General Public License as * published by the Free Software Foundation. @@ -18,8 +17,6 @@ * * BSD LICENSE * - * Copyright(c) 2015 Intel Corporation. - * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: diff --git a/drivers/staging/rdma/hfi1/ruc.c b/drivers/staging/rdma/hfi1/ruc.c index e2c4f82..5d84981 100644 --- a/drivers/staging/rdma/hfi1/ruc.c +++ b/drivers/staging/rdma/hfi1/ruc.c @@ -1,12 +1,11 @@ /* + * Copyright(c) 2015, 2016 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. * * GPL LICENSE SUMMARY * - * Copyright(c) 2015 Intel Corporation. - * * This program is free software; you can redistribute it and/or modify * it under the terms of version 2 of the GNU General Public License as * published by the Free Software Foundation. @@ -18,8 +17,6 @@ * * BSD LICENSE * - * Copyright(c) 2015 Intel Corporation. - * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: diff --git a/drivers/staging/rdma/hfi1/sdma.c b/drivers/staging/rdma/hfi1/sdma.c index f2f76b3..d894f43 100644 --- a/drivers/staging/rdma/hfi1/sdma.c +++ b/drivers/staging/rdma/hfi1/sdma.c @@ -1,12 +1,11 @@ /* + * Copyright(c) 2015, 2016 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. * * GPL LICENSE SUMMARY * - * Copyright(c) 2015 Intel Corporation. - * * This program is free software; you can redistribute it and/or modify * it under the terms of version 2 of the GNU General Public License as * published by the Free Software Foundation. @@ -18,8 +17,6 @@ * * BSD LICENSE * - * Copyright(c) 2015 Intel Corporation. - * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: diff --git a/drivers/staging/rdma/hfi1/sdma.h b/drivers/staging/rdma/hfi1/sdma.h index 5aec18b..8f50c99 100644 --- a/drivers/staging/rdma/hfi1/sdma.h +++ b/drivers/staging/rdma/hfi1/sdma.h @@ -1,14 +1,13 @@ #ifndef _HFI1_SDMA_H #define _HFI1_SDMA_H /* + * Copyright(c) 2015, 2016 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. * * GPL LICENSE SUMMARY * - * Copyright(c) 2015 Intel Corporation. - * * This program is free software; you can redistribute it and/or modify * it under the terms of version 2 of the GNU General Public License as * published by the Free Software Foundation. @@ -20,8 +19,6 @@ * * BSD LICENSE * - * Copyright(c) 2015 Intel Corporation. - * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: diff --git a/drivers/staging/rdma/hfi1/sysfs.c b/drivers/staging/rdma/hfi1/sysfs.c index 3e3f180..c7f1271 100644 --- a/drivers/staging/rdma/hfi1/sysfs.c +++ b/drivers/staging/rdma/hfi1/sysfs.c @@ -1,12 +1,11 @@ /* + * Copyright(c) 2015, 2016 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. * * GPL LICENSE SUMMARY * - * Copyright(c) 2015 Intel Corporation. - * * This program is free software; you can redistribute it and/or modify * it under the terms of version 2 of the GNU General Public License as * published by the Free Software Foundation. @@ -18,8 +17,6 @@ * * BSD LICENSE * - * Copyright(c) 2015 Intel Corporation. - * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: diff --git a/drivers/staging/rdma/hfi1/trace.c b/drivers/staging/rdma/hfi1/trace.c index 99fd017..6821d7c 100644 --- a/drivers/staging/rdma/hfi1/trace.c +++ b/drivers/staging/rdma/hfi1/trace.c @@ -1,12 +1,11 @@ /* + * Copyright(c) 2015, 2016 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. * * GPL LICENSE SUMMARY * - * Copyright(c) 2015 Intel Corporation. - * * This program is free software; you can redistribute it and/or modify * it under the terms of version 2 of the GNU General Public License as * published by the Free Software Foundation. @@ -18,8 +17,6 @@ * * BSD LICENSE * - * Copyright(c) 2015 Intel Corporation. - * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: diff --git a/drivers/staging/rdma/hfi1/trace.h b/drivers/staging/rdma/hfi1/trace.h index dfa9967..b8b4416 100644 --- a/drivers/staging/rdma/hfi1/trace.h +++ b/drivers/staging/rdma/hfi1/trace.h @@ -1,12 +1,11 @@ /* + * Copyright(c) 2015, 2016 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. * * GPL LICENSE SUMMARY * - * Copyright(c) 2015 Intel Corporation. - * * This program is free software; you can redistribute it and/or modify * it under the terms of version 2 of the GNU General Public License as * published by the Free Software Foundation. @@ -18,8 +17,6 @@ * * BSD LICENSE * - * Copyright(c) 2015 Intel Corporation. - * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: diff --git a/drivers/staging/rdma/hfi1/twsi.c b/drivers/staging/rdma/hfi1/twsi.c index 1cc0748..e82e52a 100644 --- a/drivers/staging/rdma/hfi1/twsi.c +++ b/drivers/staging/rdma/hfi1/twsi.c @@ -1,12 +1,11 @@ /* + * Copyright(c) 2015, 2016 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. * * GPL LICENSE SUMMARY * - * Copyright(c) 2015, 2016 Intel Corporation. - * * This program is free software; you can redistribute it and/or modify * it under the terms of version 2 of the GNU General Public License as * published by the Free Software Foundation. @@ -18,8 +17,6 @@ * * BSD LICENSE * - * Copyright(c) 2015, 2016 Intel Corporation. - * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: diff --git a/drivers/staging/rdma/hfi1/twsi.h b/drivers/staging/rdma/hfi1/twsi.h index 0722ac8..5b8a5b5 100644 --- a/drivers/staging/rdma/hfi1/twsi.h +++ b/drivers/staging/rdma/hfi1/twsi.h @@ -1,14 +1,13 @@ #ifndef _TWSI_H #define _TWSI_H /* + * Copyright(c) 2015, 2016 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. * * GPL LICENSE SUMMARY * - * Copyright(c) 2015, 2016 Intel Corporation. - * * This program is free software; you can redistribute it and/or modify * it under the terms of version 2 of the GNU General Public License as * published by the Free Software Foundation. @@ -20,8 +19,6 @@ * * BSD LICENSE * - * Copyright(c) 2015, 2016 Intel Corporation. - * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: diff --git a/drivers/staging/rdma/hfi1/uc.c b/drivers/staging/rdma/hfi1/uc.c index 81b2dc7..5ba29af 100644 --- a/drivers/staging/rdma/hfi1/uc.c +++ b/drivers/staging/rdma/hfi1/uc.c @@ -1,12 +1,11 @@ /* + * Copyright(c) 2015, 2016 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. * * GPL LICENSE SUMMARY * - * Copyright(c) 2015 Intel Corporation. - * * This program is free software; you can redistribute it and/or modify * it under the terms of version 2 of the GNU General Public License as * published by the Free Software Foundation. @@ -18,8 +17,6 @@ * * BSD LICENSE * - * Copyright(c) 2015 Intel Corporation. - * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: diff --git a/drivers/staging/rdma/hfi1/ud.c b/drivers/staging/rdma/hfi1/ud.c index 65157a4..ae8a70f 100644 --- a/drivers/staging/rdma/hfi1/ud.c +++ b/drivers/staging/rdma/hfi1/ud.c @@ -1,12 +1,11 @@ /* + * Copyright(c) 2015, 2016 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. * * GPL LICENSE SUMMARY * - * Copyright(c) 2015 Intel Corporation. - * * This program is free software; you can redistribute it and/or modify * it under the terms of version 2 of the GNU General Public License as * published by the Free Software Foundation. @@ -18,8 +17,6 @@ * * BSD LICENSE * - * Copyright(c) 2015 Intel Corporation. - * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: diff --git a/drivers/staging/rdma/hfi1/user_exp_rcv.c b/drivers/staging/rdma/hfi1/user_exp_rcv.c index 36b61b5..fccae50 100644 --- a/drivers/staging/rdma/hfi1/user_exp_rcv.c +++ b/drivers/staging/rdma/hfi1/user_exp_rcv.c @@ -1,12 +1,11 @@ /* + * Copyright(c) 2015, 2016 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. * * GPL LICENSE SUMMARY * - * Copyright(c) 2015, 2016 Intel Corporation. - * * This program is free software; you can redistribute it and/or modify * it under the terms of version 2 of the GNU General Public License as * published by the Free Software Foundation. @@ -18,8 +17,6 @@ * * BSD LICENSE * - * Copyright(c) 2015, 2016 Intel Corporation. - * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: diff --git a/drivers/staging/rdma/hfi1/user_exp_rcv.h b/drivers/staging/rdma/hfi1/user_exp_rcv.h index 28ef98a..9bc8d9f 100644 --- a/drivers/staging/rdma/hfi1/user_exp_rcv.h +++ b/drivers/staging/rdma/hfi1/user_exp_rcv.h @@ -1,14 +1,13 @@ #ifndef _HFI1_USER_EXP_RCV_H #define _HFI1_USER_EXP_RCV_H /* + * Copyright(c) 2015, 2016 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. * * GPL LICENSE SUMMARY * - * Copyright(c) 2015 Intel Corporation. - * * This program is free software; you can redistribute it and/or modify * it under the terms of version 2 of the GNU General Public License as * published by the Free Software Foundation. @@ -20,8 +19,6 @@ * * BSD LICENSE * - * Copyright(c) 2015 Intel Corporation. - * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: diff --git a/drivers/staging/rdma/hfi1/user_pages.c b/drivers/staging/rdma/hfi1/user_pages.c index 1854c0c..3bf8108 100644 --- a/drivers/staging/rdma/hfi1/user_pages.c +++ b/drivers/staging/rdma/hfi1/user_pages.c @@ -1,12 +1,11 @@ /* + * Copyright(c) 2015, 2016 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. * * GPL LICENSE SUMMARY * - * Copyright(c) 2015 Intel Corporation. - * * This program is free software; you can redistribute it and/or modify * it under the terms of version 2 of the GNU General Public License as * published by the Free Software Foundation. @@ -18,8 +17,6 @@ * * BSD LICENSE * - * Copyright(c) 2015 Intel Corporation. - * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: diff --git a/drivers/staging/rdma/hfi1/user_sdma.c b/drivers/staging/rdma/hfi1/user_sdma.c index 7287307..14fe079 100644 --- a/drivers/staging/rdma/hfi1/user_sdma.c +++ b/drivers/staging/rdma/hfi1/user_sdma.c @@ -1,12 +1,11 @@ /* + * Copyright(c) 2015, 2016 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. * * GPL LICENSE SUMMARY * - * Copyright(c) 2015, 2016 Intel Corporation. - * * This program is free software; you can redistribute it and/or modify * it under the terms of version 2 of the GNU General Public License as * published by the Free Software Foundation. @@ -18,8 +17,6 @@ * * BSD LICENSE * - * Copyright(c) 2015, 2016 Intel Corporation. - * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: diff --git a/drivers/staging/rdma/hfi1/user_sdma.h b/drivers/staging/rdma/hfi1/user_sdma.h index 7ef31a6..e0d0fe0 100644 --- a/drivers/staging/rdma/hfi1/user_sdma.h +++ b/drivers/staging/rdma/hfi1/user_sdma.h @@ -1,12 +1,11 @@ /* + * Copyright(c) 2015, 2016 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. * * GPL LICENSE SUMMARY * - * Copyright(c) 2015, 2016 Intel Corporation. - * * This program is free software; you can redistribute it and/or modify * it under the terms of version 2 of the GNU General Public License as * published by the Free Software Foundation. @@ -18,8 +17,6 @@ * * BSD LICENSE * - * Copyright(c) 2015, 2016 Intel Corporation. - * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: diff --git a/drivers/staging/rdma/hfi1/verbs.c b/drivers/staging/rdma/hfi1/verbs.c index 0e650b4..220bdb0 100644 --- a/drivers/staging/rdma/hfi1/verbs.c +++ b/drivers/staging/rdma/hfi1/verbs.c @@ -1,12 +1,11 @@ /* + * Copyright(c) 2015, 2016 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. * * GPL LICENSE SUMMARY * - * Copyright(c) 2015 Intel Corporation. - * * This program is free software; you can redistribute it and/or modify * it under the terms of version 2 of the GNU General Public License as * published by the Free Software Foundation. @@ -18,8 +17,6 @@ * * BSD LICENSE * - * Copyright(c) 2015 Intel Corporation. - * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: diff --git a/drivers/staging/rdma/hfi1/verbs.h b/drivers/staging/rdma/hfi1/verbs.h index dc623c6..a85e6bc 100644 --- a/drivers/staging/rdma/hfi1/verbs.h +++ b/drivers/staging/rdma/hfi1/verbs.h @@ -1,12 +1,11 @@ /* + * Copyright(c) 2015, 2016 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. * * GPL LICENSE SUMMARY * - * Copyright(c) 2015 Intel Corporation. - * * This program is free software; you can redistribute it and/or modify * it under the terms of version 2 of the GNU General Public License as * published by the Free Software Foundation. @@ -18,8 +17,6 @@ * * BSD LICENSE * - * Copyright(c) 2015 Intel Corporation. - * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: -- cgit v0.10.2 From 6b5c5213e57453c228f7695d5d889aa4c84272c3 Mon Sep 17 00:00:00 2001 From: Mike Marciniszyn Date: Thu, 18 Feb 2016 11:11:59 -0800 Subject: staging/rdma/hfi1: fix 0-day syntax error MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Setting CONFIG_HFI1_DEBUG_SDMA_ORDER causes a syntax error: sdma.c: In function ‘complete_tx’: sdma.c:370: error: ‘txp’ undeclared (first use in this function) sdma.c:370: error: (Each undeclared identifier is reported only once sdma.c:370: error: for each function it appears in.) Adjust code under ifdef to reference the tx properly. Reviewed-by: Dennis Dalessandro Signed-off-by: Mike Marciniszyn Signed-off-by: Doug Ledford diff --git a/drivers/staging/rdma/hfi1/sdma.c b/drivers/staging/rdma/hfi1/sdma.c index d894f43..e29b5d3 100644 --- a/drivers/staging/rdma/hfi1/sdma.c +++ b/drivers/staging/rdma/hfi1/sdma.c @@ -367,10 +367,10 @@ static inline void complete_tx(struct sdma_engine *sde, callback_t complete = tx->complete; #ifdef CONFIG_HFI1_DEBUG_SDMA_ORDER - trace_hfi1_sdma_out_sn(sde, txp->sn); - if (WARN_ON_ONCE(sde->head_sn != txp->sn)) + trace_hfi1_sdma_out_sn(sde, tx->sn); + if (WARN_ON_ONCE(sde->head_sn != tx->sn)) dd_dev_err(sde->dd, "expected %llu got %llu\n", - sde->head_sn, txp->sn); + sde->head_sn, tx->sn); sde->head_sn++; #endif sdma_txclean(sde->dd, tx); -- cgit v0.10.2 From 4c9e7aacb6a6334168a81b83819fb5cb088d2fb3 Mon Sep 17 00:00:00 2001 From: Dean Luick Date: Thu, 18 Feb 2016 11:12:08 -0800 Subject: staging/rdma/hfi1: Fix xmit discard error weight Count only the errors that apply to xmit discards. Update the comment to better explain the limitations of the count. Reviewed-by: Dennis Dalessandro Signed-off-by: Dean Luick Signed-off-by: Jubin John Signed-off-by: Doug Ledford diff --git a/drivers/staging/rdma/hfi1/chip.c b/drivers/staging/rdma/hfi1/chip.c index 93bf465..6e44d52 100644 --- a/drivers/staging/rdma/hfi1/chip.c +++ b/drivers/staging/rdma/hfi1/chip.c @@ -5566,12 +5566,28 @@ static void handle_send_egress_err_info(struct hfi1_devdata *dd, int weight, i; /* - * Count all, in case multiple bits are set. Reminder: - * since there is only one info register for many sources, - * these may be attributed to the wrong VL if they occur - * too close together. + * Count all applicable bits as individual errors and + * attribute them to the packet that triggered this handler. + * This may not be completely accurate due to limitations + * on the available hardware error information. There is + * a single information register and any number of error + * packets may have occurred and contributed to it before + * this routine is called. This means that: + * a) If multiple packets with the same error occur before + * this routine is called, earlier packets are missed. + * There is only a single bit for each error type. + * b) Errors may not be attributed to the correct VL. + * The driver is attributing all bits in the info register + * to the packet that triggered this call, but bits + * could be an accumulation of different packets with + * different VLs. + * c) A single error packet may have multiple counts attached + * to it. There is no way for the driver to know if + * multiple bits set in the info register are due to a + * single packet or multiple packets. The driver assumes + * multiple packets. */ - weight = hweight64(info); + weight = hweight64(info & PORT_DISCARD_EGRESS_ERRS); for (i = 0; i < weight; i++) { __count_port_discards(ppd); if (vl >= 0 && vl < TXE_NUM_DATA_VL) -- cgit v0.10.2 From e8aa284ba0cd2a1b6bfb3181a5b3b7f0bdefbe1a Mon Sep 17 00:00:00 2001 From: Easwar Hariharan Date: Thu, 18 Feb 2016 11:12:16 -0800 Subject: staging/rdma/hfi1: Cleanup comments and logs in PHY code This is a set of minor fixes including comment and log message cleanups and improvements to the PHY layer code. Reviewed-by: Dean Luick Signed-off-by: Easwar Hariharan Signed-off-by: Jubin John Signed-off-by: Doug Ledford diff --git a/drivers/staging/rdma/hfi1/chip.c b/drivers/staging/rdma/hfi1/chip.c index 6e44d52..483b37a 100644 --- a/drivers/staging/rdma/hfi1/chip.c +++ b/drivers/staging/rdma/hfi1/chip.c @@ -5946,10 +5946,10 @@ static void handle_qsfp_int(struct hfi1_devdata *dd, u32 src_ctx, u64 reg) u64 qsfp_int_mgmt = (u64)(QSFP_HFI0_INT_N | QSFP_HFI0_MODPRST_N); if (reg & QSFP_HFI0_MODPRST_N) { - dd_dev_info(dd, "%s: ModPresent triggered QSFP interrupt\n", - __func__); - if (!qsfp_mod_present(ppd)) { + dd_dev_info(dd, "%s: QSFP module removed\n", + __func__); + ppd->driver_link_ready = 0; /* * Cable removed, reset all our information about the @@ -5989,6 +5989,9 @@ static void handle_qsfp_int(struct hfi1_devdata *dd, u32 src_ctx, u64 reg) queue_work(ppd->hfi1_wq, &ppd->link_down_work); } } else { + dd_dev_info(dd, "%s: QSFP module inserted\n", + __func__); + spin_lock_irqsave(&ppd->qsfp_info.qsfp_lock, flags); ppd->qsfp_info.cache_valid = 0; ppd->qsfp_info.cache_refresh_required = 1; @@ -6009,7 +6012,7 @@ static void handle_qsfp_int(struct hfi1_devdata *dd, u32 src_ctx, u64 reg) } if (reg & QSFP_HFI0_INT_N) { - dd_dev_info(dd, "%s: IntN triggered QSFP interrupt\n", + dd_dev_info(dd, "%s: Interrupt received from QSFP module\n", __func__); spin_lock_irqsave(&ppd->qsfp_info.qsfp_lock, flags); ppd->qsfp_info.check_interrupt_flags = 1; diff --git a/drivers/staging/rdma/hfi1/qsfp.c b/drivers/staging/rdma/hfi1/qsfp.c index e38a0eb..07330b0 100644 --- a/drivers/staging/rdma/hfi1/qsfp.c +++ b/drivers/staging/rdma/hfi1/qsfp.c @@ -187,7 +187,7 @@ done: /* * Write page n, offset m of QSFP memory as defined by SFF 8636 - * in the cache by writing @addr = ((256 * n) + m) + * by writing @addr = ((256 * n) + m) */ int qsfp_write(struct hfi1_pportdata *ppd, u32 target, int addr, void *bp, int len) @@ -253,7 +253,7 @@ int qsfp_write(struct hfi1_pportdata *ppd, u32 target, int addr, void *bp, /* * Access page n, offset m of QSFP memory as defined by SFF 8636 - * in the cache by reading @addr = ((256 * n) + m) + * by reading @addr = ((256 * n) + m) */ int qsfp_read(struct hfi1_pportdata *ppd, u32 target, int addr, void *bp, int len) -- cgit v0.10.2 From ed6f653fe430ed4912aebec10a1b9d57813fe44c Mon Sep 17 00:00:00 2001 From: Dean Luick Date: Thu, 18 Feb 2016 11:12:25 -0800 Subject: staging/rdma/hfi1: Fix debugfs access race Debugfs access races with the driver being ready. Make sure the driver is ready before debugfs files appear and debufs files are gone before the driver starts tearing down. Reviewed-by: Mike Marciniszyn Signed-off-by: Dean Luick Signed-off-by: Jubin John Signed-off-by: Doug Ledford diff --git a/drivers/staging/rdma/hfi1/init.c b/drivers/staging/rdma/hfi1/init.c index 2def538..371ed29 100644 --- a/drivers/staging/rdma/hfi1/init.c +++ b/drivers/staging/rdma/hfi1/init.c @@ -983,7 +983,6 @@ void hfi1_free_devdata(struct hfi1_devdata *dd) list_del(&dd->list); spin_unlock_irqrestore(&hfi1_devs_lock, flags); free_platform_config(dd); - hfi1_dbg_ibdev_exit(&dd->verbs_dev); rcu_barrier(); /* wait for rcu callbacks to complete */ free_percpu(dd->int_counter); free_percpu(dd->rcv_limit); @@ -1088,7 +1087,6 @@ struct hfi1_devdata *hfi1_alloc_devdata(struct pci_dev *pdev, size_t extra) &pdev->dev, "Could not alloc cpulist info, cpu affinity might be wrong\n"); } - hfi1_dbg_ibdev_init(&dd->verbs_dev); return dd; bail: @@ -1445,8 +1443,11 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent) * we still create devices, so diags, etc. can be used * to determine cause of problem. */ - if (!initfail && !ret) + if (!initfail && !ret) { dd->flags |= HFI1_INITTED; + /* create debufs files after init and ib register */ + hfi1_dbg_ibdev_init(&dd->verbs_dev); + } j = hfi1_device_create(dd); if (j) @@ -1487,6 +1488,8 @@ static void remove_one(struct pci_dev *pdev) { struct hfi1_devdata *dd = pci_get_drvdata(pdev); + /* close debugfs files before ib unregister */ + hfi1_dbg_ibdev_exit(&dd->verbs_dev); /* unregister from IB core */ hfi1_unregister_ib_device(dd); -- cgit v0.10.2 From 354d9c952d9db01d561abd55fdfa09ccc67039f9 Mon Sep 17 00:00:00 2001 From: Dean Luick Date: Thu, 18 Feb 2016 11:12:34 -0800 Subject: staging/rdma/hfi1: Disclose more information when i2c fails Improve logging messages when there are i2c failures. Clean i2c read error handling. Reviewed-by: Easwar Hariharan Signed-off-by: Dean Luick Signed-off-by: Jubin John Signed-off-by: Doug Ledford diff --git a/drivers/staging/rdma/hfi1/qsfp.c b/drivers/staging/rdma/hfi1/qsfp.c index 07330b0..7e76b93 100644 --- a/drivers/staging/rdma/hfi1/qsfp.c +++ b/drivers/staging/rdma/hfi1/qsfp.c @@ -102,7 +102,8 @@ int i2c_write(struct hfi1_pportdata *ppd, u32 target, int i2c_addr, int offset, ret = hfi1_twsi_reset(ppd->dd, target); if (ret) { hfi1_dev_porterr(ppd->dd, ppd->port, - "I2C write interface reset failed\n"); + "I2C chain %d write interface reset failed\n", + target); goto done; } @@ -121,15 +122,14 @@ static int __i2c_read(struct hfi1_pportdata *ppd, u32 target, int i2c_addr, { struct hfi1_devdata *dd = ppd->dd; int ret, cnt, pass = 0; - int stuck = 0; - u8 *buff = bp; + int orig_offset = offset; cnt = 0; while (cnt < len) { int rlen = len - cnt; ret = hfi1_twsi_blk_rd(dd, target, i2c_addr, offset, - buff + cnt, rlen); + bp + cnt, rlen); /* Some QSFP's fail first try. Retry as experiment */ if (ret && cnt == 0 && ++pass < I2C_MAX_RETRY) continue; @@ -145,14 +145,11 @@ static int __i2c_read(struct hfi1_pportdata *ppd, u32 target, int i2c_addr, ret = cnt; exit: - if (stuck) - dd_dev_err(dd, "I2C interface bus stuck non-idle\n"); - - if (pass >= I2C_MAX_RETRY && ret) + if (ret < 0) { hfi1_dev_porterr(dd, ppd->port, - "I2C failed even retrying\n"); - else if (pass) - hfi1_dev_porterr(dd, ppd->port, "I2C retries: %d\n", pass); + "I2C chain %d read failed, addr 0x%x, offset 0x%x, len %d\n", + target, i2c_addr, orig_offset, len); + } /* Must wait min 20us between qsfp i2c transactions */ udelay(20); @@ -174,7 +171,8 @@ int i2c_read(struct hfi1_pportdata *ppd, u32 target, int i2c_addr, int offset, ret = hfi1_twsi_reset(ppd->dd, target); if (ret) { hfi1_dev_porterr(ppd->dd, ppd->port, - "I2C read interface reset failed\n"); + "I2C chain %d read interface reset failed\n", + target); goto done; } @@ -206,7 +204,8 @@ int qsfp_write(struct hfi1_pportdata *ppd, u32 target, int addr, void *bp, ret = hfi1_twsi_reset(ppd->dd, target); if (ret) { hfi1_dev_porterr(ppd->dd, ppd->port, - "QSFP write interface reset failed\n"); + "QSFP chain %d write interface reset failed\n", + target); mutex_unlock(&ppd->dd->qsfp_i2c_mutex); return ret; } @@ -221,10 +220,9 @@ int qsfp_write(struct hfi1_pportdata *ppd, u32 target, int addr, void *bp, ret = __i2c_write(ppd, target, QSFP_DEV | QSFP_OFFSET_SIZE, QSFP_PAGE_SELECT_BYTE_OFFS, &page, 1); if (ret != 1) { - hfi1_dev_porterr( - ppd->dd, - ppd->port, - "can't write QSFP_PAGE_SELECT_BYTE: %d\n", ret); + hfi1_dev_porterr(ppd->dd, ppd->port, + "QSFP chain %d can't write QSFP_PAGE_SELECT_BYTE: %d\n", + target, ret); ret = -EIO; break; } @@ -272,7 +270,8 @@ int qsfp_read(struct hfi1_pportdata *ppd, u32 target, int addr, void *bp, ret = hfi1_twsi_reset(ppd->dd, target); if (ret) { hfi1_dev_porterr(ppd->dd, ppd->port, - "QSFP read interface reset failed\n"); + "QSFP chain %d read interface reset failed\n", + target); mutex_unlock(&ppd->dd->qsfp_i2c_mutex); return ret; } @@ -286,10 +285,9 @@ int qsfp_read(struct hfi1_pportdata *ppd, u32 target, int addr, void *bp, ret = __i2c_write(ppd, target, QSFP_DEV | QSFP_OFFSET_SIZE, QSFP_PAGE_SELECT_BYTE_OFFS, &page, 1); if (ret != 1) { - hfi1_dev_porterr( - ppd->dd, - ppd->port, - "can't write QSFP_PAGE_SELECT_BYTE: %d\n", ret); + hfi1_dev_porterr(ppd->dd, ppd->port, + "QSFP chain %d can't write QSFP_PAGE_SELECT_BYTE: %d\n", + target, ret); ret = -EIO; break; } -- cgit v0.10.2 From 9debaaecb9070c05bbc3b5d2257d6aea416ccab6 Mon Sep 17 00:00:00 2001 From: Mike Marciniszyn Date: Thu, 18 Feb 2016 11:12:42 -0800 Subject: IB/rdamvt: fix cross build with rdmavt The new check routine causes a larger than supported frame size on s390. Changing the check routine to noinline fixes the issue. Reviewed-by: Dennis Dalessandro Signed-off-by: Mike Marciniszyn Signed-off-by: Doug Ledford diff --git a/drivers/infiniband/sw/rdmavt/vt.c b/drivers/infiniband/sw/rdmavt/vt.c index 9566a92..6caf527 100644 --- a/drivers/infiniband/sw/rdmavt/vt.c +++ b/drivers/infiniband/sw/rdmavt/vt.c @@ -383,7 +383,7 @@ static inline int check_driver_override(struct rvt_dev_info *rdi, return 1; } -static int check_support(struct rvt_dev_info *rdi, int verb) +static noinline int check_support(struct rvt_dev_info *rdi, int verb) { switch (verb) { case MISC: -- cgit v0.10.2 From 7b47622d784311bff8218d03754fbf20529c1a71 Mon Sep 17 00:00:00 2001 From: Dean Luick Date: Thu, 18 Feb 2016 11:12:51 -0800 Subject: staging/rdma/hfi1: Guard i2c access against cp An attempt to cp or cat /sys/kernel/debug/hfi1/hfi1_0/i2c1 produces this message: hfi1 0000:81:00.0: hfi1_0: IB0:1 I2C failed even retrying Fix the issue by explicitly rejecting a simple cat/cp with an -EINVAL error return. Reviewed-by: Easwar Hariharan Signed-off-by: Dean Luick Signed-off-by: Doug Ledford diff --git a/drivers/staging/rdma/hfi1/debugfs.c b/drivers/staging/rdma/hfi1/debugfs.c index 4fd58e3..07c16b3 100644 --- a/drivers/staging/rdma/hfi1/debugfs.c +++ b/drivers/staging/rdma/hfi1/debugfs.c @@ -446,6 +446,16 @@ static ssize_t __i2c_debugfs_write(struct file *file, const char __user *buf, rcu_read_lock(); ppd = private2ppd(file); + /* byte offset format: [offsetSize][i2cAddr][offsetHigh][offsetLow] */ + i2c_addr = (*ppos >> 16) & 0xffff; + offset = *ppos & 0xffff; + + /* explicitly reject invalid address 0 to catch cp and cat */ + if (i2c_addr == 0) { + ret = -EINVAL; + goto _return; + } + buff = kmalloc(count, GFP_KERNEL); if (!buff) { ret = -ENOMEM; @@ -458,10 +468,6 @@ static ssize_t __i2c_debugfs_write(struct file *file, const char __user *buf, goto _free; } - /* byte offset format: [offsetSize][i2cAddr][offsetHigh][offsetLow] */ - i2c_addr = (*ppos >> 16) & 0xffff; - offset = *ppos & 0xffff; - total_written = i2c_write(ppd, target, i2c_addr, offset, buff, count); if (total_written < 0) { ret = total_written; @@ -507,16 +513,22 @@ static ssize_t __i2c_debugfs_read(struct file *file, char __user *buf, rcu_read_lock(); ppd = private2ppd(file); + /* byte offset format: [offsetSize][i2cAddr][offsetHigh][offsetLow] */ + i2c_addr = (*ppos >> 16) & 0xffff; + offset = *ppos & 0xffff; + + /* explicitly reject invalid address 0 to catch cp and cat */ + if (i2c_addr == 0) { + ret = -EINVAL; + goto _return; + } + buff = kmalloc(count, GFP_KERNEL); if (!buff) { ret = -ENOMEM; goto _return; } - /* byte offset format: [offsetSize][i2cAddr][offsetHigh][offsetLow] */ - i2c_addr = (*ppos >> 16) & 0xffff; - offset = *ppos & 0xffff; - total_read = i2c_read(ppd, target, i2c_addr, offset, buff, count); if (total_read < 0) { ret = total_read; -- cgit v0.10.2 From 582e05c3deeaf56ed04df62ad9f1fa6e88199bd9 Mon Sep 17 00:00:00 2001 From: Dean Luick Date: Thu, 18 Feb 2016 11:13:01 -0800 Subject: staging/rdma/hfi1: Fix counter read for cp A cp or cat of /sys/kernel/debug/hfi1/hfi1_0/port1counters produces the following message: hfi1 0000:81:00.0: hfi1_0: index not supported hfi1 0000:81:00.0: hfi1_0: read_cntrs does not support indexing Fix by removing the file position logic and the associated messages and make the file positioning the responsibility of the caller. The port counter read function argument is changed to the per port data structure since the counters are relative to the port and not the device. Reviewed-by: Sebastian Sanchez Signed-off-by: Dean Luick Signed-off-by: Doug Ledford diff --git a/drivers/staging/rdma/hfi1/chip.c b/drivers/staging/rdma/hfi1/chip.c index 483b37a..fe73ebf 100644 --- a/drivers/staging/rdma/hfi1/chip.c +++ b/drivers/staging/rdma/hfi1/chip.c @@ -11407,28 +11407,19 @@ void hfi1_rcvctrl(struct hfi1_devdata *dd, unsigned int op, int ctxt) dd->rcvhdrtail_dummy_physaddr); } -u32 hfi1_read_cntrs(struct hfi1_devdata *dd, loff_t pos, char **namep, - u64 **cntrp) +u32 hfi1_read_cntrs(struct hfi1_devdata *dd, char **namep, u64 **cntrp) { int ret; u64 val = 0; if (namep) { ret = dd->cntrnameslen; - if (pos != 0) { - dd_dev_err(dd, "read_cntrs does not support indexing"); - return 0; - } *namep = dd->cntrnames; } else { const struct cntr_entry *entry; int i, j; ret = (dd->ndevcntrs) * sizeof(u64); - if (pos != 0) { - dd_dev_err(dd, "read_cntrs does not support indexing"); - return 0; - } /* Get the start of the block of counters */ *cntrp = dd->cntrs; @@ -11487,30 +11478,19 @@ u32 hfi1_read_cntrs(struct hfi1_devdata *dd, loff_t pos, char **namep, /* * Used by sysfs to create files for hfi stats to read */ -u32 hfi1_read_portcntrs(struct hfi1_devdata *dd, loff_t pos, u32 port, - char **namep, u64 **cntrp) +u32 hfi1_read_portcntrs(struct hfi1_pportdata *ppd, char **namep, u64 **cntrp) { int ret; u64 val = 0; if (namep) { - ret = dd->portcntrnameslen; - if (pos != 0) { - dd_dev_err(dd, "index not supported"); - return 0; - } - *namep = dd->portcntrnames; + ret = ppd->dd->portcntrnameslen; + *namep = ppd->dd->portcntrnames; } else { const struct cntr_entry *entry; - struct hfi1_pportdata *ppd; int i, j; - ret = (dd->nportcntrs) * sizeof(u64); - if (pos != 0) { - dd_dev_err(dd, "indexing not supported"); - return 0; - } - ppd = (struct hfi1_pportdata *)(dd + 1 + port); + ret = ppd->dd->nportcntrs * sizeof(u64); *cntrp = ppd->cntrs; for (i = 0; i < PORT_CNTR_LAST; i++) { diff --git a/drivers/staging/rdma/hfi1/chip.h b/drivers/staging/rdma/hfi1/chip.h index 8468139..e9a41ed 100644 --- a/drivers/staging/rdma/hfi1/chip.h +++ b/drivers/staging/rdma/hfi1/chip.h @@ -1297,10 +1297,8 @@ void hfi1_put_tid(struct hfi1_devdata *dd, u32 index, u32 type, unsigned long pa, u16 order); void hfi1_quiet_serdes(struct hfi1_pportdata *ppd); void hfi1_rcvctrl(struct hfi1_devdata *dd, unsigned int op, int ctxt); -u32 hfi1_read_cntrs(struct hfi1_devdata *dd, loff_t pos, char **namep, - u64 **cntrp); -u32 hfi1_read_portcntrs(struct hfi1_devdata *dd, loff_t pos, u32 port, - char **namep, u64 **cntrp); +u32 hfi1_read_cntrs(struct hfi1_devdata *dd, char **namep, u64 **cntrp); +u32 hfi1_read_portcntrs(struct hfi1_pportdata *ppd, char **namep, u64 **cntrp); u8 hfi1_ibphys_portstate(struct hfi1_pportdata *ppd); int hfi1_get_ib_cfg(struct hfi1_pportdata *ppd, int which); int hfi1_set_ib_cfg(struct hfi1_pportdata *ppd, int which, u32 val); diff --git a/drivers/staging/rdma/hfi1/debugfs.c b/drivers/staging/rdma/hfi1/debugfs.c index 07c16b3..99845bc 100644 --- a/drivers/staging/rdma/hfi1/debugfs.c +++ b/drivers/staging/rdma/hfi1/debugfs.c @@ -336,7 +336,7 @@ static ssize_t dev_counters_read(struct file *file, char __user *buf, rcu_read_lock(); dd = private2dd(file); - avail = hfi1_read_cntrs(dd, *ppos, NULL, &counters); + avail = hfi1_read_cntrs(dd, NULL, &counters); rval = simple_read_from_buffer(buf, count, ppos, counters, avail); rcu_read_unlock(); return rval; @@ -353,7 +353,7 @@ static ssize_t dev_names_read(struct file *file, char __user *buf, rcu_read_lock(); dd = private2dd(file); - avail = hfi1_read_cntrs(dd, *ppos, &names, NULL); + avail = hfi1_read_cntrs(dd, &names, NULL); rval = simple_read_from_buffer(buf, count, ppos, names, avail); rcu_read_unlock(); return rval; @@ -380,8 +380,7 @@ static ssize_t portnames_read(struct file *file, char __user *buf, rcu_read_lock(); dd = private2dd(file); - /* port number n/a here since names are constant */ - avail = hfi1_read_portcntrs(dd, *ppos, 0, &names, NULL); + avail = hfi1_read_portcntrs(dd->pport, &names, NULL); rval = simple_read_from_buffer(buf, count, ppos, names, avail); rcu_read_unlock(); return rval; @@ -393,14 +392,12 @@ static ssize_t portcntrs_debugfs_read(struct file *file, char __user *buf, { u64 *counters; size_t avail; - struct hfi1_devdata *dd; struct hfi1_pportdata *ppd; ssize_t rval; rcu_read_lock(); ppd = private2ppd(file); - dd = ppd->dd; - avail = hfi1_read_portcntrs(dd, *ppos, ppd->port - 1, NULL, &counters); + avail = hfi1_read_portcntrs(ppd, NULL, &counters); rval = simple_read_from_buffer(buf, count, ppos, counters, avail); rcu_read_unlock(); return rval; -- cgit v0.10.2 From ade302405f333af834c5a272f905000810adf474 Mon Sep 17 00:00:00 2001 From: Jubin John Date: Fri, 26 Feb 2016 13:33:08 -0800 Subject: IB/rdmavt: Check lkey_table_size value before use The lkey_table_size driver specific parameter value is used before its value is sanity checked and restricted to RVT_MAX_LKEY_TABLE_BITS. This causes a vmalloc allocation failure for large values. Fix this by moving the value check before the first usage of the value. Reviewed-by: Dennis Dalessandro Reviewed-by: Mike Marciniszyn Signed-off-by: Jubin John Signed-off-by: Doug Ledford diff --git a/drivers/infiniband/sw/rdmavt/mr.c b/drivers/infiniband/sw/rdmavt/mr.c index 4d5ef73..0ff765b 100644 --- a/drivers/infiniband/sw/rdmavt/mr.c +++ b/drivers/infiniband/sw/rdmavt/mr.c @@ -76,8 +76,6 @@ int rvt_driver_mr_init(struct rvt_dev_info *rdi) spin_lock_init(&rdi->lkey_table.lock); - rdi->lkey_table.max = 1 << lkey_table_size; - /* ensure generation is at least 4 bits */ if (lkey_table_size > RVT_MAX_LKEY_TABLE_BITS) { rvt_pr_warn(rdi, "lkey bits %u too large, reduced to %u\n", @@ -85,6 +83,7 @@ int rvt_driver_mr_init(struct rvt_dev_info *rdi) rdi->dparms.lkey_table_size = RVT_MAX_LKEY_TABLE_BITS; lkey_table_size = rdi->dparms.lkey_table_size; } + rdi->lkey_table.max = 1 << lkey_table_size; lk_tab_size = rdi->lkey_table.max * sizeof(*rdi->lkey_table.table); rdi->lkey_table.table = (struct rvt_mregion __rcu **) vmalloc_node(lk_tab_size, rdi->dparms.node); -- cgit v0.10.2 From aa0ad411e51763f95afdae11d6ee684915faecef Mon Sep 17 00:00:00 2001 From: Jianxin Xiong Date: Fri, 26 Feb 2016 13:33:13 -0800 Subject: staging/rdma/hfi1: Fix header size calculation for RC/UC QPs with GRH enabled There is a header size counter in both the QP struture and the txreq structure. The counter in the txreq structure is not updated properly for RC and UC queue pairs with GRH enabled, and thus causing SDMA send to fail. This patch fixes the RC and UC path. Reviewed-by: Dennis Dalessandro Reviewed-by: Dean Luick Reviewed-by: Mike Marciniszyn Signed-off-by: Jianxin Xiong Signed-off-by: Jubin John Signed-off-by: Doug Ledford diff --git a/drivers/staging/rdma/hfi1/rc.c b/drivers/staging/rdma/hfi1/rc.c index 8caad18..1ce0e08 100644 --- a/drivers/staging/rdma/hfi1/rc.c +++ b/drivers/staging/rdma/hfi1/rc.c @@ -358,11 +358,11 @@ normal: } qp->s_rdma_ack_cnt++; qp->s_hdrwords = hwords; - /* pbc */ - ps->s_txreq->hdr_dwords = hwords + 2; ps->s_txreq->sde = priv->s_sde; qp->s_cur_size = len; hfi1_make_ruc_header(qp, ohdr, bth0, bth2, middle, ps); + /* pbc */ + ps->s_txreq->hdr_dwords = qp->s_hdrwords + 2; return 1; bail: @@ -763,8 +763,6 @@ int hfi1_make_rc_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps) } qp->s_len -= len; qp->s_hdrwords = hwords; - /* pbc */ - ps->s_txreq->hdr_dwords = hwords + 2; ps->s_txreq->sde = priv->s_sde; qp->s_cur_sge = ss; qp->s_cur_size = len; @@ -775,6 +773,8 @@ int hfi1_make_rc_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps) bth2, middle, ps); + /* pbc */ + ps->s_txreq->hdr_dwords = qp->s_hdrwords + 2; return 1; done_free_tx: diff --git a/drivers/staging/rdma/hfi1/uc.c b/drivers/staging/rdma/hfi1/uc.c index 5ba29af..df773d4 100644 --- a/drivers/staging/rdma/hfi1/uc.c +++ b/drivers/staging/rdma/hfi1/uc.c @@ -239,13 +239,13 @@ int hfi1_make_uc_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps) } qp->s_len -= len; qp->s_hdrwords = hwords; - /* pbc */ - ps->s_txreq->hdr_dwords = qp->s_hdrwords + 2; ps->s_txreq->sde = priv->s_sde; qp->s_cur_sge = &qp->s_sge; qp->s_cur_size = len; hfi1_make_ruc_header(qp, ohdr, bth0 | (qp->s_state << 24), mask_psn(qp->s_psn++), middle, ps); + /* pbc */ + ps->s_txreq->hdr_dwords = qp->s_hdrwords + 2; return 1; done_free_tx: -- cgit v0.10.2 From bf400235f392eabf60c865c95da823727cb00def Mon Sep 17 00:00:00 2001 From: Kaike Wan Date: Fri, 26 Feb 2016 13:33:18 -0800 Subject: staging/rdma/hfi1: Avoid using upstream component if it is not accessible When the hfi1 device is assigned to a VM (eg KVM), the hfi1 driver has no access to the upstream component and therefore cannot use it to perform some operations, such as secondary bus reset. As a result, the hfi1 driver cannot perform the pcie Gen3 transition. Instead, those operation should be done in the host environment, preferrably done during the Option ROM initialization. Similarly, the hfi1 driver cannot support ASPM and tune the pcie capability under this circumstance. Reviewed-by: Mike Marciniszyn Reviewed-by: Dennis Dalessandro Signed-off-by: Kaike Wan Signed-off-by: Doug Ledford diff --git a/drivers/staging/rdma/hfi1/aspm.h b/drivers/staging/rdma/hfi1/aspm.h index 3aac802..0d58fe3 100644 --- a/drivers/staging/rdma/hfi1/aspm.h +++ b/drivers/staging/rdma/hfi1/aspm.h @@ -72,6 +72,13 @@ static inline bool aspm_hw_l1_supported(struct hfi1_devdata *dd) struct pci_dev *parent = dd->pcidev->bus->self; u32 up, dn; + /* + * If the driver does not have access to the upstream component, + * it cannot support ASPM L1 at all. + */ + if (!parent) + return false; + pcie_capability_read_dword(dd->pcidev, PCI_EXP_LNKCAP, &dn); dn = ASPM_L1_SUPPORTED(dn); @@ -98,6 +105,13 @@ static inline void aspm_hw_enable_l1(struct hfi1_devdata *dd) { struct pci_dev *parent = dd->pcidev->bus->self; + /* + * If the driver does not have access to the upstream component, + * it cannot support ASPM L1 at all. + */ + if (!parent) + return; + /* Enable ASPM L1 first in upstream component and then downstream */ pcie_capability_clear_and_set_word(parent, PCI_EXP_LNKCTL, PCI_EXP_LNKCTL_ASPMC, @@ -114,8 +128,9 @@ static inline void aspm_hw_disable_l1(struct hfi1_devdata *dd) /* Disable ASPM L1 first in downstream component and then upstream */ pcie_capability_clear_and_set_word(dd->pcidev, PCI_EXP_LNKCTL, PCI_EXP_LNKCTL_ASPMC, 0x0); - pcie_capability_clear_and_set_word(parent, PCI_EXP_LNKCTL, - PCI_EXP_LNKCTL_ASPMC, 0x0); + if (parent) + pcie_capability_clear_and_set_word(parent, PCI_EXP_LNKCTL, + PCI_EXP_LNKCTL_ASPMC, 0x0); } static inline void aspm_enable(struct hfi1_devdata *dd) diff --git a/drivers/staging/rdma/hfi1/pcie.c b/drivers/staging/rdma/hfi1/pcie.c index 7855962..1adfa8b 100644 --- a/drivers/staging/rdma/hfi1/pcie.c +++ b/drivers/staging/rdma/hfi1/pcie.c @@ -362,6 +362,7 @@ static void update_lbus_info(struct hfi1_devdata *dd) int pcie_speeds(struct hfi1_devdata *dd) { u32 linkcap; + struct pci_dev *parent = dd->pcidev->bus->self; if (!pci_is_pcie(dd->pcidev)) { dd_dev_err(dd, "Can't find PCI Express capability!\n"); @@ -382,7 +383,7 @@ int pcie_speeds(struct hfi1_devdata *dd) /* * bus->max_bus_speed is set from the bridge's linkcap Max Link Speed */ - if (dd->pcidev->bus->max_bus_speed != PCIE_SPEED_8_0GT) { + if (parent && dd->pcidev->bus->max_bus_speed != PCIE_SPEED_8_0GT) { dd_dev_info(dd, "Parent PCIe bridge does not support Gen3\n"); dd->link_gen3_capable = 0; } @@ -471,6 +472,12 @@ static void tune_pcie_caps(struct hfi1_devdata *dd) } /* Find out supported and configured values for parent (root) */ parent = dd->pcidev->bus->self; + /* + * The driver cannot perform the tuning if it does not have + * access to the upstream component. + */ + if (!parent) + return; if (!pci_is_root_bus(parent->bus)) { dd_dev_info(dd, "Parent not root\n"); return; @@ -939,7 +946,7 @@ static void write_xmt_margin(struct hfi1_devdata *dd, const char *fname) */ int do_pcie_gen3_transition(struct hfi1_devdata *dd) { - struct pci_dev *parent; + struct pci_dev *parent = dd->pcidev->bus->self; u64 fw_ctrl; u64 reg, therm; u32 reg32, fs, lf; @@ -982,6 +989,16 @@ int do_pcie_gen3_transition(struct hfi1_devdata *dd) } /* + * The driver cannot do the transition if it has no access to the + * upstream component + */ + if (!parent) { + dd_dev_info(dd, "%s: No upstream, Can't do gen3 transition\n", + __func__); + return 0; + } + + /* * Do the Gen3 transition. Steps are those of the PCIe Gen3 * recipe. */ @@ -1157,7 +1174,6 @@ retry: * that it is Gen3 capable earlier. */ dd_dev_info(dd, "%s: setting parent target link speed\n", __func__); - parent = dd->pcidev->bus->self; pcie_capability_read_word(parent, PCI_EXP_LNKCTL2, &lnkctl2); dd_dev_info(dd, "%s: ..old link control2: 0x%x\n", __func__, (u32)lnkctl2); -- cgit v0.10.2 From 24487dd39cb24c23560c2dc726c6d3375f42a697 Mon Sep 17 00:00:00 2001 From: Kaike Wan Date: Fri, 26 Feb 2016 13:33:23 -0800 Subject: staging/rdma/hfi1: Check interrupt registers mapping This patch tests the interrupt registers when the driver has no access to its upstream component. In this case, it is highly likely that it is running in a virtual machine (eg, Qemu-kvm guest). If the interrupt registers are not mapped properly by the virtual machine monitor, an error message will be printed and the probing will be terminated. This will help the user identify the issue. On the other hand, if the driver is running in a host or has access to its upstream component in some other VM, it will do nothing. Reviewed-by: Mike Marciniszyn Reviewed-by: Dennis Dalessandro Signed-off-by: Kaike Wan Signed-off-by: Doug Ledford diff --git a/drivers/staging/rdma/hfi1/chip.c b/drivers/staging/rdma/hfi1/chip.c index fe73ebf..77996527 100644 --- a/drivers/staging/rdma/hfi1/chip.c +++ b/drivers/staging/rdma/hfi1/chip.c @@ -13943,6 +13943,50 @@ static int obtain_boardname(struct hfi1_devdata *dd) return 0; } +/* + * Check the interrupt registers to make sure that they are mapped correctly. + * It is intended to help user identify any mismapping by VMM when the driver + * is running in a VM. This function should only be called before interrupt + * is set up properly. + * + * Return 0 on success, -EINVAL on failure. + */ +static int check_int_registers(struct hfi1_devdata *dd) +{ + u64 reg; + u64 all_bits = ~(u64)0; + u64 mask; + + /* Clear CceIntMask[0] to avoid raising any interrupts */ + mask = read_csr(dd, CCE_INT_MASK); + write_csr(dd, CCE_INT_MASK, 0ull); + reg = read_csr(dd, CCE_INT_MASK); + if (reg) + goto err_exit; + + /* Clear all interrupt status bits */ + write_csr(dd, CCE_INT_CLEAR, all_bits); + reg = read_csr(dd, CCE_INT_STATUS); + if (reg) + goto err_exit; + + /* Set all interrupt status bits */ + write_csr(dd, CCE_INT_FORCE, all_bits); + reg = read_csr(dd, CCE_INT_STATUS); + if (reg != all_bits) + goto err_exit; + + /* Restore the interrupt mask */ + write_csr(dd, CCE_INT_CLEAR, all_bits); + write_csr(dd, CCE_INT_MASK, mask); + + return 0; +err_exit: + write_csr(dd, CCE_INT_MASK, mask); + dd_dev_err(dd, "Interrupt registers not properly mapped by VMM\n"); + return -EINVAL; +} + /** * Allocate and initialize the device structure for the hfi. * @dev: the pci_dev for hfi1_ib device @@ -13967,6 +14011,7 @@ struct hfi1_devdata *hfi1_init_dd(struct pci_dev *pdev, "RTL FPGA emulation", "Functional simulator" }; + struct pci_dev *parent = pdev->bus->self; dd = hfi1_alloc_devdata(pdev, NUM_IB_PORTS * sizeof(struct hfi1_pportdata)); @@ -14046,6 +14091,17 @@ struct hfi1_devdata *hfi1_init_dd(struct pci_dev *pdev, & CCE_REVISION_CHIP_REV_MINOR_MASK; /* + * Check interrupt registers mapping if the driver has no access to + * the upstream component. In this case, it is likely that the driver + * is running in a VM. + */ + if (!parent) { + ret = check_int_registers(dd); + if (ret) + goto bail_cleanup; + } + + /* * obtain the hardware ID - NOT related to unit, which is a * software enumeration */ -- cgit v0.10.2 From 409b146225cdefcc76d9956e323e84e510208884 Mon Sep 17 00:00:00 2001 From: Easwar Hariharan Date: Fri, 26 Feb 2016 13:33:28 -0800 Subject: staging/rdma/hfi1: Fix reporting of LED status in Get(LedInfo) and Get(PortInfo) The LedInfo SMA attribute is redefined to control the LED beaconing state machine instead of the LED directly. In accordance, we now return the state of LED beaconing, represented by whether the beaconing timer is active, instead of the state of the LED itself for SMA queries Get(LedInfo) and Get(PortInfo). While we are at it, we fix the beaconing timer control code so that the state of the timer is accurately updated. Reviewed-by: Ira Weiny Reviewed-by: Mike Marciniszyn Signed-off-by: Easwar Hariharan Signed-off-by: Jubin John Signed-off-by: Doug Ledford diff --git a/drivers/staging/rdma/hfi1/driver.c b/drivers/staging/rdma/hfi1/driver.c index 76ed5f7..4581864 100644 --- a/drivers/staging/rdma/hfi1/driver.c +++ b/drivers/staging/rdma/hfi1/driver.c @@ -1169,6 +1169,12 @@ void shutdown_led_override(struct hfi1_pportdata *ppd) { struct hfi1_devdata *dd = ppd->dd; + /* + * This pairs with the memory barrier implied by the atomic_dec in + * hfi1_set_led_override to ensure that we read the correct state of + * LED beaconing represented by led_override_timer_active + */ + smp_mb(); if (atomic_read(&ppd->led_override_timer_active)) { del_timer_sync(&ppd->led_override_timer); atomic_set(&ppd->led_override_timer_active, 0); @@ -1199,11 +1205,14 @@ static void run_led_override(unsigned long opaque) * don't re-fire the timer if user asked for it to be off; we let * it fire one more time after they turn it off to simplify */ - if (ppd->led_override_vals[0] || ppd->led_override_vals[1]) + if (ppd->led_override_vals[0] || ppd->led_override_vals[1]) { mod_timer(&ppd->led_override_timer, jiffies + timeout); - else + } else { /* Hand control of the LED to the DC for normal operation */ write_csr(dd, DCC_CFG_LED_CNTRL, 0); + /* Record that we did not re-fire the timer */ + atomic_dec(&ppd->led_override_timer_active); + } } /* diff --git a/drivers/staging/rdma/hfi1/mad.c b/drivers/staging/rdma/hfi1/mad.c index 78931fc..5925798 100644 --- a/drivers/staging/rdma/hfi1/mad.c +++ b/drivers/staging/rdma/hfi1/mad.c @@ -516,6 +516,7 @@ static int __subn_get_opa_portinfo(struct opa_smp *smp, u32 am, u8 *data, struct opa_port_info *pi = (struct opa_port_info *)data; u8 mtu; u8 credit_rate; + u8 is_beaconing_active; u32 state; u32 num_ports = OPA_AM_NPORT(am); u32 start_of_sm_config = OPA_AM_START_SM_CFG(am); @@ -581,6 +582,14 @@ static int __subn_get_opa_portinfo(struct opa_smp *smp, u32 am, u8 *data, pi->port_states.ledenable_offlinereason = ppd->neighbor_normal << 4; pi->port_states.ledenable_offlinereason |= ppd->is_sm_config_started << 5; + /* + * This pairs with the memory barrier implied by the atomic_dec in + * hfi1_set_led_override to ensure that we read the correct state of + * LED beaconing represented by led_override_timer_active + */ + smp_mb(); + is_beaconing_active = !!atomic_read(&ppd->led_override_timer_active); + pi->port_states.ledenable_offlinereason |= is_beaconing_active << 6; pi->port_states.ledenable_offlinereason |= ppd->offline_disabled_reason; #else @@ -3578,19 +3587,24 @@ static int __subn_get_opa_led_info(struct opa_smp *smp, u32 am, u8 *data, u32 *resp_len) { struct hfi1_devdata *dd = dd_from_ibdev(ibdev); + struct hfi1_pportdata *ppd = dd->pport; struct opa_led_info *p = (struct opa_led_info *)data; u32 nport = OPA_AM_NPORT(am); - u64 reg; + u32 is_beaconing_active; if (nport != 1) { smp->status |= IB_SMP_INVALID_FIELD; return reply((struct ib_mad_hdr *)smp); } - reg = read_csr(dd, DCC_CFG_LED_CNTRL); - if ((reg & DCC_CFG_LED_CNTRL_LED_CNTRL_SMASK) && - ((reg & DCC_CFG_LED_CNTRL_LED_SW_BLINK_RATE_SMASK) == 0xf)) - p->rsvd_led_mask = cpu_to_be32(OPA_LED_MASK); + /* + * This pairs with the memory barrier implied by the atomic_dec in + * hfi1_set_led_override to ensure that we read the correct state of + * LED beaconing represented by led_override_timer_active + */ + smp_mb(); + is_beaconing_active = !!atomic_read(&ppd->led_override_timer_active); + p->rsvd_led_mask = cpu_to_be32(is_beaconing_active << OPA_LED_SHIFT); if (resp_len) *resp_len += sizeof(struct opa_led_info); -- cgit v0.10.2 From 79d0c088801b221330ee3b75cd10912003e3c6dd Mon Sep 17 00:00:00 2001 From: Jubin John Date: Fri, 26 Feb 2016 13:33:33 -0800 Subject: staging/rdma/hfi1: Fix memory leaks Fix 3 memory leaks reported by the LeakCheck tool in the KEDR framework. The following resources were allocated memory during their respective initializations but not freed during cleanup: 1. SDMA map elements 2. PIO map elements 3. HW send context to SW index map This patch fixes the memory leaks by freeing the allocated memory in the cleanup path. Reviewed-by: Dean Luick Reviewed-by: Dennis Dalessandro Reviewed-by: Mike Marciniszyn Signed-off-by: Jubin John Signed-off-by: Doug Ledford diff --git a/drivers/staging/rdma/hfi1/init.c b/drivers/staging/rdma/hfi1/init.c index 371ed29..37b3ce8 100644 --- a/drivers/staging/rdma/hfi1/init.c +++ b/drivers/staging/rdma/hfi1/init.c @@ -1324,6 +1324,8 @@ static void cleanup_device_data(struct hfi1_devdata *dd) dd->num_send_contexts = 0; kfree(dd->send_contexts); dd->send_contexts = NULL; + kfree(dd->hw_to_sw); + dd->hw_to_sw = NULL; kfree(dd->boardname); vfree(dd->events); vfree(dd->status); diff --git a/drivers/staging/rdma/hfi1/pio.c b/drivers/staging/rdma/hfi1/pio.c index 859cb46..361b43d 100644 --- a/drivers/staging/rdma/hfi1/pio.c +++ b/drivers/staging/rdma/hfi1/pio.c @@ -1881,7 +1881,7 @@ void free_pio_map(struct hfi1_devdata *dd) /* Free PIO map if allocated */ if (rcu_access_pointer(dd->pio_map)) { spin_lock_irq(&dd->pio_map_lock); - kfree(rcu_access_pointer(dd->pio_map)); + pio_map_free(rcu_access_pointer(dd->pio_map)); RCU_INIT_POINTER(dd->pio_map, NULL); spin_unlock_irq(&dd->pio_map_lock); synchronize_rcu(); diff --git a/drivers/staging/rdma/hfi1/sdma.c b/drivers/staging/rdma/hfi1/sdma.c index e29b5d3..abb8ebc 100644 --- a/drivers/staging/rdma/hfi1/sdma.c +++ b/drivers/staging/rdma/hfi1/sdma.c @@ -966,7 +966,7 @@ static void sdma_clean(struct hfi1_devdata *dd, size_t num_engines) sde->tx_ring = NULL; } spin_lock_irq(&dd->sde_map_lock); - kfree(rcu_access_pointer(dd->sdma_map)); + sdma_map_free(rcu_access_pointer(dd->sdma_map)); RCU_INIT_POINTER(dd->sdma_map, NULL); spin_unlock_irq(&dd->sde_map_lock); synchronize_rcu(); -- cgit v0.10.2 From 7cf20fc62428367bbf853a8d968804a6ec6a4973 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 3 Mar 2016 09:36:06 +0100 Subject: net/9p: convert to new CQ API Trivial conversion to the new RDMA CQ API. Signed-off-by: Christoph Hellwig Acked-by: Dominique Martinet Signed-off-by: Doug Ledford diff --git a/net/9p/trans_rdma.c b/net/9p/trans_rdma.c index 52b4a2f..1852e38 100644 --- a/net/9p/trans_rdma.c +++ b/net/9p/trans_rdma.c @@ -109,14 +109,13 @@ struct p9_trans_rdma { /** * p9_rdma_context - Keeps track of in-process WR * - * @wc_op: The original WR op for when the CQE completes in error. * @busa: Bus address to unmap when the WR completes * @req: Keeps track of requests (send) * @rc: Keepts track of replies (receive) */ struct p9_rdma_req; struct p9_rdma_context { - enum ib_wc_opcode wc_op; + struct ib_cqe cqe; dma_addr_t busa; union { struct p9_req_t *req; @@ -284,9 +283,12 @@ p9_cm_event_handler(struct rdma_cm_id *id, struct rdma_cm_event *event) } static void -handle_recv(struct p9_client *client, struct p9_trans_rdma *rdma, - struct p9_rdma_context *c, enum ib_wc_status status, u32 byte_len) +recv_done(struct ib_cq *cq, struct ib_wc *wc) { + struct p9_client *client = cq->cq_context; + struct p9_trans_rdma *rdma = client->trans; + struct p9_rdma_context *c = + container_of(wc->wr_cqe, struct p9_rdma_context, cqe); struct p9_req_t *req; int err = 0; int16_t tag; @@ -295,7 +297,7 @@ handle_recv(struct p9_client *client, struct p9_trans_rdma *rdma, ib_dma_unmap_single(rdma->cm_id->device, c->busa, client->msize, DMA_FROM_DEVICE); - if (status != IB_WC_SUCCESS) + if (wc->status != IB_WC_SUCCESS) goto err_out; err = p9_parse_header(c->rc, NULL, NULL, &tag, 1); @@ -316,21 +318,32 @@ handle_recv(struct p9_client *client, struct p9_trans_rdma *rdma, req->rc = c->rc; p9_client_cb(client, req, REQ_STATUS_RCVD); + out: + up(&rdma->rq_sem); + kfree(c); return; err_out: - p9_debug(P9_DEBUG_ERROR, "req %p err %d status %d\n", req, err, status); + p9_debug(P9_DEBUG_ERROR, "req %p err %d status %d\n", + req, err, wc->status); rdma->state = P9_RDMA_FLUSHING; client->status = Disconnected; + goto out; } static void -handle_send(struct p9_client *client, struct p9_trans_rdma *rdma, - struct p9_rdma_context *c, enum ib_wc_status status, u32 byte_len) +send_done(struct ib_cq *cq, struct ib_wc *wc) { + struct p9_client *client = cq->cq_context; + struct p9_trans_rdma *rdma = client->trans; + struct p9_rdma_context *c = + container_of(wc->wr_cqe, struct p9_rdma_context, cqe); + ib_dma_unmap_single(rdma->cm_id->device, c->busa, c->req->tc->size, DMA_TO_DEVICE); + up(&rdma->sq_sem); + kfree(c); } static void qp_event_handler(struct ib_event *event, void *context) @@ -339,42 +352,6 @@ static void qp_event_handler(struct ib_event *event, void *context) event->event, context); } -static void cq_comp_handler(struct ib_cq *cq, void *cq_context) -{ - struct p9_client *client = cq_context; - struct p9_trans_rdma *rdma = client->trans; - int ret; - struct ib_wc wc; - - ib_req_notify_cq(rdma->cq, IB_CQ_NEXT_COMP); - while ((ret = ib_poll_cq(cq, 1, &wc)) > 0) { - struct p9_rdma_context *c = (void *) (unsigned long) wc.wr_id; - - switch (c->wc_op) { - case IB_WC_RECV: - handle_recv(client, rdma, c, wc.status, wc.byte_len); - up(&rdma->rq_sem); - break; - - case IB_WC_SEND: - handle_send(client, rdma, c, wc.status, wc.byte_len); - up(&rdma->sq_sem); - break; - - default: - pr_err("unexpected completion type, c->wc_op=%d, wc.opcode=%d, status=%d\n", - c->wc_op, wc.opcode, wc.status); - break; - } - kfree(c); - } -} - -static void cq_event_handler(struct ib_event *e, void *v) -{ - p9_debug(P9_DEBUG_ERROR, "CQ event %d context %p\n", e->event, v); -} - static void rdma_destroy_trans(struct p9_trans_rdma *rdma) { if (!rdma) @@ -387,7 +364,7 @@ static void rdma_destroy_trans(struct p9_trans_rdma *rdma) ib_dealloc_pd(rdma->pd); if (rdma->cq && !IS_ERR(rdma->cq)) - ib_destroy_cq(rdma->cq); + ib_free_cq(rdma->cq); if (rdma->cm_id && !IS_ERR(rdma->cm_id)) rdma_destroy_id(rdma->cm_id); @@ -408,13 +385,14 @@ post_recv(struct p9_client *client, struct p9_rdma_context *c) if (ib_dma_mapping_error(rdma->cm_id->device, c->busa)) goto error; + c->cqe.done = recv_done; + sge.addr = c->busa; sge.length = client->msize; sge.lkey = rdma->pd->local_dma_lkey; wr.next = NULL; - c->wc_op = IB_WC_RECV; - wr.wr_id = (unsigned long) c; + wr.wr_cqe = &c->cqe; wr.sg_list = &sge; wr.num_sge = 1; return ib_post_recv(rdma->qp, &wr, &bad_wr); @@ -499,13 +477,14 @@ dont_need_post_recv: goto send_error; } + c->cqe.done = send_done; + sge.addr = c->busa; sge.length = c->req->tc->size; sge.lkey = rdma->pd->local_dma_lkey; wr.next = NULL; - c->wc_op = IB_WC_SEND; - wr.wr_id = (unsigned long) c; + wr.wr_cqe = &c->cqe; wr.opcode = IB_WR_SEND; wr.send_flags = IB_SEND_SIGNALED; wr.sg_list = &sge; @@ -642,7 +621,6 @@ rdma_create_trans(struct p9_client *client, const char *addr, char *args) struct p9_trans_rdma *rdma; struct rdma_conn_param conn_param; struct ib_qp_init_attr qp_attr; - struct ib_cq_init_attr cq_attr = {}; /* Parse the transport specific mount options */ err = parse_opts(args, &opts); @@ -695,13 +673,11 @@ rdma_create_trans(struct p9_client *client, const char *addr, char *args) goto error; /* Create the Completion Queue */ - cq_attr.cqe = opts.sq_depth + opts.rq_depth + 1; - rdma->cq = ib_create_cq(rdma->cm_id->device, cq_comp_handler, - cq_event_handler, client, - &cq_attr); + rdma->cq = ib_alloc_cq(rdma->cm_id->device, client, + opts.sq_depth + opts.rq_depth + 1, + 0, IB_POLL_SOFTIRQ); if (IS_ERR(rdma->cq)) goto error; - ib_req_notify_cq(rdma->cq, IB_CQ_NEXT_COMP); /* Create the Protection Domain */ rdma->pd = ib_alloc_pd(rdma->cm_id->device); -- cgit v0.10.2 From b493d91d333e867a043f7ff1397bcba6e2d0dda2 Mon Sep 17 00:00:00 2001 From: Faisal Latif Date: Fri, 26 Feb 2016 09:18:00 -0600 Subject: iwcm: common code for port mapper moved port mapper related code from drivers into common code Signed-off-by: Mustafa Ismail Signed-off-by: Tatyana E. Nikolova Signed-off-by: Faisal Latif Reviewed-by: Steve Wise Tested-by: Steve Wise Signed-off-by: Doug Ledford diff --git a/drivers/infiniband/core/iwcm.c b/drivers/infiniband/core/iwcm.c index ff9163d..e28a160 100644 --- a/drivers/infiniband/core/iwcm.c +++ b/drivers/infiniband/core/iwcm.c @@ -50,6 +50,8 @@ #include #include +#include +#include #include "iwcm.h" @@ -57,6 +59,16 @@ MODULE_AUTHOR("Tom Tucker"); MODULE_DESCRIPTION("iWARP CM"); MODULE_LICENSE("Dual BSD/GPL"); +static struct ibnl_client_cbs iwcm_nl_cb_table[] = { + [RDMA_NL_IWPM_REG_PID] = {.dump = iwpm_register_pid_cb}, + [RDMA_NL_IWPM_ADD_MAPPING] = {.dump = iwpm_add_mapping_cb}, + [RDMA_NL_IWPM_QUERY_MAPPING] = {.dump = iwpm_add_and_query_mapping_cb}, + [RDMA_NL_IWPM_REMOTE_INFO] = {.dump = iwpm_remote_info_cb}, + [RDMA_NL_IWPM_HANDLE_ERR] = {.dump = iwpm_mapping_error_cb}, + [RDMA_NL_IWPM_MAPINFO] = {.dump = iwpm_mapping_info_cb}, + [RDMA_NL_IWPM_MAPINFO_NUM] = {.dump = iwpm_ack_mapping_info_cb} +}; + static struct workqueue_struct *iwcm_wq; struct iwcm_work { struct work_struct work; @@ -402,6 +414,11 @@ static void destroy_cm_id(struct iw_cm_id *cm_id) } spin_unlock_irqrestore(&cm_id_priv->lock, flags); + if (cm_id->mapped) { + iwpm_remove_mapinfo(&cm_id->local_addr, &cm_id->m_local_addr); + iwpm_remove_mapping(&cm_id->local_addr, RDMA_NL_IWCM); + } + (void)iwcm_deref_id(cm_id_priv); } @@ -426,6 +443,97 @@ void iw_destroy_cm_id(struct iw_cm_id *cm_id) } EXPORT_SYMBOL(iw_destroy_cm_id); +/** + * iw_cm_check_wildcard - If IP address is 0 then use original + * @pm_addr: sockaddr containing the ip to check for wildcard + * @cm_addr: sockaddr containing the actual IP address + * @cm_outaddr: sockaddr to set IP addr which leaving port + * + * Checks the pm_addr for wildcard and then sets cm_outaddr's + * IP to the actual (cm_addr). + */ +static void iw_cm_check_wildcard(struct sockaddr_storage *pm_addr, + struct sockaddr_storage *cm_addr, + struct sockaddr_storage *cm_outaddr) +{ + if (pm_addr->ss_family == AF_INET) { + struct sockaddr_in *pm4_addr = (struct sockaddr_in *)pm_addr; + + if (pm4_addr->sin_addr.s_addr == INADDR_ANY) { + struct sockaddr_in *cm4_addr = + (struct sockaddr_in *)cm_addr; + struct sockaddr_in *cm4_outaddr = + (struct sockaddr_in *)cm_outaddr; + + cm4_outaddr->sin_addr = cm4_addr->sin_addr; + } + } else { + struct sockaddr_in6 *pm6_addr = (struct sockaddr_in6 *)pm_addr; + + if (ipv6_addr_type(&pm6_addr->sin6_addr) == IPV6_ADDR_ANY) { + struct sockaddr_in6 *cm6_addr = + (struct sockaddr_in6 *)cm_addr; + struct sockaddr_in6 *cm6_outaddr = + (struct sockaddr_in6 *)cm_outaddr; + + cm6_outaddr->sin6_addr = cm6_addr->sin6_addr; + } + } +} + +/** + * iw_cm_map - Use portmapper to map the ports + * @cm_id: connection manager pointer + * @active: Indicates the active side when true + * returns nonzero for error only if iwpm_create_mapinfo() fails + * + * Tries to add a mapping for a port using the Portmapper. If + * successful in mapping the IP/Port it will check the remote + * mapped IP address for a wildcard IP address and replace the + * zero IP address with the remote_addr. + */ +static int iw_cm_map(struct iw_cm_id *cm_id, bool active) +{ + struct iwpm_dev_data pm_reg_msg; + struct iwpm_sa_data pm_msg; + int status; + + cm_id->m_local_addr = cm_id->local_addr; + cm_id->m_remote_addr = cm_id->remote_addr; + + memcpy(pm_reg_msg.dev_name, cm_id->device->name, + sizeof(pm_reg_msg.dev_name)); + memcpy(pm_reg_msg.if_name, cm_id->device->iwcm->ifname, + sizeof(pm_reg_msg.if_name)); + + if (iwpm_register_pid(&pm_reg_msg, RDMA_NL_IWCM) || + !iwpm_valid_pid()) + return 0; + + cm_id->mapped = true; + pm_msg.loc_addr = cm_id->local_addr; + pm_msg.rem_addr = cm_id->remote_addr; + if (active) + status = iwpm_add_and_query_mapping(&pm_msg, + RDMA_NL_IWCM); + else + status = iwpm_add_mapping(&pm_msg, RDMA_NL_IWCM); + + if (!status) { + cm_id->m_local_addr = pm_msg.mapped_loc_addr; + if (active) { + cm_id->m_remote_addr = pm_msg.mapped_rem_addr; + iw_cm_check_wildcard(&pm_msg.mapped_rem_addr, + &cm_id->remote_addr, + &cm_id->m_remote_addr); + } + } + + return iwpm_create_mapinfo(&cm_id->local_addr, + &cm_id->m_local_addr, + RDMA_NL_IWCM); +} + /* * CM_ID <-- LISTEN * @@ -452,7 +560,9 @@ int iw_cm_listen(struct iw_cm_id *cm_id, int backlog) case IW_CM_STATE_IDLE: cm_id_priv->state = IW_CM_STATE_LISTEN; spin_unlock_irqrestore(&cm_id_priv->lock, flags); - ret = cm_id->device->iwcm->create_listen(cm_id, backlog); + ret = iw_cm_map(cm_id, false); + if (!ret) + ret = cm_id->device->iwcm->create_listen(cm_id, backlog); if (ret) cm_id_priv->state = IW_CM_STATE_IDLE; spin_lock_irqsave(&cm_id_priv->lock, flags); @@ -582,39 +692,37 @@ int iw_cm_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *iw_param) spin_lock_irqsave(&cm_id_priv->lock, flags); if (cm_id_priv->state != IW_CM_STATE_IDLE) { - spin_unlock_irqrestore(&cm_id_priv->lock, flags); - clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags); - wake_up_all(&cm_id_priv->connect_wait); - return -EINVAL; + ret = -EINVAL; + goto err; } /* Get the ib_qp given the QPN */ qp = cm_id->device->iwcm->get_qp(cm_id->device, iw_param->qpn); if (!qp) { - spin_unlock_irqrestore(&cm_id_priv->lock, flags); - clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags); - wake_up_all(&cm_id_priv->connect_wait); - return -EINVAL; + ret = -EINVAL; + goto err; } cm_id->device->iwcm->add_ref(qp); cm_id_priv->qp = qp; cm_id_priv->state = IW_CM_STATE_CONN_SENT; spin_unlock_irqrestore(&cm_id_priv->lock, flags); - ret = cm_id->device->iwcm->connect(cm_id, iw_param); - if (ret) { - spin_lock_irqsave(&cm_id_priv->lock, flags); - if (cm_id_priv->qp) { - cm_id->device->iwcm->rem_ref(qp); - cm_id_priv->qp = NULL; - } - spin_unlock_irqrestore(&cm_id_priv->lock, flags); - BUG_ON(cm_id_priv->state != IW_CM_STATE_CONN_SENT); - cm_id_priv->state = IW_CM_STATE_IDLE; - clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags); - wake_up_all(&cm_id_priv->connect_wait); - } + ret = iw_cm_map(cm_id, true); + if (!ret) + ret = cm_id->device->iwcm->connect(cm_id, iw_param); + if (!ret) + return 0; /* success */ + spin_lock_irqsave(&cm_id_priv->lock, flags); + if (cm_id_priv->qp) { + cm_id->device->iwcm->rem_ref(qp); + cm_id_priv->qp = NULL; + } + cm_id_priv->state = IW_CM_STATE_IDLE; +err: + spin_unlock_irqrestore(&cm_id_priv->lock, flags); + clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags); + wake_up_all(&cm_id_priv->connect_wait); return ret; } EXPORT_SYMBOL(iw_cm_connect); @@ -656,8 +764,23 @@ static void cm_conn_req_handler(struct iwcm_id_private *listen_id_priv, goto out; cm_id->provider_data = iw_event->provider_data; - cm_id->local_addr = iw_event->local_addr; - cm_id->remote_addr = iw_event->remote_addr; + cm_id->m_local_addr = iw_event->local_addr; + cm_id->m_remote_addr = iw_event->remote_addr; + cm_id->local_addr = listen_id_priv->id.local_addr; + + ret = iwpm_get_remote_info(&listen_id_priv->id.m_local_addr, + &iw_event->remote_addr, + &cm_id->remote_addr, + RDMA_NL_IWCM); + if (ret) { + cm_id->remote_addr = iw_event->remote_addr; + } else { + iw_cm_check_wildcard(&listen_id_priv->id.m_local_addr, + &iw_event->local_addr, + &cm_id->local_addr); + iw_event->local_addr = cm_id->local_addr; + iw_event->remote_addr = cm_id->remote_addr; + } cm_id_priv = container_of(cm_id, struct iwcm_id_private, id); cm_id_priv->state = IW_CM_STATE_CONN_RECV; @@ -753,8 +876,10 @@ static int cm_conn_rep_handler(struct iwcm_id_private *cm_id_priv, clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags); BUG_ON(cm_id_priv->state != IW_CM_STATE_CONN_SENT); if (iw_event->status == 0) { - cm_id_priv->id.local_addr = iw_event->local_addr; - cm_id_priv->id.remote_addr = iw_event->remote_addr; + cm_id_priv->id.m_local_addr = iw_event->local_addr; + cm_id_priv->id.m_remote_addr = iw_event->remote_addr; + iw_event->local_addr = cm_id_priv->id.local_addr; + iw_event->remote_addr = cm_id_priv->id.remote_addr; cm_id_priv->state = IW_CM_STATE_ESTABLISHED; } else { /* REJECTED or RESET */ @@ -1044,6 +1169,17 @@ EXPORT_SYMBOL(iw_cm_init_qp_attr); static int __init iw_cm_init(void) { + int ret; + + ret = iwpm_init(RDMA_NL_IWCM); + if (ret) + pr_err("iw_cm: couldn't init iwpm\n"); + + ret = ibnl_add_client(RDMA_NL_IWCM, RDMA_NL_IWPM_NUM_OPS, + iwcm_nl_cb_table); + if (ret) + pr_err("iw_cm: couldn't register netlink callbacks\n"); + iwcm_wq = create_singlethread_workqueue("iw_cm_wq"); if (!iwcm_wq) return -ENOMEM; @@ -1063,6 +1199,8 @@ static void __exit iw_cm_cleanup(void) { unregister_net_sysctl_table(iwcm_ctl_table_hdr); destroy_workqueue(iwcm_wq); + ibnl_remove_client(RDMA_NL_IWCM); + iwpm_exit(RDMA_NL_IWCM); } module_init(iw_cm_init); diff --git a/drivers/infiniband/core/iwpm_msg.c b/drivers/infiniband/core/iwpm_msg.c index 22a3abe..c2b4ce6 100644 --- a/drivers/infiniband/core/iwpm_msg.c +++ b/drivers/infiniband/core/iwpm_msg.c @@ -88,7 +88,7 @@ int iwpm_register_pid(struct iwpm_dev_data *pm_msg, u8 nl_client) ret = ibnl_put_attr(skb, nlh, sizeof(u32), &msg_seq, IWPM_NLA_REG_PID_SEQ); if (ret) goto pid_query_error; - ret = ibnl_put_attr(skb, nlh, IWPM_IFNAME_SIZE, + ret = ibnl_put_attr(skb, nlh, IFNAMSIZ, pm_msg->if_name, IWPM_NLA_REG_IF_NAME); if (ret) goto pid_query_error; diff --git a/include/rdma/iw_cm.h b/include/rdma/iw_cm.h index 036bd27..6d0065c 100644 --- a/include/rdma/iw_cm.h +++ b/include/rdma/iw_cm.h @@ -83,8 +83,10 @@ struct iw_cm_id { iw_cm_handler cm_handler; /* client callback function */ void *context; /* client cb context */ struct ib_device *device; - struct sockaddr_storage local_addr; + struct sockaddr_storage local_addr; /* local addr */ struct sockaddr_storage remote_addr; + struct sockaddr_storage m_local_addr; /* nmapped local addr */ + struct sockaddr_storage m_remote_addr; /* nmapped rem addr */ void *provider_data; /* provider private data */ iw_event_handler event_handler; /* cb for provider events */ @@ -92,6 +94,7 @@ struct iw_cm_id { void (*add_ref)(struct iw_cm_id *); void (*rem_ref)(struct iw_cm_id *); u8 tos; + bool mapped; }; struct iw_cm_conn_param { @@ -123,6 +126,7 @@ struct iw_cm_verbs { int backlog); int (*destroy_listen)(struct iw_cm_id *cm_id); + char ifname[IFNAMSIZ]; }; /** diff --git a/include/uapi/rdma/rdma_netlink.h b/include/uapi/rdma/rdma_netlink.h index c19a5dc..f7d7b6f 100644 --- a/include/uapi/rdma/rdma_netlink.h +++ b/include/uapi/rdma/rdma_netlink.h @@ -5,8 +5,8 @@ enum { RDMA_NL_RDMA_CM = 1, - RDMA_NL_NES, - RDMA_NL_C4IW, + RDMA_NL_IWCM, + RDMA_NL_RSVD, RDMA_NL_LS, /* RDMA Local Services */ RDMA_NL_NUM_CLIENTS }; -- cgit v0.10.2 From 6a0dde89cc346588f7f610dbc0d75e100f9cb568 Mon Sep 17 00:00:00 2001 From: Faisal Latif Date: Fri, 26 Feb 2016 09:18:02 -0600 Subject: iw_nes: remove port mapper related code Now that most of the port mapper code been moved to iwcm, we can remove it from port mapper service user drivers. Signed-off-by: Mustafa Ismail Signed-off-by: Tatyana E. Nikolova Signed-off-by: Faisal Latif Signed-off-by: Doug Ledford diff --git a/drivers/infiniband/hw/nes/nes.c b/drivers/infiniband/hw/nes/nes.c index 9f9d5c5..35cbb17 100644 --- a/drivers/infiniband/hw/nes/nes.c +++ b/drivers/infiniband/hw/nes/nes.c @@ -111,17 +111,6 @@ static struct pci_device_id nes_pci_table[] = { MODULE_DEVICE_TABLE(pci, nes_pci_table); -/* registered nes netlink callbacks */ -static struct ibnl_client_cbs nes_nl_cb_table[] = { - [RDMA_NL_IWPM_REG_PID] = {.dump = iwpm_register_pid_cb}, - [RDMA_NL_IWPM_ADD_MAPPING] = {.dump = iwpm_add_mapping_cb}, - [RDMA_NL_IWPM_QUERY_MAPPING] = {.dump = iwpm_add_and_query_mapping_cb}, - [RDMA_NL_IWPM_REMOTE_INFO] = {.dump = iwpm_remote_info_cb}, - [RDMA_NL_IWPM_HANDLE_ERR] = {.dump = iwpm_mapping_error_cb}, - [RDMA_NL_IWPM_MAPINFO] = {.dump = iwpm_mapping_info_cb}, - [RDMA_NL_IWPM_MAPINFO_NUM] = {.dump = iwpm_ack_mapping_info_cb} -}; - static int nes_inetaddr_event(struct notifier_block *, unsigned long, void *); static int nes_net_event(struct notifier_block *, unsigned long, void *); static int nes_notifiers_registered; @@ -682,17 +671,6 @@ static int nes_probe(struct pci_dev *pcidev, const struct pci_device_id *ent) } nes_notifiers_registered++; - if (ibnl_add_client(RDMA_NL_NES, RDMA_NL_IWPM_NUM_OPS, nes_nl_cb_table)) - printk(KERN_ERR PFX "%s[%u]: Failed to add netlink callback\n", - __func__, __LINE__); - - ret = iwpm_init(RDMA_NL_NES); - if (ret) { - printk(KERN_ERR PFX "%s: port mapper initialization failed\n", - pci_name(pcidev)); - goto bail7; - } - INIT_DELAYED_WORK(&nesdev->work, nes_recheck_link_status); /* Initialize network devices */ @@ -731,7 +709,6 @@ static int nes_probe(struct pci_dev *pcidev, const struct pci_device_id *ent) nes_debug(NES_DBG_INIT, "netdev_count=%d, nesadapter->netdev_count=%d\n", nesdev->netdev_count, nesdev->nesadapter->netdev_count); - ibnl_remove_client(RDMA_NL_NES); nes_notifiers_registered--; if (nes_notifiers_registered == 0) { @@ -795,8 +772,6 @@ static void nes_remove(struct pci_dev *pcidev) nesdev->nesadapter->netdev_count--; } } - ibnl_remove_client(RDMA_NL_NES); - iwpm_exit(RDMA_NL_NES); nes_notifiers_registered--; if (nes_notifiers_registered == 0) { diff --git a/drivers/infiniband/hw/nes/nes_cm.c b/drivers/infiniband/hw/nes/nes_cm.c index cb9f0f2..7374bed 100644 --- a/drivers/infiniband/hw/nes/nes_cm.c +++ b/drivers/infiniband/hw/nes/nes_cm.c @@ -482,11 +482,11 @@ static void form_cm_frame(struct sk_buff *skb, iph->ttl = 0x40; iph->protocol = 0x06; /* IPPROTO_TCP */ - iph->saddr = htonl(cm_node->mapped_loc_addr); - iph->daddr = htonl(cm_node->mapped_rem_addr); + iph->saddr = htonl(cm_node->loc_addr); + iph->daddr = htonl(cm_node->rem_addr); - tcph->source = htons(cm_node->mapped_loc_port); - tcph->dest = htons(cm_node->mapped_rem_port); + tcph->source = htons(cm_node->loc_port); + tcph->dest = htons(cm_node->rem_port); tcph->seq = htonl(cm_node->tcp_cntxt.loc_seq_num); if (flags & SET_ACK) { @@ -525,125 +525,6 @@ static void form_cm_frame(struct sk_buff *skb, cm_packets_created++; } -/* - * nes_create_sockaddr - Record ip addr and tcp port in a sockaddr struct - */ -static void nes_create_sockaddr(__be32 ip_addr, __be16 port, - struct sockaddr_storage *addr) -{ - struct sockaddr_in *nes_sockaddr = (struct sockaddr_in *)addr; - nes_sockaddr->sin_family = AF_INET; - memcpy(&nes_sockaddr->sin_addr.s_addr, &ip_addr, sizeof(__be32)); - nes_sockaddr->sin_port = port; -} - -/* - * nes_create_mapinfo - Create a mapinfo object in the port mapper data base - */ -static int nes_create_mapinfo(struct nes_cm_info *cm_info) -{ - struct sockaddr_storage local_sockaddr; - struct sockaddr_storage mapped_sockaddr; - - nes_create_sockaddr(htonl(cm_info->loc_addr), htons(cm_info->loc_port), - &local_sockaddr); - nes_create_sockaddr(htonl(cm_info->mapped_loc_addr), - htons(cm_info->mapped_loc_port), &mapped_sockaddr); - - return iwpm_create_mapinfo(&local_sockaddr, - &mapped_sockaddr, RDMA_NL_NES); -} - -/* - * nes_remove_mapinfo - Remove a mapinfo object from the port mapper data base - * and send a remove mapping op message to - * the userspace port mapper - */ -static int nes_remove_mapinfo(u32 loc_addr, u16 loc_port, - u32 mapped_loc_addr, u16 mapped_loc_port) -{ - struct sockaddr_storage local_sockaddr; - struct sockaddr_storage mapped_sockaddr; - - nes_create_sockaddr(htonl(loc_addr), htons(loc_port), &local_sockaddr); - nes_create_sockaddr(htonl(mapped_loc_addr), htons(mapped_loc_port), - &mapped_sockaddr); - - iwpm_remove_mapinfo(&local_sockaddr, &mapped_sockaddr); - return iwpm_remove_mapping(&local_sockaddr, RDMA_NL_NES); -} - -/* - * nes_form_pm_msg - Form a port mapper message with mapping info - */ -static void nes_form_pm_msg(struct nes_cm_info *cm_info, - struct iwpm_sa_data *pm_msg) -{ - nes_create_sockaddr(htonl(cm_info->loc_addr), htons(cm_info->loc_port), - &pm_msg->loc_addr); - nes_create_sockaddr(htonl(cm_info->rem_addr), htons(cm_info->rem_port), - &pm_msg->rem_addr); -} - -/* - * nes_form_reg_msg - Form a port mapper message with dev info - */ -static void nes_form_reg_msg(struct nes_vnic *nesvnic, - struct iwpm_dev_data *pm_msg) -{ - memcpy(pm_msg->dev_name, nesvnic->nesibdev->ibdev.name, - IWPM_DEVNAME_SIZE); - memcpy(pm_msg->if_name, nesvnic->netdev->name, IWPM_IFNAME_SIZE); -} - -static void record_sockaddr_info(struct sockaddr_storage *addr_info, - nes_addr_t *ip_addr, u16 *port_num) -{ - struct sockaddr_in *in_addr = (struct sockaddr_in *)addr_info; - - if (in_addr->sin_family == AF_INET) { - *ip_addr = ntohl(in_addr->sin_addr.s_addr); - *port_num = ntohs(in_addr->sin_port); - } -} - -/* - * nes_record_pm_msg - Save the received mapping info - */ -static void nes_record_pm_msg(struct nes_cm_info *cm_info, - struct iwpm_sa_data *pm_msg) -{ - record_sockaddr_info(&pm_msg->mapped_loc_addr, - &cm_info->mapped_loc_addr, &cm_info->mapped_loc_port); - - record_sockaddr_info(&pm_msg->mapped_rem_addr, - &cm_info->mapped_rem_addr, &cm_info->mapped_rem_port); -} - -/* - * nes_get_reminfo - Get the address info of the remote connecting peer - */ -static int nes_get_remote_addr(struct nes_cm_node *cm_node) -{ - struct sockaddr_storage mapped_loc_addr, mapped_rem_addr; - struct sockaddr_storage remote_addr; - int ret; - - nes_create_sockaddr(htonl(cm_node->mapped_loc_addr), - htons(cm_node->mapped_loc_port), &mapped_loc_addr); - nes_create_sockaddr(htonl(cm_node->mapped_rem_addr), - htons(cm_node->mapped_rem_port), &mapped_rem_addr); - - ret = iwpm_get_remote_info(&mapped_loc_addr, &mapped_rem_addr, - &remote_addr, RDMA_NL_NES); - if (ret) - nes_debug(NES_DBG_CM, "Unable to find remote peer address info\n"); - else - record_sockaddr_info(&remote_addr, &cm_node->rem_addr, - &cm_node->rem_port); - return ret; -} - /** * print_core - dump a cm core */ @@ -1266,11 +1147,10 @@ static struct nes_cm_node *find_node(struct nes_cm_core *cm_core, loc_addr, loc_port, cm_node->rem_addr, cm_node->rem_port, rem_addr, rem_port); - if ((cm_node->mapped_loc_addr == loc_addr) && - (cm_node->mapped_loc_port == loc_port) && - (cm_node->mapped_rem_addr == rem_addr) && - (cm_node->mapped_rem_port == rem_port)) { - + if ((cm_node->loc_addr == loc_addr) && + (cm_node->loc_port == loc_port) && + (cm_node->rem_addr == rem_addr) && + (cm_node->rem_port == rem_port)) { add_ref_cm_node(cm_node); spin_unlock_irqrestore(&cm_core->ht_lock, flags); return cm_node; @@ -1287,8 +1167,8 @@ static struct nes_cm_node *find_node(struct nes_cm_core *cm_core, * find_listener - find a cm node listening on this addr-port pair */ static struct nes_cm_listener *find_listener(struct nes_cm_core *cm_core, - nes_addr_t dst_addr, u16 dst_port, - enum nes_cm_listener_state listener_state, int local) + nes_addr_t dst_addr, u16 dst_port, + enum nes_cm_listener_state listener_state) { unsigned long flags; struct nes_cm_listener *listen_node; @@ -1298,13 +1178,9 @@ static struct nes_cm_listener *find_listener(struct nes_cm_core *cm_core, /* walk list and find cm_node associated with this session ID */ spin_lock_irqsave(&cm_core->listen_list_lock, flags); list_for_each_entry(listen_node, &cm_core->listen_list.list, list) { - if (local) { - listen_addr = listen_node->loc_addr; - listen_port = listen_node->loc_port; - } else { - listen_addr = listen_node->mapped_loc_addr; - listen_port = listen_node->mapped_loc_port; - } + listen_addr = listen_node->loc_addr; + listen_port = listen_node->loc_port; + /* compare node pair, return node handle if a match */ if (((listen_addr == dst_addr) || listen_addr == 0x00000000) && @@ -1443,17 +1319,13 @@ static int mini_cm_dec_refcnt_listen(struct nes_cm_core *cm_core, if (listener->nesvnic) { nes_manage_apbvt(listener->nesvnic, - listener->mapped_loc_port, + listener->loc_port, PCI_FUNC(listener->nesvnic->nesdev->pcidev->devfn), NES_MANAGE_APBVT_DEL); - nes_remove_mapinfo(listener->loc_addr, - listener->loc_port, - listener->mapped_loc_addr, - listener->mapped_loc_port); nes_debug(NES_DBG_NLMSG, - "Delete APBVT mapped_loc_port = %04X\n", - listener->mapped_loc_port); + "Delete APBVT loc_port = %04X\n", + listener->loc_port); } nes_debug(NES_DBG_CM, "destroying listener (%p)\n", listener); @@ -1602,11 +1474,6 @@ static struct nes_cm_node *make_cm_node(struct nes_cm_core *cm_core, cm_node->rem_addr = cm_info->rem_addr; cm_node->rem_port = cm_info->rem_port; - cm_node->mapped_loc_addr = cm_info->mapped_loc_addr; - cm_node->mapped_rem_addr = cm_info->mapped_rem_addr; - cm_node->mapped_loc_port = cm_info->mapped_loc_port; - cm_node->mapped_rem_port = cm_info->mapped_rem_port; - cm_node->mpa_frame_rev = mpa_version; cm_node->send_rdma0_op = SEND_RDMA_READ_ZERO; cm_node->mpav2_ird_ord = 0; @@ -1655,10 +1522,10 @@ static struct nes_cm_node *make_cm_node(struct nes_cm_core *cm_core, cm_node->loopbackpartner = NULL; /* get the mac addr for the remote node */ - oldarpindex = nes_arp_table(nesdev, cm_node->mapped_rem_addr, - NULL, NES_ARP_RESOLVE); - arpindex = nes_addr_resolve_neigh(nesvnic, - cm_node->mapped_rem_addr, oldarpindex); + oldarpindex = nes_arp_table(nesdev, cm_node->rem_addr, + NULL, NES_ARP_RESOLVE); + arpindex = nes_addr_resolve_neigh(nesvnic, cm_node->rem_addr, + oldarpindex); if (arpindex < 0) { kfree(cm_node); return NULL; @@ -1720,14 +1587,12 @@ static int rem_ref_cm_node(struct nes_cm_core *cm_core, mini_cm_dec_refcnt_listen(cm_core, cm_node->listener, 0); } else { if (cm_node->apbvt_set && cm_node->nesvnic) { - nes_manage_apbvt(cm_node->nesvnic, cm_node->mapped_loc_port, + nes_manage_apbvt(cm_node->nesvnic, cm_node->loc_port, PCI_FUNC(cm_node->nesvnic->nesdev->pcidev->devfn), NES_MANAGE_APBVT_DEL); } - nes_debug(NES_DBG_NLMSG, "Delete APBVT mapped_loc_port = %04X\n", - cm_node->mapped_loc_port); - nes_remove_mapinfo(cm_node->loc_addr, cm_node->loc_port, - cm_node->mapped_loc_addr, cm_node->mapped_loc_port); + nes_debug(NES_DBG_NLMSG, "Delete APBVT loc_port = %04X\n", + cm_node->loc_port); } atomic_dec(&cm_core->node_cnt); @@ -2184,7 +2049,6 @@ static int handle_ack_pkt(struct nes_cm_node *cm_node, struct sk_buff *skb, cm_node->state = NES_CM_STATE_ESTABLISHED; if (datasize) { cm_node->tcp_cntxt.rcv_nxt = inc_sequence + datasize; - nes_get_remote_addr(cm_node); handle_rcv_mpa(cm_node, skb); } else { /* rcvd ACK only */ dev_kfree_skb_any(skb); @@ -2399,17 +2263,14 @@ static struct nes_cm_listener *mini_cm_listen(struct nes_cm_core *cm_core, struct nes_vnic *nesvnic, struct nes_cm_info *cm_info) { struct nes_cm_listener *listener; - struct iwpm_dev_data pm_reg_msg; - struct iwpm_sa_data pm_msg; unsigned long flags; - int iwpm_err = 0; nes_debug(NES_DBG_CM, "Search for 0x%08x : 0x%04x\n", cm_info->loc_addr, cm_info->loc_port); /* cannot have multiple matching listeners */ listener = find_listener(cm_core, cm_info->loc_addr, cm_info->loc_port, - NES_CM_LISTENER_EITHER_STATE, 1); + NES_CM_LISTENER_EITHER_STATE); if (listener && listener->listener_state == NES_CM_LISTENER_ACTIVE_STATE) { /* find automatically incs ref count ??? */ @@ -2419,22 +2280,6 @@ static struct nes_cm_listener *mini_cm_listen(struct nes_cm_core *cm_core, } if (!listener) { - nes_form_reg_msg(nesvnic, &pm_reg_msg); - iwpm_err = iwpm_register_pid(&pm_reg_msg, RDMA_NL_NES); - if (iwpm_err) { - nes_debug(NES_DBG_NLMSG, - "Port Mapper reg pid fail (err = %d).\n", iwpm_err); - } - if (iwpm_valid_pid() && !iwpm_err) { - nes_form_pm_msg(cm_info, &pm_msg); - iwpm_err = iwpm_add_mapping(&pm_msg, RDMA_NL_NES); - if (iwpm_err) - nes_debug(NES_DBG_NLMSG, - "Port Mapper query fail (err = %d).\n", iwpm_err); - else - nes_record_pm_msg(cm_info, &pm_msg); - } - /* create a CM listen node (1/2 node to compare incoming traffic to) */ listener = kzalloc(sizeof(*listener), GFP_ATOMIC); if (!listener) { @@ -2444,8 +2289,6 @@ static struct nes_cm_listener *mini_cm_listen(struct nes_cm_core *cm_core, listener->loc_addr = cm_info->loc_addr; listener->loc_port = cm_info->loc_port; - listener->mapped_loc_addr = cm_info->mapped_loc_addr; - listener->mapped_loc_port = cm_info->mapped_loc_port; listener->reused_node = 0; atomic_set(&listener->ref_count, 1); @@ -2507,18 +2350,18 @@ static struct nes_cm_node *mini_cm_connect(struct nes_cm_core *cm_core, if (cm_info->loc_addr == cm_info->rem_addr) { loopbackremotelistener = find_listener(cm_core, - cm_node->mapped_loc_addr, cm_node->mapped_rem_port, - NES_CM_LISTENER_ACTIVE_STATE, 0); + cm_node->loc_addr, cm_node->rem_port, + NES_CM_LISTENER_ACTIVE_STATE); if (loopbackremotelistener == NULL) { create_event(cm_node, NES_CM_EVENT_ABORTED); } else { loopback_cm_info = *cm_info; loopback_cm_info.loc_port = cm_info->rem_port; loopback_cm_info.rem_port = cm_info->loc_port; - loopback_cm_info.mapped_loc_port = - cm_info->mapped_rem_port; - loopback_cm_info.mapped_rem_port = - cm_info->mapped_loc_port; + loopback_cm_info.loc_port = + cm_info->rem_port; + loopback_cm_info.rem_port = + cm_info->loc_port; loopback_cm_info.cm_id = loopbackremotelistener->cm_id; loopbackremotenode = make_cm_node(cm_core, nesvnic, &loopback_cm_info, loopbackremotelistener); @@ -2747,12 +2590,6 @@ static int mini_cm_recv_pkt(struct nes_cm_core *cm_core, nfo.rem_addr = ntohl(iph->saddr); nfo.rem_port = ntohs(tcph->source); - /* If port mapper is available these should be mapped address info */ - nfo.mapped_loc_addr = ntohl(iph->daddr); - nfo.mapped_loc_port = ntohs(tcph->dest); - nfo.mapped_rem_addr = ntohl(iph->saddr); - nfo.mapped_rem_port = ntohs(tcph->source); - tmp_daddr = cpu_to_be32(iph->daddr); tmp_saddr = cpu_to_be32(iph->saddr); @@ -2761,8 +2598,8 @@ static int mini_cm_recv_pkt(struct nes_cm_core *cm_core, do { cm_node = find_node(cm_core, - nfo.mapped_rem_port, nfo.mapped_rem_addr, - nfo.mapped_loc_port, nfo.mapped_loc_addr); + nfo.rem_port, nfo.rem_addr, + nfo.loc_port, nfo.loc_addr); if (!cm_node) { /* Only type of packet accepted are for */ @@ -2771,9 +2608,9 @@ static int mini_cm_recv_pkt(struct nes_cm_core *cm_core, skb_handled = 0; break; } - listener = find_listener(cm_core, nfo.mapped_loc_addr, - nfo.mapped_loc_port, - NES_CM_LISTENER_ACTIVE_STATE, 0); + listener = find_listener(cm_core, nfo.loc_addr, + nfo.loc_port, + NES_CM_LISTENER_ACTIVE_STATE); if (!listener) { nfo.cm_id = NULL; nfo.conn_type = 0; @@ -3121,8 +2958,8 @@ static int nes_cm_disconn_true(struct nes_qp *nesqp) atomic_inc(&cm_disconnects); cm_event.event = IW_CM_EVENT_DISCONNECT; cm_event.status = disconn_status; - cm_event.local_addr = cm_id->local_addr; - cm_event.remote_addr = cm_id->remote_addr; + cm_event.local_addr = cm_id->m_local_addr; + cm_event.remote_addr = cm_id->m_remote_addr; cm_event.private_data = NULL; cm_event.private_data_len = 0; @@ -3148,8 +2985,8 @@ static int nes_cm_disconn_true(struct nes_qp *nesqp) cm_event.event = IW_CM_EVENT_CLOSE; cm_event.status = 0; cm_event.provider_data = cm_id->provider_data; - cm_event.local_addr = cm_id->local_addr; - cm_event.remote_addr = cm_id->remote_addr; + cm_event.local_addr = cm_id->m_local_addr; + cm_event.remote_addr = cm_id->m_remote_addr; cm_event.private_data = NULL; cm_event.private_data_len = 0; @@ -3240,8 +3077,8 @@ int nes_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) u8 *start_ptr = &start_addr; u8 **start_buff = &start_ptr; u16 buff_len = 0; - struct sockaddr_in *laddr = (struct sockaddr_in *)&cm_id->local_addr; - struct sockaddr_in *raddr = (struct sockaddr_in *)&cm_id->remote_addr; + struct sockaddr_in *laddr = (struct sockaddr_in *)&cm_id->m_local_addr; + struct sockaddr_in *raddr = (struct sockaddr_in *)&cm_id->m_remote_addr; ibqp = nes_get_qp(cm_id->device, conn_param->qpn); if (!ibqp) @@ -3378,11 +3215,11 @@ int nes_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) nes_cm_init_tsa_conn(nesqp, cm_node); nesqp->nesqp_context->tcpPorts[0] = - cpu_to_le16(cm_node->mapped_loc_port); + cpu_to_le16(cm_node->loc_port); nesqp->nesqp_context->tcpPorts[1] = - cpu_to_le16(cm_node->mapped_rem_port); + cpu_to_le16(cm_node->rem_port); - nesqp->nesqp_context->ip0 = cpu_to_le32(cm_node->mapped_rem_addr); + nesqp->nesqp_context->ip0 = cpu_to_le32(cm_node->rem_addr); nesqp->nesqp_context->misc2 |= cpu_to_le32( (u32)PCI_FUNC(nesdev->pcidev->devfn) << @@ -3406,9 +3243,9 @@ int nes_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) memset(&nes_quad, 0, sizeof(nes_quad)); nes_quad.DstIpAdrIndex = cpu_to_le32((u32)PCI_FUNC(nesdev->pcidev->devfn) << 24); - nes_quad.SrcIpadr = htonl(cm_node->mapped_rem_addr); - nes_quad.TcpPorts[0] = htons(cm_node->mapped_rem_port); - nes_quad.TcpPorts[1] = htons(cm_node->mapped_loc_port); + nes_quad.SrcIpadr = htonl(cm_node->rem_addr); + nes_quad.TcpPorts[0] = htons(cm_node->rem_port); + nes_quad.TcpPorts[1] = htons(cm_node->loc_port); /* Produce hash key */ crc_value = get_crc_value(&nes_quad); @@ -3437,8 +3274,8 @@ int nes_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) cm_event.event = IW_CM_EVENT_ESTABLISHED; cm_event.status = 0; cm_event.provider_data = (void *)nesqp; - cm_event.local_addr = cm_id->local_addr; - cm_event.remote_addr = cm_id->remote_addr; + cm_event.local_addr = cm_id->m_local_addr; + cm_event.remote_addr = cm_id->m_remote_addr; cm_event.private_data = NULL; cm_event.private_data_len = 0; cm_event.ird = cm_node->ird_size; @@ -3508,11 +3345,8 @@ int nes_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) struct nes_cm_node *cm_node; struct nes_cm_info cm_info; int apbvt_set = 0; - struct sockaddr_in *laddr = (struct sockaddr_in *)&cm_id->local_addr; - struct sockaddr_in *raddr = (struct sockaddr_in *)&cm_id->remote_addr; - struct iwpm_dev_data pm_reg_msg; - struct iwpm_sa_data pm_msg; - int iwpm_err = 0; + struct sockaddr_in *laddr = (struct sockaddr_in *)&cm_id->m_local_addr; + struct sockaddr_in *raddr = (struct sockaddr_in *)&cm_id->m_remote_addr; if (cm_id->remote_addr.ss_family != AF_INET) return -ENOSYS; @@ -3558,37 +3392,13 @@ int nes_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) cm_info.cm_id = cm_id; cm_info.conn_type = NES_CM_IWARP_CONN_TYPE; - /* No port mapper available, go with the specified peer information */ - cm_info.mapped_loc_addr = cm_info.loc_addr; - cm_info.mapped_loc_port = cm_info.loc_port; - cm_info.mapped_rem_addr = cm_info.rem_addr; - cm_info.mapped_rem_port = cm_info.rem_port; - - nes_form_reg_msg(nesvnic, &pm_reg_msg); - iwpm_err = iwpm_register_pid(&pm_reg_msg, RDMA_NL_NES); - if (iwpm_err) { - nes_debug(NES_DBG_NLMSG, - "Port Mapper reg pid fail (err = %d).\n", iwpm_err); - } - if (iwpm_valid_pid() && !iwpm_err) { - nes_form_pm_msg(&cm_info, &pm_msg); - iwpm_err = iwpm_add_and_query_mapping(&pm_msg, RDMA_NL_NES); - if (iwpm_err) - nes_debug(NES_DBG_NLMSG, - "Port Mapper query fail (err = %d).\n", iwpm_err); - else - nes_record_pm_msg(&cm_info, &pm_msg); - } - if (laddr->sin_addr.s_addr != raddr->sin_addr.s_addr) { - nes_manage_apbvt(nesvnic, cm_info.mapped_loc_port, - PCI_FUNC(nesdev->pcidev->devfn), NES_MANAGE_APBVT_ADD); + nes_manage_apbvt(nesvnic, cm_info.loc_port, + PCI_FUNC(nesdev->pcidev->devfn), + NES_MANAGE_APBVT_ADD); apbvt_set = 1; } - if (nes_create_mapinfo(&cm_info)) - return -ENOMEM; - cm_id->add_ref(cm_id); /* create a connect CM node connection */ @@ -3597,14 +3407,12 @@ int nes_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) &cm_info); if (!cm_node) { if (apbvt_set) - nes_manage_apbvt(nesvnic, cm_info.mapped_loc_port, + nes_manage_apbvt(nesvnic, cm_info.loc_port, PCI_FUNC(nesdev->pcidev->devfn), NES_MANAGE_APBVT_DEL); - nes_debug(NES_DBG_NLMSG, "Delete mapped_loc_port = %04X\n", - cm_info.mapped_loc_port); - nes_remove_mapinfo(cm_info.loc_addr, cm_info.loc_port, - cm_info.mapped_loc_addr, cm_info.mapped_loc_port); + nes_debug(NES_DBG_NLMSG, "Delete loc_port = %04X\n", + cm_info.loc_port); cm_id->rem_ref(cm_id); return -ENOMEM; } @@ -3633,12 +3441,12 @@ int nes_create_listen(struct iw_cm_id *cm_id, int backlog) struct nes_cm_listener *cm_node; struct nes_cm_info cm_info; int err; - struct sockaddr_in *laddr = (struct sockaddr_in *)&cm_id->local_addr; + struct sockaddr_in *laddr = (struct sockaddr_in *)&cm_id->m_local_addr; nes_debug(NES_DBG_CM, "cm_id = %p, local port = 0x%04X.\n", cm_id, ntohs(laddr->sin_port)); - if (cm_id->local_addr.ss_family != AF_INET) + if (cm_id->m_local_addr.ss_family != AF_INET) return -ENOSYS; nesvnic = to_nesvnic(cm_id->device); if (!nesvnic) @@ -3658,10 +3466,6 @@ int nes_create_listen(struct iw_cm_id *cm_id, int backlog) cm_info.conn_type = NES_CM_IWARP_CONN_TYPE; - /* No port mapper available, go with the specified info */ - cm_info.mapped_loc_addr = cm_info.loc_addr; - cm_info.mapped_loc_port = cm_info.loc_port; - cm_node = g_cm_core->api->listen(g_cm_core, nesvnic, &cm_info); if (!cm_node) { printk(KERN_ERR "%s[%u] Error returned from listen API call\n", @@ -3673,10 +3477,7 @@ int nes_create_listen(struct iw_cm_id *cm_id, int backlog) cm_node->tos = cm_id->tos; if (!cm_node->reused_node) { - if (nes_create_mapinfo(&cm_info)) - return -ENOMEM; - - err = nes_manage_apbvt(nesvnic, cm_node->mapped_loc_port, + err = nes_manage_apbvt(nesvnic, cm_node->loc_port, PCI_FUNC(nesvnic->nesdev->pcidev->devfn), NES_MANAGE_APBVT_ADD); if (err) { @@ -3786,8 +3587,8 @@ static void cm_event_connected(struct nes_cm_event *event) nesvnic = to_nesvnic(nesqp->ibqp.device); nesdev = nesvnic->nesdev; nesadapter = nesdev->nesadapter; - laddr = (struct sockaddr_in *)&cm_id->local_addr; - raddr = (struct sockaddr_in *)&cm_id->remote_addr; + laddr = (struct sockaddr_in *)&cm_id->m_local_addr; + raddr = (struct sockaddr_in *)&cm_id->m_remote_addr; cm_event_laddr = (struct sockaddr_in *)&cm_event.local_addr; if (nesqp->destroyed) @@ -3802,10 +3603,10 @@ static void cm_event_connected(struct nes_cm_event *event) /* set the QP tsa context */ nesqp->nesqp_context->tcpPorts[0] = - cpu_to_le16(cm_node->mapped_loc_port); + cpu_to_le16(cm_node->loc_port); nesqp->nesqp_context->tcpPorts[1] = - cpu_to_le16(cm_node->mapped_rem_port); - nesqp->nesqp_context->ip0 = cpu_to_le32(cm_node->mapped_rem_addr); + cpu_to_le16(cm_node->rem_port); + nesqp->nesqp_context->ip0 = cpu_to_le32(cm_node->rem_addr); nesqp->nesqp_context->misc2 |= cpu_to_le32( (u32)PCI_FUNC(nesdev->pcidev->devfn) << @@ -3835,9 +3636,9 @@ static void cm_event_connected(struct nes_cm_event *event) nes_quad.DstIpAdrIndex = cpu_to_le32((u32)PCI_FUNC(nesdev->pcidev->devfn) << 24); - nes_quad.SrcIpadr = htonl(cm_node->mapped_rem_addr); - nes_quad.TcpPorts[0] = htons(cm_node->mapped_rem_port); - nes_quad.TcpPorts[1] = htons(cm_node->mapped_loc_port); + nes_quad.SrcIpadr = htonl(cm_node->rem_addr); + nes_quad.TcpPorts[0] = htons(cm_node->rem_port); + nes_quad.TcpPorts[1] = htons(cm_node->loc_port); /* Produce hash key */ crc_value = get_crc_value(&nes_quad); @@ -3858,14 +3659,14 @@ static void cm_event_connected(struct nes_cm_event *event) cm_event.provider_data = cm_id->provider_data; cm_event_laddr->sin_family = AF_INET; cm_event_laddr->sin_port = laddr->sin_port; - cm_event.remote_addr = cm_id->remote_addr; + cm_event.remote_addr = cm_id->m_remote_addr; cm_event.private_data = (void *)event->cm_node->mpa_frame_buf; cm_event.private_data_len = (u8)event->cm_node->mpa_frame_size; cm_event.ird = cm_node->ird_size; cm_event.ord = cm_node->ord_size; - cm_event_laddr->sin_addr.s_addr = htonl(event->cm_info.rem_addr); + cm_event_laddr->sin_addr.s_addr = htonl(event->cm_info.loc_addr); ret = cm_id->event_handler(cm_id, &cm_event); nes_debug(NES_DBG_CM, "OFA CM event_handler returned, ret=%d\n", ret); @@ -3913,8 +3714,8 @@ static void cm_event_connect_error(struct nes_cm_event *event) cm_event.event = IW_CM_EVENT_CONNECT_REPLY; cm_event.status = -ECONNRESET; cm_event.provider_data = cm_id->provider_data; - cm_event.local_addr = cm_id->local_addr; - cm_event.remote_addr = cm_id->remote_addr; + cm_event.local_addr = cm_id->m_local_addr; + cm_event.remote_addr = cm_id->m_remote_addr; cm_event.private_data = NULL; cm_event.private_data_len = 0; @@ -3970,8 +3771,8 @@ static void cm_event_reset(struct nes_cm_event *event) cm_event.event = IW_CM_EVENT_DISCONNECT; cm_event.status = -ECONNRESET; cm_event.provider_data = cm_id->provider_data; - cm_event.local_addr = cm_id->local_addr; - cm_event.remote_addr = cm_id->remote_addr; + cm_event.local_addr = cm_id->m_local_addr; + cm_event.remote_addr = cm_id->m_remote_addr; cm_event.private_data = NULL; cm_event.private_data_len = 0; @@ -3981,8 +3782,8 @@ static void cm_event_reset(struct nes_cm_event *event) cm_event.event = IW_CM_EVENT_CLOSE; cm_event.status = 0; cm_event.provider_data = cm_id->provider_data; - cm_event.local_addr = cm_id->local_addr; - cm_event.remote_addr = cm_id->remote_addr; + cm_event.local_addr = cm_id->m_local_addr; + cm_event.remote_addr = cm_id->m_remote_addr; cm_event.private_data = NULL; cm_event.private_data_len = 0; nes_debug(NES_DBG_CM, "NODE %p Generating CLOSE\n", event->cm_node); diff --git a/drivers/infiniband/hw/nes/nes_cm.h b/drivers/infiniband/hw/nes/nes_cm.h index 147c2c8..d827d03 100644 --- a/drivers/infiniband/hw/nes/nes_cm.h +++ b/drivers/infiniband/hw/nes/nes_cm.h @@ -293,8 +293,8 @@ struct nes_cm_listener { struct list_head list; struct nes_cm_core *cm_core; u8 loc_mac[ETH_ALEN]; - nes_addr_t loc_addr, mapped_loc_addr; - u16 loc_port, mapped_loc_port; + nes_addr_t loc_addr; + u16 loc_port; struct iw_cm_id *cm_id; enum nes_cm_conn_type conn_type; atomic_t ref_count; @@ -309,9 +309,7 @@ struct nes_cm_listener { /* per connection node and node state information */ struct nes_cm_node { nes_addr_t loc_addr, rem_addr; - nes_addr_t mapped_loc_addr, mapped_rem_addr; u16 loc_port, rem_port; - u16 mapped_loc_port, mapped_rem_port; u8 loc_mac[ETH_ALEN]; u8 rem_mac[ETH_ALEN]; @@ -368,11 +366,6 @@ struct nes_cm_info { u16 rem_port; nes_addr_t loc_addr; nes_addr_t rem_addr; - u16 mapped_loc_port; - u16 mapped_rem_port; - nes_addr_t mapped_loc_addr; - nes_addr_t mapped_rem_addr; - enum nes_cm_conn_type conn_type; int backlog; }; diff --git a/drivers/infiniband/hw/nes/nes_verbs.c b/drivers/infiniband/hw/nes/nes_verbs.c index 8c4daf7..804021d 100644 --- a/drivers/infiniband/hw/nes/nes_verbs.c +++ b/drivers/infiniband/hw/nes/nes_verbs.c @@ -3768,6 +3768,8 @@ struct nes_ib_device *nes_init_ofa_device(struct net_device *netdev) nesibdev->ibdev.iwcm->create_listen = nes_create_listen; nesibdev->ibdev.iwcm->destroy_listen = nes_destroy_listen; nesibdev->ibdev.get_port_immutable = nes_port_immutable; + memcpy(nesibdev->ibdev.iwcm->ifname, netdev->name, + sizeof(nesibdev->ibdev.iwcm->ifname)); return nesibdev; } -- cgit v0.10.2 From 170003c894d93332e1b4162aa06864ad83eb302d Mon Sep 17 00:00:00 2001 From: Steve Wise Date: Fri, 26 Feb 2016 09:18:03 -0600 Subject: iw_cxgb4: remove port mapper related code Now that most of the port mapper code been moved to iwcm, we can remove it from iw_cxgb4. Signed-off-by: Steve Wise Signed-off-by: Doug Ledford diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c index cd2ff5f..6151883 100644 --- a/drivers/infiniband/hw/cxgb4/cm.c +++ b/drivers/infiniband/hw/cxgb4/cm.c @@ -302,7 +302,7 @@ void _c4iw_free_ep(struct kref *kref) if (ep->com.remote_addr.ss_family == AF_INET6) { struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *) - &ep->com.mapped_local_addr; + &ep->com.local_addr; cxgb4_clip_release( ep->com.dev->rdev.lldi.ports[0], @@ -314,12 +314,6 @@ void _c4iw_free_ep(struct kref *kref) dst_release(ep->dst); cxgb4_l2t_release(ep->l2t); } - if (test_bit(RELEASE_MAPINFO, &ep->com.flags)) { - print_addr(&ep->com, __func__, "remove_mapinfo/mapping"); - iwpm_remove_mapinfo(&ep->com.local_addr, - &ep->com.mapped_local_addr); - iwpm_remove_mapping(&ep->com.local_addr, RDMA_NL_C4IW); - } kfree(ep); } @@ -455,7 +449,7 @@ static void act_open_req_arp_failure(void *handle, struct sk_buff *skb) state_set(&ep->com, DEAD); if (ep->com.remote_addr.ss_family == AF_INET6) { struct sockaddr_in6 *sin6 = - (struct sockaddr_in6 *)&ep->com.mapped_local_addr; + (struct sockaddr_in6 *)&ep->com.local_addr; cxgb4_clip_release(ep->com.dev->rdev.lldi.ports[0], (const u32 *)&sin6->sin6_addr.s6_addr, 1); } @@ -568,54 +562,6 @@ static int send_abort(struct c4iw_ep *ep, struct sk_buff *skb, gfp_t gfp) return c4iw_l2t_send(&ep->com.dev->rdev, skb, ep->l2t); } -/* - * c4iw_form_pm_msg - Form a port mapper message with mapping info - */ -static void c4iw_form_pm_msg(struct c4iw_ep *ep, - struct iwpm_sa_data *pm_msg) -{ - memcpy(&pm_msg->loc_addr, &ep->com.local_addr, - sizeof(ep->com.local_addr)); - memcpy(&pm_msg->rem_addr, &ep->com.remote_addr, - sizeof(ep->com.remote_addr)); -} - -/* - * c4iw_form_reg_msg - Form a port mapper message with dev info - */ -static void c4iw_form_reg_msg(struct c4iw_dev *dev, - struct iwpm_dev_data *pm_msg) -{ - memcpy(pm_msg->dev_name, dev->ibdev.name, IWPM_DEVNAME_SIZE); - memcpy(pm_msg->if_name, dev->rdev.lldi.ports[0]->name, - IWPM_IFNAME_SIZE); -} - -static void c4iw_record_pm_msg(struct c4iw_ep *ep, - struct iwpm_sa_data *pm_msg) -{ - memcpy(&ep->com.mapped_local_addr, &pm_msg->mapped_loc_addr, - sizeof(ep->com.mapped_local_addr)); - memcpy(&ep->com.mapped_remote_addr, &pm_msg->mapped_rem_addr, - sizeof(ep->com.mapped_remote_addr)); -} - -static int get_remote_addr(struct c4iw_ep *parent_ep, struct c4iw_ep *child_ep) -{ - int ret; - - print_addr(&parent_ep->com, __func__, "get_remote_addr parent_ep "); - print_addr(&child_ep->com, __func__, "get_remote_addr child_ep "); - - ret = iwpm_get_remote_info(&parent_ep->com.mapped_local_addr, - &child_ep->com.mapped_remote_addr, - &child_ep->com.remote_addr, RDMA_NL_C4IW); - if (ret) - PDBG("Unable to find remote peer addr info - err %d\n", ret); - - return ret; -} - static void best_mtu(const unsigned short *mtus, unsigned short mtu, unsigned int *idx, int use_ts, int ipv6) { @@ -645,13 +591,13 @@ static int send_connect(struct c4iw_ep *ep) int wscale; int win, sizev4, sizev6, wrlen; struct sockaddr_in *la = (struct sockaddr_in *) - &ep->com.mapped_local_addr; + &ep->com.local_addr; struct sockaddr_in *ra = (struct sockaddr_in *) - &ep->com.mapped_remote_addr; + &ep->com.remote_addr; struct sockaddr_in6 *la6 = (struct sockaddr_in6 *) - &ep->com.mapped_local_addr; + &ep->com.local_addr; struct sockaddr_in6 *ra6 = (struct sockaddr_in6 *) - &ep->com.mapped_remote_addr; + &ep->com.remote_addr; int ret; enum chip_type adapter_type = ep->com.dev->rdev.lldi.adapter_type; u32 isn = (prandom_u32() & ~7UL) - 1; @@ -1829,10 +1775,10 @@ static void send_fw_act_open_req(struct c4iw_ep *ep, unsigned int atid) req->le.filter = cpu_to_be32(cxgb4_select_ntuple( ep->com.dev->rdev.lldi.ports[0], ep->l2t)); - sin = (struct sockaddr_in *)&ep->com.mapped_local_addr; + sin = (struct sockaddr_in *)&ep->com.local_addr; req->le.lport = sin->sin_port; req->le.u.ipv4.lip = sin->sin_addr.s_addr; - sin = (struct sockaddr_in *)&ep->com.mapped_remote_addr; + sin = (struct sockaddr_in *)&ep->com.remote_addr; req->le.pport = sin->sin_port; req->le.u.ipv4.pip = sin->sin_addr.s_addr; req->tcb.t_state_to_astid = @@ -2013,13 +1959,13 @@ static int c4iw_reconnect(struct c4iw_ep *ep) { int err = 0; struct sockaddr_in *laddr = (struct sockaddr_in *) - &ep->com.cm_id->local_addr; + &ep->com.cm_id->m_local_addr; struct sockaddr_in *raddr = (struct sockaddr_in *) - &ep->com.cm_id->remote_addr; + &ep->com.cm_id->m_remote_addr; struct sockaddr_in6 *laddr6 = (struct sockaddr_in6 *) - &ep->com.cm_id->local_addr; + &ep->com.cm_id->m_local_addr; struct sockaddr_in6 *raddr6 = (struct sockaddr_in6 *) - &ep->com.cm_id->remote_addr; + &ep->com.cm_id->m_remote_addr; int iptype; __u8 *ra; @@ -2038,7 +1984,7 @@ static int c4iw_reconnect(struct c4iw_ep *ep) insert_handle(ep->com.dev, &ep->com.dev->atid_idr, ep, ep->atid); /* find a route */ - if (ep->com.cm_id->local_addr.ss_family == AF_INET) { + if (ep->com.cm_id->m_local_addr.ss_family == AF_INET) { ep->dst = find_route(ep->com.dev, laddr->sin_addr.s_addr, raddr->sin_addr.s_addr, laddr->sin_port, raddr->sin_port, 0); @@ -2109,10 +2055,10 @@ static int act_open_rpl(struct c4iw_dev *dev, struct sk_buff *skb) struct sockaddr_in6 *ra6; ep = lookup_atid(t, atid); - la = (struct sockaddr_in *)&ep->com.mapped_local_addr; - ra = (struct sockaddr_in *)&ep->com.mapped_remote_addr; - la6 = (struct sockaddr_in6 *)&ep->com.mapped_local_addr; - ra6 = (struct sockaddr_in6 *)&ep->com.mapped_remote_addr; + la = (struct sockaddr_in *)&ep->com.local_addr; + ra = (struct sockaddr_in *)&ep->com.remote_addr; + la6 = (struct sockaddr_in6 *)&ep->com.local_addr; + ra6 = (struct sockaddr_in6 *)&ep->com.remote_addr; PDBG("%s ep %p atid %u status %u errno %d\n", __func__, ep, atid, status, status2errno(status)); @@ -2154,7 +2100,7 @@ static int act_open_rpl(struct c4iw_dev *dev, struct sk_buff *skb) if (ep->com.remote_addr.ss_family == AF_INET6) { struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *) - &ep->com.mapped_local_addr; + &ep->com.local_addr; cxgb4_clip_release( ep->com.dev->rdev.lldi.ports[0], (const u32 *) @@ -2189,7 +2135,7 @@ static int act_open_rpl(struct c4iw_dev *dev, struct sk_buff *skb) if (ep->com.remote_addr.ss_family == AF_INET6) { struct sockaddr_in6 *sin6 = - (struct sockaddr_in6 *)&ep->com.mapped_local_addr; + (struct sockaddr_in6 *)&ep->com.local_addr; cxgb4_clip_release(ep->com.dev->rdev.lldi.ports[0], (const u32 *)&sin6->sin6_addr.s6_addr, 1); } @@ -2459,18 +2405,9 @@ static int pass_accept_req(struct c4iw_dev *dev, struct sk_buff *skb) child_ep->com.dev = dev; child_ep->com.cm_id = NULL; - /* - * The mapped_local and mapped_remote addresses get setup with - * the actual 4-tuple. The local address will be based on the - * actual local address of the connection, but on the port number - * of the parent listening endpoint. The remote address is - * setup based on a query to the IWPM since we don't know what it - * originally was before mapping. If no mapping was done, then - * mapped_remote == remote, and mapped_local == local. - */ if (iptype == 4) { struct sockaddr_in *sin = (struct sockaddr_in *) - &child_ep->com.mapped_local_addr; + &child_ep->com.local_addr; sin->sin_family = PF_INET; sin->sin_port = local_port; @@ -2482,12 +2419,12 @@ static int pass_accept_req(struct c4iw_dev *dev, struct sk_buff *skb) &parent_ep->com.local_addr)->sin_port; sin->sin_addr.s_addr = *(__be32 *)local_ip; - sin = (struct sockaddr_in *)&child_ep->com.mapped_remote_addr; + sin = (struct sockaddr_in *)&child_ep->com.remote_addr; sin->sin_family = PF_INET; sin->sin_port = peer_port; sin->sin_addr.s_addr = *(__be32 *)peer_ip; } else { - sin6 = (struct sockaddr_in6 *)&child_ep->com.mapped_local_addr; + sin6 = (struct sockaddr_in6 *)&child_ep->com.local_addr; sin6->sin6_family = PF_INET6; sin6->sin6_port = local_port; memcpy(sin6->sin6_addr.s6_addr, local_ip, 16); @@ -2498,14 +2435,11 @@ static int pass_accept_req(struct c4iw_dev *dev, struct sk_buff *skb) &parent_ep->com.local_addr)->sin6_port; memcpy(sin6->sin6_addr.s6_addr, local_ip, 16); - sin6 = (struct sockaddr_in6 *)&child_ep->com.mapped_remote_addr; + sin6 = (struct sockaddr_in6 *)&child_ep->com.remote_addr; sin6->sin6_family = PF_INET6; sin6->sin6_port = peer_port; memcpy(sin6->sin6_addr.s6_addr, peer_ip, 16); } - memcpy(&child_ep->com.remote_addr, &child_ep->com.mapped_remote_addr, - sizeof(child_ep->com.remote_addr)); - get_remote_addr(parent_ep, child_ep); c4iw_get_ep(&parent_ep->com); child_ep->parent_ep = parent_ep; @@ -2522,7 +2456,7 @@ static int pass_accept_req(struct c4iw_dev *dev, struct sk_buff *skb) accept_cr(child_ep, skb, req); set_bit(PASS_ACCEPT_REQ, &child_ep->com.history); if (iptype == 6) { - sin6 = (struct sockaddr_in6 *)&child_ep->com.mapped_local_addr; + sin6 = (struct sockaddr_in6 *)&child_ep->com.local_addr; cxgb4_clip_get(child_ep->com.dev->rdev.lldi.ports[0], (const u32 *)&sin6->sin6_addr.s6_addr, 1); } @@ -2765,7 +2699,7 @@ out: if (ep->com.remote_addr.ss_family == AF_INET6) { struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *) - &ep->com.mapped_local_addr; + &ep->com.local_addr; cxgb4_clip_release( ep->com.dev->rdev.lldi.ports[0], (const u32 *)&sin6->sin6_addr.s6_addr, @@ -3026,8 +2960,8 @@ static int pick_local_ipaddrs(struct c4iw_dev *dev, struct iw_cm_id *cm_id) { struct in_device *ind; int found = 0; - struct sockaddr_in *laddr = (struct sockaddr_in *)&cm_id->local_addr; - struct sockaddr_in *raddr = (struct sockaddr_in *)&cm_id->remote_addr; + struct sockaddr_in *laddr = (struct sockaddr_in *)&cm_id->m_local_addr; + struct sockaddr_in *raddr = (struct sockaddr_in *)&cm_id->m_remote_addr; ind = in_dev_get(dev->rdev.lldi.ports[0]); if (!ind) @@ -3072,8 +3006,8 @@ static int get_lladdr(struct net_device *dev, struct in6_addr *addr, static int pick_local_ip6addrs(struct c4iw_dev *dev, struct iw_cm_id *cm_id) { struct in6_addr uninitialized_var(addr); - struct sockaddr_in6 *la6 = (struct sockaddr_in6 *)&cm_id->local_addr; - struct sockaddr_in6 *ra6 = (struct sockaddr_in6 *)&cm_id->remote_addr; + struct sockaddr_in6 *la6 = (struct sockaddr_in6 *)&cm_id->m_local_addr; + struct sockaddr_in6 *ra6 = (struct sockaddr_in6 *)&cm_id->m_remote_addr; if (!get_lladdr(dev->rdev.lldi.ports[0], &addr, IFA_F_TENTATIVE)) { memcpy(la6->sin6_addr.s6_addr, &addr, 16); @@ -3092,11 +3026,8 @@ int c4iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) struct sockaddr_in *raddr; struct sockaddr_in6 *laddr6; struct sockaddr_in6 *raddr6; - struct iwpm_dev_data pm_reg_msg; - struct iwpm_sa_data pm_msg; __u8 *ra; int iptype; - int iwpm_err = 0; if ((conn_param->ord > cur_max_read_depth(dev)) || (conn_param->ird > cur_max_read_depth(dev))) { @@ -3144,47 +3075,17 @@ int c4iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) } insert_handle(dev, &dev->atid_idr, ep, ep->atid); - memcpy(&ep->com.local_addr, &cm_id->local_addr, + memcpy(&ep->com.local_addr, &cm_id->m_local_addr, sizeof(ep->com.local_addr)); - memcpy(&ep->com.remote_addr, &cm_id->remote_addr, + memcpy(&ep->com.remote_addr, &cm_id->m_remote_addr, sizeof(ep->com.remote_addr)); - /* No port mapper available, go with the specified peer information */ - memcpy(&ep->com.mapped_local_addr, &cm_id->local_addr, - sizeof(ep->com.mapped_local_addr)); - memcpy(&ep->com.mapped_remote_addr, &cm_id->remote_addr, - sizeof(ep->com.mapped_remote_addr)); - - c4iw_form_reg_msg(dev, &pm_reg_msg); - iwpm_err = iwpm_register_pid(&pm_reg_msg, RDMA_NL_C4IW); - if (iwpm_err) { - PDBG("%s: Port Mapper reg pid fail (err = %d).\n", - __func__, iwpm_err); - } - if (iwpm_valid_pid() && !iwpm_err) { - c4iw_form_pm_msg(ep, &pm_msg); - iwpm_err = iwpm_add_and_query_mapping(&pm_msg, RDMA_NL_C4IW); - if (iwpm_err) - PDBG("%s: Port Mapper query fail (err = %d).\n", - __func__, iwpm_err); - else - c4iw_record_pm_msg(ep, &pm_msg); - } - if (iwpm_create_mapinfo(&ep->com.local_addr, - &ep->com.mapped_local_addr, RDMA_NL_C4IW)) { - iwpm_remove_mapping(&ep->com.local_addr, RDMA_NL_C4IW); - err = -ENOMEM; - goto fail1; - } - print_addr(&ep->com, __func__, "add_query/create_mapinfo"); - set_bit(RELEASE_MAPINFO, &ep->com.flags); + laddr = (struct sockaddr_in *)&ep->com.local_addr; + raddr = (struct sockaddr_in *)&ep->com.remote_addr; + laddr6 = (struct sockaddr_in6 *)&ep->com.local_addr; + raddr6 = (struct sockaddr_in6 *) &ep->com.remote_addr; - laddr = (struct sockaddr_in *)&ep->com.mapped_local_addr; - raddr = (struct sockaddr_in *)&ep->com.mapped_remote_addr; - laddr6 = (struct sockaddr_in6 *)&ep->com.mapped_local_addr; - raddr6 = (struct sockaddr_in6 *) &ep->com.mapped_remote_addr; - - if (cm_id->remote_addr.ss_family == AF_INET) { + if (cm_id->m_remote_addr.ss_family == AF_INET) { iptype = 4; ra = (__u8 *)&raddr->sin_addr; @@ -3269,7 +3170,7 @@ static int create_server6(struct c4iw_dev *dev, struct c4iw_listen_ep *ep) { int err; struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *) - &ep->com.mapped_local_addr; + &ep->com.local_addr; if (ipv6_addr_type(&sin6->sin6_addr) != IPV6_ADDR_ANY) { err = cxgb4_clip_get(ep->com.dev->rdev.lldi.ports[0], @@ -3302,7 +3203,7 @@ static int create_server4(struct c4iw_dev *dev, struct c4iw_listen_ep *ep) { int err; struct sockaddr_in *sin = (struct sockaddr_in *) - &ep->com.mapped_local_addr; + &ep->com.local_addr; if (dev->rdev.lldi.enable_fw_ofld_conn) { do { @@ -3343,9 +3244,6 @@ int c4iw_create_listen(struct iw_cm_id *cm_id, int backlog) int err = 0; struct c4iw_dev *dev = to_c4iw_dev(cm_id->device); struct c4iw_listen_ep *ep; - struct iwpm_dev_data pm_reg_msg; - struct iwpm_sa_data pm_msg; - int iwpm_err = 0; might_sleep(); @@ -3360,7 +3258,7 @@ int c4iw_create_listen(struct iw_cm_id *cm_id, int backlog) ep->com.cm_id = cm_id; ep->com.dev = dev; ep->backlog = backlog; - memcpy(&ep->com.local_addr, &cm_id->local_addr, + memcpy(&ep->com.local_addr, &cm_id->m_local_addr, sizeof(ep->com.local_addr)); /* @@ -3369,10 +3267,10 @@ int c4iw_create_listen(struct iw_cm_id *cm_id, int backlog) if (dev->rdev.lldi.enable_fw_ofld_conn && ep->com.local_addr.ss_family == AF_INET) ep->stid = cxgb4_alloc_sftid(dev->rdev.lldi.tids, - cm_id->local_addr.ss_family, ep); + cm_id->m_local_addr.ss_family, ep); else ep->stid = cxgb4_alloc_stid(dev->rdev.lldi.tids, - cm_id->local_addr.ss_family, ep); + cm_id->m_local_addr.ss_family, ep); if (ep->stid == -1) { printk(KERN_ERR MOD "%s - cannot alloc stid.\n", __func__); @@ -3381,36 +3279,9 @@ int c4iw_create_listen(struct iw_cm_id *cm_id, int backlog) } insert_handle(dev, &dev->stid_idr, ep, ep->stid); - /* No port mapper available, go with the specified info */ - memcpy(&ep->com.mapped_local_addr, &cm_id->local_addr, - sizeof(ep->com.mapped_local_addr)); - - c4iw_form_reg_msg(dev, &pm_reg_msg); - iwpm_err = iwpm_register_pid(&pm_reg_msg, RDMA_NL_C4IW); - if (iwpm_err) { - PDBG("%s: Port Mapper reg pid fail (err = %d).\n", - __func__, iwpm_err); - } - if (iwpm_valid_pid() && !iwpm_err) { - memcpy(&pm_msg.loc_addr, &ep->com.local_addr, - sizeof(ep->com.local_addr)); - iwpm_err = iwpm_add_mapping(&pm_msg, RDMA_NL_C4IW); - if (iwpm_err) - PDBG("%s: Port Mapper query fail (err = %d).\n", - __func__, iwpm_err); - else - memcpy(&ep->com.mapped_local_addr, - &pm_msg.mapped_loc_addr, - sizeof(ep->com.mapped_local_addr)); - } - if (iwpm_create_mapinfo(&ep->com.local_addr, - &ep->com.mapped_local_addr, RDMA_NL_C4IW)) { - err = -ENOMEM; - goto fail3; - } - print_addr(&ep->com, __func__, "add_mapping/create_mapinfo"); + memcpy(&ep->com.local_addr, &cm_id->m_local_addr, + sizeof(ep->com.local_addr)); - set_bit(RELEASE_MAPINFO, &ep->com.flags); state_set(&ep->com, LISTEN); if (ep->com.local_addr.ss_family == AF_INET) err = create_server4(dev, ep); @@ -3421,7 +3292,6 @@ int c4iw_create_listen(struct iw_cm_id *cm_id, int backlog) goto out; } -fail3: cxgb4_free_stid(ep->com.dev->rdev.lldi.tids, ep->stid, ep->com.local_addr.ss_family); fail2: @@ -3456,7 +3326,7 @@ int c4iw_destroy_listen(struct iw_cm_id *cm_id) goto done; err = c4iw_wait_for_reply(&ep->com.dev->rdev, &ep->com.wr_wait, 0, 0, __func__); - sin6 = (struct sockaddr_in6 *)&ep->com.mapped_local_addr; + sin6 = (struct sockaddr_in6 *)&ep->com.local_addr; cxgb4_clip_release(ep->com.dev->rdev.lldi.ports[0], (const u32 *)&sin6->sin6_addr.s6_addr, 1); } @@ -3580,7 +3450,7 @@ static void active_ofld_conn_reply(struct c4iw_dev *dev, struct sk_buff *skb, state_set(&ep->com, DEAD); if (ep->com.remote_addr.ss_family == AF_INET6) { struct sockaddr_in6 *sin6 = - (struct sockaddr_in6 *)&ep->com.mapped_local_addr; + (struct sockaddr_in6 *)&ep->com.local_addr; cxgb4_clip_release(ep->com.dev->rdev.lldi.ports[0], (const u32 *)&sin6->sin6_addr.s6_addr, 1); } diff --git a/drivers/infiniband/hw/cxgb4/device.c b/drivers/infiniband/hw/cxgb4/device.c index 8024ea4..2f884ca 100644 --- a/drivers/infiniband/hw/cxgb4/device.c +++ b/drivers/infiniband/hw/cxgb4/device.c @@ -87,17 +87,6 @@ struct c4iw_debugfs_data { int pos; }; -/* registered cxgb4 netlink callbacks */ -static struct ibnl_client_cbs c4iw_nl_cb_table[] = { - [RDMA_NL_IWPM_REG_PID] = {.dump = iwpm_register_pid_cb}, - [RDMA_NL_IWPM_ADD_MAPPING] = {.dump = iwpm_add_mapping_cb}, - [RDMA_NL_IWPM_QUERY_MAPPING] = {.dump = iwpm_add_and_query_mapping_cb}, - [RDMA_NL_IWPM_HANDLE_ERR] = {.dump = iwpm_mapping_error_cb}, - [RDMA_NL_IWPM_REMOTE_INFO] = {.dump = iwpm_remote_info_cb}, - [RDMA_NL_IWPM_MAPINFO] = {.dump = iwpm_mapping_info_cb}, - [RDMA_NL_IWPM_MAPINFO_NUM] = {.dump = iwpm_ack_mapping_info_cb} -}; - static int count_idrs(int id, void *p, void *data) { int *countp = data; @@ -242,13 +231,13 @@ static int dump_qp(int id, void *p, void *data) if (qp->ep) { if (qp->ep->com.local_addr.ss_family == AF_INET) { struct sockaddr_in *lsin = (struct sockaddr_in *) - &qp->ep->com.local_addr; + &qp->ep->com.cm_id->local_addr; struct sockaddr_in *rsin = (struct sockaddr_in *) - &qp->ep->com.remote_addr; + &qp->ep->com.cm_id->remote_addr; struct sockaddr_in *mapped_lsin = (struct sockaddr_in *) - &qp->ep->com.mapped_local_addr; + &qp->ep->com.cm_id->m_local_addr; struct sockaddr_in *mapped_rsin = (struct sockaddr_in *) - &qp->ep->com.mapped_remote_addr; + &qp->ep->com.cm_id->m_remote_addr; cc = snprintf(qpd->buf + qpd->pos, space, "rc qp sq id %u rq id %u state %u " @@ -264,15 +253,15 @@ static int dump_qp(int id, void *p, void *data) ntohs(mapped_rsin->sin_port)); } else { struct sockaddr_in6 *lsin6 = (struct sockaddr_in6 *) - &qp->ep->com.local_addr; + &qp->ep->com.cm_id->local_addr; struct sockaddr_in6 *rsin6 = (struct sockaddr_in6 *) - &qp->ep->com.remote_addr; + &qp->ep->com.cm_id->remote_addr; struct sockaddr_in6 *mapped_lsin6 = (struct sockaddr_in6 *) - &qp->ep->com.mapped_local_addr; + &qp->ep->com.cm_id->m_local_addr; struct sockaddr_in6 *mapped_rsin6 = (struct sockaddr_in6 *) - &qp->ep->com.mapped_remote_addr; + &qp->ep->com.cm_id->m_remote_addr; cc = snprintf(qpd->buf + qpd->pos, space, "rc qp sq id %u rq id %u state %u " @@ -545,13 +534,13 @@ static int dump_ep(int id, void *p, void *data) if (ep->com.local_addr.ss_family == AF_INET) { struct sockaddr_in *lsin = (struct sockaddr_in *) - &ep->com.local_addr; + &ep->com.cm_id->local_addr; struct sockaddr_in *rsin = (struct sockaddr_in *) - &ep->com.remote_addr; + &ep->com.cm_id->remote_addr; struct sockaddr_in *mapped_lsin = (struct sockaddr_in *) - &ep->com.mapped_local_addr; + &ep->com.cm_id->m_local_addr; struct sockaddr_in *mapped_rsin = (struct sockaddr_in *) - &ep->com.mapped_remote_addr; + &ep->com.cm_id->m_remote_addr; cc = snprintf(epd->buf + epd->pos, space, "ep %p cm_id %p qp %p state %d flags 0x%lx " @@ -569,13 +558,13 @@ static int dump_ep(int id, void *p, void *data) ntohs(mapped_rsin->sin_port)); } else { struct sockaddr_in6 *lsin6 = (struct sockaddr_in6 *) - &ep->com.local_addr; + &ep->com.cm_id->local_addr; struct sockaddr_in6 *rsin6 = (struct sockaddr_in6 *) - &ep->com.remote_addr; + &ep->com.cm_id->remote_addr; struct sockaddr_in6 *mapped_lsin6 = (struct sockaddr_in6 *) - &ep->com.mapped_local_addr; + &ep->com.cm_id->m_local_addr; struct sockaddr_in6 *mapped_rsin6 = (struct sockaddr_in6 *) - &ep->com.mapped_remote_addr; + &ep->com.cm_id->m_remote_addr; cc = snprintf(epd->buf + epd->pos, space, "ep %p cm_id %p qp %p state %d flags 0x%lx " @@ -610,9 +599,9 @@ static int dump_listen_ep(int id, void *p, void *data) if (ep->com.local_addr.ss_family == AF_INET) { struct sockaddr_in *lsin = (struct sockaddr_in *) - &ep->com.local_addr; + &ep->com.cm_id->local_addr; struct sockaddr_in *mapped_lsin = (struct sockaddr_in *) - &ep->com.mapped_local_addr; + &ep->com.cm_id->m_local_addr; cc = snprintf(epd->buf + epd->pos, space, "ep %p cm_id %p state %d flags 0x%lx stid %d " @@ -623,9 +612,9 @@ static int dump_listen_ep(int id, void *p, void *data) ntohs(mapped_lsin->sin_port)); } else { struct sockaddr_in6 *lsin6 = (struct sockaddr_in6 *) - &ep->com.local_addr; + &ep->com.cm_id->local_addr; struct sockaddr_in6 *mapped_lsin6 = (struct sockaddr_in6 *) - &ep->com.mapped_local_addr; + &ep->com.cm_id->m_local_addr; cc = snprintf(epd->buf + epd->pos, space, "ep %p cm_id %p state %d flags 0x%lx stid %d " @@ -1506,20 +1495,6 @@ static int __init c4iw_init_module(void) printk(KERN_WARNING MOD "could not create debugfs entry, continuing\n"); - if (ibnl_add_client(RDMA_NL_C4IW, RDMA_NL_IWPM_NUM_OPS, - c4iw_nl_cb_table)) - pr_err("%s[%u]: Failed to add netlink callback\n" - , __func__, __LINE__); - - err = iwpm_init(RDMA_NL_C4IW); - if (err) { - pr_err("port mapper initialization failed with %d\n", err); - ibnl_remove_client(RDMA_NL_C4IW); - c4iw_cm_term(); - debugfs_remove_recursive(c4iw_debugfs_root); - return err; - } - cxgb4_register_uld(CXGB4_ULD_RDMA, &c4iw_uld_info); return 0; @@ -1537,8 +1512,6 @@ static void __exit c4iw_exit_module(void) } mutex_unlock(&dev_mutex); cxgb4_unregister_uld(CXGB4_ULD_RDMA); - iwpm_exit(RDMA_NL_C4IW); - ibnl_remove_client(RDMA_NL_C4IW); c4iw_cm_term(); debugfs_remove_recursive(c4iw_debugfs_root); } diff --git a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h index fb2de75..efb1d78 100644 --- a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h +++ b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h @@ -753,7 +753,6 @@ enum c4iw_ep_flags { CLOSE_SENT = 3, TIMEOUT = 4, QP_REFERENCED = 5, - RELEASE_MAPINFO = 6, }; enum c4iw_ep_history { @@ -790,8 +789,6 @@ struct c4iw_ep_common { struct mutex mutex; struct sockaddr_storage local_addr; struct sockaddr_storage remote_addr; - struct sockaddr_storage mapped_local_addr; - struct sockaddr_storage mapped_remote_addr; struct c4iw_wr_wait wr_wait; unsigned long flags; unsigned long history; @@ -843,45 +840,6 @@ struct c4iw_ep { struct c4iw_ep_stats stats; }; -static inline void print_addr(struct c4iw_ep_common *epc, const char *func, - const char *msg) -{ - -#define SINA(a) (&(((struct sockaddr_in *)(a))->sin_addr.s_addr)) -#define SINP(a) ntohs(((struct sockaddr_in *)(a))->sin_port) -#define SIN6A(a) (&(((struct sockaddr_in6 *)(a))->sin6_addr)) -#define SIN6P(a) ntohs(((struct sockaddr_in6 *)(a))->sin6_port) - - if (c4iw_debug) { - switch (epc->local_addr.ss_family) { - case AF_INET: - PDBG("%s %s %pI4:%u/%u <-> %pI4:%u/%u\n", - func, msg, SINA(&epc->local_addr), - SINP(&epc->local_addr), - SINP(&epc->mapped_local_addr), - SINA(&epc->remote_addr), - SINP(&epc->remote_addr), - SINP(&epc->mapped_remote_addr)); - break; - case AF_INET6: - PDBG("%s %s %pI6:%u/%u <-> %pI6:%u/%u\n", - func, msg, SIN6A(&epc->local_addr), - SIN6P(&epc->local_addr), - SIN6P(&epc->mapped_local_addr), - SIN6A(&epc->remote_addr), - SIN6P(&epc->remote_addr), - SIN6P(&epc->mapped_remote_addr)); - break; - default: - break; - } - } -#undef SINA -#undef SINP -#undef SIN6A -#undef SIN6P -} - static inline struct c4iw_ep *to_ep(struct iw_cm_id *cm_id) { return cm_id->provider_data; -- cgit v0.10.2 From c1340e8aa628d65bcb5c5b7e332bde8a17851ebf Mon Sep 17 00:00:00 2001 From: Steve Wise Date: Fri, 26 Feb 2016 09:18:04 -0600 Subject: iw_cxgb3: support for iWARP port mapping Now with the new iWARP port mapping service in the iwcm, it is trivial to add cxgb3 support. Signed-off-by: Steve Wise Signed-off-by: Doug Ledford diff --git a/drivers/infiniband/hw/cxgb3/iwch_cm.c b/drivers/infiniband/hw/cxgb3/iwch_cm.c index f504ba7..d403231 100644 --- a/drivers/infiniband/hw/cxgb3/iwch_cm.c +++ b/drivers/infiniband/hw/cxgb3/iwch_cm.c @@ -1877,7 +1877,7 @@ err: static int is_loopback_dst(struct iw_cm_id *cm_id) { struct net_device *dev; - struct sockaddr_in *raddr = (struct sockaddr_in *)&cm_id->remote_addr; + struct sockaddr_in *raddr = (struct sockaddr_in *)&cm_id->m_remote_addr; dev = ip_dev_find(&init_net, raddr->sin_addr.s_addr); if (!dev) @@ -1892,10 +1892,10 @@ int iwch_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) struct iwch_ep *ep; struct rtable *rt; int err = 0; - struct sockaddr_in *laddr = (struct sockaddr_in *)&cm_id->local_addr; - struct sockaddr_in *raddr = (struct sockaddr_in *)&cm_id->remote_addr; + struct sockaddr_in *laddr = (struct sockaddr_in *)&cm_id->m_local_addr; + struct sockaddr_in *raddr = (struct sockaddr_in *)&cm_id->m_remote_addr; - if (cm_id->remote_addr.ss_family != PF_INET) { + if (cm_id->m_remote_addr.ss_family != PF_INET) { err = -ENOSYS; goto out; } @@ -1961,9 +1961,9 @@ int iwch_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) state_set(&ep->com, CONNECTING); ep->tos = IPTOS_LOWDELAY; - memcpy(&ep->com.local_addr, &cm_id->local_addr, + memcpy(&ep->com.local_addr, &cm_id->m_local_addr, sizeof(ep->com.local_addr)); - memcpy(&ep->com.remote_addr, &cm_id->remote_addr, + memcpy(&ep->com.remote_addr, &cm_id->m_remote_addr, sizeof(ep->com.remote_addr)); /* send connect request to rnic */ @@ -1992,7 +1992,7 @@ int iwch_create_listen(struct iw_cm_id *cm_id, int backlog) might_sleep(); - if (cm_id->local_addr.ss_family != PF_INET) { + if (cm_id->m_local_addr.ss_family != PF_INET) { err = -ENOSYS; goto fail1; } @@ -2008,7 +2008,7 @@ int iwch_create_listen(struct iw_cm_id *cm_id, int backlog) cm_id->add_ref(cm_id); ep->com.cm_id = cm_id; ep->backlog = backlog; - memcpy(&ep->com.local_addr, &cm_id->local_addr, + memcpy(&ep->com.local_addr, &cm_id->m_local_addr, sizeof(ep->com.local_addr)); /* -- cgit v0.10.2 From dafb5587178afe8abf85f3ae91bbc88de9e54782 Mon Sep 17 00:00:00 2001 From: Faisal Latif Date: Fri, 26 Feb 2016 09:18:05 -0600 Subject: iwpm: crash fix for large connections test During large connection test, there is a crash at wake_up() in the callback as waitq is not yet initialized. Callback can happen before iwpm_wait_complete_req() is called to initialize waitq. To resolve, using signaling semaphore instead of waitq. Signed-off-by: Mustafa Ismail Reviewed-by: Tatyana E Nikolova Signed-off-by: Faisal Latif Reviewed-by: Steve Wise Tested-by: Steve Wise Signed-off-by: Doug Ledford diff --git a/drivers/infiniband/core/iwpm_msg.c b/drivers/infiniband/core/iwpm_msg.c index c2b4ce6..43e3fa2 100644 --- a/drivers/infiniband/core/iwpm_msg.c +++ b/drivers/infiniband/core/iwpm_msg.c @@ -89,7 +89,7 @@ int iwpm_register_pid(struct iwpm_dev_data *pm_msg, u8 nl_client) if (ret) goto pid_query_error; ret = ibnl_put_attr(skb, nlh, IFNAMSIZ, - pm_msg->if_name, IWPM_NLA_REG_IF_NAME); + pm_msg->if_name, IWPM_NLA_REG_IF_NAME); if (ret) goto pid_query_error; ret = ibnl_put_attr(skb, nlh, IWPM_DEVNAME_SIZE, @@ -394,7 +394,7 @@ register_pid_response_exit: /* always for found nlmsg_request */ kref_put(&nlmsg_request->kref, iwpm_free_nlmsg_request); barrier(); - wake_up(&nlmsg_request->waitq); + up(&nlmsg_request->sem); return 0; } EXPORT_SYMBOL(iwpm_register_pid_cb); @@ -463,7 +463,7 @@ add_mapping_response_exit: /* always for found request */ kref_put(&nlmsg_request->kref, iwpm_free_nlmsg_request); barrier(); - wake_up(&nlmsg_request->waitq); + up(&nlmsg_request->sem); return 0; } EXPORT_SYMBOL(iwpm_add_mapping_cb); @@ -555,7 +555,7 @@ query_mapping_response_exit: /* always for found request */ kref_put(&nlmsg_request->kref, iwpm_free_nlmsg_request); barrier(); - wake_up(&nlmsg_request->waitq); + up(&nlmsg_request->sem); return 0; } EXPORT_SYMBOL(iwpm_add_and_query_mapping_cb); @@ -749,7 +749,7 @@ int iwpm_mapping_error_cb(struct sk_buff *skb, struct netlink_callback *cb) /* always for found request */ kref_put(&nlmsg_request->kref, iwpm_free_nlmsg_request); barrier(); - wake_up(&nlmsg_request->waitq); + up(&nlmsg_request->sem); return 0; } EXPORT_SYMBOL(iwpm_mapping_error_cb); diff --git a/drivers/infiniband/core/iwpm_util.c b/drivers/infiniband/core/iwpm_util.c index 5fb089e..9b2bf2f 100644 --- a/drivers/infiniband/core/iwpm_util.c +++ b/drivers/infiniband/core/iwpm_util.c @@ -254,9 +254,9 @@ void iwpm_add_remote_info(struct iwpm_remote_info *rem_info) } int iwpm_get_remote_info(struct sockaddr_storage *mapped_loc_addr, - struct sockaddr_storage *mapped_rem_addr, - struct sockaddr_storage *remote_addr, - u8 nl_client) + struct sockaddr_storage *mapped_rem_addr, + struct sockaddr_storage *remote_addr, + u8 nl_client) { struct hlist_node *tmp_hlist_node; struct hlist_head *hash_bucket_head; @@ -322,6 +322,8 @@ struct iwpm_nlmsg_request *iwpm_get_nlmsg_request(__u32 nlmsg_seq, nlmsg_request->nl_client = nl_client; nlmsg_request->request_done = 0; nlmsg_request->err_code = 0; + sema_init(&nlmsg_request->sem, 1); + down(&nlmsg_request->sem); return nlmsg_request; } @@ -364,11 +366,9 @@ struct iwpm_nlmsg_request *iwpm_find_nlmsg_request(__u32 echo_seq) int iwpm_wait_complete_req(struct iwpm_nlmsg_request *nlmsg_request) { int ret; - init_waitqueue_head(&nlmsg_request->waitq); - ret = wait_event_timeout(nlmsg_request->waitq, - (nlmsg_request->request_done != 0), IWPM_NL_TIMEOUT); - if (!ret) { + ret = down_timeout(&nlmsg_request->sem, IWPM_NL_TIMEOUT); + if (ret) { ret = -EINVAL; pr_info("%s: Timeout %d sec for netlink request (seq = %u)\n", __func__, (IWPM_NL_TIMEOUT/HZ), nlmsg_request->nlmsg_seq); diff --git a/drivers/infiniband/core/iwpm_util.h b/drivers/infiniband/core/iwpm_util.h index b7b9e19..af1fc14 100644 --- a/drivers/infiniband/core/iwpm_util.h +++ b/drivers/infiniband/core/iwpm_util.h @@ -69,7 +69,7 @@ struct iwpm_nlmsg_request { u8 nl_client; u8 request_done; u16 err_code; - wait_queue_head_t waitq; + struct semaphore sem; struct kref kref; }; -- cgit v0.10.2 From 42d6ec19c918cb5bc6d14769e24240dce8f81687 Mon Sep 17 00:00:00 2001 From: Harish Chegondi Date: Sat, 5 Mar 2016 08:49:24 -0800 Subject: IB/hfi1: Add the break statement that was removed in an earlier patch The break statement was unintentionally removed in this patch commit 41ca419abc0ca7ee65d765408cdc1a7fed2897a3 ("staging/rdma/hfi1: Remove hfi1 MR and hfi1 specific qp type") Reviewed-by: Mike Marciniszyn Signed-off-by: Harish Chegondi Signed-off-by: Doug Ledford diff --git a/drivers/staging/rdma/hfi1/ruc.c b/drivers/staging/rdma/hfi1/ruc.c index 5d84981..aa53859 100644 --- a/drivers/staging/rdma/hfi1/ruc.c +++ b/drivers/staging/rdma/hfi1/ruc.c @@ -470,6 +470,7 @@ again: goto inv_err; do_write: if (wqe->length == 0) + break; if (unlikely(!rvt_rkey_ok(qp, &qp->r_sge.sge, wqe->length, wqe->rdma_wr.remote_addr, wqe->rdma_wr.rkey, -- cgit v0.10.2 From bf640096e670a35e3a7ba1336216664f89a2bcf1 Mon Sep 17 00:00:00 2001 From: Harish Chegondi Date: Sat, 5 Mar 2016 08:49:29 -0800 Subject: IB/hfi1: Move constant to the right in bitwise operations Implement changes recommended by the Coccinelle tool to move constant to the right in bitwise operations -bash-4.2$ make coccicheck MODE=report M=drivers/infiniband/hw/hfi1/ drivers/infiniband/hw/hfi1/pio.c:765:4-16: Move constant to right. drivers/infiniband/hw/hfi1/rc.c:2503:19-29: Move constant to right. drivers/infiniband/hw/hfi1/chip.c:9813:11-22: Move constant to right. drivers/infiniband/hw/hfi1/chip.c:14468:29-40: Move constant to right. Reviewed-by: Jubin John Reviewed-by: Dennis Dalessandro Signed-off-by: Harish Chegondi Signed-off-by: Doug Ledford diff --git a/drivers/staging/rdma/hfi1/chip.c b/drivers/staging/rdma/hfi1/chip.c index 77996527..263c882 100644 --- a/drivers/staging/rdma/hfi1/chip.c +++ b/drivers/staging/rdma/hfi1/chip.c @@ -9810,7 +9810,7 @@ static int goto_offline(struct hfi1_pportdata *ppd, u8 rem_reason) if (do_transition) { ret = set_physical_link_state(dd, - PLS_OFFLINE | (rem_reason << 8)); + (rem_reason << 8) | PLS_OFFLINE); if (ret != HCMD_SUCCESS) { dd_dev_err(dd, @@ -14465,8 +14465,8 @@ static void handle_temp_err(struct hfi1_devdata *dd) */ ppd->driver_link_ready = 0; ppd->link_enabled = 0; - set_physical_link_state(dd, PLS_OFFLINE | - (OPA_LINKDOWN_REASON_SMA_DISABLED << 8)); + set_physical_link_state(dd, (OPA_LINKDOWN_REASON_SMA_DISABLED << 8) | + PLS_OFFLINE); /* * Step 2: Shutdown LCB and 8051 * After shutdown, do not restore DC_CFG_RESET value. diff --git a/drivers/staging/rdma/hfi1/pio.c b/drivers/staging/rdma/hfi1/pio.c index 361b43d..e888e21 100644 --- a/drivers/staging/rdma/hfi1/pio.c +++ b/drivers/staging/rdma/hfi1/pio.c @@ -762,8 +762,8 @@ struct send_context *sc_alloc(struct hfi1_devdata *dd, int type, /* set the default partition key */ write_kctxt_csr(dd, hw_context, SC(CHECK_PARTITION_KEY), - (DEFAULT_PKEY & - SC(CHECK_PARTITION_KEY_VALUE_MASK)) << + (SC(CHECK_PARTITION_KEY_VALUE_MASK) & + DEFAULT_PKEY) << SC(CHECK_PARTITION_KEY_VALUE_SHIFT)); /* per context type checks */ diff --git a/drivers/staging/rdma/hfi1/rc.c b/drivers/staging/rdma/hfi1/rc.c index 1ce0e08..351f136 100644 --- a/drivers/staging/rdma/hfi1/rc.c +++ b/drivers/staging/rdma/hfi1/rc.c @@ -2500,7 +2500,7 @@ send_last: return; rnr_nak: - qp->r_nak_state = IB_RNR_NAK | qp->r_min_rnr_timer; + qp->r_nak_state = qp->r_min_rnr_timer | IB_RNR_NAK; qp->r_ack_psn = qp->r_psn; /* Queue RNR NAK for later */ rc_defered_ack(rcd, qp); -- cgit v0.10.2 From 2b8b34a948d063c4e803105ec0a3e8d27bd97c19 Mon Sep 17 00:00:00 2001 From: Harish Chegondi Date: Sat, 5 Mar 2016 08:49:34 -0800 Subject: IB/hfi1: Replace kmalloc and memcpy with a kmemdup This change was recommended by Coccinelle tool when I ran the command: -bash-4.2$ make coccicheck MODE=patch M=drivers/infiniband/hw/hfi1/ Reviewed-by: Jubin John Reviewed-by: Mike Marciniszyn Reviewed-by: Dennis Dalessandro Signed-off-by: Harish Chegondi Signed-off-by: Doug Ledford diff --git a/drivers/staging/rdma/hfi1/efivar.c b/drivers/staging/rdma/hfi1/efivar.c index 5fe3924..3f014f9 100644 --- a/drivers/staging/rdma/hfi1/efivar.c +++ b/drivers/staging/rdma/hfi1/efivar.c @@ -125,13 +125,12 @@ static int read_efi_var(const char *name, unsigned long *size, * temporary buffer. Now allocate a correctly sized * buffer. */ - data = kmalloc(temp_size, GFP_KERNEL); + data = kmemdup(temp_buffer, temp_size, GFP_KERNEL); if (!data) { ret = -ENOMEM; goto fail; } - memcpy(data, temp_buffer, temp_size); *size = temp_size; *return_data = data; -- cgit v0.10.2 From 3afb6f637e8edd68cee35b5c432c176f57fd712f Mon Sep 17 00:00:00 2001 From: Dean Luick Date: Sat, 5 Mar 2016 08:49:39 -0800 Subject: IB/hfi1: Remove ASIC block clear The ASIC block is shared between two HFIs. Individual devices should not initialize registers there. Retain the power-on values. Individual users set registers as needed with one exception. Clear sbus fast mode on "slow" calls. Reviewed-by: Mitko Haralanov Reviewed-by: Easwar Hariharan Signed-off-by: Dean Luick Signed-off-by: Jubin John Signed-off-by: Doug Ledford diff --git a/drivers/staging/rdma/hfi1/chip.c b/drivers/staging/rdma/hfi1/chip.c index 263c882..0874287 100644 --- a/drivers/staging/rdma/hfi1/chip.c +++ b/drivers/staging/rdma/hfi1/chip.c @@ -12934,91 +12934,6 @@ static void reset_cce_csrs(struct hfi1_devdata *dd) write_csr(dd, CCE_INT_COUNTER_ARRAY32 + (8 * i), 0); } -/* set ASIC CSRs to chip reset defaults */ -static void reset_asic_csrs(struct hfi1_devdata *dd) -{ - int i; - - /* - * If the HFIs are shared between separate nodes or VMs, - * then more will need to be done here. One idea is a module - * parameter that returns early, letting the first power-on or - * a known first load do the reset and blocking all others. - */ - - if (!(dd->flags & HFI1_DO_INIT_ASIC)) - return; - - if (dd->icode != ICODE_FPGA_EMULATION) { - /* emulation does not have an SBus - leave these alone */ - /* - * All writes to ASIC_CFG_SBUS_REQUEST do something. - * Notes: - * o The reset is not zero if aimed at the core. See the - * SBus documentation for details. - * o If the SBus firmware has been updated (e.g. by the BIOS), - * will the reset revert that? - */ - /* ASIC_CFG_SBUS_REQUEST leave alone */ - write_csr(dd, ASIC_CFG_SBUS_EXECUTE, 0); - } - /* ASIC_SBUS_RESULT read-only */ - write_csr(dd, ASIC_STS_SBUS_COUNTERS, 0); - for (i = 0; i < ASIC_NUM_SCRATCH; i++) - write_csr(dd, ASIC_CFG_SCRATCH + (8 * i), 0); - write_csr(dd, ASIC_CFG_MUTEX, 0); /* this will clear it */ - - /* We might want to retain this state across FLR if we ever use it */ - write_csr(dd, ASIC_CFG_DRV_STR, 0); - - /* ASIC_CFG_THERM_POLL_EN leave alone */ - /* ASIC_STS_THERM read-only */ - /* ASIC_CFG_RESET leave alone */ - - write_csr(dd, ASIC_PCIE_SD_HOST_CMD, 0); - /* ASIC_PCIE_SD_HOST_STATUS read-only */ - write_csr(dd, ASIC_PCIE_SD_INTRPT_DATA_CODE, 0); - write_csr(dd, ASIC_PCIE_SD_INTRPT_ENABLE, 0); - /* ASIC_PCIE_SD_INTRPT_PROGRESS read-only */ - write_csr(dd, ASIC_PCIE_SD_INTRPT_STATUS, ~0ull); /* clear */ - /* ASIC_HFI0_PCIE_SD_INTRPT_RSPD_DATA read-only */ - /* ASIC_HFI1_PCIE_SD_INTRPT_RSPD_DATA read-only */ - for (i = 0; i < 16; i++) - write_csr(dd, ASIC_PCIE_SD_INTRPT_LIST + (8 * i), 0); - - /* ASIC_GPIO_IN read-only */ - write_csr(dd, ASIC_GPIO_OE, 0); - write_csr(dd, ASIC_GPIO_INVERT, 0); - write_csr(dd, ASIC_GPIO_OUT, 0); - write_csr(dd, ASIC_GPIO_MASK, 0); - /* ASIC_GPIO_STATUS read-only */ - write_csr(dd, ASIC_GPIO_CLEAR, ~0ull); - /* ASIC_GPIO_FORCE leave alone */ - - /* ASIC_QSFP1_IN read-only */ - write_csr(dd, ASIC_QSFP1_OE, 0); - write_csr(dd, ASIC_QSFP1_INVERT, 0); - write_csr(dd, ASIC_QSFP1_OUT, 0); - write_csr(dd, ASIC_QSFP1_MASK, 0); - /* ASIC_QSFP1_STATUS read-only */ - write_csr(dd, ASIC_QSFP1_CLEAR, ~0ull); - /* ASIC_QSFP1_FORCE leave alone */ - - /* ASIC_QSFP2_IN read-only */ - write_csr(dd, ASIC_QSFP2_OE, 0); - write_csr(dd, ASIC_QSFP2_INVERT, 0); - write_csr(dd, ASIC_QSFP2_OUT, 0); - write_csr(dd, ASIC_QSFP2_MASK, 0); - /* ASIC_QSFP2_STATUS read-only */ - write_csr(dd, ASIC_QSFP2_CLEAR, ~0ull); - /* ASIC_QSFP2_FORCE leave alone */ - - write_csr(dd, ASIC_EEP_CTL_STAT, ASIC_EEP_CTL_STAT_RESETCSR); - /* this also writes a NOP command, clearing paging mode */ - write_csr(dd, ASIC_EEP_ADDR_CMD, 0); - write_csr(dd, ASIC_EEP_DATA, 0); -} - /* set MISC CSRs to chip reset defaults */ static void reset_misc_csrs(struct hfi1_devdata *dd) { @@ -13428,14 +13343,11 @@ static void init_chip(struct hfi1_devdata *dd) hfi1_pcie_flr(dd); restore_pci_variables(dd); } - - reset_asic_csrs(dd); } else { dd_dev_info(dd, "Resetting CSRs with writes\n"); reset_cce_csrs(dd); reset_txe_csrs(dd); reset_rxe_csrs(dd); - reset_asic_csrs(dd); reset_misc_csrs(dd); } /* clear the DC reset */ diff --git a/drivers/staging/rdma/hfi1/firmware.c b/drivers/staging/rdma/hfi1/firmware.c index d5befd1..ca4e489 100644 --- a/drivers/staging/rdma/hfi1/firmware.c +++ b/drivers/staging/rdma/hfi1/firmware.c @@ -1170,6 +1170,9 @@ int sbus_request_slow(struct hfi1_devdata *dd, { u64 reg, count = 0; + /* make sure fast mode is clear */ + clear_sbus_fast_mode(dd); + sbus_request(dd, receiver_addr, data_addr, command, data_in); write_csr(dd, ASIC_CFG_SBUS_EXECUTE, ASIC_CFG_SBUS_EXECUTE_EXECUTE_SMASK); -- cgit v0.10.2 From 78eb129d47f553e6f0607c393ebf4e9851edd73e Mon Sep 17 00:00:00 2001 From: Dean Luick Date: Sat, 5 Mar 2016 08:49:45 -0800 Subject: IB/hfi1: Add shared ASIC structure Create a shared structure to exist between devices that share the same ASIC. Reviewed-by: Mitko Haralanov Reviewed-by: Easwar Hariharan Signed-off-by: Dean Luick Signed-off-by: Jubin John Signed-off-by: Doug Ledford diff --git a/drivers/staging/rdma/hfi1/chip.c b/drivers/staging/rdma/hfi1/chip.c index 0874287..686cadf 100644 --- a/drivers/staging/rdma/hfi1/chip.c +++ b/drivers/staging/rdma/hfi1/chip.c @@ -13800,15 +13800,20 @@ void hfi1_start_cleanup(struct hfi1_devdata *dd) ((dev)->base_guid & ~(1ULL << GUID_HFI_INDEX_SHIFT)) /* + * Information can be shared between the two HFIs on the same ASIC + * in the same OS. This function finds the peer device and sets + * up a shared structure. + * * Certain chip functions need to be initialized only once per asic * instead of per-device. This function finds the peer device and * checks whether that chip initialization needs to be done by this * device. */ -static void asic_should_init(struct hfi1_devdata *dd) +static int init_asic_data(struct hfi1_devdata *dd) { unsigned long flags; struct hfi1_devdata *tmp, *peer = NULL; + int ret = 0; spin_lock_irqsave(&hfi1_devs_lock, flags); /* Find our peer device */ @@ -13826,7 +13831,22 @@ static void asic_should_init(struct hfi1_devdata *dd) */ if (!peer || !(peer->flags & HFI1_DO_INIT_ASIC)) dd->flags |= HFI1_DO_INIT_ASIC; + + if (peer) { + dd->asic_data = peer->asic_data; + } else { + dd->asic_data = kzalloc(sizeof(*dd->asic_data), GFP_KERNEL); + if (!dd->asic_data) { + ret = -ENOMEM; + goto done; + } + mutex_init(&dd->asic_data->asic_resource_mutex); + } + dd->asic_data->dds[dd->hfi1_id] = dd; /* self back-pointer */ + +done: spin_unlock_irqrestore(&hfi1_devs_lock, flags); + return ret; } /* @@ -14076,8 +14096,10 @@ struct hfi1_devdata *hfi1_init_dd(struct pci_dev *pdev, /* needs to be done before we look for the peer device */ read_guid(dd); - /* should this device init the ASIC block? */ - asic_should_init(dd); + /* set up shared ASIC data with peer device */ + ret = init_asic_data(dd); + if (ret) + goto bail_cleanup; /* obtain chip sizes, reset chip CSRs */ init_chip(dd); diff --git a/drivers/staging/rdma/hfi1/hfi.h b/drivers/staging/rdma/hfi1/hfi.h index 9215482..e71a1c2 100644 --- a/drivers/staging/rdma/hfi1/hfi.h +++ b/drivers/staging/rdma/hfi1/hfi.h @@ -805,6 +805,12 @@ struct hfi1_temp { u8 triggers; /* temperature triggers */ }; +/* common data between shared ASIC HFIs */ +struct hfi1_asic_data { + struct hfi1_devdata *dds[2]; /* back pointers */ + struct mutex asic_resource_mutex; +}; + /* device data struct now contains only "general per-device" info. * fields related to a physical IB port are in a hfi1_pportdata struct. */ @@ -880,6 +886,9 @@ struct hfi1_devdata { wait_queue_head_t sdma_unfreeze_wq; atomic_t sdma_unfreeze_count; + /* common data between shared ASIC HFIs in this OS */ + struct hfi1_asic_data *asic_data; + /* hfi1_pportdata, points to array of (physical) port-specific * data structs, indexed by pidx (0..n-1) */ diff --git a/drivers/staging/rdma/hfi1/init.c b/drivers/staging/rdma/hfi1/init.c index 37b3ce8..260a8e1 100644 --- a/drivers/staging/rdma/hfi1/init.c +++ b/drivers/staging/rdma/hfi1/init.c @@ -974,6 +974,25 @@ void hfi1_free_ctxtdata(struct hfi1_devdata *dd, struct hfi1_ctxtdata *rcd) kfree(rcd); } +/* + * Release our hold on the shared asic data. If we are the last one, + * free the structure. Must be holding hfi1_devs_lock. + */ +static void release_asic_data(struct hfi1_devdata *dd) +{ + int other; + + if (!dd->asic_data) + return; + dd->asic_data->dds[dd->hfi1_id] = NULL; + other = dd->hfi1_id ? 0 : 1; + if (!dd->asic_data->dds[other]) { + /* we are the last holder, free it */ + kfree(dd->asic_data); + } + dd->asic_data = NULL; +} + void hfi1_free_devdata(struct hfi1_devdata *dd) { unsigned long flags; @@ -981,6 +1000,7 @@ void hfi1_free_devdata(struct hfi1_devdata *dd) spin_lock_irqsave(&hfi1_devs_lock, flags); idr_remove(&hfi1_unit_table, dd->unit); list_del(&dd->list); + release_asic_data(dd); spin_unlock_irqrestore(&hfi1_devs_lock, flags); free_platform_config(dd); rcu_barrier(); /* wait for rcu callbacks to complete */ -- cgit v0.10.2 From a2ee27a4552505db5967630abcc3a90340e0d824 Mon Sep 17 00:00:00 2001 From: Dean Luick Date: Sat, 5 Mar 2016 08:49:50 -0800 Subject: IB/hfi1: Add ASIC resource reservation functions The ASIC block is a shared hardware resource between two devices on the chip. Add functions to acquire and release these resources in a way that is safe for both multiple users on the same OS and multiple users on different OSes, while holding the hardware mutex as little as possible. Reservations are noted in a scratch register in the shared region. There are two types of reservations: per-HFI dynamic and permanent. Reviewed-by: Mitko Haralanov Reviewed-by: Easwar Hariharan Signed-off-by: Dean Luick Signed-off-by: Jubin John Signed-off-by: Doug Ledford diff --git a/drivers/staging/rdma/hfi1/chip.c b/drivers/staging/rdma/hfi1/chip.c index 686cadf..98ebee4 100644 --- a/drivers/staging/rdma/hfi1/chip.c +++ b/drivers/staging/rdma/hfi1/chip.c @@ -13368,6 +13368,7 @@ static void init_chip(struct hfi1_devdata *dd) */ write_csr(dd, ASIC_QSFP1_OUT, 0x1f); write_csr(dd, ASIC_QSFP2_OUT, 0x1f); + init_chip_resources(dd); } static void init_early_variables(struct hfi1_devdata *dd) @@ -13794,6 +13795,7 @@ void hfi1_start_cleanup(struct hfi1_devdata *dd) free_cntrs(dd); free_rcverr(dd); clean_up_interrupts(dd); + finish_chip_resources(dd); } #define HFI_BASE_GUID(dev) \ diff --git a/drivers/staging/rdma/hfi1/chip.h b/drivers/staging/rdma/hfi1/chip.h index e9a41ed..dc684bc 100644 --- a/drivers/staging/rdma/hfi1/chip.h +++ b/drivers/staging/rdma/hfi1/chip.h @@ -639,6 +639,36 @@ int load_firmware(struct hfi1_devdata *dd); void dispose_firmware(void); int acquire_hw_mutex(struct hfi1_devdata *dd); void release_hw_mutex(struct hfi1_devdata *dd); + +/* + * Bitmask of dynamic access for ASIC block chip resources. Each HFI has its + * own range of bits for the resource so it can clear its own bits on + * starting and exiting. If either HFI has the resource bit set, the + * resource is in use. The separate bit ranges are: + * HFI0 bits 7:0 + * HFI1 bits 15:8 + */ +#define CR_SBUS 0x01 /* SBUS, THERM, and PCIE registers */ +#define CR_EPROM 0x02 /* EEP, GPIO registers */ +#define CR_I2C1 0x04 /* QSFP1_OE register */ +#define CR_I2C2 0x08 /* QSFP2_OE register */ +#define CR_DYN_SHIFT 8 /* dynamic flag shift */ +#define CR_DYN_MASK ((1ull << CR_DYN_SHIFT) - 1) + +/* + * Bitmask of static ASIC states these are outside of the dynamic ASIC + * block chip resources above. These are to be set once and never cleared. + * Must be holding the SBus dynamic flag when setting. + */ +#define CR_THERM_INIT 0x010000 + +int acquire_chip_resource(struct hfi1_devdata *dd, u32 resource, u32 mswait); +void release_chip_resource(struct hfi1_devdata *dd, u32 resource); +bool check_chip_resource(struct hfi1_devdata *dd, u32 resource, + const char *func); +void init_chip_resources(struct hfi1_devdata *dd); +void finish_chip_resources(struct hfi1_devdata *dd); + void fabric_serdes_reset(struct hfi1_devdata *dd); int read_8051_data(struct hfi1_devdata *dd, u32 addr, u32 len, u64 *result); diff --git a/drivers/staging/rdma/hfi1/firmware.c b/drivers/staging/rdma/hfi1/firmware.c index ca4e489..140dd86 100644 --- a/drivers/staging/rdma/hfi1/firmware.c +++ b/drivers/staging/rdma/hfi1/firmware.c @@ -1385,6 +1385,193 @@ void release_hw_mutex(struct hfi1_devdata *dd) write_csr(dd, ASIC_CFG_MUTEX, 0); } +/* return the given resource bit(s) as a mask for the given HFI */ +static inline u64 resource_mask(u32 hfi1_id, u32 resource) +{ + return ((u64)resource) << (hfi1_id ? CR_DYN_SHIFT : 0); +} + +static void fail_mutex_acquire_message(struct hfi1_devdata *dd, + const char *func) +{ + dd_dev_err(dd, + "%s: hardware mutex stuck - suggest rebooting the machine\n", + func); +} + +/* + * Acquire access to a chip resource. + * + * Return 0 on success, -EBUSY if resource busy, -EIO if mutex acquire failed. + */ +static int __acquire_chip_resource(struct hfi1_devdata *dd, u32 resource) +{ + u64 scratch0, all_bits, my_bit; + int ret; + + if (resource & CR_DYN_MASK) { + /* a dynamic resource is in use if either HFI has set the bit */ + all_bits = resource_mask(0, resource) | + resource_mask(1, resource); + my_bit = resource_mask(dd->hfi1_id, resource); + } else { + /* non-dynamic resources are not split between HFIs */ + all_bits = resource; + my_bit = resource; + } + + /* lock against other callers within the driver wanting a resource */ + mutex_lock(&dd->asic_data->asic_resource_mutex); + + ret = acquire_hw_mutex(dd); + if (ret) { + fail_mutex_acquire_message(dd, __func__); + ret = -EIO; + goto done; + } + + scratch0 = read_csr(dd, ASIC_CFG_SCRATCH); + if (scratch0 & all_bits) { + ret = -EBUSY; + } else { + write_csr(dd, ASIC_CFG_SCRATCH, scratch0 | my_bit); + /* force write to be visible to other HFI on another OS */ + (void)read_csr(dd, ASIC_CFG_SCRATCH); + } + + release_hw_mutex(dd); + +done: + mutex_unlock(&dd->asic_data->asic_resource_mutex); + return ret; +} + +/* + * Acquire access to a chip resource, wait up to mswait milliseconds for + * the resource to become available. + * + * Return 0 on success, -EBUSY if busy (even after wait), -EIO if mutex + * acquire failed. + */ +int acquire_chip_resource(struct hfi1_devdata *dd, u32 resource, u32 mswait) +{ + unsigned long timeout; + int ret; + + timeout = jiffies + msecs_to_jiffies(mswait); + while (1) { + ret = __acquire_chip_resource(dd, resource); + if (ret != -EBUSY) + return ret; + /* resource is busy, check our timeout */ + if (time_after_eq(jiffies, timeout)) + return -EBUSY; + usleep_range(80, 120); /* arbitrary delay */ + } +} + +/* + * Release access to a chip resource + */ +void release_chip_resource(struct hfi1_devdata *dd, u32 resource) +{ + u64 scratch0, bit; + + /* only dynamic resources should ever be cleared */ + if (!(resource & CR_DYN_MASK)) { + dd_dev_err(dd, "%s: invalid resource 0x%x\n", __func__, + resource); + return; + } + bit = resource_mask(dd->hfi1_id, resource); + + /* lock against other callers within the driver wanting a resource */ + mutex_lock(&dd->asic_data->asic_resource_mutex); + + if (acquire_hw_mutex(dd)) { + fail_mutex_acquire_message(dd, __func__); + goto done; + } + + scratch0 = read_csr(dd, ASIC_CFG_SCRATCH); + if ((scratch0 & bit) != 0) { + scratch0 &= ~bit; + write_csr(dd, ASIC_CFG_SCRATCH, scratch0); + /* force write to be visible to other HFI on another OS */ + (void)read_csr(dd, ASIC_CFG_SCRATCH); + } else { + dd_dev_warn(dd, "%s: id %d, resource 0x%x: bit not set\n", + __func__, dd->hfi1_id, resource); + } + + release_hw_mutex(dd); + +done: + mutex_unlock(&dd->asic_data->asic_resource_mutex); +} + +/* + * Return true if resource is set, false otherwise. Print a warning + * if not set and a function is supplied. + */ +bool check_chip_resource(struct hfi1_devdata *dd, u32 resource, + const char *func) +{ + u64 scratch0, bit; + + if (resource & CR_DYN_MASK) + bit = resource_mask(dd->hfi1_id, resource); + else + bit = resource; + + scratch0 = read_csr(dd, ASIC_CFG_SCRATCH); + if ((scratch0 & bit) == 0) { + if (func) + dd_dev_warn(dd, + "%s: id %d, resource 0x%x, not acquired!\n", + func, dd->hfi1_id, resource); + return false; + } + return true; +} + +static void clear_chip_resources(struct hfi1_devdata *dd, const char *func) +{ + u64 scratch0; + + /* lock against other callers within the driver wanting a resource */ + mutex_lock(&dd->asic_data->asic_resource_mutex); + + if (acquire_hw_mutex(dd)) { + fail_mutex_acquire_message(dd, func); + goto done; + } + + /* clear all dynamic access bits for this HFI */ + scratch0 = read_csr(dd, ASIC_CFG_SCRATCH); + scratch0 &= ~resource_mask(dd->hfi1_id, CR_DYN_MASK); + write_csr(dd, ASIC_CFG_SCRATCH, scratch0); + /* force write to be visible to other HFI on another OS */ + (void)read_csr(dd, ASIC_CFG_SCRATCH); + + release_hw_mutex(dd); + +done: + mutex_unlock(&dd->asic_data->asic_resource_mutex); +} + +void init_chip_resources(struct hfi1_devdata *dd) +{ + /* clear any holds left by us */ + clear_chip_resources(dd, __func__); +} + +void finish_chip_resources(struct hfi1_devdata *dd) +{ + /* clear any holds left by us */ + clear_chip_resources(dd, __func__); +} + void set_sbus_fast_mode(struct hfi1_devdata *dd) { write_csr(dd, ASIC_CFG_SBUS_EXECUTE, -- cgit v0.10.2 From 60c708285c0d3877a78dd8ede24238bc68c09651 Mon Sep 17 00:00:00 2001 From: Dean Luick Date: Sat, 5 Mar 2016 08:49:55 -0800 Subject: IB/hfi1: Change EPROM handling to use resource reservation Change EPROM handling to use the new ASIC resource reservation system. Reviewed-by: Mitko Haralanov Reviewed-by: Easwar Hariharan Signed-off-by: Dean Luick Signed-off-by: Jubin John Signed-off-by: Doug Ledford diff --git a/drivers/staging/rdma/hfi1/eprom.c b/drivers/staging/rdma/hfi1/eprom.c index 87114af..bd87715 100644 --- a/drivers/staging/rdma/hfi1/eprom.c +++ b/drivers/staging/rdma/hfi1/eprom.c @@ -102,9 +102,11 @@ #define EPROM_WP_N BIT_ULL(14) /* EPROM write line */ /* - * Use the EP mutex to guard against other callers from within the driver. + * How long to wait for the EPROM to become available, in ms. + * The spec 32 Mb EPROM takes around 40s to erase then write. + * Double it for safety. */ -static DEFINE_MUTEX(eprom_mutex); +#define EPROM_TIMEOUT 80000 /* ms */ /* * Turn on external enable line that allows writing on the flash. @@ -371,14 +373,9 @@ int handle_eprom_command(struct file *fp, const struct hfi1_cmd *cmd) if (!dd->eprom_available) return -EOPNOTSUPP; - /* lock against other callers touching the ASIC block */ - mutex_lock(&eprom_mutex); - - /* lock against the other HFI on another OS */ - ret = acquire_hw_mutex(dd); + ret = acquire_chip_resource(dd, CR_EPROM, EPROM_TIMEOUT); if (ret) { - dd_dev_err(dd, - "%s: unable to acquire hw mutex, no EPROM support\n", + dd_dev_err(dd, "%s: unable to acquire EPROM resource\n", __func__); goto done_asic; } @@ -428,9 +425,8 @@ int handle_eprom_command(struct file *fp, const struct hfi1_cmd *cmd) break; } - release_hw_mutex(dd); + release_chip_resource(dd, CR_EPROM); done_asic: - mutex_unlock(&eprom_mutex); return ret; } @@ -441,23 +437,18 @@ int eprom_init(struct hfi1_devdata *dd) { int ret = 0; - /* only the discrete chip has an EPROM, nothing to do */ + /* only the discrete chip has an EPROM */ if (dd->pcidev->device != PCI_DEVICE_ID_INTEL0) return 0; - /* lock against other callers */ - mutex_lock(&eprom_mutex); - /* - * Lock against the other HFI on another OS - the mutex above - * would have caught anything in this driver. It is OK if - * both OSes reset the EPROM - as long as they don't do it at - * the same time. + * It is OK if both HFIs reset the EPROM as long as they don't + * do it at the same time. */ - ret = acquire_hw_mutex(dd); + ret = acquire_chip_resource(dd, CR_EPROM, EPROM_TIMEOUT); if (ret) { dd_dev_err(dd, - "%s: unable to acquire hw mutex, no EPROM support\n", + "%s: unable to acquire EPROM resource, no EPROM support\n", __func__); goto done_asic; } @@ -474,8 +465,7 @@ int eprom_init(struct hfi1_devdata *dd) write_csr(dd, ASIC_EEP_ADDR_CMD, CMD_RELEASE_POWERDOWN_NOID); dd->eprom_available = true; - release_hw_mutex(dd); + release_chip_resource(dd, CR_EPROM); done_asic: - mutex_unlock(&eprom_mutex); return ret; } -- cgit v0.10.2 From 576531fde8473333322905ea09fd5cfd14ce91ef Mon Sep 17 00:00:00 2001 From: Dean Luick Date: Sat, 5 Mar 2016 08:50:01 -0800 Subject: IB/hfi1: Change SBus handling to use resource reservation The SBus resource includes SBUS, PCIE, and THERM registers. Change SBus handling to use the new ASIC resource reservation system. Reviewed-by: Mitko Haralanov Reviewed-by: Easwar Hariharan Signed-off-by: Dean Luick Signed-off-by: Jubin John Signed-off-by: Doug Ledford diff --git a/drivers/staging/rdma/hfi1/chip.c b/drivers/staging/rdma/hfi1/chip.c index 98ebee4..269c977 100644 --- a/drivers/staging/rdma/hfi1/chip.c +++ b/drivers/staging/rdma/hfi1/chip.c @@ -14324,7 +14324,12 @@ static int thermal_init(struct hfi1_devdata *dd) !(dd->flags & HFI1_DO_INIT_ASIC)) return ret; - acquire_hw_mutex(dd); + ret = acquire_chip_resource(dd, CR_SBUS, SBUS_TIMEOUT); + if (ret) { + THERM_FAILURE(dd, ret, "Acquire SBus"); + return ret; + } + dd_dev_info(dd, "Initializing thermal sensor\n"); /* Disable polling of thermal readings */ write_csr(dd, ASIC_CFG_THERM_POLL_EN, 0x0); @@ -14372,7 +14377,7 @@ static int thermal_init(struct hfi1_devdata *dd) /* Enable polling of thermal readings */ write_csr(dd, ASIC_CFG_THERM_POLL_EN, 0x1); done: - release_hw_mutex(dd); + release_chip_resource(dd, CR_SBUS); return ret; } diff --git a/drivers/staging/rdma/hfi1/chip.h b/drivers/staging/rdma/hfi1/chip.h index dc684bc..311e6e8 100644 --- a/drivers/staging/rdma/hfi1/chip.h +++ b/drivers/staging/rdma/hfi1/chip.h @@ -669,6 +669,9 @@ bool check_chip_resource(struct hfi1_devdata *dd, u32 resource, void init_chip_resources(struct hfi1_devdata *dd); void finish_chip_resources(struct hfi1_devdata *dd); +/* ms wait time for access to an SBus resoure */ +#define SBUS_TIMEOUT 4000 /* long enough for a FW download and SBR */ + void fabric_serdes_reset(struct hfi1_devdata *dd); int read_8051_data(struct hfi1_devdata *dd, u32 addr, u32 len, u64 *result); diff --git a/drivers/staging/rdma/hfi1/firmware.c b/drivers/staging/rdma/hfi1/firmware.c index 140dd86..1ea1ad8 100644 --- a/drivers/staging/rdma/hfi1/firmware.c +++ b/drivers/staging/rdma/hfi1/firmware.c @@ -1125,15 +1125,23 @@ static void turn_off_spicos(struct hfi1_devdata *dd, int flags) */ void fabric_serdes_reset(struct hfi1_devdata *dd) { + int ret; + if (!fw_fabric_serdes_load) return; + ret = acquire_chip_resource(dd, CR_SBUS, SBUS_TIMEOUT); + if (ret) { + dd_dev_err(dd, + "Cannot acquire SBus resource to reset fabric SerDes - perhaps you should reboot\n"); + return; + } + set_sbus_fast_mode(dd); + if (is_ax(dd)) { /* A0 serdes do not work with a re-download */ u8 ra = fabric_serdes_broadcast[dd->hfi1_id]; - acquire_hw_mutex(dd); - set_sbus_fast_mode(dd); /* place SerDes in reset and disable SPICO */ sbus_request(dd, ra, 0x07, WRITE_SBUS_RECEIVER, 0x00000011); /* wait 100 refclk cycles @ 156.25MHz => 640ns */ @@ -1142,26 +1150,20 @@ void fabric_serdes_reset(struct hfi1_devdata *dd) sbus_request(dd, ra, 0x07, WRITE_SBUS_RECEIVER, 0x00000010); /* turn SPICO enable on */ sbus_request(dd, ra, 0x07, WRITE_SBUS_RECEIVER, 0x00000002); - clear_sbus_fast_mode(dd); - release_hw_mutex(dd); - return; + } else { + turn_off_spicos(dd, SPICO_FABRIC); + /* + * No need for firmware retry - what to download has already + * been decided. + * No need to pay attention to the load return - the only + * failure is a validation failure, which has already been + * checked by the initial download. + */ + (void)load_fabric_serdes_firmware(dd, &fw_fabric); } - acquire_hw_mutex(dd); - set_sbus_fast_mode(dd); - - turn_off_spicos(dd, SPICO_FABRIC); - /* - * No need for firmware retry - what to download has already been - * decided. - * No need to pay attention to the load return - the only failure - * is a validation failure, which has already been checked by the - * initial download. - */ - (void)load_fabric_serdes_firmware(dd, &fw_fabric); - clear_sbus_fast_mode(dd); - release_hw_mutex(dd); + release_chip_resource(dd, CR_SBUS); } /* Access to the SBus in this routine should probably be serialized */ @@ -1598,7 +1600,7 @@ int load_firmware(struct hfi1_devdata *dd) int ret; if (fw_fabric_serdes_load) { - ret = acquire_hw_mutex(dd); + ret = acquire_chip_resource(dd, CR_SBUS, SBUS_TIMEOUT); if (ret) return ret; @@ -1614,7 +1616,7 @@ int load_firmware(struct hfi1_devdata *dd) } while (retry_firmware(dd, ret)); clear_sbus_fast_mode(dd); - release_hw_mutex(dd); + release_chip_resource(dd, CR_SBUS); if (ret) return ret; } @@ -1995,7 +1997,7 @@ int get_platform_config_field(struct hfi1_devdata *dd, * Download the firmware needed for the Gen3 PCIe SerDes. An update * to the SBus firmware is needed before updating the PCIe firmware. * - * Note: caller must be holding the HW mutex. + * Note: caller must be holding the SBus resource. */ int load_pcie_firmware(struct hfi1_devdata *dd) { diff --git a/drivers/staging/rdma/hfi1/pcie.c b/drivers/staging/rdma/hfi1/pcie.c index 1adfa8b..42a409f 100644 --- a/drivers/staging/rdma/hfi1/pcie.c +++ b/drivers/staging/rdma/hfi1/pcie.c @@ -773,7 +773,7 @@ static int load_eq_table(struct hfi1_devdata *dd, const u8 eq[11][3], u8 fs, /* * Steps to be done after the PCIe firmware is downloaded and * before the SBR for the Pcie Gen3. - * The hardware mutex is already being held. + * The SBus resource is already being held. */ static void pcie_post_steps(struct hfi1_devdata *dd) { @@ -1012,10 +1012,13 @@ int do_pcie_gen3_transition(struct hfi1_devdata *dd) goto done_no_mutex; } - /* hold the HW mutex across the firmware download and SBR */ - ret = acquire_hw_mutex(dd); - if (ret) + /* hold the SBus resource across the firmware download and SBR */ + ret = acquire_chip_resource(dd, CR_SBUS, SBUS_TIMEOUT); + if (ret) { + dd_dev_err(dd, "%s: unable to acquire SBus resource\n", + __func__); return ret; + } /* make sure thermal polling is not causing interrupts */ therm = read_csr(dd, ASIC_CFG_THERM_POLL_EN); @@ -1324,7 +1327,7 @@ done: dd_dev_info(dd, "%s: Re-enable therm polling\n", __func__); } - release_hw_mutex(dd); + release_chip_resource(dd, CR_SBUS); done_no_mutex: /* return no error if it is OK to be at current speed */ if (ret && !return_error) { -- cgit v0.10.2 From 765a6fac9132da203347525032bb40b1e9055104 Mon Sep 17 00:00:00 2001 From: Dean Luick Date: Sat, 5 Mar 2016 08:50:06 -0800 Subject: IB/hfi1: Change QSFP functions to use resource reservation Remove the mutex guarding each operation in favor the ASIC resource acquire/release. Push the resource acquire/release, above each operation call to allow exclusive access across multiple operations. Reviewed-by: Mitko Haralanov Reviewed-by: Easwar Hariharan Signed-off-by: Dean Luick Signed-off-by: Jubin John Signed-off-by: Doug Ledford diff --git a/drivers/staging/rdma/hfi1/chip.c b/drivers/staging/rdma/hfi1/chip.c index 269c977..d3a9b9f 100644 --- a/drivers/staging/rdma/hfi1/chip.c +++ b/drivers/staging/rdma/hfi1/chip.c @@ -6267,8 +6267,8 @@ void handle_8051_request(struct work_struct *work) cdr_ctrl_byte &= ~(1 << i); } } - qsfp_write(ppd, ppd->dd->hfi1_id, QSFP_CDR_CTRL_BYTE_OFFS, - &cdr_ctrl_byte, 1); + one_qsfp_write(ppd, dd->hfi1_id, QSFP_CDR_CTRL_BYTE_OFFS, + &cdr_ctrl_byte, 1); hreq_response(dd, HREQ_SUCCESS, data); refresh_qsfp_cache(ppd, &ppd->qsfp_info); break; @@ -9290,8 +9290,8 @@ void qsfp_event(struct work_struct *work) if (qd->check_interrupt_flags) { u8 qsfp_interrupt_status[16] = {0,}; - if (qsfp_read(ppd, dd->hfi1_id, 6, - &qsfp_interrupt_status[0], 16) != 16) { + if (one_qsfp_read(ppd, dd->hfi1_id, 6, + &qsfp_interrupt_status[0], 16) != 16) { dd_dev_info(dd, "%s: Failed to read status of QSFP module\n", __func__); @@ -9845,7 +9845,17 @@ static int goto_offline(struct hfi1_pportdata *ppd, u8 rem_reason) if (ppd->port_type == PORT_TYPE_QSFP && ppd->qsfp_info.limiting_active && qsfp_mod_present(ppd)) { - set_qsfp_tx(ppd, 0); + int ret; + + ret = acquire_chip_resource(dd, qsfp_resource(dd), QSFP_WAIT); + if (ret == 0) { + set_qsfp_tx(ppd, 0); + release_chip_resource(dd, qsfp_resource(dd)); + } else { + /* not fatal, but should warn */ + dd_dev_err(dd, + "Unable to acquire lock to turn off QSFP TX\n"); + } } /* diff --git a/drivers/staging/rdma/hfi1/chip.h b/drivers/staging/rdma/hfi1/chip.h index 311e6e8..9313963 100644 --- a/drivers/staging/rdma/hfi1/chip.h +++ b/drivers/staging/rdma/hfi1/chip.h @@ -672,6 +672,9 @@ void finish_chip_resources(struct hfi1_devdata *dd); /* ms wait time for access to an SBus resoure */ #define SBUS_TIMEOUT 4000 /* long enough for a FW download and SBR */ +/* ms wait time for a qsfp (i2c) chain to become available */ +#define QSFP_WAIT 20000 /* long enough for FW update to the F4 uc */ + void fabric_serdes_reset(struct hfi1_devdata *dd); int read_8051_data(struct hfi1_devdata *dd, u32 addr, u32 len, u64 *result); diff --git a/drivers/staging/rdma/hfi1/debugfs.c b/drivers/staging/rdma/hfi1/debugfs.c index 99845bc..665666c 100644 --- a/drivers/staging/rdma/hfi1/debugfs.c +++ b/drivers/staging/rdma/hfi1/debugfs.c @@ -465,16 +465,22 @@ static ssize_t __i2c_debugfs_write(struct file *file, const char __user *buf, goto _free; } + ret = acquire_chip_resource(ppd->dd, i2c_target(target), 0); + if (ret) + goto _free; + total_written = i2c_write(ppd, target, i2c_addr, offset, buff, count); if (total_written < 0) { ret = total_written; - goto _free; + goto _release; } *ppos += total_written; ret = total_written; + _release: + release_chip_resource(ppd->dd, i2c_target(target)); _free: kfree(buff); _return: @@ -526,10 +532,14 @@ static ssize_t __i2c_debugfs_read(struct file *file, char __user *buf, goto _return; } + ret = acquire_chip_resource(ppd->dd, i2c_target(target), 0); + if (ret) + goto _free; + total_read = i2c_read(ppd, target, i2c_addr, offset, buff, count); if (total_read < 0) { ret = total_read; - goto _free; + goto _release; } *ppos += total_read; @@ -537,11 +547,13 @@ static ssize_t __i2c_debugfs_read(struct file *file, char __user *buf, ret = copy_to_user(buf, buff, total_read); if (ret > 0) { ret = -EFAULT; - goto _free; + goto _release; } ret = total_read; + _release: + release_chip_resource(ppd->dd, i2c_target(target)); _free: kfree(buff); _return: @@ -592,7 +604,7 @@ static ssize_t __qsfp_debugfs_write(struct file *file, const char __user *buf, goto _free; } - total_written = qsfp_write(ppd, target, *ppos, buff, count); + total_written = one_qsfp_write(ppd, target, *ppos, buff, count); if (total_written < 0) { ret = total_written; goto _free; @@ -646,7 +658,7 @@ static ssize_t __qsfp_debugfs_read(struct file *file, char __user *buf, goto _return; } - total_read = qsfp_read(ppd, target, *ppos, buff, count); + total_read = one_qsfp_read(ppd, target, *ppos, buff, count); if (total_read < 0) { ret = total_read; goto _free; diff --git a/drivers/staging/rdma/hfi1/hfi.h b/drivers/staging/rdma/hfi1/hfi.h index e71a1c2..108015c 100644 --- a/drivers/staging/rdma/hfi1/hfi.h +++ b/drivers/staging/rdma/hfi1/hfi.h @@ -1048,8 +1048,6 @@ struct hfi1_devdata { struct platform_config platform_config; struct platform_config_cache pcfg_cache; - /* control high-level access to qsfp */ - struct mutex qsfp_i2c_mutex; struct diag_client *diag_client; spinlock_t hfi1_diag_trans_lock; /* protect diag observer ops */ @@ -1938,6 +1936,18 @@ static inline void setextled(struct hfi1_devdata *dd, u32 on) write_csr(dd, DCC_CFG_LED_CNTRL, 0x10); } +/* return the i2c resource given the target */ +static inline u32 i2c_target(u32 target) +{ + return target ? CR_I2C2 : CR_I2C1; +} + +/* return the i2c chain chip resource that this HFI uses for QSFP */ +static inline u32 qsfp_resource(struct hfi1_devdata *dd) +{ + return i2c_target(dd->hfi1_id); +} + int hfi1_tempsense_rd(struct hfi1_devdata *dd, struct hfi1_temp *temp); #endif /* _HFI1_KERNEL_H */ diff --git a/drivers/staging/rdma/hfi1/init.c b/drivers/staging/rdma/hfi1/init.c index 260a8e1..f21933c 100644 --- a/drivers/staging/rdma/hfi1/init.c +++ b/drivers/staging/rdma/hfi1/init.c @@ -1065,7 +1065,6 @@ struct hfi1_devdata *hfi1_alloc_devdata(struct pci_dev *pdev, size_t extra) spin_lock_init(&dd->sc_init_lock); spin_lock_init(&dd->dc8051_lock); spin_lock_init(&dd->dc8051_memlock); - mutex_init(&dd->qsfp_i2c_mutex); seqlock_init(&dd->sc2vl_lock); spin_lock_init(&dd->sde_map_lock); spin_lock_init(&dd->pio_map_lock); diff --git a/drivers/staging/rdma/hfi1/platform.c b/drivers/staging/rdma/hfi1/platform.c index 4777414..0a1d074 100644 --- a/drivers/staging/rdma/hfi1/platform.c +++ b/drivers/staging/rdma/hfi1/platform.c @@ -601,23 +601,30 @@ static void apply_tunings( static int tune_active_qsfp(struct hfi1_pportdata *ppd, u32 *ptr_tx_preset, u32 *ptr_rx_preset, u32 *ptr_total_atten) { - int ret = 0; + int ret; u16 lss = ppd->link_speed_supported, lse = ppd->link_speed_enabled; u8 *cache = ppd->qsfp_info.cache; + ret = acquire_chip_resource(ppd->dd, qsfp_resource(ppd->dd), QSFP_WAIT); + if (ret) { + dd_dev_err(ppd->dd, "%s: hfi%d: cannot lock i2c chain\n", + __func__, (int)ppd->dd->hfi1_id); + return ret; + } + ppd->qsfp_info.limiting_active = 1; ret = set_qsfp_tx(ppd, 0); if (ret) - return ret; + goto bail_unlock; ret = qual_power(ppd); if (ret) - return ret; + goto bail_unlock; ret = qual_bitrate(ppd); if (ret) - return ret; + goto bail_unlock; if (ppd->qsfp_info.reset_needed) { reset_qsfp(ppd); @@ -629,7 +636,7 @@ static int tune_active_qsfp(struct hfi1_pportdata *ppd, u32 *ptr_tx_preset, ret = set_qsfp_high_power(ppd); if (ret) - return ret; + goto bail_unlock; if (cache[QSFP_EQ_INFO_OFFS] & 0x4) { ret = get_platform_config_field( @@ -639,7 +646,7 @@ static int tune_active_qsfp(struct hfi1_pportdata *ppd, u32 *ptr_tx_preset, ptr_tx_preset, 4); if (ret) { *ptr_tx_preset = OPA_INVALID_INDEX; - return ret; + goto bail_unlock; } } else { ret = get_platform_config_field( @@ -649,7 +656,7 @@ static int tune_active_qsfp(struct hfi1_pportdata *ppd, u32 *ptr_tx_preset, ptr_tx_preset, 4); if (ret) { *ptr_tx_preset = OPA_INVALID_INDEX; - return ret; + goto bail_unlock; } } @@ -658,7 +665,7 @@ static int tune_active_qsfp(struct hfi1_pportdata *ppd, u32 *ptr_tx_preset, PORT_TABLE_RX_PRESET_IDX, ptr_rx_preset, 4); if (ret) { *ptr_rx_preset = OPA_INVALID_INDEX; - return ret; + goto bail_unlock; } if ((lss & OPA_LINK_SPEED_25G) && (lse & OPA_LINK_SPEED_25G)) @@ -677,6 +684,9 @@ static int tune_active_qsfp(struct hfi1_pportdata *ppd, u32 *ptr_tx_preset, apply_rx_amplitude_settings(ppd, *ptr_rx_preset, *ptr_tx_preset); ret = set_qsfp_tx(ppd, 1); + +bail_unlock: + release_chip_resource(ppd->dd, qsfp_resource(ppd->dd)); return ret; } diff --git a/drivers/staging/rdma/hfi1/qsfp.c b/drivers/staging/rdma/hfi1/qsfp.c index 7e76b93..9ed1963 100644 --- a/drivers/staging/rdma/hfi1/qsfp.c +++ b/drivers/staging/rdma/hfi1/qsfp.c @@ -59,7 +59,7 @@ #define I2C_MAX_RETRY 4 /* - * Unlocked i2c write. Must hold dd->qsfp_i2c_mutex. + * Raw i2c write. No set-up or lock checking. */ static int __i2c_write(struct hfi1_pportdata *ppd, u32 target, int i2c_addr, int offset, void *bp, int len) @@ -88,15 +88,16 @@ static int __i2c_write(struct hfi1_pportdata *ppd, u32 target, int i2c_addr, return cnt; } +/* + * Caller must hold the i2c chain resource. + */ int i2c_write(struct hfi1_pportdata *ppd, u32 target, int i2c_addr, int offset, void *bp, int len) { - struct hfi1_devdata *dd = ppd->dd; int ret; - ret = mutex_lock_interruptible(&dd->qsfp_i2c_mutex); - if (ret) - return ret; + if (!check_chip_resource(ppd->dd, qsfp_resource(ppd->dd), __func__)) + return -EACCES; /* make sure the TWSI bus is in a sane state */ ret = hfi1_twsi_reset(ppd->dd, target); @@ -104,18 +105,14 @@ int i2c_write(struct hfi1_pportdata *ppd, u32 target, int i2c_addr, int offset, hfi1_dev_porterr(ppd->dd, ppd->port, "I2C chain %d write interface reset failed\n", target); - goto done; + return ret; } - ret = __i2c_write(ppd, target, i2c_addr, offset, bp, len); - -done: - mutex_unlock(&dd->qsfp_i2c_mutex); - return ret; + return __i2c_write(ppd, target, i2c_addr, offset, bp, len); } /* - * Unlocked i2c read. Must hold dd->qsfp_i2c_mutex. + * Raw i2c read. No set-up or lock checking. */ static int __i2c_read(struct hfi1_pportdata *ppd, u32 target, int i2c_addr, int offset, void *bp, int len) @@ -157,15 +154,16 @@ exit: return ret; } +/* + * Caller must hold the i2c chain resource. + */ int i2c_read(struct hfi1_pportdata *ppd, u32 target, int i2c_addr, int offset, void *bp, int len) { - struct hfi1_devdata *dd = ppd->dd; int ret; - ret = mutex_lock_interruptible(&dd->qsfp_i2c_mutex); - if (ret) - return ret; + if (!check_chip_resource(ppd->dd, qsfp_resource(ppd->dd), __func__)) + return -EACCES; /* make sure the TWSI bus is in a sane state */ ret = hfi1_twsi_reset(ppd->dd, target); @@ -173,19 +171,17 @@ int i2c_read(struct hfi1_pportdata *ppd, u32 target, int i2c_addr, int offset, hfi1_dev_porterr(ppd->dd, ppd->port, "I2C chain %d read interface reset failed\n", target); - goto done; + return ret; } - ret = __i2c_read(ppd, target, i2c_addr, offset, bp, len); - -done: - mutex_unlock(&dd->qsfp_i2c_mutex); - return ret; + return __i2c_read(ppd, target, i2c_addr, offset, bp, len); } /* * Write page n, offset m of QSFP memory as defined by SFF 8636 * by writing @addr = ((256 * n) + m) + * + * Caller must hold the i2c chain resource. */ int qsfp_write(struct hfi1_pportdata *ppd, u32 target, int addr, void *bp, int len) @@ -196,9 +192,8 @@ int qsfp_write(struct hfi1_pportdata *ppd, u32 target, int addr, void *bp, int ret; u8 page; - ret = mutex_lock_interruptible(&ppd->dd->qsfp_i2c_mutex); - if (ret) - return ret; + if (!check_chip_resource(ppd->dd, qsfp_resource(ppd->dd), __func__)) + return -EACCES; /* make sure the TWSI bus is in a sane state */ ret = hfi1_twsi_reset(ppd->dd, target); @@ -206,7 +201,6 @@ int qsfp_write(struct hfi1_pportdata *ppd, u32 target, int addr, void *bp, hfi1_dev_porterr(ppd->dd, ppd->port, "QSFP chain %d write interface reset failed\n", target); - mutex_unlock(&ppd->dd->qsfp_i2c_mutex); return ret; } @@ -242,16 +236,36 @@ int qsfp_write(struct hfi1_pportdata *ppd, u32 target, int addr, void *bp, addr += ret; } - mutex_unlock(&ppd->dd->qsfp_i2c_mutex); - if (ret < 0) return ret; return count; } /* + * Perform a stand-alone single QSFP write. Acquire the resource, do the + * read, then release the resource. + */ +int one_qsfp_write(struct hfi1_pportdata *ppd, u32 target, int addr, void *bp, + int len) +{ + struct hfi1_devdata *dd = ppd->dd; + u32 resource = qsfp_resource(dd); + int ret; + + ret = acquire_chip_resource(dd, resource, QSFP_WAIT); + if (ret) + return ret; + ret = qsfp_write(ppd, target, addr, bp, len); + release_chip_resource(dd, resource); + + return ret; +} + +/* * Access page n, offset m of QSFP memory as defined by SFF 8636 * by reading @addr = ((256 * n) + m) + * + * Caller must hold the i2c chain resource. */ int qsfp_read(struct hfi1_pportdata *ppd, u32 target, int addr, void *bp, int len) @@ -262,9 +276,8 @@ int qsfp_read(struct hfi1_pportdata *ppd, u32 target, int addr, void *bp, int ret; u8 page; - ret = mutex_lock_interruptible(&ppd->dd->qsfp_i2c_mutex); - if (ret) - return ret; + if (!check_chip_resource(ppd->dd, qsfp_resource(ppd->dd), __func__)) + return -EACCES; /* make sure the TWSI bus is in a sane state */ ret = hfi1_twsi_reset(ppd->dd, target); @@ -272,7 +285,6 @@ int qsfp_read(struct hfi1_pportdata *ppd, u32 target, int addr, void *bp, hfi1_dev_porterr(ppd->dd, ppd->port, "QSFP chain %d read interface reset failed\n", target); - mutex_unlock(&ppd->dd->qsfp_i2c_mutex); return ret; } @@ -309,14 +321,32 @@ int qsfp_read(struct hfi1_pportdata *ppd, u32 target, int addr, void *bp, addr += ret; } - mutex_unlock(&ppd->dd->qsfp_i2c_mutex); - if (ret < 0) return ret; return count; } /* + * Perform a stand-alone single QSFP read. Acquire the resource, do the + * read, then release the resource. + */ +int one_qsfp_read(struct hfi1_pportdata *ppd, u32 target, int addr, void *bp, + int len) +{ + struct hfi1_devdata *dd = ppd->dd; + u32 resource = qsfp_resource(dd); + int ret; + + ret = acquire_chip_resource(dd, resource, QSFP_WAIT); + if (ret) + return ret; + ret = qsfp_read(ppd, target, addr, bp, len); + release_chip_resource(dd, resource); + + return ret; +} + +/* * This function caches the QSFP memory range in 128 byte chunks. * As an example, the next byte after address 255 is byte 128 from * upper page 01H (if existing) rather than byte 0 from lower page 00H. @@ -341,9 +371,13 @@ int refresh_qsfp_cache(struct hfi1_pportdata *ppd, struct qsfp_data *cp) if (!qsfp_mod_present(ppd)) { ret = -ENODEV; - goto bail; + goto bail_no_release; } + ret = acquire_chip_resource(ppd->dd, qsfp_resource(ppd->dd), QSFP_WAIT); + if (ret) + goto bail_no_release; + ret = qsfp_read(ppd, target, 0, cache, QSFP_PAGESIZE); if (ret != QSFP_PAGESIZE) { dd_dev_info(ppd->dd, @@ -406,6 +440,8 @@ int refresh_qsfp_cache(struct hfi1_pportdata *ppd, struct qsfp_data *cp) } } + release_chip_resource(ppd->dd, qsfp_resource(ppd->dd)); + spin_lock_irqsave(&ppd->qsfp_info.qsfp_lock, flags); ppd->qsfp_info.cache_valid = 1; ppd->qsfp_info.cache_refresh_required = 0; @@ -414,6 +450,8 @@ int refresh_qsfp_cache(struct hfi1_pportdata *ppd, struct qsfp_data *cp) return 0; bail: + release_chip_resource(ppd->dd, qsfp_resource(ppd->dd)); +bail_no_release: memset(cache, 0, (QSFP_MAX_NUM_PAGES * 128)); return ret; } diff --git a/drivers/staging/rdma/hfi1/qsfp.h b/drivers/staging/rdma/hfi1/qsfp.h index 2ad5980..831fe4c 100644 --- a/drivers/staging/rdma/hfi1/qsfp.h +++ b/drivers/staging/rdma/hfi1/qsfp.h @@ -235,3 +235,7 @@ int qsfp_write(struct hfi1_pportdata *ppd, u32 target, int addr, void *bp, int len); int qsfp_read(struct hfi1_pportdata *ppd, u32 target, int addr, void *bp, int len); +int one_qsfp_write(struct hfi1_pportdata *ppd, u32 target, int addr, void *bp, + int len); +int one_qsfp_read(struct hfi1_pportdata *ppd, u32 target, int addr, void *bp, + int len); -- cgit v0.10.2 From a453698b52dbfb248d23331450c638eaa3219025 Mon Sep 17 00:00:00 2001 From: Dean Luick Date: Sat, 5 Mar 2016 08:50:11 -0800 Subject: IB/hfi1: Change thermal init to use resource reservation Use the resource reservation system to flag that the ASIC thermal has been initialized. Reviewed-by: Mitko Haralanov Reviewed-by: Easwar Hariharan Signed-off-by: Dean Luick Signed-off-by: Jubin John Signed-off-by: Doug Ledford diff --git a/drivers/staging/rdma/hfi1/chip.c b/drivers/staging/rdma/hfi1/chip.c index d3a9b9f..7170913 100644 --- a/drivers/staging/rdma/hfi1/chip.c +++ b/drivers/staging/rdma/hfi1/chip.c @@ -14331,7 +14331,7 @@ static int thermal_init(struct hfi1_devdata *dd) int ret = 0; if (dd->icode != ICODE_RTL_SILICON || - !(dd->flags & HFI1_DO_INIT_ASIC)) + check_chip_resource(dd, CR_THERM_INIT, NULL)) return ret; ret = acquire_chip_resource(dd, CR_SBUS, SBUS_TIMEOUT); @@ -14386,6 +14386,12 @@ static int thermal_init(struct hfi1_devdata *dd) /* Enable polling of thermal readings */ write_csr(dd, ASIC_CFG_THERM_POLL_EN, 0x1); + + /* Set initialized flag */ + ret = acquire_chip_resource(dd, CR_THERM_INIT, 0); + if (ret) + THERM_FAILURE(dd, ret, "Unable to set thermal init flag"); + done: release_chip_resource(dd, CR_SBUS); return ret; -- cgit v0.10.2 From 7a8f28ca3daa61dc48046b4f1fa73139fb47bbfe Mon Sep 17 00:00:00 2001 From: Dean Luick Date: Sat, 5 Mar 2016 08:50:17 -0800 Subject: IB/hfi1: Remove unused HFI1_DO_INIT_ASIC flag The flag HFI1_DO_INIT_ASIC flag is no longer used. Remove the flag and the code that sets it. Reviewed-by: Mitko Haralanov Reviewed-by: Easwar Hariharan Signed-off-by: Dean Luick Signed-off-by: Jubin John Signed-off-by: Doug Ledford diff --git a/drivers/staging/rdma/hfi1/chip.c b/drivers/staging/rdma/hfi1/chip.c index 7170913..ac03452 100644 --- a/drivers/staging/rdma/hfi1/chip.c +++ b/drivers/staging/rdma/hfi1/chip.c @@ -13815,11 +13815,6 @@ void hfi1_start_cleanup(struct hfi1_devdata *dd) * Information can be shared between the two HFIs on the same ASIC * in the same OS. This function finds the peer device and sets * up a shared structure. - * - * Certain chip functions need to be initialized only once per asic - * instead of per-device. This function finds the peer device and - * checks whether that chip initialization needs to be done by this - * device. */ static int init_asic_data(struct hfi1_devdata *dd) { @@ -13837,13 +13832,6 @@ static int init_asic_data(struct hfi1_devdata *dd) } } - /* - * "Claim" the ASIC for initialization if it hasn't been - " "claimed" yet. - */ - if (!peer || !(peer->flags & HFI1_DO_INIT_ASIC)) - dd->flags |= HFI1_DO_INIT_ASIC; - if (peer) { dd->asic_data = peer->asic_data; } else { diff --git a/drivers/staging/rdma/hfi1/hfi.h b/drivers/staging/rdma/hfi1/hfi.h index 108015c..035a151 100644 --- a/drivers/staging/rdma/hfi1/hfi.h +++ b/drivers/staging/rdma/hfi1/hfi.h @@ -1602,7 +1602,6 @@ static inline struct cc_state *get_cc_state(struct hfi1_pportdata *ppd) #define HFI1_HAS_SDMA_TIMEOUT 0x8 #define HFI1_HAS_SEND_DMA 0x10 /* Supports Send DMA */ #define HFI1_FORCED_FREEZE 0x80 /* driver forced freeze mode */ -#define HFI1_DO_INIT_ASIC 0x100 /* This device will init the ASIC */ /* IB dword length mask in PBC (lower 11 bits); same for all chips */ #define HFI1_PBC_LENGTH_MASK ((1 << 11) - 1) -- cgit v0.10.2 From b0506f4c56d66f4a8413eaeb57212cf8166e30e9 Mon Sep 17 00:00:00 2001 From: Dean Luick Date: Sat, 5 Mar 2016 08:50:22 -0800 Subject: IB/hfi1: Reduce hardware mutex timeout The hardware mutex is now held only long enough to set or clear flags. Reduce the timeout to something more reasonable. Reviewed-by: Mitko Haralanov Reviewed-by: Easwar Hariharan Signed-off-by: Dean Luick Signed-off-by: Jubin John Signed-off-by: Doug Ledford diff --git a/drivers/staging/rdma/hfi1/firmware.c b/drivers/staging/rdma/hfi1/firmware.c index 1ea1ad8..3040162 100644 --- a/drivers/staging/rdma/hfi1/firmware.c +++ b/drivers/staging/rdma/hfi1/firmware.c @@ -198,7 +198,7 @@ static const struct firmware *platform_config; #define RSA_ENGINE_TIMEOUT 100 /* ms */ /* hardware mutex timeout, in ms */ -#define HM_TIMEOUT 4000 /* 4 s */ +#define HM_TIMEOUT 10 /* ms */ /* 8051 memory access timeout, in us */ #define DC8051_ACCESS_TIMEOUT 100 /* us */ -- cgit v0.10.2 From ae993e7fba05c6159e1af1dc504bade46a94eb47 Mon Sep 17 00:00:00 2001 From: Dean Luick Date: Sat, 5 Mar 2016 08:50:27 -0800 Subject: IB/hfi1: Hold i2c resource across debugfs open/close External i2c firmware updates are done in multiple steps and cannot have other things done in between. For debugfs files, acquire the resource on open and release it on close. Reviewed-by: Mitko Haralanov Reviewed-by: Easwar Hariharan Signed-off-by: Dean Luick Signed-off-by: Jubin John Signed-off-by: Doug Ledford diff --git a/drivers/staging/rdma/hfi1/debugfs.c b/drivers/staging/rdma/hfi1/debugfs.c index 665666c..6a1bc28 100644 --- a/drivers/staging/rdma/hfi1/debugfs.c +++ b/drivers/staging/rdma/hfi1/debugfs.c @@ -49,6 +49,7 @@ #include #include #include +#include #include "hfi.h" #include "debugfs.h" @@ -465,22 +466,16 @@ static ssize_t __i2c_debugfs_write(struct file *file, const char __user *buf, goto _free; } - ret = acquire_chip_resource(ppd->dd, i2c_target(target), 0); - if (ret) - goto _free; - total_written = i2c_write(ppd, target, i2c_addr, offset, buff, count); if (total_written < 0) { ret = total_written; - goto _release; + goto _free; } *ppos += total_written; ret = total_written; - _release: - release_chip_resource(ppd->dd, i2c_target(target)); _free: kfree(buff); _return: @@ -532,14 +527,10 @@ static ssize_t __i2c_debugfs_read(struct file *file, char __user *buf, goto _return; } - ret = acquire_chip_resource(ppd->dd, i2c_target(target), 0); - if (ret) - goto _free; - total_read = i2c_read(ppd, target, i2c_addr, offset, buff, count); if (total_read < 0) { ret = total_read; - goto _release; + goto _free; } *ppos += total_read; @@ -547,13 +538,11 @@ static ssize_t __i2c_debugfs_read(struct file *file, char __user *buf, ret = copy_to_user(buf, buff, total_read); if (ret > 0) { ret = -EFAULT; - goto _release; + goto _free; } ret = total_read; - _release: - release_chip_resource(ppd->dd, i2c_target(target)); _free: kfree(buff); _return: @@ -604,7 +593,7 @@ static ssize_t __qsfp_debugfs_write(struct file *file, const char __user *buf, goto _free; } - total_written = one_qsfp_write(ppd, target, *ppos, buff, count); + total_written = qsfp_write(ppd, target, *ppos, buff, count); if (total_written < 0) { ret = total_written; goto _free; @@ -658,7 +647,7 @@ static ssize_t __qsfp_debugfs_read(struct file *file, char __user *buf, goto _return; } - total_read = one_qsfp_read(ppd, target, *ppos, buff, count); + total_read = qsfp_read(ppd, target, *ppos, buff, count); if (total_read < 0) { ret = total_read; goto _free; @@ -695,6 +684,104 @@ static ssize_t qsfp2_debugfs_read(struct file *file, char __user *buf, return __qsfp_debugfs_read(file, buf, count, ppos, 1); } +static int __i2c_debugfs_open(struct inode *in, struct file *fp, u32 target) +{ + struct hfi1_pportdata *ppd; + int ret; + + if (!try_module_get(THIS_MODULE)) + return -ENODEV; + + ppd = private2ppd(fp); + + ret = acquire_chip_resource(ppd->dd, i2c_target(target), 0); + if (ret) /* failed - release the module */ + module_put(THIS_MODULE); + + return ret; +} + +static int i2c1_debugfs_open(struct inode *in, struct file *fp) +{ + return __i2c_debugfs_open(in, fp, 0); +} + +static int i2c2_debugfs_open(struct inode *in, struct file *fp) +{ + return __i2c_debugfs_open(in, fp, 1); +} + +static int __i2c_debugfs_release(struct inode *in, struct file *fp, u32 target) +{ + struct hfi1_pportdata *ppd; + + ppd = private2ppd(fp); + + release_chip_resource(ppd->dd, i2c_target(target)); + module_put(THIS_MODULE); + + return 0; +} + +static int i2c1_debugfs_release(struct inode *in, struct file *fp) +{ + return __i2c_debugfs_release(in, fp, 0); +} + +static int i2c2_debugfs_release(struct inode *in, struct file *fp) +{ + return __i2c_debugfs_release(in, fp, 1); +} + +static int __qsfp_debugfs_open(struct inode *in, struct file *fp, u32 target) +{ + struct hfi1_pportdata *ppd; + int ret; + + if (!try_module_get(THIS_MODULE)) + return -ENODEV; + + ppd = private2ppd(fp); + + ret = acquire_chip_resource(ppd->dd, i2c_target(target), 0); + if (ret) /* failed - release the module */ + module_put(THIS_MODULE); + + return ret; +} + +static int qsfp1_debugfs_open(struct inode *in, struct file *fp) +{ + return __qsfp_debugfs_open(in, fp, 0); +} + +static int qsfp2_debugfs_open(struct inode *in, struct file *fp) +{ + return __qsfp_debugfs_open(in, fp, 1); +} + +static int __qsfp_debugfs_release(struct inode *in, struct file *fp, u32 target) +{ + struct hfi1_pportdata *ppd; + + ppd = private2ppd(fp); + + release_chip_resource(ppd->dd, i2c_target(target)); + module_put(THIS_MODULE); + + return 0; +} + +static int qsfp1_debugfs_release(struct inode *in, struct file *fp) +{ + return __qsfp_debugfs_release(in, fp, 0); +} + +static int qsfp2_debugfs_release(struct inode *in, struct file *fp) +{ + return __qsfp_debugfs_release(in, fp, 1); +} + #define DEBUGFS_OPS(nm, readroutine, writeroutine) \ { \ .name = nm, \ @@ -705,6 +792,18 @@ static ssize_t qsfp2_debugfs_read(struct file *file, char __user *buf, }, \ } +#define DEBUGFS_XOPS(nm, readf, writef, openf, releasef) \ +{ \ + .name = nm, \ + .ops = { \ + .read = readf, \ + .write = writef, \ + .llseek = generic_file_llseek, \ + .open = openf, \ + .release = releasef \ + }, \ +} + static const struct counter_info cntr_ops[] = { DEBUGFS_OPS("counter_names", dev_names_read, NULL), DEBUGFS_OPS("counters", dev_counters_read, NULL), @@ -713,11 +812,15 @@ static const struct counter_info cntr_ops[] = { static const struct counter_info port_cntr_ops[] = { DEBUGFS_OPS("port%dcounters", portcntrs_debugfs_read, NULL), - DEBUGFS_OPS("i2c1", i2c1_debugfs_read, i2c1_debugfs_write), - DEBUGFS_OPS("i2c2", i2c2_debugfs_read, i2c2_debugfs_write), + DEBUGFS_XOPS("i2c1", i2c1_debugfs_read, i2c1_debugfs_write, + i2c1_debugfs_open, i2c1_debugfs_release), + DEBUGFS_XOPS("i2c2", i2c2_debugfs_read, i2c2_debugfs_write, + i2c2_debugfs_open, i2c2_debugfs_release), DEBUGFS_OPS("qsfp_dump%d", qsfp_debugfs_dump, NULL), - DEBUGFS_OPS("qsfp1", qsfp1_debugfs_read, qsfp1_debugfs_write), - DEBUGFS_OPS("qsfp2", qsfp2_debugfs_read, qsfp2_debugfs_write), + DEBUGFS_XOPS("qsfp1", qsfp1_debugfs_read, qsfp1_debugfs_write, + qsfp1_debugfs_open, qsfp1_debugfs_release), + DEBUGFS_XOPS("qsfp2", qsfp2_debugfs_read, qsfp2_debugfs_write, + qsfp2_debugfs_open, qsfp2_debugfs_release), }; void hfi1_dbg_ibdev_init(struct hfi1_ibdev *ibd) -- cgit v0.10.2 From c9c8ea3d47ebe025c3bca692e729f4c2e634c9a8 Mon Sep 17 00:00:00 2001 From: Dean Luick Date: Sat, 5 Mar 2016 08:50:33 -0800 Subject: IB/hfi1: Add ASIC flag view/clear Different OSes using parts of the same hardware may leave cross-device flags set. Export a debugfs file to view and clear these flags if needed. Reviewed-by: Mitko Haralanov Reviewed-by: Easwar Hariharan Signed-off-by: Dean Luick Signed-off-by: Jubin John Signed-off-by: Doug Ledford diff --git a/drivers/staging/rdma/hfi1/debugfs.c b/drivers/staging/rdma/hfi1/debugfs.c index 6a1bc28..dbab9d9 100644 --- a/drivers/staging/rdma/hfi1/debugfs.c +++ b/drivers/staging/rdma/hfi1/debugfs.c @@ -404,6 +404,130 @@ static ssize_t portcntrs_debugfs_read(struct file *file, char __user *buf, return rval; } +static void check_dyn_flag(u64 scratch0, char *p, int size, int *used, + int this_hfi, int hfi, u32 flag, const char *what) +{ + u32 mask; + + mask = flag << (hfi ? CR_DYN_SHIFT : 0); + if (scratch0 & mask) { + *used += scnprintf(p + *used, size - *used, + " 0x%08x - HFI%d %s in use, %s device\n", + mask, hfi, what, + this_hfi == hfi ? "this" : "other"); + } +} + +static ssize_t asic_flags_read(struct file *file, char __user *buf, + size_t count, loff_t *ppos) +{ + struct hfi1_pportdata *ppd; + struct hfi1_devdata *dd; + u64 scratch0; + char *tmp; + int ret = 0; + int size; + int used; + int i; + + rcu_read_lock(); + ppd = private2ppd(file); + dd = ppd->dd; + size = PAGE_SIZE; + used = 0; + tmp = kmalloc(size, GFP_KERNEL); + if (!tmp) { + rcu_read_unlock(); + return -ENOMEM; + } + + scratch0 = read_csr(dd, ASIC_CFG_SCRATCH); + used += scnprintf(tmp + used, size - used, + "Resource flags: 0x%016llx\n", scratch0); + + /* check permanent flag */ + if (scratch0 & CR_THERM_INIT) { + used += scnprintf(tmp + used, size - used, + " 0x%08x - thermal monitoring initialized\n", + (u32)CR_THERM_INIT); + } + + /* check each dynamic flag on each HFI */ + for (i = 0; i < 2; i++) { + check_dyn_flag(scratch0, tmp, size, &used, dd->hfi1_id, i, + CR_SBUS, "SBus"); + check_dyn_flag(scratch0, tmp, size, &used, dd->hfi1_id, i, + CR_EPROM, "EPROM"); + check_dyn_flag(scratch0, tmp, size, &used, dd->hfi1_id, i, + CR_I2C1, "i2c chain 1"); + check_dyn_flag(scratch0, tmp, size, &used, dd->hfi1_id, i, + CR_I2C2, "i2c chain 2"); + } + used += scnprintf(tmp + used, size - used, "Write bits to clear\n"); + + ret = simple_read_from_buffer(buf, count, ppos, tmp, used); + rcu_read_unlock(); + kfree(tmp); + return ret; +} + +static ssize_t asic_flags_write(struct file *file, const char __user *buf, + size_t count, loff_t *ppos) +{ + struct hfi1_pportdata *ppd; + struct hfi1_devdata *dd; + char *buff; + int ret; + unsigned long long value; + u64 scratch0; + u64 clear; + + rcu_read_lock(); + ppd = private2ppd(file); + dd = ppd->dd; + + buff = kmalloc(count + 1, GFP_KERNEL); + if (!buff) { + ret = -ENOMEM; + goto do_return; + } + + ret = copy_from_user(buff, buf, count); + if (ret > 0) { + ret = -EFAULT; + goto do_free; + } + + /* zero terminate and read the expected integer */ + buff[count] = 0; + ret = kstrtoull(buff, 0, &value); + if (ret) + goto do_free; + clear = value; + + /* obtain exclusive access */ + mutex_lock(&dd->asic_data->asic_resource_mutex); + acquire_hw_mutex(dd); + + scratch0 = read_csr(dd, ASIC_CFG_SCRATCH); + scratch0 &= ~clear; + write_csr(dd, ASIC_CFG_SCRATCH, scratch0); + /* force write to be visible to other HFI on another OS */ + (void)read_csr(dd, ASIC_CFG_SCRATCH); + + release_hw_mutex(dd); + mutex_unlock(&dd->asic_data->asic_resource_mutex); + + /* return the number of bytes written */ + ret = count; + + do_free: + kfree(buff); + do_return: + rcu_read_unlock(); + return ret; +} + /* * read the per-port QSFP data for ppd */ @@ -821,6 +945,7 @@ static const struct counter_info port_cntr_ops[] = { qsfp1_debugfs_open, qsfp1_debugfs_release), DEBUGFS_XOPS("qsfp2", qsfp2_debugfs_read, qsfp2_debugfs_write, qsfp2_debugfs_open, qsfp2_debugfs_release), + DEBUGFS_OPS("asic_flags", asic_flags_read, asic_flags_write), }; void hfi1_dbg_ibdev_init(struct hfi1_ibdev *ibd) -- cgit v0.10.2 From 8fefef125ed4b9347068d782aa5439f3da3dca32 Mon Sep 17 00:00:00 2001 From: Jubin John Date: Sat, 5 Mar 2016 08:50:38 -0800 Subject: IB/hfi1: Handle host handshake timeout Host handshake timeout can occur during the verify capability state. This is a LNI related failure and should be handled in the same way as other LNI failures. Reviewed-by: Dean Luick Reviewed-by: Easwar Hariharan Reviewed-by: Mike Marciniszyn Signed-off-by: Jubin John Signed-off-by: Doug Ledford diff --git a/drivers/staging/rdma/hfi1/chip.c b/drivers/staging/rdma/hfi1/chip.c index ac03452..c29860c 100644 --- a/drivers/staging/rdma/hfi1/chip.c +++ b/drivers/staging/rdma/hfi1/chip.c @@ -963,7 +963,8 @@ static struct flag_table dc8051_info_err_flags[] = { FLAG_ENTRY0("Failed LNI(OptEq)", FAILED_LNI_OPTEQ), FLAG_ENTRY0("Failed LNI(VerifyCap_1)", FAILED_LNI_VERIFY_CAP1), FLAG_ENTRY0("Failed LNI(VerifyCap_2)", FAILED_LNI_VERIFY_CAP2), - FLAG_ENTRY0("Failed LNI(ConfigLT)", FAILED_LNI_CONFIGLT) + FLAG_ENTRY0("Failed LNI(ConfigLT)", FAILED_LNI_CONFIGLT), + FLAG_ENTRY0("Host Handshake Timeout", HOST_HANDSHAKE_TIMEOUT) }; /* diff --git a/drivers/staging/rdma/hfi1/chip.h b/drivers/staging/rdma/hfi1/chip.h index 9313963..4f3b878 100644 --- a/drivers/staging/rdma/hfi1/chip.h +++ b/drivers/staging/rdma/hfi1/chip.h @@ -253,12 +253,13 @@ #define FAILED_LNI_VERIFY_CAP1 BIT(9) #define FAILED_LNI_VERIFY_CAP2 BIT(10) #define FAILED_LNI_CONFIGLT BIT(11) +#define HOST_HANDSHAKE_TIMEOUT BIT(12) #define FAILED_LNI (FAILED_LNI_POLLING | FAILED_LNI_DEBOUNCE \ | FAILED_LNI_ESTBCOMM | FAILED_LNI_OPTEQ \ | FAILED_LNI_VERIFY_CAP1 \ | FAILED_LNI_VERIFY_CAP2 \ - | FAILED_LNI_CONFIGLT) + | FAILED_LNI_CONFIGLT | HOST_HANDSHAKE_TIMEOUT) /* DC_DC8051_DBG_ERR_INFO_SET_BY_8051.HOST_MSG - host message flags */ #define HOST_REQ_DONE BIT(0) -- cgit v0.10.2 From 528ee9fbf0244406a76cb5e37406eef303b09a46 Mon Sep 17 00:00:00 2001 From: Dean Luick Date: Sat, 5 Mar 2016 08:50:43 -0800 Subject: IB/hfi1: Add adaptive cacheless verbs copy The kernel memcpy is faster than a cacheless copy. However, if too much of the L3 cache is overwritten by one-time copies then overall bandwidth suffers. Implement an adaptive scheme where full page copies are tracked and if the number of unique entries are larger than a threshold, verbs will use a cacheless copy. Tracked entries are gradually cleaned, allowing memcpy to resume once the larger copies have stopped. Reviewed-by: Dennis Dalessandro Reviewed-by: Mike Marciniszyn Signed-off-by: Dean Luick Signed-off-by: Jubin John Signed-off-by: Doug Ledford diff --git a/drivers/staging/rdma/hfi1/init.c b/drivers/staging/rdma/hfi1/init.c index f21933c..deabb08 100644 --- a/drivers/staging/rdma/hfi1/init.c +++ b/drivers/staging/rdma/hfi1/init.c @@ -1242,6 +1242,9 @@ static int __init hfi1_mod_init(void) idr_init(&hfi1_unit_table); hfi1_dbg_init(); + ret = hfi1_wss_init(); + if (ret < 0) + goto bail_wss; ret = pci_register_driver(&hfi1_pci_driver); if (ret < 0) { pr_err("Unable to register driver: error %d\n", -ret); @@ -1250,6 +1253,8 @@ static int __init hfi1_mod_init(void) goto bail; /* all OK */ bail_dev: + hfi1_wss_exit(); +bail_wss: hfi1_dbg_exit(); idr_destroy(&hfi1_unit_table); dev_cleanup(); @@ -1265,6 +1270,7 @@ module_init(hfi1_mod_init); static void __exit hfi1_mod_cleanup(void) { pci_unregister_driver(&hfi1_pci_driver); + hfi1_wss_exit(); hfi1_dbg_exit(); hfi1_cpulist_count = 0; kfree(hfi1_cpulist); diff --git a/drivers/staging/rdma/hfi1/verbs.c b/drivers/staging/rdma/hfi1/verbs.c index 220bdb0..8209757 100644 --- a/drivers/staging/rdma/hfi1/verbs.c +++ b/drivers/staging/rdma/hfi1/verbs.c @@ -125,6 +125,13 @@ unsigned short piothreshold; module_param(piothreshold, ushort, S_IRUGO); MODULE_PARM_DESC(piothreshold, "size used to determine sdma vs. pio"); +#define COPY_CACHELESS 1 +#define COPY_ADAPTIVE 2 +static unsigned int sge_copy_mode; +module_param(sge_copy_mode, uint, S_IRUGO); +MODULE_PARM_DESC(sge_copy_mode, + "Verbs copy mode: 0 use memcpy, 1 use cacheless copy, 2 adapt based on WSS"); + static void verbs_sdma_complete( struct sdma_txreq *cookie, int status); @@ -137,6 +144,159 @@ static int pio_wait(struct rvt_qp *qp, /* Length of buffer to create verbs txreq cache name */ #define TXREQ_NAME_LEN 24 +static uint wss_threshold; +module_param(wss_threshold, uint, S_IRUGO); +MODULE_PARM_DESC(wss_threshold, "Percentage (1-100) of LLC to use as a threshold for a cacheless copy"); +static uint wss_clean_period = 256; +module_param(wss_clean_period, uint, S_IRUGO); +MODULE_PARM_DESC(wss_clean_period, "Count of verbs copies before an entry in the page copy table is cleaned"); + +/* memory working set size */ +struct hfi1_wss { + unsigned long *entries; + atomic_t total_count; + atomic_t clean_counter; + atomic_t clean_entry; + + int threshold; + int num_entries; + long pages_mask; +}; + +static struct hfi1_wss wss; + +int hfi1_wss_init(void) +{ + long llc_size; + long llc_bits; + long table_size; + long table_bits; + + /* check for a valid percent range - default to 80 if none or invalid */ + if (wss_threshold < 1 || wss_threshold > 100) + wss_threshold = 80; + /* reject a wildly large period */ + if (wss_clean_period > 1000000) + wss_clean_period = 256; + /* reject a zero period */ + if (wss_clean_period == 0) + wss_clean_period = 1; + + /* + * Calculate the table size - the next power of 2 larger than the + * LLC size. LLC size is in KiB. + */ + llc_size = wss_llc_size() * 1024; + table_size = roundup_pow_of_two(llc_size); + + /* one bit per page in rounded up table */ + llc_bits = llc_size / PAGE_SIZE; + table_bits = table_size / PAGE_SIZE; + wss.pages_mask = table_bits - 1; + wss.num_entries = table_bits / BITS_PER_LONG; + + wss.threshold = (llc_bits * wss_threshold) / 100; + if (wss.threshold == 0) + wss.threshold = 1; + + atomic_set(&wss.clean_counter, wss_clean_period); + + wss.entries = kcalloc(wss.num_entries, sizeof(*wss.entries), + GFP_KERNEL); + if (!wss.entries) { + hfi1_wss_exit(); + return -ENOMEM; + } + + return 0; +} + +void hfi1_wss_exit(void) +{ + /* coded to handle partially initialized and repeat callers */ + kfree(wss.entries); + wss.entries = NULL; +} + +/* + * Advance the clean counter. When the clean period has expired, + * clean an entry. + * + * This is implemented in atomics to avoid locking. Because multiple + * variables are involved, it can be racy which can lead to slightly + * inaccurate information. Since this is only a heuristic, this is + * OK. Any innaccuracies will clean themselves out as the counter + * advances. That said, it is unlikely the entry clean operation will + * race - the next possible racer will not start until the next clean + * period. + * + * The clean counter is implemented as a decrement to zero. When zero + * is reached an entry is cleaned. + */ +static void wss_advance_clean_counter(void) +{ + int entry; + int weight; + unsigned long bits; + + /* become the cleaner if we decrement the counter to zero */ + if (atomic_dec_and_test(&wss.clean_counter)) { + /* + * Set, not add, the clean period. This avoids an issue + * where the counter could decrement below the clean period. + * Doing a set can result in lost decrements, slowing the + * clean advance. Since this a heuristic, this possible + * slowdown is OK. + * + * An alternative is to loop, advancing the counter by a + * clean period until the result is > 0. However, this could + * lead to several threads keeping another in the clean loop. + * This could be mitigated by limiting the number of times + * we stay in the loop. + */ + atomic_set(&wss.clean_counter, wss_clean_period); + + /* + * Uniquely grab the entry to clean and move to next. + * The current entry is always the lower bits of + * wss.clean_entry. The table size, wss.num_entries, + * is always a power-of-2. + */ + entry = (atomic_inc_return(&wss.clean_entry) - 1) + & (wss.num_entries - 1); + + /* clear the entry and count the bits */ + bits = xchg(&wss.entries[entry], 0); + weight = hweight64((u64)bits); + /* only adjust the contended total count if needed */ + if (weight) + atomic_sub(weight, &wss.total_count); + } +} + +/* + * Insert the given address into the working set array. + */ +static void wss_insert(void *address) +{ + u32 page = ((unsigned long)address >> PAGE_SHIFT) & wss.pages_mask; + u32 entry = page / BITS_PER_LONG; /* assumes this ends up a shift */ + u32 nr = page & (BITS_PER_LONG - 1); + + if (!test_and_set_bit(nr, &wss.entries[entry])) + atomic_inc(&wss.total_count); + + wss_advance_clean_counter(); +} + +/* + * Is the working set larger than the threshold? + */ +static inline int wss_exceeds_threshold(void) +{ + return atomic_read(&wss.total_count) >= wss.threshold; +} + /* * Translate ib_wr_opcode into ib_wc_opcode. */ @@ -258,7 +418,26 @@ void hfi1_copy_sge( struct rvt_sge *sge = &ss->sge; int in_last = 0; int i; + int cacheless_copy = 0; + if (sge_copy_mode == COPY_CACHELESS) { + cacheless_copy = length >= PAGE_SIZE; + } else if (sge_copy_mode == COPY_ADAPTIVE) { + if (length >= PAGE_SIZE) { + /* + * NOTE: this *assumes*: + * o The first vaddr is the dest. + * o If multiple pages, then vaddr is sequential. + */ + wss_insert(sge->vaddr); + if (length >= (2 * PAGE_SIZE)) + wss_insert(sge->vaddr + PAGE_SIZE); + + cacheless_copy = wss_exceeds_threshold(); + } else { + wss_advance_clean_counter(); + } + } if (copy_last) { if (length > 8) { length -= 8; @@ -277,10 +456,12 @@ again: if (len > sge->sge_length) len = sge->sge_length; WARN_ON_ONCE(len == 0); - if (in_last) { - /* enforce byte transer ordering */ + if (unlikely(in_last)) { + /* enforce byte transfer ordering */ for (i = 0; i < len; i++) ((u8 *)sge->vaddr)[i] = ((u8 *)data)[i]; + } else if (cacheless_copy) { + cacheless_memcpy(sge->vaddr, data, len); } else { memcpy(sge->vaddr, data, len); } diff --git a/drivers/staging/rdma/hfi1/verbs.h b/drivers/staging/rdma/hfi1/verbs.h index a85e6bc..6c4670f 100644 --- a/drivers/staging/rdma/hfi1/verbs.h +++ b/drivers/staging/rdma/hfi1/verbs.h @@ -475,6 +475,28 @@ int hfi1_verbs_send_dma(struct rvt_qp *qp, struct hfi1_pkt_state *ps, int hfi1_verbs_send_pio(struct rvt_qp *qp, struct hfi1_pkt_state *ps, u64 pbc); +int hfi1_wss_init(void); +void hfi1_wss_exit(void); + +/* platform specific: return the lowest level cache (llc) size, in KiB */ +static inline int wss_llc_size(void) +{ + /* assume that the boot CPU value is universal for all CPUs */ + return boot_cpu_data.x86_cache_size; +} + +/* platform specific: cacheless copy */ +static inline void cacheless_memcpy(void *dst, void *src, size_t n) +{ + /* + * Use the only available X64 cacheless copy. Add a __user cast + * to quiet sparse. The src agument is already in the kernel so + * there are no security issues. The extra fault recovery machinery + * is not invoked. + */ + __copy_user_nocache(dst, (void __user *)src, n, 0); +} + extern const enum ib_wc_opcode ib_hfi1_wc_opcode[]; extern const u8 hdr_len_by_opcode[]; -- cgit v0.10.2 From 831464ce4b74eaec723bad51ea48fe3879732f66 Mon Sep 17 00:00:00 2001 From: Kaike Wan Date: Sat, 5 Mar 2016 08:50:49 -0800 Subject: IB/hfi1: Don't call cond_resched in atomic mode when sending packets This patch fixed the problem where the driver might reschedule in atomic mode when sending packets. This is due to the fact that the call to cond_resched() in hfi1_do_send() might occur in atomic mode and a check is required to avoid the warning message: "kernel: BUG: scheduling while atomic: swapper/2/0/0x10000100." Reviewed-by: Dennis Dalessandro Signed-off-by: Mike Marciniszyn Signed-off-by: Kaike Wan Signed-off-by: Jubin John Signed-off-by: Doug Ledford diff --git a/drivers/staging/rdma/hfi1/ruc.c b/drivers/staging/rdma/hfi1/ruc.c index aa53859..08813cd 100644 --- a/drivers/staging/rdma/hfi1/ruc.c +++ b/drivers/staging/rdma/hfi1/ruc.c @@ -906,8 +906,11 @@ void hfi1_do_send(struct rvt_qp *qp) *ps.ppd->dd->send_schedule); return; } - cond_resched(); - this_cpu_inc(*ps.ppd->dd->send_schedule); + if (!irqs_disabled()) { + cond_resched(); + this_cpu_inc( + *ps.ppd->dd->send_schedule); + } timeout = jiffies + (timeout_int) / 8; } spin_lock_irqsave(&qp->s_lock, flags); -- cgit v0.10.2 From 2243472e9d98c3ca0cb735f96ad48a7b59bdb34d Mon Sep 17 00:00:00 2001 From: Easwar Hariharan Date: Mon, 7 Mar 2016 11:35:03 -0800 Subject: IB/hfi1: Improve LED beaconing The current LED beaconing code is unclear and uses the timer handler to turn off the timer. This patch simplifies the code by removing the special semantics of timeon = timeoff = 0 being interpreted as a request to turn off the beaconing. Reviewed-by: Ira Weiny Reviewed-by: Dennis Dalessandro Signed-off-by: Easwar Hariharan Signed-off-by: Jubin John Signed-off-by: Doug Ledford diff --git a/drivers/staging/rdma/hfi1/driver.c b/drivers/staging/rdma/hfi1/driver.c index 4581864..914beed 100644 --- a/drivers/staging/rdma/hfi1/driver.c +++ b/drivers/staging/rdma/hfi1/driver.c @@ -1170,18 +1170,20 @@ void shutdown_led_override(struct hfi1_pportdata *ppd) struct hfi1_devdata *dd = ppd->dd; /* - * This pairs with the memory barrier implied by the atomic_dec in - * hfi1_set_led_override to ensure that we read the correct state of - * LED beaconing represented by led_override_timer_active + * This pairs with the memory barrier in hfi1_start_led_override to + * ensure that we read the correct state of LED beaconing represented + * by led_override_timer_active */ - smp_mb(); + smp_rmb(); if (atomic_read(&ppd->led_override_timer_active)) { del_timer_sync(&ppd->led_override_timer); atomic_set(&ppd->led_override_timer_active, 0); + /* Ensure the atomic_set is visible to all CPUs */ + smp_wmb(); } - /* Shut off LEDs after we are sure timer is not running */ - setextled(dd, 0); + /* Hand control of the LED to the DC for normal operation */ + write_csr(dd, DCC_CFG_LED_CNTRL, 0); } static void run_led_override(unsigned long opaque) @@ -1195,59 +1197,48 @@ static void run_led_override(unsigned long opaque) return; phase_idx = ppd->led_override_phase & 1; + setextled(dd, phase_idx); timeout = ppd->led_override_vals[phase_idx]; + /* Set up for next phase */ ppd->led_override_phase = !ppd->led_override_phase; - /* - * don't re-fire the timer if user asked for it to be off; we let - * it fire one more time after they turn it off to simplify - */ - if (ppd->led_override_vals[0] || ppd->led_override_vals[1]) { - mod_timer(&ppd->led_override_timer, jiffies + timeout); - } else { - /* Hand control of the LED to the DC for normal operation */ - write_csr(dd, DCC_CFG_LED_CNTRL, 0); - /* Record that we did not re-fire the timer */ - atomic_dec(&ppd->led_override_timer_active); - } + mod_timer(&ppd->led_override_timer, jiffies + timeout); } /* * To have the LED blink in a particular pattern, provide timeon and timeoff - * in milliseconds. To turn off custom blinking and return to normal operation, - * provide timeon = timeoff = 0. + * in milliseconds. + * To turn off custom blinking and return to normal operation, use + * shutdown_led_override() */ -void hfi1_set_led_override(struct hfi1_pportdata *ppd, unsigned int timeon, - unsigned int timeoff) +void hfi1_start_led_override(struct hfi1_pportdata *ppd, unsigned int timeon, + unsigned int timeoff) { - struct hfi1_devdata *dd = ppd->dd; - - if (!(dd->flags & HFI1_INITTED)) + if (!(ppd->dd->flags & HFI1_INITTED)) return; /* Convert to jiffies for direct use in timer */ ppd->led_override_vals[0] = msecs_to_jiffies(timeoff); ppd->led_override_vals[1] = msecs_to_jiffies(timeon); - ppd->led_override_phase = 1; /* Arbitrarily start from LED on phase */ + + /* Arbitrarily start from LED on phase */ + ppd->led_override_phase = 1; /* * If the timer has not already been started, do so. Use a "quick" - * timeout so the function will be called soon, to look at our request. + * timeout so the handler will be called soon to look at our request. */ - if (atomic_inc_return(&ppd->led_override_timer_active) == 1) { - /* Need to start timer */ + if (!timer_pending(&ppd->led_override_timer)) { setup_timer(&ppd->led_override_timer, run_led_override, (unsigned long)ppd); - ppd->led_override_timer.expires = jiffies + 1; add_timer(&ppd->led_override_timer); - } else { - if (ppd->led_override_vals[0] || ppd->led_override_vals[1]) - mod_timer(&ppd->led_override_timer, jiffies + 1); - atomic_dec(&ppd->led_override_timer_active); + atomic_set(&ppd->led_override_timer_active, 1); + /* Ensure the atomic_set is visible to all CPUs */ + smp_wmb(); } } diff --git a/drivers/staging/rdma/hfi1/hfi.h b/drivers/staging/rdma/hfi1/hfi.h index 035a151..5722883 100644 --- a/drivers/staging/rdma/hfi1/hfi.h +++ b/drivers/staging/rdma/hfi1/hfi.h @@ -1623,13 +1623,9 @@ void hfi1_free_devdata(struct hfi1_devdata *); void cc_state_reclaim(struct rcu_head *rcu); struct hfi1_devdata *hfi1_alloc_devdata(struct pci_dev *pdev, size_t extra); -void hfi1_set_led_override(struct hfi1_pportdata *ppd, unsigned int timeon, - unsigned int timeoff); -/* - * Only to be used for driver unload or device reset where we cannot allow - * the timer to fire even the one extra time, else use hfi1_set_led_override - * with timeon = timeoff = 0 - */ +/* LED beaconing functions */ +void hfi1_start_led_override(struct hfi1_pportdata *ppd, unsigned int timeon, + unsigned int timeoff); void shutdown_led_override(struct hfi1_pportdata *ppd); #define HFI1_CREDIT_RETURN_RATE (100) diff --git a/drivers/staging/rdma/hfi1/mad.c b/drivers/staging/rdma/hfi1/mad.c index 5925798..0ec748e 100644 --- a/drivers/staging/rdma/hfi1/mad.c +++ b/drivers/staging/rdma/hfi1/mad.c @@ -583,11 +583,11 @@ static int __subn_get_opa_portinfo(struct opa_smp *smp, u32 am, u8 *data, pi->port_states.ledenable_offlinereason |= ppd->is_sm_config_started << 5; /* - * This pairs with the memory barrier implied by the atomic_dec in - * hfi1_set_led_override to ensure that we read the correct state of - * LED beaconing represented by led_override_timer_active + * This pairs with the memory barrier in hfi1_start_led_override to + * ensure that we read the correct state of LED beaconing represented + * by led_override_timer_active */ - smp_mb(); + smp_rmb(); is_beaconing_active = !!atomic_read(&ppd->led_override_timer_active); pi->port_states.ledenable_offlinereason |= is_beaconing_active << 6; pi->port_states.ledenable_offlinereason |= @@ -3598,11 +3598,11 @@ static int __subn_get_opa_led_info(struct opa_smp *smp, u32 am, u8 *data, } /* - * This pairs with the memory barrier implied by the atomic_dec in - * hfi1_set_led_override to ensure that we read the correct state of - * LED beaconing represented by led_override_timer_active + * This pairs with the memory barrier in hfi1_start_led_override to + * ensure that we read the correct state of LED beaconing represented + * by led_override_timer_active */ - smp_mb(); + smp_rmb(); is_beaconing_active = !!atomic_read(&ppd->led_override_timer_active); p->rsvd_led_mask = cpu_to_be32(is_beaconing_active << OPA_LED_SHIFT); @@ -3627,9 +3627,9 @@ static int __subn_set_opa_led_info(struct opa_smp *smp, u32 am, u8 *data, } if (on) - hfi1_set_led_override(dd->pport, 2000, 1500); + hfi1_start_led_override(dd->pport, 2000, 1500); else - hfi1_set_led_override(dd->pport, 0, 0); + shutdown_led_override(dd->pport); return __subn_get_opa_led_info(smp, am, data, ibdev, port, resp_len); } -- cgit v0.10.2 From ef086c0d5dd9a151578c72b6f257e5b0e77d65eb Mon Sep 17 00:00:00 2001 From: Mike Marciniszyn Date: Mon, 7 Mar 2016 11:35:08 -0800 Subject: IB/hfi1: Report pid in qp_stats to aid debug Tracking user/QP ownership is needed to debug issues with user ULPs like OpenMPI. Reviewed-by: Dennis Dalessandro Signed-off-by: Mike Marciniszyn Signed-off-by: Doug Ledford diff --git a/drivers/infiniband/sw/rdmavt/qp.c b/drivers/infiniband/sw/rdmavt/qp.c index ef82abf..de34474 100644 --- a/drivers/infiniband/sw/rdmavt/qp.c +++ b/drivers/infiniband/sw/rdmavt/qp.c @@ -786,6 +786,7 @@ struct ib_qp *rvt_create_qp(struct ib_pd *ibpd, goto bail_ip; } } + qp->pid = current->pid; } spin_lock(&rdi->n_qps_lock); diff --git a/drivers/staging/rdma/hfi1/qp.c b/drivers/staging/rdma/hfi1/qp.c index 9e831a1..6f85715 100644 --- a/drivers/staging/rdma/hfi1/qp.c +++ b/drivers/staging/rdma/hfi1/qp.c @@ -678,7 +678,7 @@ void qp_iter_print(struct seq_file *s, struct qp_iter *iter) wqe = rvt_get_swqe_ptr(qp, qp->s_last); send_context = qp_to_send_context(qp, priv->s_sc); seq_printf(s, - "N %d %s QP%x R %u %s %u %u %u f=%x %u %u %u %u %u %u PSN %x %x %x %x %x (%u %u %u %u %u %u %u) QP%x LID %x SL %u MTU %u %u %u %u SDE %p,%u SC %p,%u CQ %u %u\n", + "N %d %s QP%x R %u %s %u %u %u f=%x %u %u %u %u %u %u PSN %x %x %x %x %x (%u %u %u %u %u %u %u) QP%x LID %x SL %u MTU %u %u %u %u SDE %p,%u SC %p,%u CQ %u %u PID %d\n", iter->n, qp_idle(qp) ? "I" : "B", qp->ibqp.qp_num, @@ -712,7 +712,8 @@ void qp_iter_print(struct seq_file *s, struct qp_iter *iter) send_context, send_context ? send_context->sw_index : 0, ibcq_to_rvtcq(qp->ibqp.send_cq)->queue->head, - ibcq_to_rvtcq(qp->ibqp.send_cq)->queue->tail); + ibcq_to_rvtcq(qp->ibqp.send_cq)->queue->tail, + qp->pid); } void qp_comm_est(struct rvt_qp *qp) diff --git a/include/rdma/rdmavt_qp.h b/include/rdma/rdmavt_qp.h index f2f4df0..497e590 100644 --- a/include/rdma/rdmavt_qp.h +++ b/include/rdma/rdmavt_qp.h @@ -251,6 +251,7 @@ struct rvt_qp { enum ib_mtu path_mtu; int srate_mbps; /* s_srate (below) converted to Mbit/s */ + pid_t pid; /* pid for user mode QPs */ u32 remote_qpn; u32 qkey; /* QKEY for this QP (for UD or RD) */ u32 s_size; /* send work queue size */ -- cgit v0.10.2 From ef6d8c4ec86f03b1e40791a804c746e5efacaf86 Mon Sep 17 00:00:00 2001 From: Mike Marciniszyn Date: Mon, 7 Mar 2016 11:35:14 -0800 Subject: IB/hfi1: Fix issues with qp_stats print The changes are to aid in coorelating trace information with QPs between the trace and qp_stats information Such changes include adds a space after QP and clarifying that the second QP is actually the remote QP. Reviewed-by: Dennis Dalessandro Signed-off-by: Mike Marciniszyn Signed-off-by: Doug Ledford diff --git a/drivers/staging/rdma/hfi1/qp.c b/drivers/staging/rdma/hfi1/qp.c index 6f85715..59ee12a 100644 --- a/drivers/staging/rdma/hfi1/qp.c +++ b/drivers/staging/rdma/hfi1/qp.c @@ -678,7 +678,7 @@ void qp_iter_print(struct seq_file *s, struct qp_iter *iter) wqe = rvt_get_swqe_ptr(qp, qp->s_last); send_context = qp_to_send_context(qp, priv->s_sc); seq_printf(s, - "N %d %s QP%x R %u %s %u %u %u f=%x %u %u %u %u %u %u PSN %x %x %x %x %x (%u %u %u %u %u %u %u) QP%x LID %x SL %u MTU %u %u %u %u SDE %p,%u SC %p,%u CQ %u %u PID %d\n", + "N %d %s QP %x R %u %s %u %u %u f=%x %u %u %u %u %u %u PSN %x %x %x %x %x (%u %u %u %u %u %u %u) RQP %x LID %x SL %u MTU %u %u %u %u SDE %p,%u SC %p,%u SCQ %u %u PID %d\n", iter->n, qp_idle(qp) ? "I" : "B", qp->ibqp.qp_num, -- cgit v0.10.2 From 1db78eeebee7cde877194ddc8691f192e6279609 Mon Sep 17 00:00:00 2001 From: Mike Marciniszyn Date: Mon, 7 Mar 2016 11:35:19 -0800 Subject: IB/hfi1: Add unique trace point for pio and sdma send This allows for separately enabling pio and sdma tracepoints to cut the volume of trace information. Reviewed-by: Dennis Dalessandro Signed-off-by: Mike Marciniszyn Signed-off-by: Doug Ledford diff --git a/drivers/staging/rdma/hfi1/rc.c b/drivers/staging/rdma/hfi1/rc.c index 351f136..0d7e101 100644 --- a/drivers/staging/rdma/hfi1/rc.c +++ b/drivers/staging/rdma/hfi1/rc.c @@ -881,7 +881,7 @@ void hfi1_send_rc_ack(struct hfi1_ctxtdata *rcd, struct rvt_qp *qp, goto queue_ack; } - trace_output_ibhdr(dd_from_ibdev(qp->ibqp.device), &hdr); + trace_ack_output_ibhdr(dd_from_ibdev(qp->ibqp.device), &hdr); /* write the pbc and data */ ppd->dd->pio_inline_send(ppd->dd, pbuf, pbc, &hdr, hwords); diff --git a/drivers/staging/rdma/hfi1/trace.h b/drivers/staging/rdma/hfi1/trace.h index b8b4416..4d91c18 100644 --- a/drivers/staging/rdma/hfi1/trace.h +++ b/drivers/staging/rdma/hfi1/trace.h @@ -530,7 +530,15 @@ DEFINE_EVENT(hfi1_ibhdr_template, input_ibhdr, TP_PROTO(struct hfi1_devdata *dd, struct hfi1_ib_header *hdr), TP_ARGS(dd, hdr)); -DEFINE_EVENT(hfi1_ibhdr_template, output_ibhdr, +DEFINE_EVENT(hfi1_ibhdr_template, pio_output_ibhdr, + TP_PROTO(struct hfi1_devdata *dd, struct hfi1_ib_header *hdr), + TP_ARGS(dd, hdr)); + +DEFINE_EVENT(hfi1_ibhdr_template, ack_output_ibhdr, + TP_PROTO(struct hfi1_devdata *dd, struct hfi1_ib_header *hdr), + TP_ARGS(dd, hdr)); + +DEFINE_EVENT(hfi1_ibhdr_template, sdma_output_ibhdr, TP_PROTO(struct hfi1_devdata *dd, struct hfi1_ib_header *hdr), TP_ARGS(dd, hdr)); diff --git a/drivers/staging/rdma/hfi1/verbs.c b/drivers/staging/rdma/hfi1/verbs.c index 8209757..e605e09 100644 --- a/drivers/staging/rdma/hfi1/verbs.c +++ b/drivers/staging/rdma/hfi1/verbs.c @@ -891,8 +891,8 @@ int hfi1_verbs_send_dma(struct rvt_qp *qp, struct hfi1_pkt_state *ps, if (unlikely(ret)) goto bail_build; } - trace_output_ibhdr(dd_from_ibdev(qp->ibqp.device), - &ps->s_txreq->phdr.hdr); + trace_sdma_output_ibhdr(dd_from_ibdev(qp->ibqp.device), + &ps->s_txreq->phdr.hdr); ret = sdma_send_txreq(tx->sde, &priv->s_iowait, &tx->txreq); if (unlikely(ret == -ECOMM)) goto bail_ecomm; @@ -1067,8 +1067,8 @@ int hfi1_verbs_send_pio(struct rvt_qp *qp, struct hfi1_pkt_state *ps, } } - trace_output_ibhdr(dd_from_ibdev(qp->ibqp.device), - &ps->s_txreq->phdr.hdr); + trace_pio_output_ibhdr(dd_from_ibdev(qp->ibqp.device), + &ps->s_txreq->phdr.hdr); pio_bail: if (qp->s_wqe) { -- cgit v0.10.2 From 5326dfbf005ca8589d709209a81d145c5b87b23d Mon Sep 17 00:00:00 2001 From: Mike Marciniszyn Date: Mon, 7 Mar 2016 11:35:24 -0800 Subject: IB/hfi1: Fix ordering of trace for accuracy The postitioning of the sdma ibhdr trace was causing an extra trace message when the tx send returned -EBUSY. Move the trace to just before the return and handle negative return values to avoid any trace. Reviewed-by: Dennis Dalessandro Signed-off-by: Mike Marciniszyn Signed-off-by: Doug Ledford diff --git a/drivers/staging/rdma/hfi1/verbs.c b/drivers/staging/rdma/hfi1/verbs.c index e605e09..467e6c34 100644 --- a/drivers/staging/rdma/hfi1/verbs.c +++ b/drivers/staging/rdma/hfi1/verbs.c @@ -891,11 +891,14 @@ int hfi1_verbs_send_dma(struct rvt_qp *qp, struct hfi1_pkt_state *ps, if (unlikely(ret)) goto bail_build; } + ret = sdma_send_txreq(tx->sde, &priv->s_iowait, &tx->txreq); + if (unlikely(ret < 0)) { + if (ret == -ECOMM) + goto bail_ecomm; + return ret; + } trace_sdma_output_ibhdr(dd_from_ibdev(qp->ibqp.device), &ps->s_txreq->phdr.hdr); - ret = sdma_send_txreq(tx->sde, &priv->s_iowait, &tx->txreq); - if (unlikely(ret == -ECOMM)) - goto bail_ecomm; return ret; bail_ecomm: -- cgit v0.10.2 From 60df29581f67e06791a176641c774515ec1634e5 Mon Sep 17 00:00:00 2001 From: Mike Marciniszyn Date: Mon, 7 Mar 2016 11:35:30 -0800 Subject: IB/hfi1: Fix PIO wakeup timing hole There is a timing hole if there had been greater than PIO_WAIT_BATCH_SIZE waiters. This code will dispatch the first batch but leave the others in the queue. If the restarted waiters don't in turn wait on a buffer, there is a hang. Fix by forcing a return when the QP queue is non-empty. Reviewed-by: Vennila Megavannan Signed-off-by: Mike Marciniszyn Signed-off-by: Doug Ledford diff --git a/drivers/staging/rdma/hfi1/pio.c b/drivers/staging/rdma/hfi1/pio.c index e888e21..c6849ce 100644 --- a/drivers/staging/rdma/hfi1/pio.c +++ b/drivers/staging/rdma/hfi1/pio.c @@ -1545,7 +1545,7 @@ static void sc_piobufavail(struct send_context *sc) struct iowait *wait; if (n == ARRAY_SIZE(qps)) - goto full; + break; wait = list_first_entry(list, struct iowait, list); qp = iowait_to_qp(wait); priv = qp->priv; @@ -1554,12 +1554,14 @@ static void sc_piobufavail(struct send_context *sc) qps[n++] = qp; } /* - * Counting: only call wantpiobuf_intr() if there were waiters and they - * are now all gone. + * If there had been waiters and there are more + * insure that we redo the force to avoid a potential hang. */ - if (n) + if (n) { hfi1_sc_wantpiobuf_intr(sc, 0); -full: + if (!list_empty(list)) + hfi1_sc_wantpiobuf_intr(sc, 1); + } write_sequnlock_irqrestore(&dev->iowait_lock, flags); for (i = 0; i < n; i++) -- cgit v0.10.2 From cef504c5c019ea4f59cf3a69e7341b2b34091cda Mon Sep 17 00:00:00 2001 From: Mike Marciniszyn Date: Mon, 7 Mar 2016 11:35:35 -0800 Subject: IB/hfi1: Fix panic in adaptive pio The following panic occurs while running ib_send_bw -a with adaptive pio turned on: [ 8551.143596] BUG: unable to handle kernel NULL pointer dereference at (null) [ 8551.152986] IP: [] pio_wait.isra.21+0x34/0x190 [hfi1] [ 8551.160926] PGD 80db21067 PUD 80bb45067 PMD 0 [ 8551.166431] Oops: 0000 [#1] SMP [ 8551.276725] task: ffff880816bf15c0 ti: ffff880812ac0000 task.ti: ffff880812ac0000 [ 8551.285705] RIP: 0010:[] pio_wait.isra.21+0x34/0x190 [hfi1] [ 8551.296462] RSP: 0018:ffff880812ac3b58 EFLAGS: 00010282 [ 8551.303029] RAX: 000000000000002d RBX: 0000000000000000 RCX: 0000000000000800 [ 8551.311633] RDX: ffff880812ac3c08 RSI: 0000000000000000 RDI: ffff8800b6665e40 [ 8551.320228] RBP: ffff880812ac3ba0 R08: 0000000000001000 R09: ffffffffa09039a0 [ 8551.328820] R10: ffff880817a0c000 R11: 0000000000000000 R12: ffff8800b6665e40 [ 8551.337406] R13: ffff880817a0c000 R14: ffff8800b6665800 R15: ffff8800b6665e40 [ 8551.355640] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 8551.362674] CR2: 0000000000000000 CR3: 000000080abe8000 CR4: 00000000001406e0 [ 8551.371262] Stack: [ 8551.374119] ffff880812ac3bf0 ffff88080cf54010 ffff880800000800 ffff880812ac3c08 [ 8551.383036] ffff8800b6665800 ffff8800b6665e40 0000000000000202 ffffffffa08e7b80 [ 8551.391941] 00000001007de431 ffff880812ac3bc8 ffffffffa0904645 ffff8800b6665800 [ 8551.400859] Call Trace: [ 8551.404214] [] ? hfi1_del_timers_sync+0x30/0x30 [hfi1] [ 8551.412417] [] hfi1_verbs_send+0x215/0x330 [hfi1] [ 8551.420154] [] hfi1_do_send+0x166/0x350 [hfi1] [ 8551.427618] [] rvt_post_send+0x533/0x6a0 [rdmavt] [ 8551.435367] [] ib_uverbs_post_send+0x30f/0x530 [ib_uverbs] [ 8551.443999] [] ib_uverbs_write+0x117/0x380 [ib_uverbs] [ 8551.452269] [] ? sock_recvmsg+0x3b/0x50 [ 8551.459071] [] ? sock_read_iter+0x92/0xe0 [ 8551.466068] [] __vfs_write+0x37/0x100 [ 8551.472692] [] ? rw_verify_area+0x52/0xd0 [ 8551.479682] [] vfs_write+0xa2/0x1a0 [ 8551.486089] [] ? do_audit_syscall_entry+0x66/0x70 [ 8551.493891] [] SyS_write+0x55/0xc0 [ 8551.500220] [] entry_SYSCALL_64_fastpath+0x12/0x71 [ 8551.531284] RIP [] pio_wait.isra.21+0x34/0x190 [hfi1] [ 8551.539508] RSP [ 8551.544110] CR2: 0000000000000000 The priv s_sendcontext pointer was not setup properly. Fix with this patch by using the s_sendcontext and eliminating its send engine use. Reviewed-by: Dennis Dalessandro Signed-off-by: Mike Marciniszyn Signed-off-by: Doug Ledford diff --git a/drivers/staging/rdma/hfi1/qp.c b/drivers/staging/rdma/hfi1/qp.c index 59ee12a..29a5ad2 100644 --- a/drivers/staging/rdma/hfi1/qp.c +++ b/drivers/staging/rdma/hfi1/qp.c @@ -220,6 +220,7 @@ void hfi1_modify_qp(struct rvt_qp *qp, struct ib_qp_attr *attr, if (attr_mask & IB_QP_AV) { priv->s_sc = ah_to_sc(ibqp->device, &qp->remote_ah_attr); priv->s_sde = qp_to_sdma_engine(qp, priv->s_sc); + priv->s_sendcontext = qp_to_send_context(qp, priv->s_sc); } if (attr_mask & IB_QP_PATH_MIG_STATE && @@ -228,6 +229,7 @@ void hfi1_modify_qp(struct rvt_qp *qp, struct ib_qp_attr *attr, qp->s_flags |= RVT_S_AHG_CLEAR; priv->s_sc = ah_to_sc(ibqp->device, &qp->remote_ah_attr); priv->s_sde = qp_to_sdma_engine(qp, priv->s_sc); + priv->s_sendcontext = qp_to_send_context(qp, priv->s_sc); } } diff --git a/drivers/staging/rdma/hfi1/verbs.c b/drivers/staging/rdma/hfi1/verbs.c index 467e6c34..7acaa25 100644 --- a/drivers/staging/rdma/hfi1/verbs.c +++ b/drivers/staging/rdma/hfi1/verbs.c @@ -1005,12 +1005,8 @@ int hfi1_verbs_send_pio(struct rvt_qp *qp, struct hfi1_pkt_state *ps, /* vl15 special case taken care of in ud.c */ sc5 = priv->s_sc; - sc = qp_to_send_context(qp, sc5); + sc = ps->s_txreq->psc; - if (!sc) { - ret = -EINVAL; - goto bail; - } if (likely(pbc == 0)) { u8 vl = sc_to_vlt(dd_from_ibdev(qp->ibqp.device), sc5); /* set PBC_DC_INFO bit (aka SC[4]) in pbc_flags */ -- cgit v0.10.2 From 47177f1bac9ca2b65eefdc9b0b63d0505bd4e11e Mon Sep 17 00:00:00 2001 From: Mike Marciniszyn Date: Mon, 7 Mar 2016 11:35:41 -0800 Subject: IB/hfi1: Fix adaptive pio packet corruption The adaptive pio heuristic missed a case that causes a corrupted packet on the wire. The case is if SDMA egress had been chosen for a pio-able packet and then encountered a ring space wait, the packet is queued. The sge cursor had been incremented as part of the packet build out for SDMA. After the send engine restart, the heuristic might now chose pio based on the sdma count being zero and start the mmio copy using the already incremented sge cursor. Fix this by forcing SDMA egress when the SDMA descriptor has already been built. Additionally, the code to wait for a QPs pio count to zero when switching to SDMA was missing. Add it. There is also an issue with UD QPs, in that the different SLs can pick a different egress send context. For now, just insure the UD/GSI always go through SDMA. Reviewed-by: Vennila Megavannan Signed-off-by: Mike Marciniszyn Signed-off-by: Doug Ledford diff --git a/drivers/staging/rdma/hfi1/verbs.c b/drivers/staging/rdma/hfi1/verbs.c index 7acaa25..62755af 100644 --- a/drivers/staging/rdma/hfi1/verbs.c +++ b/drivers/staging/rdma/hfi1/verbs.c @@ -1179,10 +1179,11 @@ bad: * and size */ static inline send_routine get_send_routine(struct rvt_qp *qp, - struct hfi1_ib_header *h) + struct verbs_txreq *tx) { struct hfi1_devdata *dd = dd_from_ibdev(qp->ibqp.device); struct hfi1_qp_priv *priv = qp->priv; + struct hfi1_ib_header *h = &tx->phdr.hdr; if (unlikely(!(dd->flags & HFI1_HAS_SEND_DMA))) return dd->process_pio_send; @@ -1191,21 +1192,21 @@ static inline send_routine get_send_routine(struct rvt_qp *qp, return dd->process_pio_send; case IB_QPT_GSI: case IB_QPT_UD: - if (piothreshold && qp->s_cur_size <= piothreshold) - return dd->process_pio_send; break; case IB_QPT_RC: if (piothreshold && qp->s_cur_size <= min(piothreshold, qp->pmtu) && (BIT(get_opcode(h) & 0x1f) & rc_only_opcode) && - iowait_sdma_pending(&priv->s_iowait) == 0) + iowait_sdma_pending(&priv->s_iowait) == 0 && + !sdma_txreq_built(&tx->txreq)) return dd->process_pio_send; break; case IB_QPT_UC: if (piothreshold && qp->s_cur_size <= min(piothreshold, qp->pmtu) && (BIT(get_opcode(h) & 0x1f) & uc_only_opcode) && - iowait_sdma_pending(&priv->s_iowait) == 0) + iowait_sdma_pending(&priv->s_iowait) == 0 && + !sdma_txreq_built(&tx->txreq)) return dd->process_pio_send; break; default: @@ -1225,10 +1226,11 @@ static inline send_routine get_send_routine(struct rvt_qp *qp, int hfi1_verbs_send(struct rvt_qp *qp, struct hfi1_pkt_state *ps) { struct hfi1_devdata *dd = dd_from_ibdev(qp->ibqp.device); + struct hfi1_qp_priv *priv = qp->priv; send_routine sr; int ret; - sr = get_send_routine(qp, &ps->s_txreq->phdr.hdr); + sr = get_send_routine(qp, ps->s_txreq); ret = egress_pkey_check(dd->pport, &ps->s_txreq->phdr.hdr, qp); if (unlikely(ret)) { /* @@ -1250,6 +1252,11 @@ int hfi1_verbs_send(struct rvt_qp *qp, struct hfi1_pkt_state *ps) } return -EINVAL; } + if (sr == dd->process_dma_send && iowait_pio_pending(&priv->s_iowait)) + return pio_wait(qp, + ps->s_txreq->psc, + ps, + RVT_S_WAIT_PIO_DRAIN); return sr(qp, ps, 0); } -- cgit v0.10.2 From d0e859c32801f6793790d71dc41a9330da0da371 Mon Sep 17 00:00:00 2001 From: Mike Marciniszyn Date: Mon, 7 Mar 2016 11:35:46 -0800 Subject: IB/hfi1: Enable adaptive pio by default Set the piothreshold to the agreed upon default of 256B. Reviewed-by: Jubin John Signed-off-by: Mike Marciniszyn Signed-off-by: Doug Ledford diff --git a/drivers/staging/rdma/hfi1/verbs.c b/drivers/staging/rdma/hfi1/verbs.c index 62755af..89f2aad 100644 --- a/drivers/staging/rdma/hfi1/verbs.c +++ b/drivers/staging/rdma/hfi1/verbs.c @@ -121,7 +121,7 @@ unsigned int hfi1_max_srq_wrs = 0x1FFFF; module_param_named(max_srq_wrs, hfi1_max_srq_wrs, uint, S_IRUGO); MODULE_PARM_DESC(max_srq_wrs, "Maximum number of SRQ WRs support"); -unsigned short piothreshold; +unsigned short piothreshold = 256; module_param(piothreshold, ushort, S_IRUGO); MODULE_PARM_DESC(piothreshold, "size used to determine sdma vs. pio"); -- cgit v0.10.2 From 000a830efd370bf93083c7af484ffd84ab7fb21f Mon Sep 17 00:00:00 2001 From: Alex Estrin Date: Mon, 7 Mar 2016 11:35:51 -0800 Subject: IB/rdmavt: Post receive for QP in ERR state Accordingly IB Spec post WR to receive queue must complete with error if QP is in Error state. Please refer to C10-42, C10-97.2.1 Reviewed-by: Mike Marciniszyn Reviewed-by: Dennis Dalessandro Signed-off-by: Alex Estrin Signed-off-by: Doug Ledford diff --git a/drivers/infiniband/sw/rdmavt/qp.c b/drivers/infiniband/sw/rdmavt/qp.c index de34474..bd82a69 100644 --- a/drivers/infiniband/sw/rdmavt/qp.c +++ b/drivers/infiniband/sw/rdmavt/qp.c @@ -1364,6 +1364,8 @@ int rvt_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr, struct rvt_qp *qp = ibqp_to_rvtqp(ibqp); struct rvt_rwq *wq = qp->r_rq.wq; unsigned long flags; + int qp_err_flush = (ib_rvt_state_ops[qp->state] & RVT_FLUSH_RECV) && + !qp->ibqp.srq; /* Check that state is OK to post receive. */ if (!(ib_rvt_state_ops[qp->state] & RVT_POST_RECV_OK) || !wq) { @@ -1390,15 +1392,28 @@ int rvt_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr, *bad_wr = wr; return -ENOMEM; } - - wqe = rvt_get_rwqe_ptr(&qp->r_rq, wq->head); - wqe->wr_id = wr->wr_id; - wqe->num_sge = wr->num_sge; - for (i = 0; i < wr->num_sge; i++) - wqe->sg_list[i] = wr->sg_list[i]; - /* Make sure queue entry is written before the head index. */ - smp_wmb(); - wq->head = next; + if (unlikely(qp_err_flush)) { + struct ib_wc wc; + + memset(&wc, 0, sizeof(wc)); + wc.qp = &qp->ibqp; + wc.opcode = IB_WC_RECV; + wc.wr_id = wr->wr_id; + wc.status = IB_WC_WR_FLUSH_ERR; + rvt_cq_enter(ibcq_to_rvtcq(qp->ibqp.recv_cq), &wc, 1); + } else { + wqe = rvt_get_rwqe_ptr(&qp->r_rq, wq->head); + wqe->wr_id = wr->wr_id; + wqe->num_sge = wr->num_sge; + for (i = 0; i < wr->num_sge; i++) + wqe->sg_list[i] = wr->sg_list[i]; + /* + * Make sure queue entry is written + * before the head index. + */ + smp_wmb(); + wq->head = next; + } spin_unlock_irqrestore(&qp->r_rq.lock, flags); } return 0; -- cgit v0.10.2 From 06e0ffa69312ce33484bf5c63aa5fc576fde13a8 Mon Sep 17 00:00:00 2001 From: Mitko Haralanov Date: Tue, 8 Mar 2016 11:14:20 -0800 Subject: IB/hfi1: Re-factor MMU notification code The MMU notification code added to the expected receive side has been re-factored and split into it's own file. This was done in order to make the code more general and, therefore, usable by other parts of the driver. The caching behavior remains the same. However, the handling of the RB tree (insertion, deletions, and searching) as well as the MMU invalidation processing is now handled by functions in the mmu_rb.[ch] files. Reviewed-by: Dennis Dalessandro Reviewed-by: Dean Luick Signed-off-by: Mitko Haralanov Signed-off-by: Jubin John Signed-off-by: Doug Ledford diff --git a/drivers/staging/rdma/hfi1/Makefile b/drivers/staging/rdma/hfi1/Makefile index 9b11706..8dc5938 100644 --- a/drivers/staging/rdma/hfi1/Makefile +++ b/drivers/staging/rdma/hfi1/Makefile @@ -9,7 +9,7 @@ obj-$(CONFIG_INFINIBAND_HFI1) += hfi1.o hfi1-y := affinity.o chip.o device.o diag.o driver.o efivar.o \ eprom.o file_ops.o firmware.o \ - init.o intr.o mad.o pcie.o pio.o pio_copy.o platform.o \ + init.o intr.o mad.o mmu_rb.o pcie.o pio.o pio_copy.o platform.o \ qp.o qsfp.o rc.o ruc.o sdma.o sysfs.o trace.o twsi.o \ uc.o ud.o user_exp_rcv.o user_pages.o user_sdma.o verbs.o \ verbs_txreq.o diff --git a/drivers/staging/rdma/hfi1/file_ops.c b/drivers/staging/rdma/hfi1/file_ops.c index e4490ae..e460261 100644 --- a/drivers/staging/rdma/hfi1/file_ops.c +++ b/drivers/staging/rdma/hfi1/file_ops.c @@ -58,6 +58,7 @@ #include "user_exp_rcv.h" #include "eprom.h" #include "aspm.h" +#include "mmu_rb.h" #undef pr_fmt #define pr_fmt(fmt) DRIVER_NAME ": " fmt diff --git a/drivers/staging/rdma/hfi1/hfi.h b/drivers/staging/rdma/hfi1/hfi.h index 5722883..78c8e24 100644 --- a/drivers/staging/rdma/hfi1/hfi.h +++ b/drivers/staging/rdma/hfi1/hfi.h @@ -1179,6 +1179,7 @@ struct hfi1_devdata { #define PT_EAGER 1 #define PT_INVALID 2 +struct tid_rb_node; struct mmu_rb_node; /* Private data for file operations */ @@ -1189,20 +1190,17 @@ struct hfi1_filedata { struct hfi1_user_sdma_pkt_q *pq; /* for cpu affinity; -1 if none */ int rec_cpu_num; - struct mmu_notifier mn; struct rb_root tid_rb_root; - struct mmu_rb_node **entry_to_rb; + struct tid_rb_node **entry_to_rb; spinlock_t tid_lock; /* protect tid_[limit,used] counters */ u32 tid_limit; u32 tid_used; - spinlock_t rb_lock; /* protect tid_rb_root RB tree */ u32 *invalid_tids; u32 invalid_tid_idx; - spinlock_t invalid_lock; /* protect the invalid_tids array */ - int (*mmu_rb_insert)(struct hfi1_filedata *, struct rb_root *, - struct mmu_rb_node *); - void (*mmu_rb_remove)(struct hfi1_filedata *, struct rb_root *, - struct mmu_rb_node *); + /* protect invalid_tids array and invalid_tid_idx */ + spinlock_t invalid_lock; + int (*mmu_rb_insert)(struct rb_root *, struct mmu_rb_node *); + void (*mmu_rb_remove)(struct rb_root *, struct mmu_rb_node *); }; extern struct list_head hfi1_dev_list; diff --git a/drivers/staging/rdma/hfi1/mmu_rb.c b/drivers/staging/rdma/hfi1/mmu_rb.c new file mode 100644 index 0000000..779ebaf --- /dev/null +++ b/drivers/staging/rdma/hfi1/mmu_rb.c @@ -0,0 +1,304 @@ +/* + * Copyright(c) 2016 Intel Corporation. + * + * This file is provided under a dual BSD/GPLv2 license. When using or + * redistributing this file, you may do so under either license. + * + * GPL LICENSE SUMMARY + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * BSD LICENSE + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * - Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + */ +#include +#include +#include + +#include "mmu_rb.h" +#include "trace.h" + +struct mmu_rb_handler { + struct list_head list; + struct mmu_notifier mn; + struct rb_root *root; + spinlock_t lock; /* protect the RB tree */ + struct mmu_rb_ops *ops; +}; + +static LIST_HEAD(mmu_rb_handlers); +static DEFINE_SPINLOCK(mmu_rb_lock); /* protect mmu_rb_handlers list */ + +static struct mmu_rb_handler *find_mmu_handler(struct rb_root *); +static inline void mmu_notifier_page(struct mmu_notifier *, struct mm_struct *, + unsigned long); +static inline void mmu_notifier_range_start(struct mmu_notifier *, + struct mm_struct *, + unsigned long, unsigned long); +static void mmu_notifier_mem_invalidate(struct mmu_notifier *, + unsigned long, unsigned long); +static struct mmu_rb_node *__mmu_rb_search(struct mmu_rb_handler *, + unsigned long, unsigned long); + +static struct mmu_notifier_ops mn_opts = { + .invalidate_page = mmu_notifier_page, + .invalidate_range_start = mmu_notifier_range_start, +}; + +int hfi1_mmu_rb_register(struct rb_root *root, struct mmu_rb_ops *ops) +{ + struct mmu_rb_handler *handlr; + + if (!ops->compare || !ops->invalidate) + return -EINVAL; + + handlr = kmalloc(sizeof(*handlr), GFP_KERNEL); + if (!handlr) + return -ENOMEM; + + handlr->root = root; + handlr->ops = ops; + INIT_HLIST_NODE(&handlr->mn.hlist); + spin_lock_init(&handlr->lock); + handlr->mn.ops = &mn_opts; + spin_lock(&mmu_rb_lock); + list_add_tail(&handlr->list, &mmu_rb_handlers); + spin_unlock(&mmu_rb_lock); + + return mmu_notifier_register(&handlr->mn, current->mm); +} + +void hfi1_mmu_rb_unregister(struct rb_root *root) +{ + struct mmu_rb_handler *handler = find_mmu_handler(root); + + spin_lock(&mmu_rb_lock); + list_del(&handler->list); + spin_unlock(&mmu_rb_lock); + + if (!RB_EMPTY_ROOT(root)) { + struct rb_node *node; + struct mmu_rb_node *rbnode; + + while ((node = rb_first(root))) { + rbnode = rb_entry(node, struct mmu_rb_node, node); + if (handler->ops->remove) + handler->ops->remove(root, rbnode); + rb_erase(node, root); + kfree(rbnode); + } + } + + if (current->mm) + mmu_notifier_unregister(&handler->mn, current->mm); + kfree(handler); +} + +int hfi1_mmu_rb_insert(struct rb_root *root, struct mmu_rb_node *mnode) +{ + struct rb_node **new, *parent = NULL; + struct mmu_rb_handler *handler = find_mmu_handler(root); + struct mmu_rb_node *this; + int res, ret = 0; + + if (!handler) + return -EINVAL; + + new = &handler->root->rb_node; + spin_lock(&handler->lock); + while (*new) { + this = container_of(*new, struct mmu_rb_node, node); + res = handler->ops->compare(this, mnode->addr, mnode->len); + parent = *new; + + if (res < 0) { + new = &((*new)->rb_left); + } else if (res > 0) { + new = &((*new)->rb_right); + } else { + ret = 1; + goto unlock; + } + } + + if (handler->ops->insert) { + ret = handler->ops->insert(root, mnode); + if (ret) + goto unlock; + } + + rb_link_node(&mnode->node, parent, new); + rb_insert_color(&mnode->node, root); +unlock: + spin_unlock(&handler->lock); + return ret; +} + +/* Caller must host handler lock */ +static struct mmu_rb_node *__mmu_rb_search(struct mmu_rb_handler *handler, + unsigned long addr, + unsigned long len) +{ + struct rb_node *node = handler->root->rb_node; + struct mmu_rb_node *mnode; + int res; + + while (node) { + mnode = container_of(node, struct mmu_rb_node, node); + res = handler->ops->compare(mnode, addr, len); + + if (res < 0) + node = node->rb_left; + else if (res > 0) + node = node->rb_right; + else + return mnode; + } + return NULL; +} + +static void __mmu_rb_remove(struct mmu_rb_handler *handler, + struct mmu_rb_node *node) +{ + /* Validity of handler and node pointers has been checked by caller. */ + if (handler->ops->remove) + handler->ops->remove(handler->root, node); + rb_erase(&node->node, handler->root); +} + +struct mmu_rb_node *hfi1_mmu_rb_search(struct rb_root *root, unsigned long addr, + unsigned long len) +{ + struct mmu_rb_handler *handler = find_mmu_handler(root); + struct mmu_rb_node *node; + + if (!handler) + return ERR_PTR(-EINVAL); + + spin_lock(&handler->lock); + node = __mmu_rb_search(handler, addr, len); + spin_unlock(&handler->lock); + + return node; +} + +void hfi1_mmu_rb_remove(struct rb_root *root, struct mmu_rb_node *node) +{ + struct mmu_rb_handler *handler = find_mmu_handler(root); + + if (!handler || !node) + return; + + spin_lock(&handler->lock); + __mmu_rb_remove(handler, node); + spin_unlock(&handler->lock); +} + +static struct mmu_rb_handler *find_mmu_handler(struct rb_root *root) +{ + struct mmu_rb_handler *handler; + + spin_lock(&mmu_rb_lock); + list_for_each_entry(handler, &mmu_rb_handlers, list) { + if (handler->root == root) + goto unlock; + } + handler = NULL; +unlock: + spin_unlock(&mmu_rb_lock); + return handler; +} + +static inline void mmu_notifier_page(struct mmu_notifier *mn, + struct mm_struct *mm, unsigned long addr) +{ + mmu_notifier_mem_invalidate(mn, addr, addr + PAGE_SIZE); +} + +static inline void mmu_notifier_range_start(struct mmu_notifier *mn, + struct mm_struct *mm, + unsigned long start, + unsigned long end) +{ + mmu_notifier_mem_invalidate(mn, start, end); +} + +static void mmu_notifier_mem_invalidate(struct mmu_notifier *mn, + unsigned long start, unsigned long end) +{ + struct mmu_rb_handler *handler = + container_of(mn, struct mmu_rb_handler, mn); + struct rb_root *root = handler->root; + struct mmu_rb_node *node; + unsigned long addr = start; + + spin_lock(&handler->lock); + while (addr < end) { + /* + * There is no good way to provide a reasonable length to the + * search function at this point. Using the remaining length in + * the invalidation range is not the right thing to do. + * We have to rely on the fact that the insertion algorithm + * takes care of any overlap or length restrictions by using the + * actual size of each node. Therefore, we can use a page as an + * arbitrary, non-zero value. + */ + node = __mmu_rb_search(handler, addr, PAGE_SIZE); + + if (!node) { + /* + * Didn't find a node at this address. However, the + * range could be bigger than what we have registered + * so we have to keep looking. + */ + addr += PAGE_SIZE; + continue; + } + if (handler->ops->invalidate(root, node)) + __mmu_rb_remove(handler, node); + + /* + * The next address to be looked up is computed based + * on the node's starting address. This is due to the + * fact that the range where we start might be in the + * middle of the node's buffer so simply incrementing + * the address by the node's size would result is a + * bad address. + */ + addr = node->addr + node->len; + } + spin_unlock(&handler->lock); +} diff --git a/drivers/staging/rdma/hfi1/mmu_rb.h b/drivers/staging/rdma/hfi1/mmu_rb.h new file mode 100644 index 0000000..9fe1076 --- /dev/null +++ b/drivers/staging/rdma/hfi1/mmu_rb.h @@ -0,0 +1,73 @@ +/* + * Copyright(c) 2016 Intel Corporation. + * + * This file is provided under a dual BSD/GPLv2 license. When using or + * redistributing this file, you may do so under either license. + * + * GPL LICENSE SUMMARY + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * BSD LICENSE + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * - Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + */ +#ifndef _HFI1_MMU_RB_H +#define _HFI1_MMU_RB_H + +#include "hfi.h" + +struct mmu_rb_node { + struct rb_node node; + unsigned long addr; + unsigned long len; +}; + +struct mmu_rb_ops { + int (*compare)(struct mmu_rb_node *, unsigned long, + unsigned long); + int (*insert)(struct rb_root *, struct mmu_rb_node *); + void (*remove)(struct rb_root *, struct mmu_rb_node *); + int (*invalidate)(struct rb_root *, struct mmu_rb_node *); +}; + +int hfi1_mmu_rb_register(struct rb_root *root, struct mmu_rb_ops *ops); +void hfi1_mmu_rb_unregister(struct rb_root *); +int hfi1_mmu_rb_insert(struct rb_root *, struct mmu_rb_node *); +void hfi1_mmu_rb_remove(struct rb_root *, struct mmu_rb_node *); +struct mmu_rb_node *hfi1_mmu_rb_search(struct rb_root *, unsigned long, + unsigned long); + +#endif /* _HFI1_MMU_RB_H */ diff --git a/drivers/staging/rdma/hfi1/user_exp_rcv.c b/drivers/staging/rdma/hfi1/user_exp_rcv.c index fccae50..c9e05dd 100644 --- a/drivers/staging/rdma/hfi1/user_exp_rcv.c +++ b/drivers/staging/rdma/hfi1/user_exp_rcv.c @@ -48,6 +48,7 @@ #include "user_exp_rcv.h" #include "trace.h" +#include "mmu_rb.h" struct tid_group { struct list_head list; @@ -57,11 +58,9 @@ struct tid_group { u8 map; }; -struct mmu_rb_node { - struct rb_node rbnode; - unsigned long virt; +struct tid_rb_node { + struct mmu_rb_node mmu; unsigned long phys; - unsigned long len; struct tid_group *grp; u32 rcventry; dma_addr_t dma_addr; @@ -70,16 +69,6 @@ struct mmu_rb_node { struct page *pages[0]; }; -enum mmu_call_types { - MMU_INVALIDATE_PAGE = 0, - MMU_INVALIDATE_RANGE = 1 -}; - -static const char * const mmu_types[] = { - "PAGE", - "RANGE" -}; - struct tid_pageset { u16 idx; u16 count; @@ -99,28 +88,21 @@ static int set_rcvarray_entry(struct file *, unsigned long, u32, struct tid_group *, struct page **, unsigned); static inline int mmu_addr_cmp(struct mmu_rb_node *, unsigned long, unsigned long); -static struct mmu_rb_node *mmu_rb_search(struct rb_root *, unsigned long); -static int mmu_rb_insert_by_addr(struct hfi1_filedata *, struct rb_root *, - struct mmu_rb_node *); -static int mmu_rb_insert_by_entry(struct hfi1_filedata *, struct rb_root *, - struct mmu_rb_node *); -static void mmu_rb_remove_by_addr(struct hfi1_filedata *, struct rb_root *, - struct mmu_rb_node *); -static void mmu_rb_remove_by_entry(struct hfi1_filedata *, struct rb_root *, - struct mmu_rb_node *); -static void mmu_notifier_mem_invalidate(struct mmu_notifier *, - unsigned long, unsigned long, - enum mmu_call_types); -static inline void mmu_notifier_page(struct mmu_notifier *, struct mm_struct *, - unsigned long); -static inline void mmu_notifier_range_start(struct mmu_notifier *, - struct mm_struct *, - unsigned long, unsigned long); +static int mmu_rb_insert(struct rb_root *, struct mmu_rb_node *); +static void mmu_rb_remove(struct rb_root *, struct mmu_rb_node *); +static int mmu_rb_invalidate(struct rb_root *, struct mmu_rb_node *); static int program_rcvarray(struct file *, unsigned long, struct tid_group *, struct tid_pageset *, unsigned, u16, struct page **, u32 *, unsigned *, unsigned *); static int unprogram_rcvarray(struct file *, u32, struct tid_group **); -static void clear_tid_node(struct hfi1_filedata *, u16, struct mmu_rb_node *); +static void clear_tid_node(struct hfi1_filedata *, u16, struct tid_rb_node *); + +static struct mmu_rb_ops tid_rb_ops = { + .compare = mmu_addr_cmp, + .insert = mmu_rb_insert, + .remove = mmu_rb_remove, + .invalidate = mmu_rb_invalidate +}; static inline u32 rcventry2tidinfo(u32 rcventry) { @@ -167,11 +149,6 @@ static inline void tid_group_move(struct tid_group *group, tid_group_add_tail(group, s2); } -static struct mmu_notifier_ops mn_opts = { - .invalidate_page = mmu_notifier_page, - .invalidate_range_start = mmu_notifier_range_start, -}; - /* * Initialize context and file private data needed for Expected * receive caching. This needs to be done after the context has @@ -185,11 +162,8 @@ int hfi1_user_exp_rcv_init(struct file *fp) unsigned tidbase; int i, ret = 0; - INIT_HLIST_NODE(&fd->mn.hlist); - spin_lock_init(&fd->rb_lock); spin_lock_init(&fd->tid_lock); spin_lock_init(&fd->invalid_lock); - fd->mn.ops = &mn_opts; fd->tid_rb_root = RB_ROOT; if (!uctxt->subctxt_cnt || !fd->subctxt) { @@ -239,7 +213,7 @@ int hfi1_user_exp_rcv_init(struct file *fp) * fails, continue but turn off the TID caching for * all user contexts. */ - ret = mmu_notifier_register(&fd->mn, current->mm); + ret = hfi1_mmu_rb_register(&fd->tid_rb_root, &tid_rb_ops); if (ret) { dd_dev_info(dd, "Failed MMU notifier registration %d\n", @@ -250,11 +224,11 @@ int hfi1_user_exp_rcv_init(struct file *fp) } if (HFI1_CAP_IS_USET(TID_UNMAP)) { - fd->mmu_rb_insert = mmu_rb_insert_by_entry; - fd->mmu_rb_remove = mmu_rb_remove_by_entry; + fd->mmu_rb_insert = mmu_rb_insert; + fd->mmu_rb_remove = mmu_rb_remove; } else { - fd->mmu_rb_insert = mmu_rb_insert_by_addr; - fd->mmu_rb_remove = mmu_rb_remove_by_addr; + fd->mmu_rb_insert = hfi1_mmu_rb_insert; + fd->mmu_rb_remove = hfi1_mmu_rb_remove; } /* @@ -295,8 +269,8 @@ int hfi1_user_exp_rcv_free(struct hfi1_filedata *fd) * The notifier would have been removed when the process'es mm * was freed. */ - if (current->mm && !HFI1_CAP_IS_USET(TID_UNMAP)) - mmu_notifier_unregister(&fd->mn, current->mm); + if (!HFI1_CAP_IS_USET(TID_UNMAP)) + hfi1_mmu_rb_unregister(&fd->tid_rb_root); kfree(fd->invalid_tids); @@ -312,19 +286,6 @@ int hfi1_user_exp_rcv_free(struct hfi1_filedata *fd) list_del_init(&grp->list); kfree(grp); } - spin_lock(&fd->rb_lock); - if (!RB_EMPTY_ROOT(&fd->tid_rb_root)) { - struct rb_node *node; - struct mmu_rb_node *rbnode; - - while ((node = rb_first(&fd->tid_rb_root))) { - rbnode = rb_entry(node, struct mmu_rb_node, - rbnode); - rb_erase(&rbnode->rbnode, &fd->tid_rb_root); - kfree(rbnode); - } - } - spin_unlock(&fd->rb_lock); hfi1_clear_tids(uctxt); } @@ -866,7 +827,7 @@ static int set_rcvarray_entry(struct file *fp, unsigned long vaddr, int ret; struct hfi1_filedata *fd = fp->private_data; struct hfi1_ctxtdata *uctxt = fd->uctxt; - struct mmu_rb_node *node; + struct tid_rb_node *node; struct hfi1_devdata *dd = uctxt->dd; struct rb_root *root = &fd->tid_rb_root; dma_addr_t phys; @@ -890,9 +851,9 @@ static int set_rcvarray_entry(struct file *fp, unsigned long vaddr, return -EFAULT; } - node->virt = vaddr; + node->mmu.addr = vaddr; + node->mmu.len = npages * PAGE_SIZE; node->phys = page_to_phys(pages[0]); - node->len = npages * PAGE_SIZE; node->npages = npages; node->rcventry = rcventry; node->dma_addr = phys; @@ -900,21 +861,19 @@ static int set_rcvarray_entry(struct file *fp, unsigned long vaddr, node->freed = false; memcpy(node->pages, pages, sizeof(struct page *) * npages); - spin_lock(&fd->rb_lock); - ret = fd->mmu_rb_insert(fd, root, node); - spin_unlock(&fd->rb_lock); + ret = fd->mmu_rb_insert(root, &node->mmu); if (ret) { hfi1_cdbg(TID, "Failed to insert RB node %u 0x%lx, 0x%lx %d", - node->rcventry, node->virt, node->phys, ret); + node->rcventry, node->mmu.addr, node->phys, ret); pci_unmap_single(dd->pcidev, phys, npages * PAGE_SIZE, PCI_DMA_FROMDEVICE); kfree(node); return -EFAULT; } hfi1_put_tid(dd, rcventry, PT_EXPECTED, phys, ilog2(npages) + 1); - trace_hfi1_exp_tid_reg(uctxt->ctxt, fd->subctxt, rcventry, - npages, node->virt, node->phys, phys); + trace_hfi1_exp_tid_reg(uctxt->ctxt, fd->subctxt, rcventry, npages, + node->mmu.addr, node->phys, phys); return 0; } @@ -924,7 +883,7 @@ static int unprogram_rcvarray(struct file *fp, u32 tidinfo, struct hfi1_filedata *fd = fp->private_data; struct hfi1_ctxtdata *uctxt = fd->uctxt; struct hfi1_devdata *dd = uctxt->dd; - struct mmu_rb_node *node; + struct tid_rb_node *node; u8 tidctrl = EXP_TID_GET(tidinfo, CTRL); u32 tididx = EXP_TID_GET(tidinfo, IDX) << 1, rcventry; @@ -939,14 +898,11 @@ static int unprogram_rcvarray(struct file *fp, u32 tidinfo, rcventry = tididx + (tidctrl - 1); - spin_lock(&fd->rb_lock); node = fd->entry_to_rb[rcventry]; - if (!node || node->rcventry != (uctxt->expected_base + rcventry)) { - spin_unlock(&fd->rb_lock); + if (!node || node->rcventry != (uctxt->expected_base + rcventry)) return -EBADF; - } - fd->mmu_rb_remove(fd, &fd->tid_rb_root, node); - spin_unlock(&fd->rb_lock); + fd->mmu_rb_remove(&fd->tid_rb_root, &node->mmu); + if (grp) *grp = node->grp; clear_tid_node(fd, fd->subctxt, node); @@ -954,13 +910,13 @@ static int unprogram_rcvarray(struct file *fp, u32 tidinfo, } static void clear_tid_node(struct hfi1_filedata *fd, u16 subctxt, - struct mmu_rb_node *node) + struct tid_rb_node *node) { struct hfi1_ctxtdata *uctxt = fd->uctxt; struct hfi1_devdata *dd = uctxt->dd; trace_hfi1_exp_tid_unreg(uctxt->ctxt, fd->subctxt, node->rcventry, - node->npages, node->virt, node->phys, + node->npages, node->mmu.addr, node->phys, node->dma_addr); hfi1_put_tid(dd, node->rcventry, PT_INVALID, 0, 0); @@ -970,7 +926,7 @@ static void clear_tid_node(struct hfi1_filedata *fd, u16 subctxt, */ flush_wc(); - pci_unmap_single(dd->pcidev, node->dma_addr, node->len, + pci_unmap_single(dd->pcidev, node->dma_addr, node->mmu.len, PCI_DMA_FROMDEVICE); hfi1_release_user_pages(node->pages, node->npages, true); @@ -997,216 +953,96 @@ static void unlock_exp_tids(struct hfi1_ctxtdata *uctxt, list_for_each_entry_safe(grp, ptr, &set->list, list) { list_del_init(&grp->list); - spin_lock(&fd->rb_lock); for (i = 0; i < grp->size; i++) { if (grp->map & (1 << i)) { u16 rcventry = grp->base + i; - struct mmu_rb_node *node; + struct tid_rb_node *node; node = fd->entry_to_rb[rcventry - uctxt->expected_base]; if (!node || node->rcventry != rcventry) continue; - fd->mmu_rb_remove(fd, root, node); + fd->mmu_rb_remove(root, &node->mmu); clear_tid_node(fd, -1, node); } } - spin_unlock(&fd->rb_lock); } } -static inline void mmu_notifier_page(struct mmu_notifier *mn, - struct mm_struct *mm, unsigned long addr) -{ - mmu_notifier_mem_invalidate(mn, addr, addr + PAGE_SIZE, - MMU_INVALIDATE_PAGE); -} - -static inline void mmu_notifier_range_start(struct mmu_notifier *mn, - struct mm_struct *mm, - unsigned long start, - unsigned long end) +static int mmu_rb_invalidate(struct rb_root *root, struct mmu_rb_node *mnode) { - mmu_notifier_mem_invalidate(mn, start, end, MMU_INVALIDATE_RANGE); -} + struct hfi1_filedata *fdata = + container_of(root, struct hfi1_filedata, tid_rb_root); + struct hfi1_ctxtdata *uctxt = fdata->uctxt; + struct tid_rb_node *node = + container_of(mnode, struct tid_rb_node, mmu); -static void mmu_notifier_mem_invalidate(struct mmu_notifier *mn, - unsigned long start, unsigned long end, - enum mmu_call_types type) -{ - struct hfi1_filedata *fd = container_of(mn, struct hfi1_filedata, mn); - struct hfi1_ctxtdata *uctxt = fd->uctxt; - struct rb_root *root = &fd->tid_rb_root; - struct mmu_rb_node *node; - unsigned long addr = start; + if (node->freed) + return 0; - trace_hfi1_mmu_invalidate(uctxt->ctxt, fd->subctxt, mmu_types[type], - start, end); + trace_hfi1_exp_tid_inval(uctxt->ctxt, fdata->subctxt, node->mmu.addr, + node->rcventry, node->npages, node->dma_addr); + node->freed = true; - spin_lock(&fd->rb_lock); - while (addr < end) { - node = mmu_rb_search(root, addr); + spin_lock(&fdata->invalid_lock); + if (fdata->invalid_tid_idx < uctxt->expected_count) { + fdata->invalid_tids[fdata->invalid_tid_idx] = + rcventry2tidinfo(node->rcventry - uctxt->expected_base); + fdata->invalid_tids[fdata->invalid_tid_idx] |= + EXP_TID_SET(LEN, node->npages); + if (!fdata->invalid_tid_idx) { + unsigned long *ev; - if (!node) { /* - * Didn't find a node at this address. However, the - * range could be bigger than what we have registered - * so we have to keep looking. + * hfi1_set_uevent_bits() sets a user event flag + * for all processes. Because calling into the + * driver to process TID cache invalidations is + * expensive and TID cache invalidations are + * handled on a per-process basis, we can + * optimize this to set the flag only for the + * process in question. */ - addr += PAGE_SIZE; - continue; - } - - /* - * The next address to be looked up is computed based - * on the node's starting address. This is due to the - * fact that the range where we start might be in the - * middle of the node's buffer so simply incrementing - * the address by the node's size would result is a - * bad address. - */ - addr = node->virt + (node->npages * PAGE_SIZE); - if (node->freed) - continue; - - trace_hfi1_exp_tid_inval(uctxt->ctxt, fd->subctxt, node->virt, - node->rcventry, node->npages, - node->dma_addr); - node->freed = true; - - spin_lock(&fd->invalid_lock); - if (fd->invalid_tid_idx < uctxt->expected_count) { - fd->invalid_tids[fd->invalid_tid_idx] = - rcventry2tidinfo(node->rcventry - - uctxt->expected_base); - fd->invalid_tids[fd->invalid_tid_idx] |= - EXP_TID_SET(LEN, node->npages); - if (!fd->invalid_tid_idx) { - unsigned long *ev; - - /* - * hfi1_set_uevent_bits() sets a user event flag - * for all processes. Because calling into the - * driver to process TID cache invalidations is - * expensive and TID cache invalidations are - * handled on a per-process basis, we can - * optimize this to set the flag only for the - * process in question. - */ - ev = uctxt->dd->events + - (((uctxt->ctxt - - uctxt->dd->first_user_ctxt) * - HFI1_MAX_SHARED_CTXTS) + fd->subctxt); - set_bit(_HFI1_EVENT_TID_MMU_NOTIFY_BIT, ev); - } - fd->invalid_tid_idx++; + ev = uctxt->dd->events + + (((uctxt->ctxt - uctxt->dd->first_user_ctxt) * + HFI1_MAX_SHARED_CTXTS) + fdata->subctxt); + set_bit(_HFI1_EVENT_TID_MMU_NOTIFY_BIT, ev); } - spin_unlock(&fd->invalid_lock); + fdata->invalid_tid_idx++; } - spin_unlock(&fd->rb_lock); + spin_unlock(&fdata->invalid_lock); + return 0; } -static inline int mmu_addr_cmp(struct mmu_rb_node *node, unsigned long addr, - unsigned long len) +static int mmu_addr_cmp(struct mmu_rb_node *node, unsigned long addr, + unsigned long len) { - if ((addr + len) <= node->virt) + if ((addr + len) <= node->addr) return -1; - else if (addr >= node->virt && addr < (node->virt + node->len)) + else if (addr >= node->addr && addr < (node->addr + node->len)) return 0; else return 1; } -static inline int mmu_entry_cmp(struct mmu_rb_node *node, u32 entry) -{ - if (entry < node->rcventry) - return -1; - else if (entry > node->rcventry) - return 1; - else - return 0; -} - -static struct mmu_rb_node *mmu_rb_search(struct rb_root *root, - unsigned long addr) -{ - struct rb_node *node = root->rb_node; - - while (node) { - struct mmu_rb_node *mnode = - container_of(node, struct mmu_rb_node, rbnode); - /* - * When searching, use at least one page length for size. The - * MMU notifier will not give us anything less than that. We - * also don't need anything more than a page because we are - * guaranteed to have non-overlapping buffers in the tree. - */ - int result = mmu_addr_cmp(mnode, addr, PAGE_SIZE); - - if (result < 0) - node = node->rb_left; - else if (result > 0) - node = node->rb_right; - else - return mnode; - } - return NULL; -} - -static int mmu_rb_insert_by_entry(struct hfi1_filedata *fdata, - struct rb_root *root, - struct mmu_rb_node *node) +static int mmu_rb_insert(struct rb_root *root, struct mmu_rb_node *node) { + struct hfi1_filedata *fdata = + container_of(root, struct hfi1_filedata, tid_rb_root); + struct tid_rb_node *tnode = + container_of(node, struct tid_rb_node, mmu); u32 base = fdata->uctxt->expected_base; - fdata->entry_to_rb[node->rcventry - base] = node; + fdata->entry_to_rb[tnode->rcventry - base] = tnode; return 0; } -static int mmu_rb_insert_by_addr(struct hfi1_filedata *fdata, - struct rb_root *root, struct mmu_rb_node *node) -{ - struct rb_node **new = &root->rb_node, *parent = NULL; - u32 base = fdata->uctxt->expected_base; - - /* Figure out where to put new node */ - while (*new) { - struct mmu_rb_node *this = - container_of(*new, struct mmu_rb_node, rbnode); - int result = mmu_addr_cmp(this, node->virt, node->len); - - parent = *new; - if (result < 0) - new = &((*new)->rb_left); - else if (result > 0) - new = &((*new)->rb_right); - else - return 1; - } - - /* Add new node and rebalance tree. */ - rb_link_node(&node->rbnode, parent, new); - rb_insert_color(&node->rbnode, root); - - fdata->entry_to_rb[node->rcventry - base] = node; - return 0; -} - -static void mmu_rb_remove_by_entry(struct hfi1_filedata *fdata, - struct rb_root *root, - struct mmu_rb_node *node) -{ - u32 base = fdata->uctxt->expected_base; - - fdata->entry_to_rb[node->rcventry - base] = NULL; -} - -static void mmu_rb_remove_by_addr(struct hfi1_filedata *fdata, - struct rb_root *root, - struct mmu_rb_node *node) +static void mmu_rb_remove(struct rb_root *root, struct mmu_rb_node *node) { + struct hfi1_filedata *fdata = + container_of(root, struct hfi1_filedata, tid_rb_root); + struct tid_rb_node *tnode = + container_of(node, struct tid_rb_node, mmu); u32 base = fdata->uctxt->expected_base; - fdata->entry_to_rb[node->rcventry - base] = NULL; - rb_erase(&node->rbnode, root); + fdata->entry_to_rb[tnode->rcventry - base] = NULL; } -- cgit v0.10.2 From c81e1f6452406a633b7c4ea3e0a12e5deaf57f5c Mon Sep 17 00:00:00 2001 From: Mitko Haralanov Date: Tue, 8 Mar 2016 11:14:25 -0800 Subject: IB/hfi1: Allow MMU function execution in IRQ context Future users of the MMU/RB functions might be searching or manipulating the MMU RB trees in interrupt context. Therefore, the MMU/RB functions need to be able to run in interrupt context. This requires that we use the IRQ-aware API for spin locks. Reviewed-by: Dennis Dalessandro Reviewed-by: Dean Luick Signed-off-by: Mitko Haralanov Signed-off-by: Jubin John Signed-off-by: Doug Ledford diff --git a/drivers/staging/rdma/hfi1/mmu_rb.c b/drivers/staging/rdma/hfi1/mmu_rb.c index 779ebaf..648f7e0 100644 --- a/drivers/staging/rdma/hfi1/mmu_rb.c +++ b/drivers/staging/rdma/hfi1/mmu_rb.c @@ -81,6 +81,7 @@ static struct mmu_notifier_ops mn_opts = { int hfi1_mmu_rb_register(struct rb_root *root, struct mmu_rb_ops *ops) { struct mmu_rb_handler *handlr; + unsigned long flags; if (!ops->compare || !ops->invalidate) return -EINVAL; @@ -94,9 +95,9 @@ int hfi1_mmu_rb_register(struct rb_root *root, struct mmu_rb_ops *ops) INIT_HLIST_NODE(&handlr->mn.hlist); spin_lock_init(&handlr->lock); handlr->mn.ops = &mn_opts; - spin_lock(&mmu_rb_lock); + spin_lock_irqsave(&mmu_rb_lock, flags); list_add_tail(&handlr->list, &mmu_rb_handlers); - spin_unlock(&mmu_rb_lock); + spin_unlock_irqrestore(&mmu_rb_lock, flags); return mmu_notifier_register(&handlr->mn, current->mm); } @@ -104,10 +105,11 @@ int hfi1_mmu_rb_register(struct rb_root *root, struct mmu_rb_ops *ops) void hfi1_mmu_rb_unregister(struct rb_root *root) { struct mmu_rb_handler *handler = find_mmu_handler(root); + unsigned long flags; - spin_lock(&mmu_rb_lock); + spin_lock_irqsave(&mmu_rb_lock, flags); list_del(&handler->list); - spin_unlock(&mmu_rb_lock); + spin_unlock_irqrestore(&mmu_rb_lock, flags); if (!RB_EMPTY_ROOT(root)) { struct rb_node *node; @@ -132,13 +134,14 @@ int hfi1_mmu_rb_insert(struct rb_root *root, struct mmu_rb_node *mnode) struct rb_node **new, *parent = NULL; struct mmu_rb_handler *handler = find_mmu_handler(root); struct mmu_rb_node *this; + unsigned long flags; int res, ret = 0; if (!handler) return -EINVAL; new = &handler->root->rb_node; - spin_lock(&handler->lock); + spin_lock_irqsave(&handler->lock, flags); while (*new) { this = container_of(*new, struct mmu_rb_node, node); res = handler->ops->compare(this, mnode->addr, mnode->len); @@ -163,7 +166,7 @@ int hfi1_mmu_rb_insert(struct rb_root *root, struct mmu_rb_node *mnode) rb_link_node(&mnode->node, parent, new); rb_insert_color(&mnode->node, root); unlock: - spin_unlock(&handler->lock); + spin_unlock_irqrestore(&handler->lock, flags); return ret; } @@ -204,13 +207,14 @@ struct mmu_rb_node *hfi1_mmu_rb_search(struct rb_root *root, unsigned long addr, { struct mmu_rb_handler *handler = find_mmu_handler(root); struct mmu_rb_node *node; + unsigned long flags; if (!handler) return ERR_PTR(-EINVAL); - spin_lock(&handler->lock); + spin_lock_irqsave(&handler->lock, flags); node = __mmu_rb_search(handler, addr, len); - spin_unlock(&handler->lock); + spin_unlock_irqrestore(&handler->lock, flags); return node; } @@ -218,27 +222,29 @@ struct mmu_rb_node *hfi1_mmu_rb_search(struct rb_root *root, unsigned long addr, void hfi1_mmu_rb_remove(struct rb_root *root, struct mmu_rb_node *node) { struct mmu_rb_handler *handler = find_mmu_handler(root); + unsigned long flags; if (!handler || !node) return; - spin_lock(&handler->lock); + spin_lock_irqsave(&handler->lock, flags); __mmu_rb_remove(handler, node); - spin_unlock(&handler->lock); + spin_unlock_irqrestore(&handler->lock, flags); } static struct mmu_rb_handler *find_mmu_handler(struct rb_root *root) { struct mmu_rb_handler *handler; + unsigned long flags; - spin_lock(&mmu_rb_lock); + spin_lock_irqsave(&mmu_rb_lock, flags); list_for_each_entry(handler, &mmu_rb_handlers, list) { if (handler->root == root) goto unlock; } handler = NULL; unlock: - spin_unlock(&mmu_rb_lock); + spin_unlock_irqrestore(&mmu_rb_lock, flags); return handler; } @@ -263,9 +269,9 @@ static void mmu_notifier_mem_invalidate(struct mmu_notifier *mn, container_of(mn, struct mmu_rb_handler, mn); struct rb_root *root = handler->root; struct mmu_rb_node *node; - unsigned long addr = start; + unsigned long addr = start, flags; - spin_lock(&handler->lock); + spin_lock_irqsave(&handler->lock, flags); while (addr < end) { /* * There is no good way to provide a reasonable length to the @@ -300,5 +306,5 @@ static void mmu_notifier_mem_invalidate(struct mmu_notifier *mn, */ addr = node->addr + node->len; } - spin_unlock(&handler->lock); + spin_unlock_irqrestore(&handler->lock, flags); } -- cgit v0.10.2 From 4b00d9490f2147d29f107f36391b0cc77bcd944f Mon Sep 17 00:00:00 2001 From: Mitko Haralanov Date: Tue, 8 Mar 2016 11:14:31 -0800 Subject: IB/hfi1: Prevent NULL pointer dereference Prevent a potential NULL pointer dereference (found by code inspection) when unregistering an MMU handler. Reviewed-by: Dennis Dalessandro Reviewed-by: Dean Luick Signed-off-by: Mitko Haralanov Signed-off-by: Jubin John Signed-off-by: Doug Ledford diff --git a/drivers/staging/rdma/hfi1/mmu_rb.c b/drivers/staging/rdma/hfi1/mmu_rb.c index 648f7e0..f42a33b 100644 --- a/drivers/staging/rdma/hfi1/mmu_rb.c +++ b/drivers/staging/rdma/hfi1/mmu_rb.c @@ -107,6 +107,9 @@ void hfi1_mmu_rb_unregister(struct rb_root *root) struct mmu_rb_handler *handler = find_mmu_handler(root); unsigned long flags; + if (!handler) + return; + spin_lock_irqsave(&mmu_rb_lock, flags); list_del(&handler->list); spin_unlock_irqrestore(&mmu_rb_lock, flags); -- cgit v0.10.2 From eef9c896a94e715fcf8eb41e98b2469319641c73 Mon Sep 17 00:00:00 2001 From: Mitko Haralanov Date: Tue, 8 Mar 2016 11:14:36 -0800 Subject: IB/hfi1: Allow remove MMU callbacks to free nodes In order to allow the remove MMU callbacks to free the RB nodes, it is necessary to prevent any references to the nodes after the remove callback has been called. Therefore, remove the node from the tree prior to calling the callback. In other words, the MMU/RB API now guarantees that all RB node operations it performs will be done prior to calling the remove callback and that the RB node will not be touched afterwards. Reviewed-by: Dennis Dalessandro Reviewed-by: Dean Luick Signed-off-by: Mitko Haralanov Signed-off-by: Jubin John Signed-off-by: Doug Ledford diff --git a/drivers/staging/rdma/hfi1/mmu_rb.c b/drivers/staging/rdma/hfi1/mmu_rb.c index f42a33b..a3515d7 100644 --- a/drivers/staging/rdma/hfi1/mmu_rb.c +++ b/drivers/staging/rdma/hfi1/mmu_rb.c @@ -120,10 +120,9 @@ void hfi1_mmu_rb_unregister(struct rb_root *root) while ((node = rb_first(root))) { rbnode = rb_entry(node, struct mmu_rb_node, node); + rb_erase(node, root); if (handler->ops->remove) handler->ops->remove(root, rbnode); - rb_erase(node, root); - kfree(rbnode); } } @@ -200,9 +199,9 @@ static void __mmu_rb_remove(struct mmu_rb_handler *handler, struct mmu_rb_node *node) { /* Validity of handler and node pointers has been checked by caller. */ + rb_erase(&node->node, handler->root); if (handler->ops->remove) handler->ops->remove(handler->root, node); - rb_erase(&node->node, handler->root); } struct mmu_rb_node *hfi1_mmu_rb_search(struct rb_root *root, unsigned long addr, @@ -272,7 +271,7 @@ static void mmu_notifier_mem_invalidate(struct mmu_notifier *mn, container_of(mn, struct mmu_rb_handler, mn); struct rb_root *root = handler->root; struct mmu_rb_node *node; - unsigned long addr = start, flags; + unsigned long addr = start, naddr, nlen, flags; spin_lock_irqsave(&handler->lock, flags); while (addr < end) { @@ -296,6 +295,9 @@ static void mmu_notifier_mem_invalidate(struct mmu_notifier *mn, addr += PAGE_SIZE; continue; } + + naddr = node->addr; + nlen = node->len; if (handler->ops->invalidate(root, node)) __mmu_rb_remove(handler, node); @@ -307,7 +309,7 @@ static void mmu_notifier_mem_invalidate(struct mmu_notifier *mn, * the address by the node's size would result is a * bad address. */ - addr = node->addr + node->len; + addr = naddr + nlen; } spin_unlock_irqrestore(&handler->lock, flags); } -- cgit v0.10.2 From 368f2b59d024fbb58015dfd0e09c54c424cda979 Mon Sep 17 00:00:00 2001 From: Mitko Haralanov Date: Tue, 8 Mar 2016 11:14:42 -0800 Subject: IB/hfi1: Remove the use of add/remove RB function pointers The usage of function pointers for RB node insertion and removal in the expected receive code path was meant to be a small performance optimization. However, maintaining it, especially with the new MMU API, would become more troublesome as the API is extended. Since the performance optimization is minor, remove the function pointers and replace with direct calls. Reviewed-by: Dennis Dalessandro Reviewed-by: Dean Luick Signed-off-by: Mitko Haralanov Signed-off-by: Jubin John Signed-off-by: Doug Ledford diff --git a/drivers/staging/rdma/hfi1/hfi.h b/drivers/staging/rdma/hfi1/hfi.h index 78c8e24..2107cdc 100644 --- a/drivers/staging/rdma/hfi1/hfi.h +++ b/drivers/staging/rdma/hfi1/hfi.h @@ -1199,8 +1199,6 @@ struct hfi1_filedata { u32 invalid_tid_idx; /* protect invalid_tids array and invalid_tid_idx */ spinlock_t invalid_lock; - int (*mmu_rb_insert)(struct rb_root *, struct mmu_rb_node *); - void (*mmu_rb_remove)(struct rb_root *, struct mmu_rb_node *); }; extern struct list_head hfi1_dev_list; diff --git a/drivers/staging/rdma/hfi1/user_exp_rcv.c b/drivers/staging/rdma/hfi1/user_exp_rcv.c index c9e05dd..b0b193f 100644 --- a/drivers/staging/rdma/hfi1/user_exp_rcv.c +++ b/drivers/staging/rdma/hfi1/user_exp_rcv.c @@ -223,14 +223,6 @@ int hfi1_user_exp_rcv_init(struct file *fp) } } - if (HFI1_CAP_IS_USET(TID_UNMAP)) { - fd->mmu_rb_insert = mmu_rb_insert; - fd->mmu_rb_remove = mmu_rb_remove; - } else { - fd->mmu_rb_insert = hfi1_mmu_rb_insert; - fd->mmu_rb_remove = hfi1_mmu_rb_remove; - } - /* * PSM does not have a good way to separate, count, and * effectively enforce a limit on RcvArray entries used by @@ -861,7 +853,10 @@ static int set_rcvarray_entry(struct file *fp, unsigned long vaddr, node->freed = false; memcpy(node->pages, pages, sizeof(struct page *) * npages); - ret = fd->mmu_rb_insert(root, &node->mmu); + if (HFI1_CAP_IS_USET(TID_UNMAP)) + ret = mmu_rb_insert(root, &node->mmu); + else + ret = hfi1_mmu_rb_insert(root, &node->mmu); if (ret) { hfi1_cdbg(TID, "Failed to insert RB node %u 0x%lx, 0x%lx %d", @@ -901,7 +896,10 @@ static int unprogram_rcvarray(struct file *fp, u32 tidinfo, node = fd->entry_to_rb[rcventry]; if (!node || node->rcventry != (uctxt->expected_base + rcventry)) return -EBADF; - fd->mmu_rb_remove(&fd->tid_rb_root, &node->mmu); + if (HFI1_CAP_IS_USET(TID_UNMAP)) + mmu_rb_remove(&fd->tid_rb_root, &node->mmu); + else + hfi1_mmu_rb_remove(&fd->tid_rb_root, &node->mmu); if (grp) *grp = node->grp; @@ -962,7 +960,12 @@ static void unlock_exp_tids(struct hfi1_ctxtdata *uctxt, uctxt->expected_base]; if (!node || node->rcventry != rcventry) continue; - fd->mmu_rb_remove(root, &node->mmu); + if (HFI1_CAP_IS_USET(TID_UNMAP)) + mmu_rb_remove(&fd->tid_rb_root, + &node->mmu); + else + hfi1_mmu_rb_remove(&fd->tid_rb_root, + &node->mmu); clear_tid_node(fd, -1, node); } } -- cgit v0.10.2 From 909e2cd004b639276678c195760efeea6c173626 Mon Sep 17 00:00:00 2001 From: Mitko Haralanov Date: Tue, 8 Mar 2016 11:14:48 -0800 Subject: IB/hfi1: Notify remove MMU/RB callback of calling context Tell the remove MMU/RB callback if it's being called as part of a memory invalidation or not. This can be important in preventing a deadlock if the remove callback attempts to take the map_sem semaphore because the kernel's MMU invalidation functions have already taken it. Reviewed-by: Dennis Dalessandro Reviewed-by: Dean Luick Signed-off-by: Mitko Haralanov Signed-off-by: Jubin John Signed-off-by: Doug Ledford diff --git a/drivers/staging/rdma/hfi1/mmu_rb.c b/drivers/staging/rdma/hfi1/mmu_rb.c index a3515d7..29d6d3e 100644 --- a/drivers/staging/rdma/hfi1/mmu_rb.c +++ b/drivers/staging/rdma/hfi1/mmu_rb.c @@ -122,7 +122,7 @@ void hfi1_mmu_rb_unregister(struct rb_root *root) rbnode = rb_entry(node, struct mmu_rb_node, node); rb_erase(node, root); if (handler->ops->remove) - handler->ops->remove(root, rbnode); + handler->ops->remove(root, rbnode, false); } } @@ -196,12 +196,12 @@ static struct mmu_rb_node *__mmu_rb_search(struct mmu_rb_handler *handler, } static void __mmu_rb_remove(struct mmu_rb_handler *handler, - struct mmu_rb_node *node) + struct mmu_rb_node *node, bool arg) { /* Validity of handler and node pointers has been checked by caller. */ rb_erase(&node->node, handler->root); if (handler->ops->remove) - handler->ops->remove(handler->root, node); + handler->ops->remove(handler->root, node, arg); } struct mmu_rb_node *hfi1_mmu_rb_search(struct rb_root *root, unsigned long addr, @@ -230,7 +230,7 @@ void hfi1_mmu_rb_remove(struct rb_root *root, struct mmu_rb_node *node) return; spin_lock_irqsave(&handler->lock, flags); - __mmu_rb_remove(handler, node); + __mmu_rb_remove(handler, node, false); spin_unlock_irqrestore(&handler->lock, flags); } @@ -299,7 +299,7 @@ static void mmu_notifier_mem_invalidate(struct mmu_notifier *mn, naddr = node->addr; nlen = node->len; if (handler->ops->invalidate(root, node)) - __mmu_rb_remove(handler, node); + __mmu_rb_remove(handler, node, true); /* * The next address to be looked up is computed based diff --git a/drivers/staging/rdma/hfi1/mmu_rb.h b/drivers/staging/rdma/hfi1/mmu_rb.h index 9fe1076..fdd9787 100644 --- a/drivers/staging/rdma/hfi1/mmu_rb.h +++ b/drivers/staging/rdma/hfi1/mmu_rb.h @@ -59,7 +59,7 @@ struct mmu_rb_ops { int (*compare)(struct mmu_rb_node *, unsigned long, unsigned long); int (*insert)(struct rb_root *, struct mmu_rb_node *); - void (*remove)(struct rb_root *, struct mmu_rb_node *); + void (*remove)(struct rb_root *, struct mmu_rb_node *, bool); int (*invalidate)(struct rb_root *, struct mmu_rb_node *); }; diff --git a/drivers/staging/rdma/hfi1/user_exp_rcv.c b/drivers/staging/rdma/hfi1/user_exp_rcv.c index b0b193f..1d971c0 100644 --- a/drivers/staging/rdma/hfi1/user_exp_rcv.c +++ b/drivers/staging/rdma/hfi1/user_exp_rcv.c @@ -89,7 +89,7 @@ static int set_rcvarray_entry(struct file *, unsigned long, u32, static inline int mmu_addr_cmp(struct mmu_rb_node *, unsigned long, unsigned long); static int mmu_rb_insert(struct rb_root *, struct mmu_rb_node *); -static void mmu_rb_remove(struct rb_root *, struct mmu_rb_node *); +static void mmu_rb_remove(struct rb_root *, struct mmu_rb_node *, bool); static int mmu_rb_invalidate(struct rb_root *, struct mmu_rb_node *); static int program_rcvarray(struct file *, unsigned long, struct tid_group *, struct tid_pageset *, unsigned, u16, struct page **, @@ -897,7 +897,7 @@ static int unprogram_rcvarray(struct file *fp, u32 tidinfo, if (!node || node->rcventry != (uctxt->expected_base + rcventry)) return -EBADF; if (HFI1_CAP_IS_USET(TID_UNMAP)) - mmu_rb_remove(&fd->tid_rb_root, &node->mmu); + mmu_rb_remove(&fd->tid_rb_root, &node->mmu, false); else hfi1_mmu_rb_remove(&fd->tid_rb_root, &node->mmu); @@ -962,7 +962,7 @@ static void unlock_exp_tids(struct hfi1_ctxtdata *uctxt, continue; if (HFI1_CAP_IS_USET(TID_UNMAP)) mmu_rb_remove(&fd->tid_rb_root, - &node->mmu); + &node->mmu, false); else hfi1_mmu_rb_remove(&fd->tid_rb_root, &node->mmu); @@ -1039,7 +1039,8 @@ static int mmu_rb_insert(struct rb_root *root, struct mmu_rb_node *node) return 0; } -static void mmu_rb_remove(struct rb_root *root, struct mmu_rb_node *node) +static void mmu_rb_remove(struct rb_root *root, struct mmu_rb_node *node, + bool notifier) { struct hfi1_filedata *fdata = container_of(root, struct hfi1_filedata, tid_rb_root); -- cgit v0.10.2 From df5a00f81dab36b3479a2b84c836e98e701c78bc Mon Sep 17 00:00:00 2001 From: Mitko Haralanov Date: Tue, 8 Mar 2016 11:14:53 -0800 Subject: IB/hfi1: Use interval RB trees The interval RB trees can handle RB nodes which hold ranged information. This is exactly the usage for the buffer cache implemented in the expected receive code path. Convert the MMU/RB functions to use the interval RB tree API. This will help with future users of the caching API, as well. Reviewed-by: Dennis Dalessandro Reviewed-by: Dean Luick Signed-off-by: Mitko Haralanov Signed-off-by: Jubin John Signed-off-by: Doug Ledford diff --git a/drivers/staging/rdma/hfi1/mmu_rb.c b/drivers/staging/rdma/hfi1/mmu_rb.c index 29d6d3e..540e267 100644 --- a/drivers/staging/rdma/hfi1/mmu_rb.c +++ b/drivers/staging/rdma/hfi1/mmu_rb.c @@ -46,7 +46,7 @@ */ #include #include -#include +#include #include "mmu_rb.h" #include "trace.h" @@ -62,6 +62,8 @@ struct mmu_rb_handler { static LIST_HEAD(mmu_rb_handlers); static DEFINE_SPINLOCK(mmu_rb_lock); /* protect mmu_rb_handlers list */ +static unsigned long mmu_node_start(struct mmu_rb_node *); +static unsigned long mmu_node_last(struct mmu_rb_node *); static struct mmu_rb_handler *find_mmu_handler(struct rb_root *); static inline void mmu_notifier_page(struct mmu_notifier *, struct mm_struct *, unsigned long); @@ -78,6 +80,19 @@ static struct mmu_notifier_ops mn_opts = { .invalidate_range_start = mmu_notifier_range_start, }; +INTERVAL_TREE_DEFINE(struct mmu_rb_node, node, unsigned long, __last, + mmu_node_start, mmu_node_last, static, __mmu_int_rb); + +static unsigned long mmu_node_start(struct mmu_rb_node *node) +{ + return node->addr & PAGE_MASK; +} + +static unsigned long mmu_node_last(struct mmu_rb_node *node) +{ + return ((node->addr & PAGE_MASK) + node->len); +} + int hfi1_mmu_rb_register(struct rb_root *root, struct mmu_rb_ops *ops) { struct mmu_rb_handler *handlr; @@ -133,40 +148,27 @@ void hfi1_mmu_rb_unregister(struct rb_root *root) int hfi1_mmu_rb_insert(struct rb_root *root, struct mmu_rb_node *mnode) { - struct rb_node **new, *parent = NULL; struct mmu_rb_handler *handler = find_mmu_handler(root); - struct mmu_rb_node *this; + struct mmu_rb_node *node; unsigned long flags; - int res, ret = 0; + int ret = 0; if (!handler) return -EINVAL; - new = &handler->root->rb_node; spin_lock_irqsave(&handler->lock, flags); - while (*new) { - this = container_of(*new, struct mmu_rb_node, node); - res = handler->ops->compare(this, mnode->addr, mnode->len); - parent = *new; - - if (res < 0) { - new = &((*new)->rb_left); - } else if (res > 0) { - new = &((*new)->rb_right); - } else { - ret = 1; - goto unlock; - } + node = __mmu_rb_search(handler, mnode->addr, mnode->len); + if (node) { + ret = -EINVAL; + goto unlock; } + __mmu_int_rb_insert(mnode, root); if (handler->ops->insert) { ret = handler->ops->insert(root, mnode); if (ret) - goto unlock; + __mmu_int_rb_remove(mnode, root); } - - rb_link_node(&mnode->node, parent, new); - rb_insert_color(&mnode->node, root); unlock: spin_unlock_irqrestore(&handler->lock, flags); return ret; @@ -177,29 +179,17 @@ static struct mmu_rb_node *__mmu_rb_search(struct mmu_rb_handler *handler, unsigned long addr, unsigned long len) { - struct rb_node *node = handler->root->rb_node; - struct mmu_rb_node *mnode; - int res; - - while (node) { - mnode = container_of(node, struct mmu_rb_node, node); - res = handler->ops->compare(mnode, addr, len); - - if (res < 0) - node = node->rb_left; - else if (res > 0) - node = node->rb_right; - else - return mnode; - } - return NULL; + struct mmu_rb_node *node; + + node = __mmu_int_rb_iter_first(handler->root, addr, len); + return node; } static void __mmu_rb_remove(struct mmu_rb_handler *handler, struct mmu_rb_node *node, bool arg) { /* Validity of handler and node pointers has been checked by caller. */ - rb_erase(&node->node, handler->root); + __mmu_int_rb_remove(node, handler->root); if (handler->ops->remove) handler->ops->remove(handler->root, node, arg); } @@ -271,45 +261,13 @@ static void mmu_notifier_mem_invalidate(struct mmu_notifier *mn, container_of(mn, struct mmu_rb_handler, mn); struct rb_root *root = handler->root; struct mmu_rb_node *node; - unsigned long addr = start, naddr, nlen, flags; + unsigned long flags; spin_lock_irqsave(&handler->lock, flags); - while (addr < end) { - /* - * There is no good way to provide a reasonable length to the - * search function at this point. Using the remaining length in - * the invalidation range is not the right thing to do. - * We have to rely on the fact that the insertion algorithm - * takes care of any overlap or length restrictions by using the - * actual size of each node. Therefore, we can use a page as an - * arbitrary, non-zero value. - */ - node = __mmu_rb_search(handler, addr, PAGE_SIZE); - - if (!node) { - /* - * Didn't find a node at this address. However, the - * range could be bigger than what we have registered - * so we have to keep looking. - */ - addr += PAGE_SIZE; - continue; - } - - naddr = node->addr; - nlen = node->len; + for (node = __mmu_int_rb_iter_first(root, start, end); node; + node = __mmu_int_rb_iter_next(node, start, end)) { if (handler->ops->invalidate(root, node)) __mmu_rb_remove(handler, node, true); - - /* - * The next address to be looked up is computed based - * on the node's starting address. This is due to the - * fact that the range where we start might be in the - * middle of the node's buffer so simply incrementing - * the address by the node's size would result is a - * bad address. - */ - addr = naddr + nlen; } spin_unlock_irqrestore(&handler->lock, flags); } diff --git a/drivers/staging/rdma/hfi1/mmu_rb.h b/drivers/staging/rdma/hfi1/mmu_rb.h index fdd9787..abed3a6 100644 --- a/drivers/staging/rdma/hfi1/mmu_rb.h +++ b/drivers/staging/rdma/hfi1/mmu_rb.h @@ -50,9 +50,10 @@ #include "hfi.h" struct mmu_rb_node { - struct rb_node node; unsigned long addr; unsigned long len; + unsigned long __last; + struct rb_node node; }; struct mmu_rb_ops { -- cgit v0.10.2 From 353b71c7c08ed75fe83843a382e5ca53376d07ca Mon Sep 17 00:00:00 2001 From: Mitko Haralanov Date: Tue, 8 Mar 2016 11:14:59 -0800 Subject: IB/hfi1: Add MMU tracing Add a new tracepoint type for the MMU functions and calls to that tracepoint to allow tracing of MMU functionality. Reviewed-by: Dennis Dalessandro Reviewed-by: Dean Luick Signed-off-by: Mitko Haralanov Signed-off-by: Jubin John Signed-off-by: Doug Ledford diff --git a/drivers/staging/rdma/hfi1/mmu_rb.c b/drivers/staging/rdma/hfi1/mmu_rb.c index 540e267..c30373d 100644 --- a/drivers/staging/rdma/hfi1/mmu_rb.c +++ b/drivers/staging/rdma/hfi1/mmu_rb.c @@ -157,6 +157,8 @@ int hfi1_mmu_rb_insert(struct rb_root *root, struct mmu_rb_node *mnode) return -EINVAL; spin_lock_irqsave(&handler->lock, flags); + hfi1_cdbg(MMU, "Inserting node addr 0x%llx, len %u", mnode->addr, + mnode->len); node = __mmu_rb_search(handler, mnode->addr, mnode->len); if (node) { ret = -EINVAL; @@ -181,7 +183,11 @@ static struct mmu_rb_node *__mmu_rb_search(struct mmu_rb_handler *handler, { struct mmu_rb_node *node; + hfi1_cdbg(MMU, "Searching for addr 0x%llx, len %u", addr, len); node = __mmu_int_rb_iter_first(handler->root, addr, len); + if (node) + hfi1_cdbg(MMU, "Found node addr 0x%llx, len %u", node->addr, + node->len); return node; } @@ -189,6 +195,8 @@ static void __mmu_rb_remove(struct mmu_rb_handler *handler, struct mmu_rb_node *node, bool arg) { /* Validity of handler and node pointers has been checked by caller. */ + hfi1_cdbg(MMU, "Removing node addr 0x%llx, len %u", node->addr, + node->len); __mmu_int_rb_remove(node, handler->root); if (handler->ops->remove) handler->ops->remove(handler->root, node, arg); @@ -266,6 +274,8 @@ static void mmu_notifier_mem_invalidate(struct mmu_notifier *mn, spin_lock_irqsave(&handler->lock, flags); for (node = __mmu_int_rb_iter_first(root, start, end); node; node = __mmu_int_rb_iter_next(node, start, end)) { + hfi1_cdbg(MMU, "Invalidating node addr 0x%llx, len %u", + node->addr, node->len); if (handler->ops->invalidate(root, node)) __mmu_rb_remove(handler, node, true); } diff --git a/drivers/staging/rdma/hfi1/trace.c b/drivers/staging/rdma/hfi1/trace.c index 6821d7c..8b62fef 100644 --- a/drivers/staging/rdma/hfi1/trace.c +++ b/drivers/staging/rdma/hfi1/trace.c @@ -232,3 +232,4 @@ __hfi1_trace_fn(DC8051); __hfi1_trace_fn(FIRMWARE); __hfi1_trace_fn(RCVCTRL); __hfi1_trace_fn(TID); +__hfi1_trace_fn(MMU); diff --git a/drivers/staging/rdma/hfi1/trace.h b/drivers/staging/rdma/hfi1/trace.h index 4d91c18..963dc94 100644 --- a/drivers/staging/rdma/hfi1/trace.h +++ b/drivers/staging/rdma/hfi1/trace.h @@ -1340,6 +1340,7 @@ __hfi1_trace_def(DC8051); __hfi1_trace_def(FIRMWARE); __hfi1_trace_def(RCVCTRL); __hfi1_trace_def(TID); +__hfi1_trace_def(MMU); #define hfi1_cdbg(which, fmt, ...) \ __hfi1_trace_##which(__func__, fmt, ##__VA_ARGS__) -- cgit v0.10.2 From b8718e2e2e4702dfbe0d9a15d527e0531807e871 Mon Sep 17 00:00:00 2001 From: Mitko Haralanov Date: Tue, 8 Mar 2016 11:15:04 -0800 Subject: IB/hfi1: Remove compare callback Interval RB trees provide their own searching function, which also takes care of determining the path through the tree that should be taken. This make the compare callback unnecessary. Reviewed-by: Dennis Dalessandro Reviewed-by: Dean Luick Signed-off-by: Mitko Haralanov Signed-off-by: Jubin John Signed-off-by: Doug Ledford diff --git a/drivers/staging/rdma/hfi1/mmu_rb.c b/drivers/staging/rdma/hfi1/mmu_rb.c index c30373d..5d27fee 100644 --- a/drivers/staging/rdma/hfi1/mmu_rb.c +++ b/drivers/staging/rdma/hfi1/mmu_rb.c @@ -98,7 +98,7 @@ int hfi1_mmu_rb_register(struct rb_root *root, struct mmu_rb_ops *ops) struct mmu_rb_handler *handlr; unsigned long flags; - if (!ops->compare || !ops->invalidate) + if (!ops->invalidate) return -EINVAL; handlr = kmalloc(sizeof(*handlr), GFP_KERNEL); diff --git a/drivers/staging/rdma/hfi1/mmu_rb.h b/drivers/staging/rdma/hfi1/mmu_rb.h index abed3a6..9c26009 100644 --- a/drivers/staging/rdma/hfi1/mmu_rb.h +++ b/drivers/staging/rdma/hfi1/mmu_rb.h @@ -57,8 +57,6 @@ struct mmu_rb_node { }; struct mmu_rb_ops { - int (*compare)(struct mmu_rb_node *, unsigned long, - unsigned long); int (*insert)(struct rb_root *, struct mmu_rb_node *); void (*remove)(struct rb_root *, struct mmu_rb_node *, bool); int (*invalidate)(struct rb_root *, struct mmu_rb_node *); diff --git a/drivers/staging/rdma/hfi1/user_exp_rcv.c b/drivers/staging/rdma/hfi1/user_exp_rcv.c index 1d971c0..bf670cb 100644 --- a/drivers/staging/rdma/hfi1/user_exp_rcv.c +++ b/drivers/staging/rdma/hfi1/user_exp_rcv.c @@ -86,8 +86,6 @@ static void unlock_exp_tids(struct hfi1_ctxtdata *, struct exp_tid_set *, static u32 find_phys_blocks(struct page **, unsigned, struct tid_pageset *); static int set_rcvarray_entry(struct file *, unsigned long, u32, struct tid_group *, struct page **, unsigned); -static inline int mmu_addr_cmp(struct mmu_rb_node *, unsigned long, - unsigned long); static int mmu_rb_insert(struct rb_root *, struct mmu_rb_node *); static void mmu_rb_remove(struct rb_root *, struct mmu_rb_node *, bool); static int mmu_rb_invalidate(struct rb_root *, struct mmu_rb_node *); @@ -98,7 +96,6 @@ static int unprogram_rcvarray(struct file *, u32, struct tid_group **); static void clear_tid_node(struct hfi1_filedata *, u16, struct tid_rb_node *); static struct mmu_rb_ops tid_rb_ops = { - .compare = mmu_addr_cmp, .insert = mmu_rb_insert, .remove = mmu_rb_remove, .invalidate = mmu_rb_invalidate @@ -1016,17 +1013,6 @@ static int mmu_rb_invalidate(struct rb_root *root, struct mmu_rb_node *mnode) return 0; } -static int mmu_addr_cmp(struct mmu_rb_node *node, unsigned long addr, - unsigned long len) -{ - if ((addr + len) <= node->addr) - return -1; - else if (addr >= node->addr && addr < (node->addr + node->len)) - return 0; - else - return 1; -} - static int mmu_rb_insert(struct rb_root *root, struct mmu_rb_node *node) { struct hfi1_filedata *fdata = -- cgit v0.10.2 From 0f310a00e02094ea7a2a7d2ae45bd51d97706caa Mon Sep 17 00:00:00 2001 From: Mitko Haralanov Date: Tue, 8 Mar 2016 11:15:10 -0800 Subject: IB/hfi1: Add filter callback This commit adds a filter callback, which can be used to filter out interval RB nodes matching a certain interval down to a single one. This is needed for the upcoming SDMA-side caching where buffers will need to be filtered by their virtual address. Reviewed-by: Dennis Dalessandro Reviewed-by: Dean Luick Signed-off-by: Mitko Haralanov Signed-off-by: Jubin John Signed-off-by: Doug Ledford diff --git a/drivers/staging/rdma/hfi1/mmu_rb.c b/drivers/staging/rdma/hfi1/mmu_rb.c index 5d27fee..6edd5f0 100644 --- a/drivers/staging/rdma/hfi1/mmu_rb.c +++ b/drivers/staging/rdma/hfi1/mmu_rb.c @@ -181,13 +181,22 @@ static struct mmu_rb_node *__mmu_rb_search(struct mmu_rb_handler *handler, unsigned long addr, unsigned long len) { - struct mmu_rb_node *node; + struct mmu_rb_node *node = NULL; hfi1_cdbg(MMU, "Searching for addr 0x%llx, len %u", addr, len); - node = __mmu_int_rb_iter_first(handler->root, addr, len); - if (node) - hfi1_cdbg(MMU, "Found node addr 0x%llx, len %u", node->addr, - node->len); + if (!handler->ops->filter) { + node = __mmu_int_rb_iter_first(handler->root, addr, + (addr + len) - 1); + } else { + for (node = __mmu_int_rb_iter_first(handler->root, addr, + (addr + len) - 1); + node; + node = __mmu_int_rb_iter_next(node, addr, + (addr + len) - 1)) { + if (handler->ops->filter(node, addr, len)) + return node; + } + } return node; } diff --git a/drivers/staging/rdma/hfi1/mmu_rb.h b/drivers/staging/rdma/hfi1/mmu_rb.h index 9c26009..f8523fd 100644 --- a/drivers/staging/rdma/hfi1/mmu_rb.h +++ b/drivers/staging/rdma/hfi1/mmu_rb.h @@ -57,6 +57,7 @@ struct mmu_rb_node { }; struct mmu_rb_ops { + bool (*filter)(struct mmu_rb_node *, unsigned long, unsigned long); int (*insert)(struct rb_root *, struct mmu_rb_node *); void (*remove)(struct rb_root *, struct mmu_rb_node *, bool); int (*invalidate)(struct rb_root *, struct mmu_rb_node *); -- cgit v0.10.2 From a489876010377481823ae5dbbd83fa32792a2e16 Mon Sep 17 00:00:00 2001 From: Mitko Haralanov Date: Tue, 8 Mar 2016 11:15:16 -0800 Subject: IB/hfi1: Adjust last address values for intervals Last address values for intervals in the interval RB tree nodes should be non-inclusive in order to avoid confusing ranges. Reviewed-by: Dennis Dalessandro Reviewed-by: Dean Luick Signed-off-by: Mitko Haralanov Signed-off-by: Jubin John Signed-off-by: Doug Ledford diff --git a/drivers/staging/rdma/hfi1/mmu_rb.c b/drivers/staging/rdma/hfi1/mmu_rb.c index 6edd5f0..c7ad016 100644 --- a/drivers/staging/rdma/hfi1/mmu_rb.c +++ b/drivers/staging/rdma/hfi1/mmu_rb.c @@ -90,7 +90,7 @@ static unsigned long mmu_node_start(struct mmu_rb_node *node) static unsigned long mmu_node_last(struct mmu_rb_node *node) { - return ((node->addr & PAGE_MASK) + node->len); + return PAGE_ALIGN((node->addr & PAGE_MASK) + node->len) - 1; } int hfi1_mmu_rb_register(struct rb_root *root, struct mmu_rb_ops *ops) @@ -281,8 +281,8 @@ static void mmu_notifier_mem_invalidate(struct mmu_notifier *mn, unsigned long flags; spin_lock_irqsave(&handler->lock, flags); - for (node = __mmu_int_rb_iter_first(root, start, end); node; - node = __mmu_int_rb_iter_next(node, start, end)) { + for (node = __mmu_int_rb_iter_first(root, start, end - 1); node; + node = __mmu_int_rb_iter_next(node, start, end - 1)) { hfi1_cdbg(MMU, "Invalidating node addr 0x%llx, len %u", node->addr, node->len); if (handler->ops->invalidate(root, node)) -- cgit v0.10.2 From 5cd3a88d7f2b050164dc1df59a398294515126d9 Mon Sep 17 00:00:00 2001 From: Mitko Haralanov Date: Tue, 8 Mar 2016 11:15:22 -0800 Subject: IB/hfi1: Implement SDMA-side buffer caching Add support for caching of user buffers used for SDMA transfers. This change improves performance by avoiding repeatedly pinning the pages of buffers, which are being re-used by the application. While the cost of the pinning operation has been made heavier by adding the extra code to search the cache tree, re-allocate pages arrays, and future cache evictions, that cost will be amortized against the savings when the same buffer is re-used. It is also worth noting that in most cases, the cost of pinning should be much lower due to the buffer already being in the cache. Reviewed-by: Dennis Dalessandro Reviewed-by: Dean Luick Signed-off-by: Mitko Haralanov Signed-off-by: Jubin John Signed-off-by: Doug Ledford diff --git a/drivers/staging/rdma/hfi1/user_sdma.c b/drivers/staging/rdma/hfi1/user_sdma.c index 14fe079..a53edb9 100644 --- a/drivers/staging/rdma/hfi1/user_sdma.c +++ b/drivers/staging/rdma/hfi1/user_sdma.c @@ -68,6 +68,7 @@ #include "verbs.h" /* for the headers */ #include "common.h" /* for struct hfi1_tid_info */ #include "trace.h" +#include "mmu_rb.h" static uint hfi1_sdma_comp_ring_size = 128; module_param_named(sdma_comp_size, hfi1_sdma_comp_ring_size, uint, S_IRUGO); @@ -145,9 +146,6 @@ MODULE_PARM_DESC(sdma_comp_size, "Size of User SDMA completion ring. Default: 12 /* Last packet in the request */ #define TXREQ_FLAGS_REQ_LAST_PKT BIT(0) -/* Last packet that uses a particular io vector */ -#define TXREQ_FLAGS_IOVEC_LAST_PKT BIT(0) - #define SDMA_REQ_IN_USE 0 #define SDMA_REQ_FOR_THREAD 1 #define SDMA_REQ_SEND_DONE 2 @@ -183,6 +181,13 @@ struct user_sdma_iovec { u64 offset; }; +struct sdma_mmu_node { + struct mmu_rb_node rb; + atomic_t refcount; + struct page **pages; + unsigned npages; +}; + struct user_sdma_request { struct sdma_req_info info; struct hfi1_user_sdma_pkt_q *pq; @@ -252,11 +257,6 @@ struct user_sdma_txreq { struct sdma_txreq txreq; struct list_head list; struct user_sdma_request *req; - struct { - struct user_sdma_iovec *vec; - u8 flags; - } iovecs[3]; - int idx; u16 flags; unsigned busycount; u64 seqnum; @@ -277,7 +277,7 @@ static inline void pq_update(struct hfi1_user_sdma_pkt_q *); static void user_sdma_free_request(struct user_sdma_request *, bool); static int pin_vector_pages(struct user_sdma_request *, struct user_sdma_iovec *); -static void unpin_vector_pages(struct user_sdma_iovec *); +static void unpin_vector_pages(struct page **, unsigned); static int check_header_template(struct user_sdma_request *, struct hfi1_pkt_header *, u32, u32); static int set_txreq_header(struct user_sdma_request *, @@ -296,6 +296,17 @@ static int defer_packet_queue( struct sdma_txreq *, unsigned seq); static void activate_packet_queue(struct iowait *, int); +static bool sdma_rb_filter(struct mmu_rb_node *, unsigned long, unsigned long); +static int sdma_rb_insert(struct rb_root *, struct mmu_rb_node *); +static void sdma_rb_remove(struct rb_root *, struct mmu_rb_node *, bool); +static int sdma_rb_invalidate(struct rb_root *, struct mmu_rb_node *); + +static struct mmu_rb_ops sdma_rb_ops = { + .filter = sdma_rb_filter, + .insert = sdma_rb_insert, + .remove = sdma_rb_remove, + .invalidate = sdma_rb_invalidate +}; static int defer_packet_queue( struct sdma_engine *sde, @@ -385,6 +396,7 @@ int hfi1_user_sdma_alloc_queues(struct hfi1_ctxtdata *uctxt, struct file *fp) pq->state = SDMA_PKT_Q_INACTIVE; atomic_set(&pq->n_reqs, 0); init_waitqueue_head(&pq->wait); + pq->sdma_rb_root = RB_ROOT; iowait_init(&pq->busy, 0, NULL, defer_packet_queue, activate_packet_queue, NULL); @@ -415,6 +427,12 @@ int hfi1_user_sdma_alloc_queues(struct hfi1_ctxtdata *uctxt, struct file *fp) cq->nentries = hfi1_sdma_comp_ring_size; fd->cq = cq; + ret = hfi1_mmu_rb_register(&pq->sdma_rb_root, &sdma_rb_ops); + if (ret) { + dd_dev_err(dd, "Failed to register with MMU %d", ret); + goto done; + } + spin_lock_irqsave(&uctxt->sdma_qlock, flags); list_add(&pq->list, &uctxt->sdma_queues); spin_unlock_irqrestore(&uctxt->sdma_qlock, flags); @@ -444,6 +462,7 @@ int hfi1_user_sdma_free_queues(struct hfi1_filedata *fd) hfi1_cdbg(SDMA, "[%u:%u:%u] Freeing user SDMA queues", uctxt->dd->unit, uctxt->ctxt, fd->subctxt); pq = fd->pq; + hfi1_mmu_rb_unregister(&pq->sdma_rb_root); if (pq) { spin_lock_irqsave(&uctxt->sdma_qlock, flags); if (!list_empty(&pq->list)) @@ -477,7 +496,7 @@ int hfi1_user_sdma_process_request(struct file *fp, struct iovec *iovec, struct hfi1_user_sdma_pkt_q *pq = fd->pq; struct hfi1_user_sdma_comp_q *cq = fd->cq; struct hfi1_devdata *dd = pq->dd; - unsigned long idx = 0, unpinned; + unsigned long idx = 0; u8 pcount = initial_pkt_count; struct sdma_req_info info; struct user_sdma_request *req; @@ -498,14 +517,6 @@ int hfi1_user_sdma_process_request(struct file *fp, struct iovec *iovec, return -EFAULT; } - /* Process any completed vectors */ - unpinned = xchg(&pq->unpinned, 0); - if (unpinned) { - down_write(¤t->mm->mmap_sem); - current->mm->pinned_vm -= unpinned; - up_write(¤t->mm->mmap_sem); - } - trace_hfi1_sdma_user_reqinfo(dd, uctxt->ctxt, fd->subctxt, (u16 *)&info); if (cq->comps[info.comp_idx].status == QUEUED || @@ -609,7 +620,11 @@ int hfi1_user_sdma_process_request(struct file *fp, struct iovec *iovec, while (i < req->data_iovs) { INIT_LIST_HEAD(&req->iovs[i].list); memcpy(&req->iovs[i].iov, iovec + idx++, sizeof(struct iovec)); - req->iovs[i].offset = 0; + ret = pin_vector_pages(req, &req->iovs[i]); + if (ret) { + req->status = ret; + goto free_req; + } req->data_len += req->iovs[i++].iov.iov_len; } SDMA_DBG(req, "total data length %u", req->data_len); @@ -827,9 +842,7 @@ static int user_sdma_send_pkts(struct user_sdma_request *req, unsigned maxpkts) tx->flags = 0; tx->req = req; tx->busycount = 0; - tx->idx = -1; INIT_LIST_HEAD(&tx->list); - memset(tx->iovecs, 0, sizeof(tx->iovecs)); if (req->seqnum == req->info.npkts - 1) tx->flags |= TXREQ_FLAGS_REQ_LAST_PKT; @@ -850,18 +863,6 @@ static int user_sdma_send_pkts(struct user_sdma_request *req, unsigned maxpkts) WARN_ON(iovec->offset); } - /* - * This request might include only a header and no user - * data, so pin pages only if there is data and it the - * pages have not been pinned already. - */ - if (unlikely(!iovec->pages && iovec->iov.iov_len)) { - ret = pin_vector_pages(req, iovec); - if (ret) - goto free_tx; - } - - tx->iovecs[++tx->idx].vec = iovec; datalen = compute_data_length(req, tx); if (!datalen) { SDMA_DBG(req, @@ -960,19 +961,10 @@ static int user_sdma_send_pkts(struct user_sdma_request *req, unsigned maxpkts) data_sent += len; if (unlikely(queued < datalen && pageidx == iovec->npages && - req->iov_idx < req->data_iovs - 1 && - tx->idx < ARRAY_SIZE(tx->iovecs))) { + req->iov_idx < req->data_iovs - 1)) { iovec->offset += iov_offset; - tx->iovecs[tx->idx].flags |= - TXREQ_FLAGS_IOVEC_LAST_PKT; iovec = &req->iovs[++req->iov_idx]; - if (!iovec->pages) { - ret = pin_vector_pages(req, iovec); - if (ret) - goto free_txreq; - } iov_offset = 0; - tx->iovecs[++tx->idx].vec = iovec; } } /* @@ -983,18 +975,8 @@ static int user_sdma_send_pkts(struct user_sdma_request *req, unsigned maxpkts) if (req_opcode(req->info.ctrl) == EXPECTED) req->tidoffset += datalen; req->sent += data_sent; - if (req->data_len) { - tx->iovecs[tx->idx].vec->offset += iov_offset; - /* - * If we've reached the end of the io vector, mark it - * so the callback can unpin the pages and free it. - */ - if (tx->iovecs[tx->idx].vec->offset == - tx->iovecs[tx->idx].vec->iov.iov_len) - tx->iovecs[tx->idx].flags |= - TXREQ_FLAGS_IOVEC_LAST_PKT; - } - + if (req->data_len) + iovec->offset += iov_offset; list_add_tail(&tx->txreq.list, &req->txps); /* * It is important to increment this here as it is used to @@ -1047,38 +1029,78 @@ static inline int num_user_pages(const struct iovec *iov) static int pin_vector_pages(struct user_sdma_request *req, struct user_sdma_iovec *iovec) { - int pinned, npages; + int ret = 0, pinned, npages; + struct page **pages; + struct hfi1_user_sdma_pkt_q *pq = req->pq; + struct sdma_mmu_node *node = NULL; + struct mmu_rb_node *rb_node; + + rb_node = hfi1_mmu_rb_search(&pq->sdma_rb_root, + (unsigned long)iovec->iov.iov_base, + iovec->iov.iov_len); + if (rb_node) + node = container_of(rb_node, struct sdma_mmu_node, rb); + + if (!node) { + node = kzalloc(sizeof(*node), GFP_KERNEL); + if (!node) + return -ENOMEM; - npages = num_user_pages(&iovec->iov); - iovec->pages = kcalloc(npages, sizeof(*iovec->pages), GFP_KERNEL); - if (!iovec->pages) { - SDMA_DBG(req, "Failed page array alloc"); - return -ENOMEM; + node->rb.addr = (unsigned long)iovec->iov.iov_base; + node->rb.len = iovec->iov.iov_len; + atomic_set(&node->refcount, 0); } - pinned = hfi1_acquire_user_pages((unsigned long)iovec->iov.iov_base, - npages, 0, iovec->pages); - - if (pinned < 0) - return pinned; + npages = num_user_pages(&iovec->iov); + if (node->npages < npages) { + pages = kcalloc(npages, sizeof(*pages), GFP_KERNEL); + if (!pages) { + SDMA_DBG(req, "Failed page array alloc"); + ret = -ENOMEM; + goto bail; + } + memcpy(pages, node->pages, node->npages * sizeof(*pages)); + + npages -= node->npages; + pinned = hfi1_acquire_user_pages( + ((unsigned long)iovec->iov.iov_base + + (node->npages * PAGE_SIZE)), npages, 0, + pages + node->npages); + if (pinned < 0) { + kfree(pages); + ret = pinned; + goto bail; + } + if (pinned != npages) { + unpin_vector_pages(pages, pinned); + ret = -EFAULT; + goto bail; + } + kfree(node->pages); + node->pages = pages; + node->npages += pinned; + npages = node->npages; + } + iovec->pages = node->pages; + iovec->npages = npages; - iovec->npages = pinned; - if (pinned != npages) { - SDMA_DBG(req, "Failed to pin pages (%d/%u)", pinned, npages); - unpin_vector_pages(iovec); - return -EFAULT; + if (!rb_node) { + if (hfi1_mmu_rb_insert(&req->pq->sdma_rb_root, &node->rb)) + goto bail; + } else { + atomic_inc(&node->refcount); } return 0; +bail: + if (!rb_node) + kfree(node); + return ret; } -static void unpin_vector_pages(struct user_sdma_iovec *iovec) +static void unpin_vector_pages(struct page **pages, unsigned npages) { - hfi1_release_user_pages(iovec->pages, iovec->npages, 0); - - kfree(iovec->pages); - iovec->pages = NULL; - iovec->npages = 0; - iovec->offset = 0; + hfi1_release_user_pages(pages, npages, 0); + kfree(pages); } static int check_header_template(struct user_sdma_request *req, @@ -1360,7 +1382,6 @@ static void user_sdma_txreq_cb(struct sdma_txreq *txreq, int status) struct hfi1_user_sdma_pkt_q *pq; struct hfi1_user_sdma_comp_q *cq; u16 idx; - int i, j; if (!tx->req) return; @@ -1369,24 +1390,6 @@ static void user_sdma_txreq_cb(struct sdma_txreq *txreq, int status) pq = req->pq; cq = req->cq; - /* - * If we have any io vectors associated with this txreq, - * check whether they need to be 'freed'. - */ - for (i = tx->idx; i >= 0; i--) { - if (tx->iovecs[i].flags & TXREQ_FLAGS_IOVEC_LAST_PKT) { - struct user_sdma_iovec *vec = - tx->iovecs[i].vec; - - for (j = 0; j < vec->npages; j++) - put_page(vec->pages[j]); - xadd(&pq->unpinned, vec->npages); - kfree(vec->pages); - vec->pages = NULL; - vec->npages = 0; - } - } - if (status != SDMA_TXREQ_S_OK) { SDMA_DBG(req, "SDMA completion with error %d", status); @@ -1439,12 +1442,26 @@ static void user_sdma_free_request(struct user_sdma_request *req, bool unpin) kmem_cache_free(req->pq->txreq_cache, tx); } } - if (req->data_iovs && unpin) { + if (req->data_iovs) { + struct sdma_mmu_node *node; + struct mmu_rb_node *mnode; int i; - for (i = 0; i < req->data_iovs; i++) - if (req->iovs[i].npages && req->iovs[i].pages) - unpin_vector_pages(&req->iovs[i]); + for (i = 0; i < req->data_iovs; i++) { + mnode = hfi1_mmu_rb_search( + &req->pq->sdma_rb_root, + (unsigned long)req->iovs[i].iov.iov_base, + req->iovs[i].iov.iov_len); + if (!mnode) + continue; + + node = container_of(mnode, struct sdma_mmu_node, rb); + if (unpin) + hfi1_mmu_rb_remove(&req->pq->sdma_rb_root, + &node->rb); + else + atomic_dec(&node->refcount); + } } kfree(req->tids); clear_bit(SDMA_REQ_IN_USE, &req->flags); @@ -1463,3 +1480,39 @@ static inline void set_comp_state(struct hfi1_user_sdma_pkt_q *pq, trace_hfi1_sdma_user_completion(pq->dd, pq->ctxt, pq->subctxt, idx, state, ret); } + +static bool sdma_rb_filter(struct mmu_rb_node *node, unsigned long addr, + unsigned long len) +{ + return (bool)(node->addr == addr); +} + +static int sdma_rb_insert(struct rb_root *root, struct mmu_rb_node *mnode) +{ + struct sdma_mmu_node *node = + container_of(mnode, struct sdma_mmu_node, rb); + + atomic_inc(&node->refcount); + return 0; +} + +static void sdma_rb_remove(struct rb_root *root, struct mmu_rb_node *mnode, + bool notifier) +{ + struct sdma_mmu_node *node = + container_of(mnode, struct sdma_mmu_node, rb); + + if (!notifier) + unpin_vector_pages(node->pages, node->npages); + kfree(node); +} + +static int sdma_rb_invalidate(struct rb_root *root, struct mmu_rb_node *mnode) +{ + struct sdma_mmu_node *node = + container_of(mnode, struct sdma_mmu_node, rb); + + if (!atomic_read(&node->refcount)) + return 1; + return 0; +} diff --git a/drivers/staging/rdma/hfi1/user_sdma.h b/drivers/staging/rdma/hfi1/user_sdma.h index e0d0fe0..39866b5 100644 --- a/drivers/staging/rdma/hfi1/user_sdma.h +++ b/drivers/staging/rdma/hfi1/user_sdma.h @@ -67,6 +67,7 @@ struct hfi1_user_sdma_pkt_q { unsigned state; wait_queue_head_t wait; unsigned long unpinned; + struct rb_root sdma_rb_root; }; struct hfi1_user_sdma_comp_q { -- cgit v0.10.2 From 2c97ce4f3c292e9ab75c7b6b4d9f69f0a9ee241d Mon Sep 17 00:00:00 2001 From: Mitko Haralanov Date: Tue, 8 Mar 2016 11:15:28 -0800 Subject: IB/hfi1: Add pin query function System administrators can use the locked memory ulimit setting to set the maximum amount of memory a user can lock/pin. However, this setting alone is not enough to guarantee good operation of the hfi1 driver due to the fact that the setting does not have fine enough granularity to account for the limit being used by multiple user processes and caches. Therefore, a better limiting algorithm is needed. This is where the new hfi1_can_pin_pages() function and the cache_size module parameter come in. The function works by looking at the ulimit and cache_size value to compute a cache size. The algorithm examines the ulimit value and, if it is not "unlimited", computes a per-cache limit based on the number of configured user contexts. After that, the lower of the two - cache_size and computed per-cache limit - is used. Reviewed-by: Dennis Dalessandro Reviewed-by: Dean Luick Signed-off-by: Mitko Haralanov Signed-off-by: Jubin John Signed-off-by: Doug Ledford diff --git a/drivers/staging/rdma/hfi1/hfi.h b/drivers/staging/rdma/hfi1/hfi.h index 2107cdc..ff3b37a 100644 --- a/drivers/staging/rdma/hfi1/hfi.h +++ b/drivers/staging/rdma/hfi1/hfi.h @@ -1664,6 +1664,7 @@ void shutdown_led_override(struct hfi1_pportdata *ppd); */ #define DEFAULT_RCVHDR_ENTSIZE 32 +bool hfi1_can_pin_pages(struct hfi1_devdata *, u32, u32); int hfi1_acquire_user_pages(unsigned long, size_t, bool, struct page **); void hfi1_release_user_pages(struct page **, size_t, bool); diff --git a/drivers/staging/rdma/hfi1/user_pages.c b/drivers/staging/rdma/hfi1/user_pages.c index 3bf8108..bd7a8ab 100644 --- a/drivers/staging/rdma/hfi1/user_pages.c +++ b/drivers/staging/rdma/hfi1/user_pages.c @@ -48,22 +48,62 @@ #include #include #include +#include #include "hfi.h" -int hfi1_acquire_user_pages(unsigned long vaddr, size_t npages, bool writable, - struct page **pages) +static unsigned long cache_size = 256; +module_param(cache_size, ulong, S_IRUGO | S_IWUSR); +MODULE_PARM_DESC(cache_size, "Send and receive side cache size limit (in MB)"); + +/* + * Determine whether the caller can pin pages. + * + * This function should be used in the implementation of buffer caches. + * The cache implementation should call this function prior to attempting + * to pin buffer pages in order to determine whether they should do so. + * The function computes cache limits based on the configured ulimit and + * cache size. Use of this function is especially important for caches + * which are not limited in any other way (e.g. by HW resources) and, thus, + * could keeping caching buffers. + * + */ +bool hfi1_can_pin_pages(struct hfi1_devdata *dd, u32 nlocked, u32 npages) { - unsigned long pinned, lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT; + unsigned long ulimit = rlimit(RLIMIT_MEMLOCK), pinned, cache_limit, + size = (cache_size * (1UL << 20)); /* convert to bytes */ + unsigned usr_ctxts = dd->num_rcv_contexts - dd->first_user_ctxt; bool can_lock = capable(CAP_IPC_LOCK); - int ret; + + /* + * Calculate per-cache size. The calculation below uses only a quarter + * of the available per-context limit. This leaves space for other + * pinning. Should we worry about shared ctxts? + */ + cache_limit = (ulimit / usr_ctxts) / 4; + + /* If ulimit isn't set to "unlimited" and is smaller than cache_size. */ + if (ulimit != (-1UL) && size > cache_limit) + size = cache_limit; + + /* Convert to number of pages */ + size = DIV_ROUND_UP(size, PAGE_SIZE); down_read(¤t->mm->mmap_sem); pinned = current->mm->pinned_vm; up_read(¤t->mm->mmap_sem); - if (pinned + npages > lock_limit && !can_lock) - return -ENOMEM; + /* First, check the absolute limit against all pinned pages. */ + if (pinned + npages >= ulimit && !can_lock) + return false; + + return ((nlocked + npages) <= size) || can_lock; +} + +int hfi1_acquire_user_pages(unsigned long vaddr, size_t npages, bool writable, + struct page **pages) +{ + int ret; ret = get_user_pages_fast(vaddr, npages, writable, pages); if (ret < 0) -- cgit v0.10.2 From bd3a8947de916534722b0861d865d3a809c0743c Mon Sep 17 00:00:00 2001 From: Mitko Haralanov Date: Tue, 8 Mar 2016 11:15:33 -0800 Subject: IB/hfi1: Specify mm when releasing pages This change adds a pointer to the process mm_struct when calling hfi1_release_user_pages(). Previously, the function used the mm_struct of the current process to adjust the number of pinned pages. However, is some cases, namely when unpinning pages due to a MMU notifier call, we want to drop into that code block as it will cause a deadlock (the MMU notifiers take the process' mmap_sem prior to calling the callbacks). By allowing to caller to specify the pointer to the mm_struct, the caller has finer control over that part of hfi1_release_user_pages(). Reviewed-by: Dennis Dalessandro Reviewed-by: Dean Luick Signed-off-by: Mitko Haralanov Signed-off-by: Jubin John Signed-off-by: Doug Ledford diff --git a/drivers/staging/rdma/hfi1/hfi.h b/drivers/staging/rdma/hfi1/hfi.h index ff3b37a..3dc644d 100644 --- a/drivers/staging/rdma/hfi1/hfi.h +++ b/drivers/staging/rdma/hfi1/hfi.h @@ -1666,7 +1666,7 @@ void shutdown_led_override(struct hfi1_pportdata *ppd); bool hfi1_can_pin_pages(struct hfi1_devdata *, u32, u32); int hfi1_acquire_user_pages(unsigned long, size_t, bool, struct page **); -void hfi1_release_user_pages(struct page **, size_t, bool); +void hfi1_release_user_pages(struct mm_struct *, struct page **, size_t, bool); static inline void clear_rcvhdrtail(const struct hfi1_ctxtdata *rcd) { diff --git a/drivers/staging/rdma/hfi1/user_exp_rcv.c b/drivers/staging/rdma/hfi1/user_exp_rcv.c index bf670cb..591605a 100644 --- a/drivers/staging/rdma/hfi1/user_exp_rcv.c +++ b/drivers/staging/rdma/hfi1/user_exp_rcv.c @@ -550,7 +550,7 @@ nomem: * for example), unpin all unmapped pages so we can pin them nex time. */ if (mapped_pages != pinned) - hfi1_release_user_pages(&pages[mapped_pages], + hfi1_release_user_pages(current->mm, &pages[mapped_pages], pinned - mapped_pages, false); bail: @@ -923,7 +923,7 @@ static void clear_tid_node(struct hfi1_filedata *fd, u16 subctxt, pci_unmap_single(dd->pcidev, node->dma_addr, node->mmu.len, PCI_DMA_FROMDEVICE); - hfi1_release_user_pages(node->pages, node->npages, true); + hfi1_release_user_pages(current->mm, node->pages, node->npages, true); node->grp->used--; node->grp->map &= ~(1 << (node->rcventry - node->grp->base)); diff --git a/drivers/staging/rdma/hfi1/user_pages.c b/drivers/staging/rdma/hfi1/user_pages.c index bd7a8ab..88e10b5f 100644 --- a/drivers/staging/rdma/hfi1/user_pages.c +++ b/drivers/staging/rdma/hfi1/user_pages.c @@ -116,7 +116,8 @@ int hfi1_acquire_user_pages(unsigned long vaddr, size_t npages, bool writable, return ret; } -void hfi1_release_user_pages(struct page **p, size_t npages, bool dirty) +void hfi1_release_user_pages(struct mm_struct *mm, struct page **p, + size_t npages, bool dirty) { size_t i; @@ -126,9 +127,9 @@ void hfi1_release_user_pages(struct page **p, size_t npages, bool dirty) put_page(p[i]); } - if (current->mm) { /* during close after signal, mm can be NULL */ - down_write(¤t->mm->mmap_sem); - current->mm->pinned_vm -= npages; - up_write(¤t->mm->mmap_sem); + if (mm) { /* during close after signal, mm can be NULL */ + down_write(&mm->mmap_sem); + mm->pinned_vm -= npages; + up_write(&mm->mmap_sem); } } diff --git a/drivers/staging/rdma/hfi1/user_sdma.c b/drivers/staging/rdma/hfi1/user_sdma.c index a53edb9..bf55a41d 100644 --- a/drivers/staging/rdma/hfi1/user_sdma.c +++ b/drivers/staging/rdma/hfi1/user_sdma.c @@ -277,7 +277,7 @@ static inline void pq_update(struct hfi1_user_sdma_pkt_q *); static void user_sdma_free_request(struct user_sdma_request *, bool); static int pin_vector_pages(struct user_sdma_request *, struct user_sdma_iovec *); -static void unpin_vector_pages(struct page **, unsigned); +static void unpin_vector_pages(struct mm_struct *, struct page **, unsigned); static int check_header_template(struct user_sdma_request *, struct hfi1_pkt_header *, u32, u32); static int set_txreq_header(struct user_sdma_request *, @@ -1072,7 +1072,7 @@ static int pin_vector_pages(struct user_sdma_request *req, goto bail; } if (pinned != npages) { - unpin_vector_pages(pages, pinned); + unpin_vector_pages(current->mm, pages, pinned); ret = -EFAULT; goto bail; } @@ -1097,9 +1097,10 @@ bail: return ret; } -static void unpin_vector_pages(struct page **pages, unsigned npages) +static void unpin_vector_pages(struct mm_struct *mm, struct page **pages, + unsigned npages) { - hfi1_release_user_pages(pages, npages, 0); + hfi1_release_user_pages(mm, pages, npages, 0); kfree(pages); } @@ -1502,8 +1503,14 @@ static void sdma_rb_remove(struct rb_root *root, struct mmu_rb_node *mnode, struct sdma_mmu_node *node = container_of(mnode, struct sdma_mmu_node, rb); - if (!notifier) - unpin_vector_pages(node->pages, node->npages); + unpin_vector_pages(notifier ? NULL : current->mm, node->pages, + node->npages); + /* + * If called by the MMU notifier, we have to adjust the pinned + * page count ourselves. + */ + if (notifier) + current->mm->pinned_vm -= node->npages; kfree(node); } -- cgit v0.10.2 From a7922f7ddf023c93b0c409d7a3557fdf0b5ce343 Mon Sep 17 00:00:00 2001 From: Mitko Haralanov Date: Tue, 8 Mar 2016 11:15:39 -0800 Subject: IB/hfi1: Switch to using the pin query function Use the new function to query whether the expected receive user buffer can be pinned successfully. This requires that a new variable be added to the hfi1_filedata structure used to hold the number of pages pinned by the expected receive code. Reviewed-by: Dennis Dalessandro Reviewed-by: Dean Luick Signed-off-by: Mitko Haralanov Signed-off-by: Jubin John Signed-off-by: Doug Ledford diff --git a/drivers/staging/rdma/hfi1/hfi.h b/drivers/staging/rdma/hfi1/hfi.h index 3dc644d..16cbdc4 100644 --- a/drivers/staging/rdma/hfi1/hfi.h +++ b/drivers/staging/rdma/hfi1/hfi.h @@ -1190,6 +1190,7 @@ struct hfi1_filedata { struct hfi1_user_sdma_pkt_q *pq; /* for cpu affinity; -1 if none */ int rec_cpu_num; + u32 tid_n_pinned; struct rb_root tid_rb_root; struct tid_rb_node **entry_to_rb; spinlock_t tid_lock; /* protect tid_[limit,used] counters */ diff --git a/drivers/staging/rdma/hfi1/user_exp_rcv.c b/drivers/staging/rdma/hfi1/user_exp_rcv.c index 591605a..0861e09 100644 --- a/drivers/staging/rdma/hfi1/user_exp_rcv.c +++ b/drivers/staging/rdma/hfi1/user_exp_rcv.c @@ -396,11 +396,14 @@ int hfi1_user_exp_rcv_setup(struct file *fp, struct hfi1_tid_info *tinfo) * pages, accept the amount pinned so far and program only that. * User space knows how to deal with partially programmed buffers. */ + if (!hfi1_can_pin_pages(dd, fd->tid_n_pinned, npages)) + return -ENOMEM; pinned = hfi1_acquire_user_pages(vaddr, npages, true, pages); if (pinned <= 0) { ret = pinned; goto bail; } + fd->tid_n_pinned += npages; /* Find sets of physically contiguous pages */ npagesets = find_phys_blocks(pages, pinned, pagesets); @@ -549,10 +552,12 @@ nomem: * If not everything was mapped (due to insufficient RcvArray entries, * for example), unpin all unmapped pages so we can pin them nex time. */ - if (mapped_pages != pinned) + if (mapped_pages != pinned) { hfi1_release_user_pages(current->mm, &pages[mapped_pages], pinned - mapped_pages, false); + fd->tid_n_pinned -= pinned - mapped_pages; + } bail: kfree(pagesets); kfree(pages); @@ -924,6 +929,7 @@ static void clear_tid_node(struct hfi1_filedata *fd, u16 subctxt, pci_unmap_single(dd->pcidev, node->dma_addr, node->mmu.len, PCI_DMA_FROMDEVICE); hfi1_release_user_pages(current->mm, node->pages, node->npages, true); + fd->tid_n_pinned -= node->npages; node->grp->used--; node->grp->map &= ~(1 << (node->rcventry - node->grp->base)); -- cgit v0.10.2 From 5511d7810752f426f0a9f999100fd249d352c2ef Mon Sep 17 00:00:00 2001 From: Mitko Haralanov Date: Tue, 8 Mar 2016 11:15:44 -0800 Subject: IB/hfi1: Add SDMA cache eviction algorithm This commit adds a cache eviction algorithm for the SDMA user buffer cache. Besides the interval RB tree used for node lookup, the cache nodes are also arranged in a doubly-linked list. When a node is used, it is put at the beginning of the list. Less frequently used nodes naturally move to the tail of the list. When the cache limit is reached, the eviction code starts traversing the linked list in reverse, freeing buffers until enough space has been freed to fit the new user buffer. This guarantees that only the least used cache nodes will be removed from the cache. Reviewed-by: Dennis Dalessandro Reviewed-by: Dean Luick Signed-off-by: Mitko Haralanov Signed-off-by: Jubin John Signed-off-by: Doug Ledford diff --git a/drivers/staging/rdma/hfi1/user_sdma.c b/drivers/staging/rdma/hfi1/user_sdma.c index bf55a41d..46e254d 100644 --- a/drivers/staging/rdma/hfi1/user_sdma.c +++ b/drivers/staging/rdma/hfi1/user_sdma.c @@ -183,6 +183,8 @@ struct user_sdma_iovec { struct sdma_mmu_node { struct mmu_rb_node rb; + struct list_head list; + struct hfi1_user_sdma_pkt_q *pq; atomic_t refcount; struct page **pages; unsigned npages; @@ -397,6 +399,8 @@ int hfi1_user_sdma_alloc_queues(struct hfi1_ctxtdata *uctxt, struct file *fp) atomic_set(&pq->n_reqs, 0); init_waitqueue_head(&pq->wait); pq->sdma_rb_root = RB_ROOT; + INIT_LIST_HEAD(&pq->evict); + spin_lock_init(&pq->evict_lock); iowait_init(&pq->busy, 0, NULL, defer_packet_queue, activate_packet_queue, NULL); @@ -1027,9 +1031,33 @@ static inline int num_user_pages(const struct iovec *iov) return 1 + ((epage - spage) >> PAGE_SHIFT); } +/* Caller must hold pq->evict_lock */ +static u32 sdma_cache_evict(struct hfi1_user_sdma_pkt_q *pq, u32 npages) +{ + u32 cleared = 0; + struct sdma_mmu_node *node, *ptr; + + list_for_each_entry_safe_reverse(node, ptr, &pq->evict, list) { + /* Make sure that no one is still using the node. */ + if (!atomic_read(&node->refcount)) { + /* + * Need to use the page count now as the remove callback + * will free the node. + */ + cleared += node->npages; + spin_unlock(&pq->evict_lock); + hfi1_mmu_rb_remove(&pq->sdma_rb_root, &node->rb); + spin_lock(&pq->evict_lock); + if (cleared >= npages) + break; + } + } + return cleared; +} + static int pin_vector_pages(struct user_sdma_request *req, struct user_sdma_iovec *iovec) { - int ret = 0, pinned, npages; + int ret = 0, pinned, npages, cleared; struct page **pages; struct hfi1_user_sdma_pkt_q *pq = req->pq; struct sdma_mmu_node *node = NULL; @@ -1048,7 +1076,9 @@ static int pin_vector_pages(struct user_sdma_request *req, node->rb.addr = (unsigned long)iovec->iov.iov_base; node->rb.len = iovec->iov.iov_len; + node->pq = pq; atomic_set(&node->refcount, 0); + INIT_LIST_HEAD(&node->list); } npages = num_user_pages(&iovec->iov); @@ -1062,6 +1092,14 @@ static int pin_vector_pages(struct user_sdma_request *req, memcpy(pages, node->pages, node->npages * sizeof(*pages)); npages -= node->npages; +retry: + if (!hfi1_can_pin_pages(pq->dd, pq->n_locked, npages)) { + spin_lock(&pq->evict_lock); + cleared = sdma_cache_evict(pq, npages); + spin_unlock(&pq->evict_lock); + if (cleared >= npages) + goto retry; + } pinned = hfi1_acquire_user_pages( ((unsigned long)iovec->iov.iov_base + (node->npages * PAGE_SIZE)), npages, 0, @@ -1080,13 +1118,27 @@ static int pin_vector_pages(struct user_sdma_request *req, node->pages = pages; node->npages += pinned; npages = node->npages; + spin_lock(&pq->evict_lock); + if (!rb_node) + list_add(&node->list, &pq->evict); + else + list_move(&node->list, &pq->evict); + pq->n_locked += pinned; + spin_unlock(&pq->evict_lock); } iovec->pages = node->pages; iovec->npages = npages; if (!rb_node) { - if (hfi1_mmu_rb_insert(&req->pq->sdma_rb_root, &node->rb)) + ret = hfi1_mmu_rb_insert(&req->pq->sdma_rb_root, &node->rb); + if (ret) { + spin_lock(&pq->evict_lock); + list_del(&node->list); + pq->n_locked -= node->npages; + spin_unlock(&pq->evict_lock); + ret = 0; goto bail; + } } else { atomic_inc(&node->refcount); } @@ -1503,6 +1555,11 @@ static void sdma_rb_remove(struct rb_root *root, struct mmu_rb_node *mnode, struct sdma_mmu_node *node = container_of(mnode, struct sdma_mmu_node, rb); + spin_lock(&node->pq->evict_lock); + list_del(&node->list); + node->pq->n_locked -= node->npages; + spin_unlock(&node->pq->evict_lock); + unpin_vector_pages(notifier ? NULL : current->mm, node->pages, node->npages); /* diff --git a/drivers/staging/rdma/hfi1/user_sdma.h b/drivers/staging/rdma/hfi1/user_sdma.h index 39866b5..b9240e3 100644 --- a/drivers/staging/rdma/hfi1/user_sdma.h +++ b/drivers/staging/rdma/hfi1/user_sdma.h @@ -68,6 +68,9 @@ struct hfi1_user_sdma_pkt_q { wait_queue_head_t wait; unsigned long unpinned; struct rb_root sdma_rb_root; + u32 n_locked; + struct list_head evict; + spinlock_t evict_lock; /* protect evict and n_locked */ }; struct hfi1_user_sdma_comp_q { -- cgit v0.10.2 From 5658600e7f67063e75b8fb8fa6c697183d305dd3 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 18 Mar 2016 08:41:59 +0300 Subject: ib_srpt: fix a WARN_ON() message The first argument of WARN_ON() is a condition, so it means the warning message here will just be the name without the ->qp_num information. Signed-off-by: Dan Carpenter Reviewed-by: Bart Van Assche Signed-off-by: Doug Ledford diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.c b/drivers/infiniband/ulp/srpt/ib_srpt.c index 25bdaee..578c370 100644 --- a/drivers/infiniband/ulp/srpt/ib_srpt.c +++ b/drivers/infiniband/ulp/srpt/ib_srpt.c @@ -839,7 +839,7 @@ static void srpt_zerolength_write_done(struct ib_cq *cq, struct ib_wc *wc) if (srpt_set_ch_state(ch, CH_DISCONNECTED)) schedule_work(&ch->release_work); else - WARN_ONCE("%s-%d\n", ch->sess_name, ch->qp->qp_num); + WARN_ONCE(1, "%s-%d\n", ch->sess_name, ch->qp->qp_num); } } -- cgit v0.10.2 From 91d9ed8443b88cc50b81cf5ec900172515270f6f Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Tue, 23 Feb 2016 10:25:21 +0200 Subject: net/mlx5_core: Fix caching ATOMIC endian mode capability Add caching of maximum device capability of ATOMIC endian mode. Fixes: f91e6d8941bf ('net/mlx5_core: Add setting ATOMIC endian mode') Signed-off-by: Leon Romanovsky Reviewed-by: Saeed Mahameed Signed-off-by: Doug Ledford diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index 0916bbc..1d43855 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -423,6 +423,10 @@ static int handle_hca_cap_atomic(struct mlx5_core_dev *dev) HCA_CAP_OPMOD_GET_CUR); if (err) return err; + err = mlx5_core_get_caps(dev, MLX5_CAP_ATOMIC, + HCA_CAP_OPMOD_GET_MAX); + if (err) + return err; } else { return 0; } -- cgit v0.10.2 From b06e7de8a9d8d1d540ec122bbdf2face2a211634 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Tue, 23 Feb 2016 10:25:22 +0200 Subject: net/mlx5_core: Refactor device capability function Device capability function was called similar in all places. It was called twice for every queried parameter, while the difference between calls was in HCA capability mode only. The change proposed unify these calls into one function. Signed-off-by: Leon Romanovsky Reviewed-by: Saeed Mahameed Signed-off-by: Doug Ledford diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fw.c b/drivers/net/ethernet/mellanox/mlx5/core/fw.c index aa1ab47..fe6dfd8 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fw.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fw.c @@ -98,88 +98,49 @@ int mlx5_query_hca_caps(struct mlx5_core_dev *dev) { int err; - err = mlx5_core_get_caps(dev, MLX5_CAP_GENERAL, HCA_CAP_OPMOD_GET_CUR); - if (err) - return err; - - err = mlx5_core_get_caps(dev, MLX5_CAP_GENERAL, HCA_CAP_OPMOD_GET_MAX); + err = mlx5_core_get_caps(dev, MLX5_CAP_GENERAL); if (err) return err; if (MLX5_CAP_GEN(dev, eth_net_offloads)) { - err = mlx5_core_get_caps(dev, MLX5_CAP_ETHERNET_OFFLOADS, - HCA_CAP_OPMOD_GET_CUR); - if (err) - return err; - err = mlx5_core_get_caps(dev, MLX5_CAP_ETHERNET_OFFLOADS, - HCA_CAP_OPMOD_GET_MAX); + err = mlx5_core_get_caps(dev, MLX5_CAP_ETHERNET_OFFLOADS); if (err) return err; } if (MLX5_CAP_GEN(dev, pg)) { - err = mlx5_core_get_caps(dev, MLX5_CAP_ODP, - HCA_CAP_OPMOD_GET_CUR); - if (err) - return err; - err = mlx5_core_get_caps(dev, MLX5_CAP_ODP, - HCA_CAP_OPMOD_GET_MAX); + err = mlx5_core_get_caps(dev, MLX5_CAP_ODP); if (err) return err; } if (MLX5_CAP_GEN(dev, atomic)) { - err = mlx5_core_get_caps(dev, MLX5_CAP_ATOMIC, - HCA_CAP_OPMOD_GET_CUR); - if (err) - return err; - err = mlx5_core_get_caps(dev, MLX5_CAP_ATOMIC, - HCA_CAP_OPMOD_GET_MAX); + err = mlx5_core_get_caps(dev, MLX5_CAP_ATOMIC); if (err) return err; } if (MLX5_CAP_GEN(dev, roce)) { - err = mlx5_core_get_caps(dev, MLX5_CAP_ROCE, - HCA_CAP_OPMOD_GET_CUR); - if (err) - return err; - err = mlx5_core_get_caps(dev, MLX5_CAP_ROCE, - HCA_CAP_OPMOD_GET_MAX); + err = mlx5_core_get_caps(dev, MLX5_CAP_ROCE); if (err) return err; } if (MLX5_CAP_GEN(dev, nic_flow_table)) { - err = mlx5_core_get_caps(dev, MLX5_CAP_FLOW_TABLE, - HCA_CAP_OPMOD_GET_CUR); - if (err) - return err; - err = mlx5_core_get_caps(dev, MLX5_CAP_FLOW_TABLE, - HCA_CAP_OPMOD_GET_MAX); + err = mlx5_core_get_caps(dev, MLX5_CAP_FLOW_TABLE); if (err) return err; } if (MLX5_CAP_GEN(dev, vport_group_manager) && MLX5_CAP_GEN(dev, eswitch_flow_table)) { - err = mlx5_core_get_caps(dev, MLX5_CAP_ESWITCH_FLOW_TABLE, - HCA_CAP_OPMOD_GET_CUR); - if (err) - return err; - err = mlx5_core_get_caps(dev, MLX5_CAP_ESWITCH_FLOW_TABLE, - HCA_CAP_OPMOD_GET_MAX); + err = mlx5_core_get_caps(dev, MLX5_CAP_ESWITCH_FLOW_TABLE); if (err) return err; } if (MLX5_CAP_GEN(dev, eswitch_flow_table)) { - err = mlx5_core_get_caps(dev, MLX5_CAP_ESWITCH, - HCA_CAP_OPMOD_GET_CUR); - if (err) - return err; - err = mlx5_core_get_caps(dev, MLX5_CAP_ESWITCH, - HCA_CAP_OPMOD_GET_MAX); + err = mlx5_core_get_caps(dev, MLX5_CAP_ESWITCH); if (err) return err; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index 1d43855..f2354bc 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -341,8 +341,9 @@ static u16 to_fw_pkey_sz(u32 size) } } -int mlx5_core_get_caps(struct mlx5_core_dev *dev, enum mlx5_cap_type cap_type, - enum mlx5_cap_mode cap_mode) +static int mlx5_core_get_caps_mode(struct mlx5_core_dev *dev, + enum mlx5_cap_type cap_type, + enum mlx5_cap_mode cap_mode) { u8 in[MLX5_ST_SZ_BYTES(query_hca_cap_in)]; int out_sz = MLX5_ST_SZ_BYTES(query_hca_cap_out); @@ -392,6 +393,16 @@ query_ex: return err; } +int mlx5_core_get_caps(struct mlx5_core_dev *dev, enum mlx5_cap_type cap_type) +{ + int ret; + + ret = mlx5_core_get_caps_mode(dev, cap_type, HCA_CAP_OPMOD_GET_CUR); + if (ret) + return ret; + return mlx5_core_get_caps_mode(dev, cap_type, HCA_CAP_OPMOD_GET_MAX); +} + static int set_caps(struct mlx5_core_dev *dev, void *in, int in_sz, int opmod) { u32 out[MLX5_ST_SZ_DW(set_hca_cap_out)]; @@ -419,12 +430,7 @@ static int handle_hca_cap_atomic(struct mlx5_core_dev *dev) int err; if (MLX5_CAP_GEN(dev, atomic)) { - err = mlx5_core_get_caps(dev, MLX5_CAP_ATOMIC, - HCA_CAP_OPMOD_GET_CUR); - if (err) - return err; - err = mlx5_core_get_caps(dev, MLX5_CAP_ATOMIC, - HCA_CAP_OPMOD_GET_MAX); + err = mlx5_core_get_caps(dev, MLX5_CAP_ATOMIC); if (err) return err; } else { @@ -466,11 +472,7 @@ static int handle_hca_cap(struct mlx5_core_dev *dev) if (!set_ctx) goto query_ex; - err = mlx5_core_get_caps(dev, MLX5_CAP_GENERAL, HCA_CAP_OPMOD_GET_MAX); - if (err) - goto query_ex; - - err = mlx5_core_get_caps(dev, MLX5_CAP_GENERAL, HCA_CAP_OPMOD_GET_CUR); + err = mlx5_core_get_caps(dev, MLX5_CAP_GENERAL); if (err) goto query_ex; diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 9108904..406b27ec 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -706,8 +706,7 @@ void mlx5_cmd_use_events(struct mlx5_core_dev *dev); void mlx5_cmd_use_polling(struct mlx5_core_dev *dev); int mlx5_cmd_status_to_err(struct mlx5_outbox_hdr *hdr); int mlx5_cmd_status_to_err_v2(void *ptr); -int mlx5_core_get_caps(struct mlx5_core_dev *dev, enum mlx5_cap_type cap_type, - enum mlx5_cap_mode cap_mode); +int mlx5_core_get_caps(struct mlx5_core_dev *dev, enum mlx5_cap_type cap_type); int mlx5_cmd_exec(struct mlx5_core_dev *dev, void *in, int in_size, void *out, int out_size); int mlx5_cmd_exec_cb(struct mlx5_core_dev *dev, void *in, int in_size, -- cgit v0.10.2 From 3f0393a57509c200aeecc5e3984bf1a47bffc578 Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Tue, 23 Feb 2016 10:25:23 +0200 Subject: net/mlx5_core: Introduce offload arithmetic hardware capabilities Define the necessary hardware structures for the offload arithmetic capabilities and read/cache them on driver load. Signed-off-by: Sagi Grimberg Signed-off-by: Leon Romanovsky Reviewed-by: Saeed Mahameed Signed-off-by: Doug Ledford diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fw.c b/drivers/net/ethernet/mellanox/mlx5/core/fw.c index fe6dfd8..75c7ae6 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fw.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fw.c @@ -145,6 +145,12 @@ int mlx5_query_hca_caps(struct mlx5_core_dev *dev) return err; } + if (MLX5_CAP_GEN(dev, vector_calc)) { + err = mlx5_core_get_caps(dev, MLX5_CAP_VECTOR_CALC); + if (err) + return err; + } + return 0; } diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h index 9566b3b..4b531c4 100644 --- a/include/linux/mlx5/device.h +++ b/include/linux/mlx5/device.h @@ -1219,6 +1219,8 @@ enum mlx5_cap_type { MLX5_CAP_FLOW_TABLE, MLX5_CAP_ESWITCH_FLOW_TABLE, MLX5_CAP_ESWITCH, + MLX5_CAP_RESERVED, + MLX5_CAP_VECTOR_CALC, /* NUM OF CAP Types */ MLX5_CAP_NUM }; @@ -1281,6 +1283,10 @@ enum mlx5_cap_type { #define MLX5_CAP_ODP(mdev, cap)\ MLX5_GET(odp_cap, mdev->hca_caps_cur[MLX5_CAP_ODP], cap) +#define MLX5_CAP_VECTOR_CALC(mdev, cap) \ + MLX5_GET(vector_calc_cap, \ + mdev->hca_caps_cur[MLX5_CAP_VECTOR_CALC], cap) + enum { MLX5_CMD_STAT_OK = 0x0, MLX5_CMD_STAT_INT_ERR = 0x1, diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index a3cacab..ad0a44b 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -616,6 +616,33 @@ struct mlx5_ifc_odp_cap_bits { u8 reserved_at_e0[0x720]; }; +struct mlx5_ifc_calc_op { + u8 reserved_at_0[0x10]; + u8 reserved_at_10[0x9]; + u8 op_swap_endianness[0x1]; + u8 op_min[0x1]; + u8 op_xor[0x1]; + u8 op_or[0x1]; + u8 op_and[0x1]; + u8 op_max[0x1]; + u8 op_add[0x1]; +}; + +struct mlx5_ifc_vector_calc_cap_bits { + u8 calc_matrix[0x1]; + u8 reserved_at_1[0x1f]; + u8 reserved_at_20[0x8]; + u8 max_vec_count[0x8]; + u8 reserved_at_30[0xd]; + u8 max_chunk_size[0x3]; + struct mlx5_ifc_calc_op calc0; + struct mlx5_ifc_calc_op calc1; + struct mlx5_ifc_calc_op calc2; + struct mlx5_ifc_calc_op calc3; + + u8 reserved_at_e0[0x720]; +}; + enum { MLX5_WQ_TYPE_LINKED_LIST = 0x0, MLX5_WQ_TYPE_CYCLIC = 0x1, @@ -770,7 +797,8 @@ struct mlx5_ifc_cmd_hca_cap_bits { u8 cd[0x1]; u8 reserved_at_22c[0x1]; u8 apm[0x1]; - u8 reserved_at_22e[0x2]; + u8 vector_calc[0x1]; + u8 reserved_at_22f[0x1]; u8 imaicl[0x1]; u8 reserved_at_231[0x4]; u8 qkv[0x1]; @@ -1940,6 +1968,7 @@ union mlx5_ifc_hca_cap_union_bits { struct mlx5_ifc_flow_table_nic_cap_bits flow_table_nic_cap; struct mlx5_ifc_flow_table_eswitch_cap_bits flow_table_eswitch_cap; struct mlx5_ifc_e_switch_cap_bits e_switch_cap; + struct mlx5_ifc_vector_calc_cap_bits vector_calc_cap; u8 reserved_at_0[0x8000]; }; -- cgit v0.10.2 From 2953f42513225e2cc90a7a7e3d2d5ce9a7abc799 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Tue, 23 Feb 2016 10:25:24 +0200 Subject: IB/core: Replace setting the zero values in ib_uverbs_ex_query_device The setting to zero during variable initialization eliminates the need to explicitly set to zero variables and structures. Signed-off-by: Leon Romanovsky Reviewed-by: Matan Barak Signed-off-by: Doug Ledford diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index 3638c78..bb1b93c 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -3600,9 +3600,9 @@ int ib_uverbs_ex_query_device(struct ib_uverbs_file *file, struct ib_udata *ucore, struct ib_udata *uhw) { - struct ib_uverbs_ex_query_device_resp resp; + struct ib_uverbs_ex_query_device_resp resp = { {0} }; struct ib_uverbs_ex_query_device cmd; - struct ib_device_attr attr; + struct ib_device_attr attr = {0}; int err; if (ucore->inlen < sizeof(cmd)) @@ -3623,14 +3623,11 @@ int ib_uverbs_ex_query_device(struct ib_uverbs_file *file, if (ucore->outlen < resp.response_length) return -ENOSPC; - memset(&attr, 0, sizeof(attr)); - err = ib_dev->query_device(ib_dev, &attr, uhw); if (err) return err; copy_query_dev_fields(file, ib_dev, &resp.base, &attr); - resp.comp_mask = 0; if (ucore->outlen < resp.response_length + sizeof(resp.odp_caps)) goto end; @@ -3643,9 +3640,6 @@ int ib_uverbs_ex_query_device(struct ib_uverbs_file *file, attr.odp_caps.per_transport_caps.uc_odp_caps; resp.odp_caps.per_transport_caps.ud_odp_caps = attr.odp_caps.per_transport_caps.ud_odp_caps; - resp.odp_caps.reserved = 0; -#else - memset(&resp.odp_caps, 0, sizeof(resp.odp_caps)); #endif resp.response_length += sizeof(resp.odp_caps); @@ -3663,8 +3657,5 @@ int ib_uverbs_ex_query_device(struct ib_uverbs_file *file, end: err = ib_copy_to_udata(ucore, &resp, resp.response_length); - if (err) - return err; - - return 0; + return err; } -- cgit v0.10.2 From fb532d6a79b96a4c8f678024d7ed3549ff0ca916 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Tue, 23 Feb 2016 10:25:25 +0200 Subject: IB/{core, ulp} Support above 32 possible device capability flags The old bitwise device_cap_flags variable was limited to u32 which has all bits already defined. In order to overcome it, we converted device_cap_flags variable to be u64 type. Signed-off-by: Leon Romanovsky Reviewed-by: Matan Barak Signed-off-by: Doug Ledford diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index bb1b93c..6fdc7ec 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -402,7 +402,7 @@ static void copy_query_dev_fields(struct ib_uverbs_file *file, resp->hw_ver = attr->hw_ver; resp->max_qp = attr->max_qp; resp->max_qp_wr = attr->max_qp_wr; - resp->device_cap_flags = attr->device_cap_flags; + resp->device_cap_flags = lower_32_bits(attr->device_cap_flags); resp->max_sge = attr->max_sge; resp->max_sge_rd = attr->max_sge_rd; resp->max_cq = attr->max_cq; diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h index 85be0de..caec8e9 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib.h +++ b/drivers/infiniband/ulp/ipoib/ipoib.h @@ -388,7 +388,7 @@ struct ipoib_dev_priv { struct dentry *mcg_dentry; struct dentry *path_dentry; #endif - int hca_caps; + u64 hca_caps; struct ipoib_ethtool_st ethtool; struct timer_list poll_timer; unsigned max_send_sge; diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 3a03c1d..c9b6234 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -274,7 +274,7 @@ struct ib_device_attr { u32 hw_ver; int max_qp; int max_qp_wr; - int device_cap_flags; + u64 device_cap_flags; int max_sge; int max_sge_rd; int max_cq; -- cgit v0.10.2 From cc8e27cc97318471b7e707932d5b93b0d5f70830 Mon Sep 17 00:00:00 2001 From: Eli Cohen Date: Fri, 11 Mar 2016 22:58:34 +0200 Subject: net/core: Add support for configuring VF GUIDs Add two new NLAs to support configuration of Infiniband node or port GUIDs. New applications can choose to use this interface to configure GUIDs with iproute2 with commands such as: ip link set dev ib0 vf 0 node_guid 00:02:c9:03:00:21:6e:70 ip link set dev ib0 vf 0 port_guid 00:02:c9:03:00:21:6e:78 A new ndo, ndo_sef_vf_guid is introduced to notify the net device of the request to change the GUID. Signed-off-by: Eli Cohen Reviewed-by: Or Gerlitz Signed-off-by: Doug Ledford diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 5440b7b..7b4ae21 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1147,6 +1147,9 @@ struct net_device_ops { struct nlattr *port[]); int (*ndo_get_vf_port)(struct net_device *dev, int vf, struct sk_buff *skb); + int (*ndo_set_vf_guid)(struct net_device *dev, + int vf, u64 guid, + int guid_type); int (*ndo_set_vf_rss_query_en)( struct net_device *dev, int vf, bool setting); diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index a30b780..1d01e8a 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -556,6 +556,8 @@ enum { */ IFLA_VF_STATS, /* network device statistics */ IFLA_VF_TRUST, /* Trust VF */ + IFLA_VF_IB_NODE_GUID, /* VF Infiniband node GUID */ + IFLA_VF_IB_PORT_GUID, /* VF Infiniband port GUID */ __IFLA_VF_MAX, }; @@ -588,6 +590,11 @@ struct ifla_vf_spoofchk { __u32 setting; }; +struct ifla_vf_guid { + __u32 vf; + __u64 guid; +}; + enum { IFLA_VF_LINK_STATE_AUTO, /* link state of the uplink */ IFLA_VF_LINK_STATE_ENABLE, /* link always up */ diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index d735e85..4b6f3db 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -1387,6 +1387,8 @@ static const struct nla_policy ifla_vf_policy[IFLA_VF_MAX+1] = { [IFLA_VF_RSS_QUERY_EN] = { .len = sizeof(struct ifla_vf_rss_query_en) }, [IFLA_VF_STATS] = { .type = NLA_NESTED }, [IFLA_VF_TRUST] = { .len = sizeof(struct ifla_vf_trust) }, + [IFLA_VF_IB_NODE_GUID] = { .len = sizeof(struct ifla_vf_guid) }, + [IFLA_VF_IB_PORT_GUID] = { .len = sizeof(struct ifla_vf_guid) }, }; static const struct nla_policy ifla_vf_stats_policy[IFLA_VF_STATS_MAX + 1] = { @@ -1534,6 +1536,22 @@ static int validate_linkmsg(struct net_device *dev, struct nlattr *tb[]) return 0; } +static int handle_infiniband_guid(struct net_device *dev, struct ifla_vf_guid *ivt, + int guid_type) +{ + const struct net_device_ops *ops = dev->netdev_ops; + + return ops->ndo_set_vf_guid(dev, ivt->vf, ivt->guid, guid_type); +} + +static int handle_vf_guid(struct net_device *dev, struct ifla_vf_guid *ivt, int guid_type) +{ + if (dev->type != ARPHRD_INFINIBAND) + return -EOPNOTSUPP; + + return handle_infiniband_guid(dev, ivt, guid_type); +} + static int do_setvfinfo(struct net_device *dev, struct nlattr **tb) { const struct net_device_ops *ops = dev->netdev_ops; @@ -1636,6 +1654,24 @@ static int do_setvfinfo(struct net_device *dev, struct nlattr **tb) return err; } + if (tb[IFLA_VF_IB_NODE_GUID]) { + struct ifla_vf_guid *ivt = nla_data(tb[IFLA_VF_IB_NODE_GUID]); + + if (!ops->ndo_set_vf_guid) + return -EOPNOTSUPP; + + return handle_vf_guid(dev, ivt, IFLA_VF_IB_NODE_GUID); + } + + if (tb[IFLA_VF_IB_PORT_GUID]) { + struct ifla_vf_guid *ivt = nla_data(tb[IFLA_VF_IB_PORT_GUID]); + + if (!ops->ndo_set_vf_guid) + return -EOPNOTSUPP; + + return handle_vf_guid(dev, ivt, IFLA_VF_IB_PORT_GUID); + } + return err; } -- cgit v0.10.2 From d603c809ef91fa2d211bde5e95be417847410379 Mon Sep 17 00:00:00 2001 From: Eli Cohen Date: Fri, 11 Mar 2016 22:58:35 +0200 Subject: IB/mlx5: Fix decision on using MAD_IFC Fix the condition that dictates when MAD_IFC should be used. According to firmware specifications, MAD_IFC commands must be used only if the ib_virt capability is off. Signed-off-by: Eli Cohen Reviewed-by: Or Gerlitz Signed-off-by: Doug Ledford diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 5afbb69..73cb633 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -283,7 +283,7 @@ __be16 mlx5_get_roce_udp_sport(struct mlx5_ib_dev *dev, u8 port_num, static int mlx5_use_mad_ifc(struct mlx5_ib_dev *dev) { - return !dev->mdev->issi; + return !MLX5_CAP_GEN(dev->mdev, ib_virt); } enum { -- cgit v0.10.2 From fad61ad4e755f5dd13c7702a87cd907207392534 Mon Sep 17 00:00:00 2001 From: Eli Cohen Date: Fri, 11 Mar 2016 22:58:36 +0200 Subject: IB/core: Add subnet prefix to port info The subnet prefix is a part of the port_info MAD returned and should be available at the ib_port_attr struct. We define it here and provide a default implementation in case the hardware driver does not provide one. The subnet prefix is required when creating the address vector to access the SA in networks where GRH must be used. Signed-off-by: Eli Cohen Reviewed-by: Or Gerlitz Signed-off-by: Doug Ledford diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c index 270c7ff..1097984 100644 --- a/drivers/infiniband/core/device.c +++ b/drivers/infiniband/core/device.c @@ -650,10 +650,23 @@ int ib_query_port(struct ib_device *device, u8 port_num, struct ib_port_attr *port_attr) { + union ib_gid gid; + int err; + if (port_num < rdma_start_port(device) || port_num > rdma_end_port(device)) return -EINVAL; - return device->query_port(device, port_num, port_attr); + memset(port_attr, 0, sizeof(*port_attr)); + err = device->query_port(device, port_num, port_attr); + if (err || port_attr->subnet_prefix) + return err; + + err = ib_query_gid(device, port_num, 0, &gid, NULL); + if (err) + return err; + + port_attr->subnet_prefix = be64_to_cpu(gid.global.subnet_prefix); + return 0; } EXPORT_SYMBOL(ib_query_port); diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index c9b6234..7239b9a 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -490,6 +490,7 @@ union rdma_protocol_stats { | RDMA_CORE_CAP_OPA_MAD) struct ib_port_attr { + u64 subnet_prefix; enum ib_port_state state; enum ib_mtu max_mtu; enum ib_mtu active_mtu; -- cgit v0.10.2 From a0c1b2a3508714281f604db818fa0cc83c2f9b6a Mon Sep 17 00:00:00 2001 From: Eli Cohen Date: Fri, 11 Mar 2016 22:58:37 +0200 Subject: IB/core: Support accessing SA in virtualized environment Per the ongoing standardisation process, when virtual HCAs are present in a network, traffic is routed based on a destination GID. In order to access the SA we use the well known SA GID. We also add a GRH required boolean field to the port attributes which is used to report to the verbs consumer whether this port is connected to a virtual network. We use this field to realize whether we need to create an address vector with GRH to access the subnet administrator. We clear the port attributes struct before calling the hardware driver to make sure the default remains that GRH is not required. Signed-off-by: Eli Cohen Reviewed-by: Or Gerlitz Signed-off-by: Doug Ledford diff --git a/drivers/infiniband/core/sa_query.c b/drivers/infiniband/core/sa_query.c index 8e3bf6c..d2214a5 100644 --- a/drivers/infiniband/core/sa_query.c +++ b/drivers/infiniband/core/sa_query.c @@ -885,6 +885,11 @@ static void update_sm_ah(struct work_struct *work) ah_attr.dlid = port_attr.sm_lid; ah_attr.sl = port_attr.sm_sl; ah_attr.port_num = port->port_num; + if (port_attr.grh_required) { + ah_attr.ah_flags = IB_AH_GRH; + ah_attr.grh.dgid.global.subnet_prefix = cpu_to_be64(port_attr.subnet_prefix); + ah_attr.grh.dgid.global.interface_id = cpu_to_be64(IB_SA_WELL_KNOWN_GUID); + } new_ah->ah = ib_create_ah(port->agent->qp->pd, &ah_attr); if (IS_ERR(new_ah->ah)) { diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 7239b9a..3a5a66b 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -97,6 +97,11 @@ enum rdma_node_type { RDMA_NODE_USNIC_UDP, }; +enum { + /* set the local administered indication */ + IB_SA_WELL_KNOWN_GUID = BIT_ULL(57) | 2, +}; + enum rdma_transport_type { RDMA_TRANSPORT_IB, RDMA_TRANSPORT_IWARP, @@ -510,6 +515,7 @@ struct ib_port_attr { u8 active_width; u8 active_speed; u8 phys_state; + bool grh_required; }; enum ib_device_modify_flags { -- cgit v0.10.2 From 50174a7f2c24d13cdeec435ee1ba70b1e0b1318f Mon Sep 17 00:00:00 2001 From: Eli Cohen Date: Fri, 11 Mar 2016 22:58:38 +0200 Subject: IB/core: Add interfaces to control VF attributes Following the practice exercised for network devices which allow the PF net device to configure attributes of its virtual functions, we introduce the following functions to be used by IPoIB which is the network driver implementation for IB devices. ib_set_vf_link_state - set the policy for a VF link. More below. ib_get_vf_config - read configuration information of a VF ib_get_vf_stats - read VF statistics ib_set_vf_guid - set the node or port GUID of a VF Also add an indication in the device cap flags that indicates that this IB devices is based on a virtual function. A VF shares the physical port with the PF and other VFs. When setting the link state we have three options: 1. Auto - in this mode, the virtual port follows the state of the physical port and becomes active only if the physical port's state is active. In all other cases it remains in a Down state. 2. Down - sets the state of the virtual port to Down 3. Up - causes the virtual port to transition into Initialize state if it was not already in this state. A virtualization aware subnet manager can then bring the state of the port into the Active state. Signed-off-by: Eli Cohen Reviewed-by: Or Gerlitz Signed-off-by: Doug Ledford diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c index 5cd1e39..15b8adb 100644 --- a/drivers/infiniband/core/verbs.c +++ b/drivers/infiniband/core/verbs.c @@ -1551,6 +1551,46 @@ int ib_check_mr_status(struct ib_mr *mr, u32 check_mask, } EXPORT_SYMBOL(ib_check_mr_status); +int ib_set_vf_link_state(struct ib_device *device, int vf, u8 port, + int state) +{ + if (!device->set_vf_link_state) + return -ENOSYS; + + return device->set_vf_link_state(device, vf, port, state); +} +EXPORT_SYMBOL(ib_set_vf_link_state); + +int ib_get_vf_config(struct ib_device *device, int vf, u8 port, + struct ifla_vf_info *info) +{ + if (!device->get_vf_config) + return -ENOSYS; + + return device->get_vf_config(device, vf, port, info); +} +EXPORT_SYMBOL(ib_get_vf_config); + +int ib_get_vf_stats(struct ib_device *device, int vf, u8 port, + struct ifla_vf_stats *stats) +{ + if (!device->get_vf_stats) + return -ENOSYS; + + return device->get_vf_stats(device, vf, port, stats); +} +EXPORT_SYMBOL(ib_get_vf_stats); + +int ib_set_vf_guid(struct ib_device *device, int vf, u8 port, u64 guid, + int type) +{ + if (!device->set_vf_guid) + return -ENOSYS; + + return device->set_vf_guid(device, vf, port, guid, type); +} +EXPORT_SYMBOL(ib_set_vf_guid); + /** * ib_map_mr_sg() - Map the largest prefix of a dma mapped SG list * and set it the memory region. diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 3a5a66b..8a245a7 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -56,6 +56,7 @@ #include #include +#include #include #include #include @@ -218,6 +219,7 @@ enum ib_device_cap_flags { IB_DEVICE_SIGNATURE_HANDOVER = (1 << 30), IB_DEVICE_ON_DEMAND_PAGING = (1 << 31), IB_DEVICE_SG_GAPS_REG = (1ULL << 32), + IB_DEVICE_VIRTUAL_FUNCTION = ((u64)1 << 33), }; enum ib_signature_prot_cap { @@ -1867,6 +1869,14 @@ struct ib_device { void (*disassociate_ucontext)(struct ib_ucontext *ibcontext); void (*drain_rq)(struct ib_qp *qp); void (*drain_sq)(struct ib_qp *qp); + int (*set_vf_link_state)(struct ib_device *device, int vf, u8 port, + int state); + int (*get_vf_config)(struct ib_device *device, int vf, u8 port, + struct ifla_vf_info *ivf); + int (*get_vf_stats)(struct ib_device *device, int vf, u8 port, + struct ifla_vf_stats *stats); + int (*set_vf_guid)(struct ib_device *device, int vf, u8 port, u64 guid, + int type); struct ib_dma_mapping_ops *dma_ops; @@ -2310,6 +2320,15 @@ int ib_query_gid(struct ib_device *device, u8 port_num, int index, union ib_gid *gid, struct ib_gid_attr *attr); +int ib_set_vf_link_state(struct ib_device *device, int vf, u8 port, + int state); +int ib_get_vf_config(struct ib_device *device, int vf, u8 port, + struct ifla_vf_info *info); +int ib_get_vf_stats(struct ib_device *device, int vf, u8 port, + struct ifla_vf_stats *stats); +int ib_set_vf_guid(struct ib_device *device, int vf, u8 port, u64 guid, + int type); + int ib_query_pkey(struct ib_device *device, u8 port_num, u16 index, u16 *pkey); -- cgit v0.10.2 From 9c3c5f8e1f3092f43a46b247a95ae526c1b4542e Mon Sep 17 00:00:00 2001 From: Eli Cohen Date: Fri, 11 Mar 2016 22:58:39 +0200 Subject: IB/ipoib: Add ndo operations for configuring VFs Add ndo operations to the network driver that enables configuring the following operations: ipoib_set_vf_link_state - configure the VF link policy ipoib_get_vf_config - get link state configuration ipoib_set_vf_guid - set a VF port or node GUID ipoib_get_vf_stats - get statistics of a VF Signed-off-by: Eli Cohen Reviewed-by: Or Gerlitz Signed-off-by: Doug Ledford diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c index 25509bb..80807d6 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_main.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c @@ -51,6 +51,7 @@ #include #include #include +#include #define DRV_VERSION "1.0.0" @@ -1590,11 +1591,67 @@ void ipoib_dev_cleanup(struct net_device *dev) priv->tx_ring = NULL; } +static int ipoib_set_vf_link_state(struct net_device *dev, int vf, int link_state) +{ + struct ipoib_dev_priv *priv = netdev_priv(dev); + + return ib_set_vf_link_state(priv->ca, vf, priv->port, link_state); +} + +static int ipoib_get_vf_config(struct net_device *dev, int vf, + struct ifla_vf_info *ivf) +{ + struct ipoib_dev_priv *priv = netdev_priv(dev); + int err; + + err = ib_get_vf_config(priv->ca, vf, priv->port, ivf); + if (err) + return err; + + ivf->vf = vf; + + return 0; +} + +static int ipoib_set_vf_guid(struct net_device *dev, int vf, u64 guid, int type) +{ + struct ipoib_dev_priv *priv = netdev_priv(dev); + + if (type != IFLA_VF_IB_NODE_GUID && type != IFLA_VF_IB_PORT_GUID) + return -EINVAL; + + return ib_set_vf_guid(priv->ca, vf, priv->port, guid, type); +} + +static int ipoib_get_vf_stats(struct net_device *dev, int vf, + struct ifla_vf_stats *vf_stats) +{ + struct ipoib_dev_priv *priv = netdev_priv(dev); + + return ib_get_vf_stats(priv->ca, vf, priv->port, vf_stats); +} + static const struct header_ops ipoib_header_ops = { .create = ipoib_hard_header, }; -static const struct net_device_ops ipoib_netdev_ops = { +static const struct net_device_ops ipoib_netdev_ops_pf = { + .ndo_uninit = ipoib_uninit, + .ndo_open = ipoib_open, + .ndo_stop = ipoib_stop, + .ndo_change_mtu = ipoib_change_mtu, + .ndo_fix_features = ipoib_fix_features, + .ndo_start_xmit = ipoib_start_xmit, + .ndo_tx_timeout = ipoib_timeout, + .ndo_set_rx_mode = ipoib_set_mcast_list, + .ndo_get_iflink = ipoib_get_iflink, + .ndo_set_vf_link_state = ipoib_set_vf_link_state, + .ndo_get_vf_config = ipoib_get_vf_config, + .ndo_get_vf_stats = ipoib_get_vf_stats, + .ndo_set_vf_guid = ipoib_set_vf_guid, +}; + +static const struct net_device_ops ipoib_netdev_ops_vf = { .ndo_uninit = ipoib_uninit, .ndo_open = ipoib_open, .ndo_stop = ipoib_stop, @@ -1610,7 +1667,11 @@ void ipoib_setup(struct net_device *dev) { struct ipoib_dev_priv *priv = netdev_priv(dev); - dev->netdev_ops = &ipoib_netdev_ops; + if (priv->hca_caps & IB_DEVICE_VIRTUAL_FUNCTION) + dev->netdev_ops = &ipoib_netdev_ops_vf; + else + dev->netdev_ops = &ipoib_netdev_ops_pf; + dev->header_ops = &ipoib_header_ops; ipoib_set_ethtool_ops(dev); -- cgit v0.10.2 From 2a4826fe746e01712ca53902ee75c1a1f6c0a4aa Mon Sep 17 00:00:00 2001 From: Eli Cohen Date: Fri, 11 Mar 2016 22:58:40 +0200 Subject: net/mlx5_core: Add VF param when querying vport counter Add a vf parameter to mlx5_core_query_vport_counter so we can call it to query counters of virtual functions. Also update current users of the API. PFs may call mlx5_core_query_vport_counter with other_vport set to indicate that they are querying a virtual function. The virtual function to be queried is given by the vf parameter. Virtual function numbering is zero based so the first VF is 0 and so on. When a PF queries its own function, the other_vport parameter is cleared. Signed-off-by: Eli Cohen Reviewed-by: Or Gerlitz Signed-off-by: Doug Ledford diff --git a/drivers/infiniband/hw/mlx5/mad.c b/drivers/infiniband/hw/mlx5/mad.c index 41d8a00..1534af1 100644 --- a/drivers/infiniband/hw/mlx5/mad.c +++ b/drivers/infiniband/hw/mlx5/mad.c @@ -208,7 +208,7 @@ static int process_pma_cmd(struct ib_device *ibdev, u8 port_num, if (!out_cnt) return IB_MAD_RESULT_FAILURE; - err = mlx5_core_query_vport_counter(dev->mdev, 0, + err = mlx5_core_query_vport_counter(dev->mdev, 0, 0, port_num, out_cnt, sz); if (!err) pma_cnt_ext_assign(pma_cnt_ext, out_cnt); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/vport.c b/drivers/net/ethernet/mellanox/mlx5/core/vport.c index 90ab09e..2b836d0 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/vport.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/vport.c @@ -852,7 +852,8 @@ int mlx5_nic_vport_disable_roce(struct mlx5_core_dev *mdev) EXPORT_SYMBOL_GPL(mlx5_nic_vport_disable_roce); int mlx5_core_query_vport_counter(struct mlx5_core_dev *dev, u8 other_vport, - u8 port_num, void *out, size_t out_sz) + int vf, u8 port_num, void *out, + size_t out_sz) { int in_sz = MLX5_ST_SZ_BYTES(query_vport_counter_in); int is_group_manager; @@ -871,7 +872,7 @@ int mlx5_core_query_vport_counter(struct mlx5_core_dev *dev, u8 other_vport, if (other_vport) { if (is_group_manager) { MLX5_SET(query_vport_counter_in, in, other_vport, 1); - MLX5_SET(query_vport_counter_in, in, vport_number, 0); + MLX5_SET(query_vport_counter_in, in, vport_number, vf + 1); } else { err = -EPERM; goto free; diff --git a/include/linux/mlx5/vport.h b/include/linux/mlx5/vport.h index a9f2bcc..aafb3e4 100644 --- a/include/linux/mlx5/vport.h +++ b/include/linux/mlx5/vport.h @@ -93,6 +93,7 @@ int mlx5_modify_nic_vport_vlans(struct mlx5_core_dev *dev, int mlx5_nic_vport_enable_roce(struct mlx5_core_dev *mdev); int mlx5_nic_vport_disable_roce(struct mlx5_core_dev *mdev); int mlx5_core_query_vport_counter(struct mlx5_core_dev *dev, u8 other_vport, - u8 port_num, void *out, size_t out_sz); + int vf, u8 port_num, void *out, + size_t out_sz); #endif /* __MLX5_VPORT_H__ */ -- cgit v0.10.2 From 1f324bff9ba3db276f074169d5b4af9e9c117ba1 Mon Sep 17 00:00:00 2001 From: Eli Cohen Date: Fri, 11 Mar 2016 22:58:41 +0200 Subject: net/mlx5_core: Implement modify HCA vport command Implement the modify HCA vport commands used to modify the parameters of virtual HCA's ports. Signed-off-by: Eli Cohen Reviewed-by: Or Gerlitz Signed-off-by: Doug Ledford diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c index 037fc4c..ebb4036 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c @@ -407,6 +407,12 @@ static int mlx5_internal_err_ret_value(struct mlx5_core_dev *dev, u16 op, const char *mlx5_command_str(int command) { switch (command) { + case MLX5_CMD_OP_QUERY_HCA_VPORT_CONTEXT: + return "QUERY_HCA_VPORT_CONTEXT"; + + case MLX5_CMD_OP_MODIFY_HCA_VPORT_CONTEXT: + return "MODIFY_HCA_VPORT_CONTEXT"; + case MLX5_CMD_OP_QUERY_HCA_CAP: return "QUERY_HCA_CAP"; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/vport.c b/drivers/net/ethernet/mellanox/mlx5/core/vport.c index 2b836d0..bd51840 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/vport.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/vport.c @@ -891,3 +891,70 @@ free: return err; } EXPORT_SYMBOL_GPL(mlx5_core_query_vport_counter); + +int mlx5_core_modify_hca_vport_context(struct mlx5_core_dev *dev, + u8 other_vport, u8 port_num, + int vf, + struct mlx5_hca_vport_context *req) +{ + int in_sz = MLX5_ST_SZ_BYTES(modify_hca_vport_context_in); + u8 out[MLX5_ST_SZ_BYTES(modify_hca_vport_context_out)]; + int is_group_manager; + void *in; + int err; + void *ctx; + + mlx5_core_dbg(dev, "vf %d\n", vf); + is_group_manager = MLX5_CAP_GEN(dev, vport_group_manager); + in = kzalloc(in_sz, GFP_KERNEL); + if (!in) + return -ENOMEM; + + memset(out, 0, sizeof(out)); + MLX5_SET(modify_hca_vport_context_in, in, opcode, MLX5_CMD_OP_MODIFY_HCA_VPORT_CONTEXT); + if (other_vport) { + if (is_group_manager) { + MLX5_SET(modify_hca_vport_context_in, in, other_vport, 1); + MLX5_SET(modify_hca_vport_context_in, in, vport_number, vf); + } else { + err = -EPERM; + goto ex; + } + } + + if (MLX5_CAP_GEN(dev, num_ports) > 1) + MLX5_SET(modify_hca_vport_context_in, in, port_num, port_num); + + ctx = MLX5_ADDR_OF(modify_hca_vport_context_in, in, hca_vport_context); + MLX5_SET(hca_vport_context, ctx, field_select, req->field_select); + MLX5_SET(hca_vport_context, ctx, sm_virt_aware, req->sm_virt_aware); + MLX5_SET(hca_vport_context, ctx, has_smi, req->has_smi); + MLX5_SET(hca_vport_context, ctx, has_raw, req->has_raw); + MLX5_SET(hca_vport_context, ctx, vport_state_policy, req->policy); + MLX5_SET(hca_vport_context, ctx, port_physical_state, req->phys_state); + MLX5_SET(hca_vport_context, ctx, vport_state, req->vport_state); + MLX5_SET64(hca_vport_context, ctx, port_guid, req->port_guid); + MLX5_SET64(hca_vport_context, ctx, node_guid, req->node_guid); + MLX5_SET(hca_vport_context, ctx, cap_mask1, req->cap_mask1); + MLX5_SET(hca_vport_context, ctx, cap_mask1_field_select, req->cap_mask1_perm); + MLX5_SET(hca_vport_context, ctx, cap_mask2, req->cap_mask2); + MLX5_SET(hca_vport_context, ctx, cap_mask2_field_select, req->cap_mask2_perm); + MLX5_SET(hca_vport_context, ctx, lid, req->lid); + MLX5_SET(hca_vport_context, ctx, init_type_reply, req->init_type_reply); + MLX5_SET(hca_vport_context, ctx, lmc, req->lmc); + MLX5_SET(hca_vport_context, ctx, subnet_timeout, req->subnet_timeout); + MLX5_SET(hca_vport_context, ctx, sm_lid, req->sm_lid); + MLX5_SET(hca_vport_context, ctx, sm_sl, req->sm_sl); + MLX5_SET(hca_vport_context, ctx, qkey_violation_counter, req->qkey_violation_counter); + MLX5_SET(hca_vport_context, ctx, pkey_violation_counter, req->pkey_violation_counter); + err = mlx5_cmd_exec(dev, in, in_sz, out, sizeof(out)); + if (err) + goto ex; + + err = mlx5_cmd_status_to_err_v2(out); + +ex: + kfree(in); + return err; +} +EXPORT_SYMBOL_GPL(mlx5_core_modify_hca_vport_context); diff --git a/include/linux/mlx5/vport.h b/include/linux/mlx5/vport.h index aafb3e4..bd93e63 100644 --- a/include/linux/mlx5/vport.h +++ b/include/linux/mlx5/vport.h @@ -95,5 +95,9 @@ int mlx5_nic_vport_disable_roce(struct mlx5_core_dev *mdev); int mlx5_core_query_vport_counter(struct mlx5_core_dev *dev, u8 other_vport, int vf, u8 port_num, void *out, size_t out_sz); +int mlx5_core_modify_hca_vport_context(struct mlx5_core_dev *dev, + u8 other_vport, u8 port_num, + int vf, + struct mlx5_hca_vport_context *req); #endif /* __MLX5_VPORT_H__ */ -- cgit v0.10.2 From eff901d30e6cebd940072637f112ce4d0090ac12 Mon Sep 17 00:00:00 2001 From: Eli Cohen Date: Fri, 11 Mar 2016 22:58:42 +0200 Subject: IB/mlx5: Implement callbacks for manipulating VFs Implement the IB defined callbacks used to manipulate the policy for the link state, set GUIDs or get statistics information. This functionality is added into a new file that will be used to add any SRIOV related functionality to the mlx5 IB layer. The following callbacks have been added: mlx5_ib_get_vf_config mlx5_ib_set_vf_link_state mlx5_ib_get_vf_stats mlx5_ib_set_vf_guid In addition, publish whether this device is based on a virtual function. In mlx5 supported devices, virtual functions are implemented as vHCAs. vHCAs have their own QP number space so it is possible that two vHCAs will use a QP with the same number at the same time. Signed-off-by: Eli Cohen Reviewed-by: Or Gerlitz Signed-off-by: Doug Ledford diff --git a/drivers/infiniband/hw/mlx5/Makefile b/drivers/infiniband/hw/mlx5/Makefile index 4e85188..7493a83 100644 --- a/drivers/infiniband/hw/mlx5/Makefile +++ b/drivers/infiniband/hw/mlx5/Makefile @@ -1,4 +1,4 @@ obj-$(CONFIG_MLX5_INFINIBAND) += mlx5_ib.o -mlx5_ib-y := main.o cq.o doorbell.o qp.o mem.o srq.o mr.o ah.o mad.o gsi.o +mlx5_ib-y := main.o cq.o doorbell.o qp.o mem.o srq.o mr.o ah.o mad.o gsi.o ib_virt.o mlx5_ib-$(CONFIG_INFINIBAND_ON_DEMAND_PAGING) += odp.o diff --git a/drivers/infiniband/hw/mlx5/ib_virt.c b/drivers/infiniband/hw/mlx5/ib_virt.c new file mode 100644 index 0000000..c1b9de8 --- /dev/null +++ b/drivers/infiniband/hw/mlx5/ib_virt.c @@ -0,0 +1,194 @@ +/* + * Copyright (c) 2016, Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include +#include +#include "mlx5_ib.h" + +static inline u32 mlx_to_net_policy(enum port_state_policy mlx_policy) +{ + switch (mlx_policy) { + case MLX5_POLICY_DOWN: + return IFLA_VF_LINK_STATE_DISABLE; + case MLX5_POLICY_UP: + return IFLA_VF_LINK_STATE_ENABLE; + case MLX5_POLICY_FOLLOW: + return IFLA_VF_LINK_STATE_AUTO; + default: + return __IFLA_VF_LINK_STATE_MAX; + } +} + +int mlx5_ib_get_vf_config(struct ib_device *device, int vf, u8 port, + struct ifla_vf_info *info) +{ + struct mlx5_ib_dev *dev = to_mdev(device); + struct mlx5_core_dev *mdev = dev->mdev; + struct mlx5_hca_vport_context *rep; + int err; + + rep = kzalloc(sizeof(*rep), GFP_KERNEL); + if (!rep) + return -ENOMEM; + + err = mlx5_query_hca_vport_context(mdev, 1, 1, vf + 1, rep); + if (err) { + mlx5_ib_warn(dev, "failed to query port policy for vf %d (%d)\n", + vf, err); + goto free; + } + memset(info, 0, sizeof(*info)); + info->linkstate = mlx_to_net_policy(rep->policy); + if (info->linkstate == __IFLA_VF_LINK_STATE_MAX) + err = -EINVAL; + +free: + kfree(rep); + return err; +} + +static inline enum port_state_policy net_to_mlx_policy(int policy) +{ + switch (policy) { + case IFLA_VF_LINK_STATE_DISABLE: + return MLX5_POLICY_DOWN; + case IFLA_VF_LINK_STATE_ENABLE: + return MLX5_POLICY_UP; + case IFLA_VF_LINK_STATE_AUTO: + return MLX5_POLICY_FOLLOW; + default: + return MLX5_POLICY_INVALID; + } +} + +int mlx5_ib_set_vf_link_state(struct ib_device *device, int vf, + u8 port, int state) +{ + struct mlx5_ib_dev *dev = to_mdev(device); + struct mlx5_core_dev *mdev = dev->mdev; + struct mlx5_hca_vport_context *in; + int err; + + in = kzalloc(sizeof(*in), GFP_KERNEL); + if (!in) + return -ENOMEM; + + in->policy = net_to_mlx_policy(state); + if (in->policy == MLX5_POLICY_INVALID) { + err = -EINVAL; + goto out; + } + in->field_select = MLX5_HCA_VPORT_SEL_STATE_POLICY; + err = mlx5_core_modify_hca_vport_context(mdev, 1, 1, vf + 1, in); + +out: + kfree(in); + return err; +} + +int mlx5_ib_get_vf_stats(struct ib_device *device, int vf, + u8 port, struct ifla_vf_stats *stats) +{ + int out_sz = MLX5_ST_SZ_BYTES(query_vport_counter_out); + struct mlx5_core_dev *mdev; + struct mlx5_ib_dev *dev; + void *out; + int err; + + dev = to_mdev(device); + mdev = dev->mdev; + + out = kzalloc(out_sz, GFP_KERNEL); + if (!out) + return -ENOMEM; + + err = mlx5_core_query_vport_counter(mdev, true, vf, port, out, out_sz); + if (err) + goto ex; + + stats->rx_packets = MLX5_GET64_PR(query_vport_counter_out, out, received_ib_unicast.packets); + stats->tx_packets = MLX5_GET64_PR(query_vport_counter_out, out, transmitted_ib_unicast.packets); + stats->rx_bytes = MLX5_GET64_PR(query_vport_counter_out, out, received_ib_unicast.octets); + stats->tx_bytes = MLX5_GET64_PR(query_vport_counter_out, out, transmitted_ib_unicast.octets); + stats->multicast = MLX5_GET64_PR(query_vport_counter_out, out, received_ib_multicast.packets); + +ex: + kfree(out); + return err; +} + +static int set_vf_node_guid(struct ib_device *device, int vf, u8 port, u64 guid) +{ + struct mlx5_ib_dev *dev = to_mdev(device); + struct mlx5_core_dev *mdev = dev->mdev; + struct mlx5_hca_vport_context *in; + int err; + + in = kzalloc(sizeof(*in), GFP_KERNEL); + if (!in) + return -ENOMEM; + + in->field_select = MLX5_HCA_VPORT_SEL_NODE_GUID; + in->node_guid = guid; + err = mlx5_core_modify_hca_vport_context(mdev, 1, 1, vf + 1, in); + kfree(in); + return err; +} + +static int set_vf_port_guid(struct ib_device *device, int vf, u8 port, u64 guid) +{ + struct mlx5_ib_dev *dev = to_mdev(device); + struct mlx5_core_dev *mdev = dev->mdev; + struct mlx5_hca_vport_context *in; + int err; + + in = kzalloc(sizeof(*in), GFP_KERNEL); + if (!in) + return -ENOMEM; + + in->field_select = MLX5_HCA_VPORT_SEL_PORT_GUID; + in->port_guid = guid; + err = mlx5_core_modify_hca_vport_context(mdev, 1, 1, vf + 1, in); + kfree(in); + return err; +} + +int mlx5_ib_set_vf_guid(struct ib_device *device, int vf, u8 port, + u64 guid, int type) +{ + if (type == IFLA_VF_IB_NODE_GUID) + return set_vf_node_guid(device, vf, port, guid); + else if (type == IFLA_VF_IB_PORT_GUID) + return set_vf_port_guid(device, vf, port, guid); + + return -EINVAL; +} diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 73cb633..e305990 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -562,6 +562,9 @@ static int mlx5_ib_query_device(struct ib_device *ibdev, if (MLX5_CAP_GEN(mdev, cd)) props->device_cap_flags |= IB_DEVICE_CROSS_CHANNEL; + if (!mlx5_core_is_pf(mdev)) + props->device_cap_flags |= IB_DEVICE_VIRTUAL_FUNCTION; + return 0; } @@ -699,6 +702,7 @@ static int mlx5_query_hca_port(struct ib_device *ibdev, u8 port, props->qkey_viol_cntr = rep->qkey_violation_counter; props->subnet_timeout = rep->subnet_timeout; props->init_type_reply = rep->init_type_reply; + props->grh_required = rep->grh_required; err = mlx5_query_port_link_width_oper(mdev, &ib_link_width_oper, port); if (err) @@ -2349,6 +2353,12 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev) dev->ib_dev.map_mr_sg = mlx5_ib_map_mr_sg; dev->ib_dev.check_mr_status = mlx5_ib_check_mr_status; dev->ib_dev.get_port_immutable = mlx5_port_immutable; + if (mlx5_core_is_pf(mdev)) { + dev->ib_dev.get_vf_config = mlx5_ib_get_vf_config; + dev->ib_dev.set_vf_link_state = mlx5_ib_set_vf_link_state; + dev->ib_dev.get_vf_stats = mlx5_ib_get_vf_stats; + dev->ib_dev.set_vf_guid = mlx5_ib_set_vf_guid; + } mlx5_ib_internal_fill_odp_caps(dev); diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index 76b2b42..f16c818 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -776,6 +776,14 @@ void mlx5_ib_qp_disable_pagefaults(struct mlx5_ib_qp *qp); void mlx5_ib_qp_enable_pagefaults(struct mlx5_ib_qp *qp); void mlx5_ib_invalidate_range(struct ib_umem *umem, unsigned long start, unsigned long end); +int mlx5_ib_get_vf_config(struct ib_device *device, int vf, + u8 port, struct ifla_vf_info *info); +int mlx5_ib_set_vf_link_state(struct ib_device *device, int vf, + u8 port, int state); +int mlx5_ib_get_vf_stats(struct ib_device *device, int vf, + u8 port, struct ifla_vf_stats *stats); +int mlx5_ib_set_vf_guid(struct ib_device *device, int vf, u8 port, + u64 guid, int type); #else /* CONFIG_INFINIBAND_ON_DEMAND_PAGING */ static inline void mlx5_ib_internal_fill_odp_caps(struct mlx5_ib_dev *dev) diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 406b27ec..e1d987f 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -613,7 +613,10 @@ struct mlx5_pas { }; enum port_state_policy { - MLX5_AAA_000 + MLX5_POLICY_DOWN = 0, + MLX5_POLICY_UP = 1, + MLX5_POLICY_FOLLOW = 2, + MLX5_POLICY_INVALID = 0xffffffff }; enum phy_port_state { diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index ad0a44b..bb9e07c 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -3696,6 +3696,12 @@ struct mlx5_ifc_query_hca_vport_pkey_in_bits { u8 pkey_index[0x10]; }; +enum { + MLX5_HCA_VPORT_SEL_PORT_GUID = 1 << 0, + MLX5_HCA_VPORT_SEL_NODE_GUID = 1 << 1, + MLX5_HCA_VPORT_SEL_STATE_POLICY = 1 << 2, +}; + struct mlx5_ifc_query_hca_vport_gid_out_bits { u8 status[0x8]; u8 reserved_at_8[0x18]; -- cgit v0.10.2 From 68996a6e760e5c74654723eeb57bf65628ae87f4 Mon Sep 17 00:00:00 2001 From: Eli Cohen Date: Fri, 11 Mar 2016 22:58:43 +0200 Subject: IB/ipoib: Allow mcast packets from other VFs With SRIOV enabled, two VFs on the same HCA which have the same port LID and may have the same QP number. To enable receiving multicasts from such VFs, further qualify the check: ignore the receive only if, in addition, the packet source gid equals the receiving VF's source gid. Signed-off-by: Eli Cohen Reviewed-by: Or Gerlitz Signed-off-by: Doug Ledford diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ib.c b/drivers/infiniband/ulp/ipoib/ipoib_ib.c index 899e6b7..f0e55e4 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_ib.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_ib.c @@ -180,6 +180,7 @@ static void ipoib_ib_handle_rx_wc(struct net_device *dev, struct ib_wc *wc) struct sk_buff *skb; u64 mapping[IPOIB_UD_RX_SG]; union ib_gid *dgid; + union ib_gid *sgid; ipoib_dbg_data(priv, "recv completion: id %d, status: %d\n", wr_id, wc->status); @@ -203,13 +204,6 @@ static void ipoib_ib_handle_rx_wc(struct net_device *dev, struct ib_wc *wc) return; } - /* - * Drop packets that this interface sent, ie multicast packets - * that the HCA has replicated. - */ - if (wc->slid == priv->local_lid && wc->src_qp == priv->qp->qp_num) - goto repost; - memcpy(mapping, priv->rx_ring[wr_id].mapping, IPOIB_UD_RX_SG * sizeof *mapping); @@ -239,6 +233,25 @@ static void ipoib_ib_handle_rx_wc(struct net_device *dev, struct ib_wc *wc) else skb->pkt_type = PACKET_MULTICAST; + sgid = &((struct ib_grh *)skb->data)->sgid; + + /* + * Drop packets that this interface sent, ie multicast packets + * that the HCA has replicated. + */ + if (wc->slid == priv->local_lid && wc->src_qp == priv->qp->qp_num) { + int need_repost = 1; + + if ((wc->wc_flags & IB_WC_GRH) && + sgid->global.interface_id != priv->local_gid.global.interface_id) + need_repost = 0; + + if (need_repost) { + dev_kfree_skb_any(skb); + goto repost; + } + } + skb_pull(skb, IB_GRH_BYTES); skb->protocol = ((struct ipoib_header *) skb->data)->proto; -- cgit v0.10.2