summaryrefslogtreecommitdiff
path: root/drivers/staging/rdma/hfi1
diff options
context:
space:
mode:
authorChristoph Hellwig <hch@lst.de>2015-10-08 08:16:33 (GMT)
committerChristoph Hellwig <hch@lst.de>2015-10-08 10:09:10 (GMT)
commite622f2f4ad2142d2a613a57fb85f8cf737935ef5 (patch)
tree19fa458bcaacf3f8b2f5e40676f748afc3df1e84 /drivers/staging/rdma/hfi1
parentb8cab5dab15ff5c2acc3faefdde28919b0341c11 (diff)
downloadlinux-e622f2f4ad2142d2a613a57fb85f8cf737935ef5.tar.xz
IB: split struct ib_send_wr
This patch split up struct ib_send_wr so that all non-trivial verbs use their own structure which embedds struct ib_send_wr. This dramaticly shrinks the size of a WR for most common operations: sizeof(struct ib_send_wr) (old): 96 sizeof(struct ib_send_wr): 48 sizeof(struct ib_rdma_wr): 64 sizeof(struct ib_atomic_wr): 96 sizeof(struct ib_ud_wr): 88 sizeof(struct ib_fast_reg_wr): 88 sizeof(struct ib_bind_mw_wr): 96 sizeof(struct ib_sig_handover_wr): 80 And with Sagi's pending MR rework the fast registration WR will also be down to a reasonable size: sizeof(struct ib_fastreg_wr): 64 Signed-off-by: Christoph Hellwig <hch@lst.de> Reviewed-by: Bart Van Assche <bart.vanassche@sandisk.com> [srp, srpt] Reviewed-by: Chuck Lever <chuck.lever@oracle.com> [sunrpc] Tested-by: Haggai Eran <haggaie@mellanox.com> Tested-by: Sagi Grimberg <sagig@mellanox.com> Tested-by: Steve Wise <swise@opengridcomputing.com>
Diffstat (limited to 'drivers/staging/rdma/hfi1')
-rw-r--r--drivers/staging/rdma/hfi1/keys.c22
-rw-r--r--drivers/staging/rdma/hfi1/qp.c2
-rw-r--r--drivers/staging/rdma/hfi1/rc.c24
-rw-r--r--drivers/staging/rdma/hfi1/ruc.c18
-rw-r--r--drivers/staging/rdma/hfi1/uc.c4
-rw-r--r--drivers/staging/rdma/hfi1/ud.c20
-rw-r--r--drivers/staging/rdma/hfi1/verbs.c23
-rw-r--r--drivers/staging/rdma/hfi1/verbs.h10
8 files changed, 73 insertions, 50 deletions
diff --git a/drivers/staging/rdma/hfi1/keys.c b/drivers/staging/rdma/hfi1/keys.c
index f6eff17..82c21b1 100644
--- a/drivers/staging/rdma/hfi1/keys.c
+++ b/drivers/staging/rdma/hfi1/keys.c
@@ -358,12 +358,12 @@ bail:
/*
* Initialize the memory region specified by the work request.
*/
-int hfi1_fast_reg_mr(struct hfi1_qp *qp, struct ib_send_wr *wr)
+int hfi1_fast_reg_mr(struct hfi1_qp *qp, struct ib_fast_reg_wr *wr)
{
struct hfi1_lkey_table *rkt = &to_idev(qp->ibqp.device)->lk_table;
struct hfi1_pd *pd = to_ipd(qp->ibqp.pd);
struct hfi1_mregion *mr;
- u32 rkey = wr->wr.fast_reg.rkey;
+ u32 rkey = wr->rkey;
unsigned i, n, m;
int ret = -EINVAL;
unsigned long flags;
@@ -380,22 +380,22 @@ int hfi1_fast_reg_mr(struct hfi1_qp *qp, struct ib_send_wr *wr)
if (unlikely(mr == NULL || qp->ibqp.pd != mr->pd))
goto bail;
- if (wr->wr.fast_reg.page_list_len > mr->max_segs)
+ if (wr->page_list_len > mr->max_segs)
goto bail;
- ps = 1UL << wr->wr.fast_reg.page_shift;
- if (wr->wr.fast_reg.length > ps * wr->wr.fast_reg.page_list_len)
+ ps = 1UL << wr->page_shift;
+ if (wr->length > ps * wr->page_list_len)
goto bail;
- mr->user_base = wr->wr.fast_reg.iova_start;
- mr->iova = wr->wr.fast_reg.iova_start;
+ mr->user_base = wr->iova_start;
+ mr->iova = wr->iova_start;
mr->lkey = rkey;
- mr->length = wr->wr.fast_reg.length;
- mr->access_flags = wr->wr.fast_reg.access_flags;
- page_list = wr->wr.fast_reg.page_list->page_list;
+ mr->length = wr->length;
+ mr->access_flags = wr->access_flags;
+ page_list = wr->page_list->page_list;
m = 0;
n = 0;
- for (i = 0; i < wr->wr.fast_reg.page_list_len; i++) {
+ for (i = 0; i < wr->page_list_len; i++) {
mr->map[m]->segs[n].vaddr = (void *) page_list[i];
mr->map[m]->segs[n].length = ps;
if (++n == HFI1_SEGSZ) {
diff --git a/drivers/staging/rdma/hfi1/qp.c b/drivers/staging/rdma/hfi1/qp.c
index df1fa56..f8c3616 100644
--- a/drivers/staging/rdma/hfi1/qp.c
+++ b/drivers/staging/rdma/hfi1/qp.c
@@ -422,7 +422,7 @@ static void clear_mr_refs(struct hfi1_qp *qp, int clr_sends)
if (qp->ibqp.qp_type == IB_QPT_UD ||
qp->ibqp.qp_type == IB_QPT_SMI ||
qp->ibqp.qp_type == IB_QPT_GSI)
- atomic_dec(&to_iah(wqe->wr.wr.ud.ah)->refcount);
+ atomic_dec(&to_iah(wqe->ud_wr.ah)->refcount);
if (++qp->s_last >= qp->s_size)
qp->s_last = 0;
}
diff --git a/drivers/staging/rdma/hfi1/rc.c b/drivers/staging/rdma/hfi1/rc.c
index 632dd5b..fd0ac60 100644
--- a/drivers/staging/rdma/hfi1/rc.c
+++ b/drivers/staging/rdma/hfi1/rc.c
@@ -404,9 +404,9 @@ int hfi1_make_rc_req(struct hfi1_qp *qp)
goto bail;
}
ohdr->u.rc.reth.vaddr =
- cpu_to_be64(wqe->wr.wr.rdma.remote_addr);
+ cpu_to_be64(wqe->rdma_wr.remote_addr);
ohdr->u.rc.reth.rkey =
- cpu_to_be32(wqe->wr.wr.rdma.rkey);
+ cpu_to_be32(wqe->rdma_wr.rkey);
ohdr->u.rc.reth.length = cpu_to_be32(len);
hwords += sizeof(struct ib_reth) / sizeof(u32);
wqe->lpsn = wqe->psn;
@@ -455,9 +455,9 @@ int hfi1_make_rc_req(struct hfi1_qp *qp)
wqe->lpsn = qp->s_next_psn++;
}
ohdr->u.rc.reth.vaddr =
- cpu_to_be64(wqe->wr.wr.rdma.remote_addr);
+ cpu_to_be64(wqe->rdma_wr.remote_addr);
ohdr->u.rc.reth.rkey =
- cpu_to_be32(wqe->wr.wr.rdma.rkey);
+ cpu_to_be32(wqe->rdma_wr.rkey);
ohdr->u.rc.reth.length = cpu_to_be32(len);
qp->s_state = OP(RDMA_READ_REQUEST);
hwords += sizeof(ohdr->u.rc.reth) / sizeof(u32);
@@ -488,21 +488,21 @@ int hfi1_make_rc_req(struct hfi1_qp *qp)
if (wqe->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP) {
qp->s_state = OP(COMPARE_SWAP);
ohdr->u.atomic_eth.swap_data = cpu_to_be64(
- wqe->wr.wr.atomic.swap);
+ wqe->atomic_wr.swap);
ohdr->u.atomic_eth.compare_data = cpu_to_be64(
- wqe->wr.wr.atomic.compare_add);
+ wqe->atomic_wr.compare_add);
} else {
qp->s_state = OP(FETCH_ADD);
ohdr->u.atomic_eth.swap_data = cpu_to_be64(
- wqe->wr.wr.atomic.compare_add);
+ wqe->atomic_wr.compare_add);
ohdr->u.atomic_eth.compare_data = 0;
}
ohdr->u.atomic_eth.vaddr[0] = cpu_to_be32(
- wqe->wr.wr.atomic.remote_addr >> 32);
+ wqe->atomic_wr.remote_addr >> 32);
ohdr->u.atomic_eth.vaddr[1] = cpu_to_be32(
- wqe->wr.wr.atomic.remote_addr);
+ wqe->atomic_wr.remote_addr);
ohdr->u.atomic_eth.rkey = cpu_to_be32(
- wqe->wr.wr.atomic.rkey);
+ wqe->atomic_wr.rkey);
hwords += sizeof(struct ib_atomic_eth) / sizeof(u32);
ss = NULL;
len = 0;
@@ -629,9 +629,9 @@ int hfi1_make_rc_req(struct hfi1_qp *qp)
*/
len = (delta_psn(qp->s_psn, wqe->psn)) * pmtu;
ohdr->u.rc.reth.vaddr =
- cpu_to_be64(wqe->wr.wr.rdma.remote_addr + len);
+ cpu_to_be64(wqe->rdma_wr.remote_addr + len);
ohdr->u.rc.reth.rkey =
- cpu_to_be32(wqe->wr.wr.rdma.rkey);
+ cpu_to_be32(wqe->rdma_wr.rkey);
ohdr->u.rc.reth.length = cpu_to_be32(wqe->length - len);
qp->s_state = OP(RDMA_READ_REQUEST);
hwords += sizeof(ohdr->u.rc.reth) / sizeof(u32);
diff --git a/drivers/staging/rdma/hfi1/ruc.c b/drivers/staging/rdma/hfi1/ruc.c
index a411528..d614474 100644
--- a/drivers/staging/rdma/hfi1/ruc.c
+++ b/drivers/staging/rdma/hfi1/ruc.c
@@ -481,8 +481,8 @@ again:
if (wqe->length == 0)
break;
if (unlikely(!hfi1_rkey_ok(qp, &qp->r_sge.sge, wqe->length,
- wqe->wr.wr.rdma.remote_addr,
- wqe->wr.wr.rdma.rkey,
+ wqe->rdma_wr.remote_addr,
+ wqe->rdma_wr.rkey,
IB_ACCESS_REMOTE_WRITE)))
goto acc_err;
qp->r_sge.sg_list = NULL;
@@ -494,8 +494,8 @@ again:
if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_READ)))
goto inv_err;
if (unlikely(!hfi1_rkey_ok(qp, &sqp->s_sge.sge, wqe->length,
- wqe->wr.wr.rdma.remote_addr,
- wqe->wr.wr.rdma.rkey,
+ wqe->rdma_wr.remote_addr,
+ wqe->rdma_wr.rkey,
IB_ACCESS_REMOTE_READ)))
goto acc_err;
release = 0;
@@ -512,18 +512,18 @@ again:
if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_ATOMIC)))
goto inv_err;
if (unlikely(!hfi1_rkey_ok(qp, &qp->r_sge.sge, sizeof(u64),
- wqe->wr.wr.atomic.remote_addr,
- wqe->wr.wr.atomic.rkey,
+ wqe->atomic_wr.remote_addr,
+ wqe->atomic_wr.rkey,
IB_ACCESS_REMOTE_ATOMIC)))
goto acc_err;
/* Perform atomic OP and save result. */
maddr = (atomic64_t *) qp->r_sge.sge.vaddr;
- sdata = wqe->wr.wr.atomic.compare_add;
+ sdata = wqe->atomic_wr.compare_add;
*(u64 *) sqp->s_sge.sge.vaddr =
(wqe->wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD) ?
(u64) atomic64_add_return(sdata, maddr) - sdata :
(u64) cmpxchg((u64 *) qp->r_sge.sge.vaddr,
- sdata, wqe->wr.wr.atomic.swap);
+ sdata, wqe->atomic_wr.swap);
hfi1_put_mr(qp->r_sge.sge.mr);
qp->r_sge.num_sge = 0;
goto send_comp;
@@ -913,7 +913,7 @@ void hfi1_send_complete(struct hfi1_qp *qp, struct hfi1_swqe *wqe,
if (qp->ibqp.qp_type == IB_QPT_UD ||
qp->ibqp.qp_type == IB_QPT_SMI ||
qp->ibqp.qp_type == IB_QPT_GSI)
- atomic_dec(&to_iah(wqe->wr.wr.ud.ah)->refcount);
+ atomic_dec(&to_iah(wqe->ud_wr.ah)->refcount);
/* See ch. 11.2.4.1 and 10.7.3.1 */
if (!(qp->s_flags & HFI1_S_SIGNAL_REQ_WR) ||
diff --git a/drivers/staging/rdma/hfi1/uc.c b/drivers/staging/rdma/hfi1/uc.c
index b536f39..6095039 100644
--- a/drivers/staging/rdma/hfi1/uc.c
+++ b/drivers/staging/rdma/hfi1/uc.c
@@ -147,9 +147,9 @@ int hfi1_make_uc_req(struct hfi1_qp *qp)
case IB_WR_RDMA_WRITE:
case IB_WR_RDMA_WRITE_WITH_IMM:
ohdr->u.rc.reth.vaddr =
- cpu_to_be64(wqe->wr.wr.rdma.remote_addr);
+ cpu_to_be64(wqe->rdma_wr.remote_addr);
ohdr->u.rc.reth.rkey =
- cpu_to_be32(wqe->wr.wr.rdma.rkey);
+ cpu_to_be32(wqe->rdma_wr.rkey);
ohdr->u.rc.reth.length = cpu_to_be32(len);
hwords += sizeof(struct ib_reth) / 4;
if (len > pmtu) {
diff --git a/drivers/staging/rdma/hfi1/ud.c b/drivers/staging/rdma/hfi1/ud.c
index d40d1a1..5a9c784 100644
--- a/drivers/staging/rdma/hfi1/ud.c
+++ b/drivers/staging/rdma/hfi1/ud.c
@@ -80,7 +80,7 @@ static void ud_loopback(struct hfi1_qp *sqp, struct hfi1_swqe *swqe)
rcu_read_lock();
- qp = hfi1_lookup_qpn(ibp, swqe->wr.wr.ud.remote_qpn);
+ qp = hfi1_lookup_qpn(ibp, swqe->ud_wr.remote_qpn);
if (!qp) {
ibp->n_pkt_drops++;
rcu_read_unlock();
@@ -98,7 +98,7 @@ static void ud_loopback(struct hfi1_qp *sqp, struct hfi1_swqe *swqe)
goto drop;
}
- ah_attr = &to_iah(swqe->wr.wr.ud.ah)->attr;
+ ah_attr = &to_iah(swqe->ud_wr.ah)->attr;
ppd = ppd_from_ibp(ibp);
if (qp->ibqp.qp_num > 1) {
@@ -128,8 +128,8 @@ static void ud_loopback(struct hfi1_qp *sqp, struct hfi1_swqe *swqe)
if (qp->ibqp.qp_num) {
u32 qkey;
- qkey = (int)swqe->wr.wr.ud.remote_qkey < 0 ?
- sqp->qkey : swqe->wr.wr.ud.remote_qkey;
+ qkey = (int)swqe->ud_wr.remote_qkey < 0 ?
+ sqp->qkey : swqe->ud_wr.remote_qkey;
if (unlikely(qkey != qp->qkey)) {
u16 lid;
@@ -234,7 +234,7 @@ static void ud_loopback(struct hfi1_qp *sqp, struct hfi1_swqe *swqe)
if (qp->ibqp.qp_type == IB_QPT_GSI || qp->ibqp.qp_type == IB_QPT_SMI) {
if (sqp->ibqp.qp_type == IB_QPT_GSI ||
sqp->ibqp.qp_type == IB_QPT_SMI)
- wc.pkey_index = swqe->wr.wr.ud.pkey_index;
+ wc.pkey_index = swqe->ud_wr.pkey_index;
else
wc.pkey_index = sqp->s_pkey_index;
} else {
@@ -309,7 +309,7 @@ int hfi1_make_ud_req(struct hfi1_qp *qp)
/* Construct the header. */
ibp = to_iport(qp->ibqp.device, qp->port_num);
ppd = ppd_from_ibp(ibp);
- ah_attr = &to_iah(wqe->wr.wr.ud.ah)->attr;
+ ah_attr = &to_iah(wqe->ud_wr.ah)->attr;
if (ah_attr->dlid < HFI1_MULTICAST_LID_BASE ||
ah_attr->dlid == HFI1_PERMISSIVE_LID) {
lid = ah_attr->dlid & ~((1 << ppd->lmc) - 1);
@@ -401,18 +401,18 @@ int hfi1_make_ud_req(struct hfi1_qp *qp)
bth0 |= IB_BTH_SOLICITED;
bth0 |= extra_bytes << 20;
if (qp->ibqp.qp_type == IB_QPT_GSI || qp->ibqp.qp_type == IB_QPT_SMI)
- bth0 |= hfi1_get_pkey(ibp, wqe->wr.wr.ud.pkey_index);
+ bth0 |= hfi1_get_pkey(ibp, wqe->ud_wr.pkey_index);
else
bth0 |= hfi1_get_pkey(ibp, qp->s_pkey_index);
ohdr->bth[0] = cpu_to_be32(bth0);
- ohdr->bth[1] = cpu_to_be32(wqe->wr.wr.ud.remote_qpn);
+ ohdr->bth[1] = cpu_to_be32(wqe->ud_wr.remote_qpn);
ohdr->bth[2] = cpu_to_be32(mask_psn(qp->s_next_psn++));
/*
* Qkeys with the high order bit set mean use the
* qkey from the QP context instead of the WR (see 10.2.5).
*/
- ohdr->u.ud.deth[0] = cpu_to_be32((int)wqe->wr.wr.ud.remote_qkey < 0 ?
- qp->qkey : wqe->wr.wr.ud.remote_qkey);
+ ohdr->u.ud.deth[0] = cpu_to_be32((int)wqe->ud_wr.remote_qkey < 0 ?
+ qp->qkey : wqe->ud_wr.remote_qkey);
ohdr->u.ud.deth[1] = cpu_to_be32(qp->ibqp.qp_num);
/* disarm any ahg */
qp->s_hdr->ahgcount = 0;
diff --git a/drivers/staging/rdma/hfi1/verbs.c b/drivers/staging/rdma/hfi1/verbs.c
index 41bb59e..981e6c1 100644
--- a/drivers/staging/rdma/hfi1/verbs.c
+++ b/drivers/staging/rdma/hfi1/verbs.c
@@ -391,7 +391,7 @@ static int post_one_send(struct hfi1_qp *qp, struct ib_send_wr *wr)
wr->opcode != IB_WR_SEND_WITH_IMM)
return -EINVAL;
/* Check UD destination address PD */
- if (qp->ibqp.pd != wr->wr.ud.ah->pd)
+ if (qp->ibqp.pd != ud_wr(wr)->ah->pd)
return -EINVAL;
} else if ((unsigned) wr->opcode > IB_WR_ATOMIC_FETCH_AND_ADD)
return -EINVAL;
@@ -412,7 +412,24 @@ static int post_one_send(struct hfi1_qp *qp, struct ib_send_wr *wr)
rkt = &to_idev(qp->ibqp.device)->lk_table;
pd = to_ipd(qp->ibqp.pd);
wqe = get_swqe_ptr(qp, qp->s_head);
- wqe->wr = *wr;
+
+
+ if (qp->ibqp.qp_type != IB_QPT_UC &&
+ qp->ibqp.qp_type != IB_QPT_RC)
+ memcpy(&wqe->ud_wr, ud_wr(wr), sizeof(wqe->ud_wr));
+ else if (wr->opcode == IB_WR_FAST_REG_MR)
+ memcpy(&wqe->fast_reg_wr, fast_reg_wr(wr),
+ sizeof(wqe->fast_reg_wr));
+ else if (wr->opcode == IB_WR_RDMA_WRITE_WITH_IMM ||
+ wr->opcode == IB_WR_RDMA_WRITE ||
+ wr->opcode == IB_WR_RDMA_READ)
+ memcpy(&wqe->rdma_wr, rdma_wr(wr), sizeof(wqe->rdma_wr));
+ else if (wr->opcode == IB_WR_ATOMIC_CMP_AND_SWP ||
+ wr->opcode == IB_WR_ATOMIC_FETCH_AND_ADD)
+ memcpy(&wqe->atomic_wr, atomic_wr(wr), sizeof(wqe->atomic_wr));
+ else
+ memcpy(&wqe->wr, wr, sizeof(wqe->wr));
+
wqe->length = 0;
j = 0;
if (wr->num_sge) {
@@ -438,7 +455,7 @@ static int post_one_send(struct hfi1_qp *qp, struct ib_send_wr *wr)
if (wqe->length > 0x80000000U)
goto bail_inval_free;
} else {
- struct hfi1_ah *ah = to_iah(wr->wr.ud.ah);
+ struct hfi1_ah *ah = to_iah(ud_wr(wr)->ah);
atomic_inc(&ah->refcount);
}
diff --git a/drivers/staging/rdma/hfi1/verbs.h b/drivers/staging/rdma/hfi1/verbs.h
index ed903a9..cf5a3c9 100644
--- a/drivers/staging/rdma/hfi1/verbs.h
+++ b/drivers/staging/rdma/hfi1/verbs.h
@@ -348,7 +348,13 @@ struct hfi1_mr {
* in qp->s_max_sge.
*/
struct hfi1_swqe {
- struct ib_send_wr wr; /* don't use wr.sg_list */
+ union {
+ struct ib_send_wr wr; /* don't use wr.sg_list */
+ struct ib_rdma_wr rdma_wr;
+ struct ib_atomic_wr atomic_wr;
+ struct ib_ud_wr ud_wr;
+ struct ib_fast_reg_wr fast_reg_wr;
+ };
u32 psn; /* first packet sequence number */
u32 lpsn; /* last packet sequence number */
u32 ssn; /* send sequence number */
@@ -1025,7 +1031,7 @@ struct ib_fast_reg_page_list *hfi1_alloc_fast_reg_page_list(
void hfi1_free_fast_reg_page_list(struct ib_fast_reg_page_list *pl);
-int hfi1_fast_reg_mr(struct hfi1_qp *qp, struct ib_send_wr *wr);
+int hfi1_fast_reg_mr(struct hfi1_qp *qp, struct ib_fast_reg_wr *wr);
struct ib_fmr *hfi1_alloc_fmr(struct ib_pd *pd, int mr_access_flags,
struct ib_fmr_attr *fmr_attr);