summaryrefslogtreecommitdiff
path: root/drivers/staging/rdma/ipath
diff options
context:
space:
mode:
authorChristoph Hellwig <hch@lst.de>2015-10-08 08:16:33 (GMT)
committerChristoph Hellwig <hch@lst.de>2015-10-08 10:09:10 (GMT)
commite622f2f4ad2142d2a613a57fb85f8cf737935ef5 (patch)
tree19fa458bcaacf3f8b2f5e40676f748afc3df1e84 /drivers/staging/rdma/ipath
parentb8cab5dab15ff5c2acc3faefdde28919b0341c11 (diff)
downloadlinux-e622f2f4ad2142d2a613a57fb85f8cf737935ef5.tar.xz
IB: split struct ib_send_wr
This patch split up struct ib_send_wr so that all non-trivial verbs use their own structure which embedds struct ib_send_wr. This dramaticly shrinks the size of a WR for most common operations: sizeof(struct ib_send_wr) (old): 96 sizeof(struct ib_send_wr): 48 sizeof(struct ib_rdma_wr): 64 sizeof(struct ib_atomic_wr): 96 sizeof(struct ib_ud_wr): 88 sizeof(struct ib_fast_reg_wr): 88 sizeof(struct ib_bind_mw_wr): 96 sizeof(struct ib_sig_handover_wr): 80 And with Sagi's pending MR rework the fast registration WR will also be down to a reasonable size: sizeof(struct ib_fastreg_wr): 64 Signed-off-by: Christoph Hellwig <hch@lst.de> Reviewed-by: Bart Van Assche <bart.vanassche@sandisk.com> [srp, srpt] Reviewed-by: Chuck Lever <chuck.lever@oracle.com> [sunrpc] Tested-by: Haggai Eran <haggaie@mellanox.com> Tested-by: Sagi Grimberg <sagig@mellanox.com> Tested-by: Steve Wise <swise@opengridcomputing.com>
Diffstat (limited to 'drivers/staging/rdma/ipath')
-rw-r--r--drivers/staging/rdma/ipath/ipath_rc.c24
-rw-r--r--drivers/staging/rdma/ipath/ipath_ruc.c16
-rw-r--r--drivers/staging/rdma/ipath/ipath_uc.c4
-rw-r--r--drivers/staging/rdma/ipath/ipath_ud.c26
-rw-r--r--drivers/staging/rdma/ipath/ipath_verbs.c20
-rw-r--r--drivers/staging/rdma/ipath/ipath_verbs.h9
6 files changed, 61 insertions, 38 deletions
diff --git a/drivers/staging/rdma/ipath/ipath_rc.c b/drivers/staging/rdma/ipath/ipath_rc.c
index 79b3dbc..d4aa535 100644
--- a/drivers/staging/rdma/ipath/ipath_rc.c
+++ b/drivers/staging/rdma/ipath/ipath_rc.c
@@ -350,9 +350,9 @@ int ipath_make_rc_req(struct ipath_qp *qp)
goto bail;
}
ohdr->u.rc.reth.vaddr =
- cpu_to_be64(wqe->wr.wr.rdma.remote_addr);
+ cpu_to_be64(wqe->rdma_wr.remote_addr);
ohdr->u.rc.reth.rkey =
- cpu_to_be32(wqe->wr.wr.rdma.rkey);
+ cpu_to_be32(wqe->rdma_wr.rkey);
ohdr->u.rc.reth.length = cpu_to_be32(len);
hwords += sizeof(struct ib_reth) / sizeof(u32);
wqe->lpsn = wqe->psn;
@@ -401,9 +401,9 @@ int ipath_make_rc_req(struct ipath_qp *qp)
wqe->lpsn = qp->s_next_psn++;
}
ohdr->u.rc.reth.vaddr =
- cpu_to_be64(wqe->wr.wr.rdma.remote_addr);
+ cpu_to_be64(wqe->rdma_wr.remote_addr);
ohdr->u.rc.reth.rkey =
- cpu_to_be32(wqe->wr.wr.rdma.rkey);
+ cpu_to_be32(wqe->rdma_wr.rkey);
ohdr->u.rc.reth.length = cpu_to_be32(len);
qp->s_state = OP(RDMA_READ_REQUEST);
hwords += sizeof(ohdr->u.rc.reth) / sizeof(u32);
@@ -433,21 +433,21 @@ int ipath_make_rc_req(struct ipath_qp *qp)
if (wqe->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP) {
qp->s_state = OP(COMPARE_SWAP);
ohdr->u.atomic_eth.swap_data = cpu_to_be64(
- wqe->wr.wr.atomic.swap);
+ wqe->atomic_wr.swap);
ohdr->u.atomic_eth.compare_data = cpu_to_be64(
- wqe->wr.wr.atomic.compare_add);
+ wqe->atomic_wr.compare_add);
} else {
qp->s_state = OP(FETCH_ADD);
ohdr->u.atomic_eth.swap_data = cpu_to_be64(
- wqe->wr.wr.atomic.compare_add);
+ wqe->atomic_wr.compare_add);
ohdr->u.atomic_eth.compare_data = 0;
}
ohdr->u.atomic_eth.vaddr[0] = cpu_to_be32(
- wqe->wr.wr.atomic.remote_addr >> 32);
+ wqe->atomic_wr.remote_addr >> 32);
ohdr->u.atomic_eth.vaddr[1] = cpu_to_be32(
- wqe->wr.wr.atomic.remote_addr);
+ wqe->atomic_wr.remote_addr);
ohdr->u.atomic_eth.rkey = cpu_to_be32(
- wqe->wr.wr.atomic.rkey);
+ wqe->atomic_wr.rkey);
hwords += sizeof(struct ib_atomic_eth) / sizeof(u32);
ss = NULL;
len = 0;
@@ -567,9 +567,9 @@ int ipath_make_rc_req(struct ipath_qp *qp)
ipath_init_restart(qp, wqe);
len = ((qp->s_psn - wqe->psn) & IPATH_PSN_MASK) * pmtu;
ohdr->u.rc.reth.vaddr =
- cpu_to_be64(wqe->wr.wr.rdma.remote_addr + len);
+ cpu_to_be64(wqe->rdma_wr.remote_addr + len);
ohdr->u.rc.reth.rkey =
- cpu_to_be32(wqe->wr.wr.rdma.rkey);
+ cpu_to_be32(wqe->rdma_wr.rkey);
ohdr->u.rc.reth.length = cpu_to_be32(qp->s_len);
qp->s_state = OP(RDMA_READ_REQUEST);
hwords += sizeof(ohdr->u.rc.reth) / sizeof(u32);
diff --git a/drivers/staging/rdma/ipath/ipath_ruc.c b/drivers/staging/rdma/ipath/ipath_ruc.c
index 1f95bba..46af8b0 100644
--- a/drivers/staging/rdma/ipath/ipath_ruc.c
+++ b/drivers/staging/rdma/ipath/ipath_ruc.c
@@ -353,8 +353,8 @@ again:
if (wqe->length == 0)
break;
if (unlikely(!ipath_rkey_ok(qp, &qp->r_sge, wqe->length,
- wqe->wr.wr.rdma.remote_addr,
- wqe->wr.wr.rdma.rkey,
+ wqe->rdma_wr.remote_addr,
+ wqe->rdma_wr.rkey,
IB_ACCESS_REMOTE_WRITE)))
goto acc_err;
break;
@@ -363,8 +363,8 @@ again:
if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_READ)))
goto inv_err;
if (unlikely(!ipath_rkey_ok(qp, &sqp->s_sge, wqe->length,
- wqe->wr.wr.rdma.remote_addr,
- wqe->wr.wr.rdma.rkey,
+ wqe->rdma_wr.remote_addr,
+ wqe->rdma_wr.rkey,
IB_ACCESS_REMOTE_READ)))
goto acc_err;
qp->r_sge.sge = wqe->sg_list[0];
@@ -377,18 +377,18 @@ again:
if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_ATOMIC)))
goto inv_err;
if (unlikely(!ipath_rkey_ok(qp, &qp->r_sge, sizeof(u64),
- wqe->wr.wr.atomic.remote_addr,
- wqe->wr.wr.atomic.rkey,
+ wqe->atomic_wr.remote_addr,
+ wqe->atomic_wr.rkey,
IB_ACCESS_REMOTE_ATOMIC)))
goto acc_err;
/* Perform atomic OP and save result. */
maddr = (atomic64_t *) qp->r_sge.sge.vaddr;
- sdata = wqe->wr.wr.atomic.compare_add;
+ sdata = wqe->atomic_wr.compare_add;
*(u64 *) sqp->s_sge.sge.vaddr =
(wqe->wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD) ?
(u64) atomic64_add_return(sdata, maddr) - sdata :
(u64) cmpxchg((u64 *) qp->r_sge.sge.vaddr,
- sdata, wqe->wr.wr.atomic.swap);
+ sdata, wqe->atomic_wr.swap);
goto send_comp;
default:
diff --git a/drivers/staging/rdma/ipath/ipath_uc.c b/drivers/staging/rdma/ipath/ipath_uc.c
index 22e6099..0246b30 100644
--- a/drivers/staging/rdma/ipath/ipath_uc.c
+++ b/drivers/staging/rdma/ipath/ipath_uc.c
@@ -126,9 +126,9 @@ int ipath_make_uc_req(struct ipath_qp *qp)
case IB_WR_RDMA_WRITE:
case IB_WR_RDMA_WRITE_WITH_IMM:
ohdr->u.rc.reth.vaddr =
- cpu_to_be64(wqe->wr.wr.rdma.remote_addr);
+ cpu_to_be64(wqe->rdma_wr.remote_addr);
ohdr->u.rc.reth.rkey =
- cpu_to_be32(wqe->wr.wr.rdma.rkey);
+ cpu_to_be32(wqe->rdma_wr.rkey);
ohdr->u.rc.reth.length = cpu_to_be32(len);
hwords += sizeof(struct ib_reth) / 4;
if (len > pmtu) {
diff --git a/drivers/staging/rdma/ipath/ipath_ud.c b/drivers/staging/rdma/ipath/ipath_ud.c
index e8a2a91..3ffc156 100644
--- a/drivers/staging/rdma/ipath/ipath_ud.c
+++ b/drivers/staging/rdma/ipath/ipath_ud.c
@@ -65,7 +65,7 @@ static void ipath_ud_loopback(struct ipath_qp *sqp, struct ipath_swqe *swqe)
u32 rlen;
u32 length;
- qp = ipath_lookup_qpn(&dev->qp_table, swqe->wr.wr.ud.remote_qpn);
+ qp = ipath_lookup_qpn(&dev->qp_table, swqe->ud_wr.remote_qpn);
if (!qp || !(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_RECV_OK)) {
dev->n_pkt_drops++;
goto done;
@@ -77,8 +77,8 @@ static void ipath_ud_loopback(struct ipath_qp *sqp, struct ipath_swqe *swqe)
* qkey from the QP context instead of the WR (see 10.2.5).
*/
if (unlikely(qp->ibqp.qp_num &&
- ((int) swqe->wr.wr.ud.remote_qkey < 0 ?
- sqp->qkey : swqe->wr.wr.ud.remote_qkey) != qp->qkey)) {
+ ((int) swqe->ud_wr.remote_qkey < 0 ?
+ sqp->qkey : swqe->ud_wr.remote_qkey) != qp->qkey)) {
/* XXX OK to lose a count once in a while. */
dev->qkey_violations++;
dev->n_pkt_drops++;
@@ -175,7 +175,7 @@ static void ipath_ud_loopback(struct ipath_qp *sqp, struct ipath_swqe *swqe)
} else
spin_unlock_irqrestore(&rq->lock, flags);
- ah_attr = &to_iah(swqe->wr.wr.ud.ah)->attr;
+ ah_attr = &to_iah(swqe->ud_wr.ah)->attr;
if (ah_attr->ah_flags & IB_AH_GRH) {
ipath_copy_sge(&rsge, &ah_attr->grh, sizeof(struct ib_grh));
wc.wc_flags |= IB_WC_GRH;
@@ -225,7 +225,7 @@ static void ipath_ud_loopback(struct ipath_qp *sqp, struct ipath_swqe *swqe)
wc.port_num = 1;
/* Signal completion event if the solicited bit is set. */
ipath_cq_enter(to_icq(qp->ibqp.recv_cq), &wc,
- swqe->wr.send_flags & IB_SEND_SOLICITED);
+ swqe->ud_wr.wr.send_flags & IB_SEND_SOLICITED);
drop:
if (atomic_dec_and_test(&qp->refcount))
wake_up(&qp->wait);
@@ -280,7 +280,7 @@ int ipath_make_ud_req(struct ipath_qp *qp)
next_cur = 0;
/* Construct the header. */
- ah_attr = &to_iah(wqe->wr.wr.ud.ah)->attr;
+ ah_attr = &to_iah(wqe->ud_wr.ah)->attr;
if (ah_attr->dlid >= IPATH_MULTICAST_LID_BASE) {
if (ah_attr->dlid != IPATH_PERMISSIVE_LID)
dev->n_multicast_xmit++;
@@ -322,7 +322,7 @@ int ipath_make_ud_req(struct ipath_qp *qp)
qp->s_wqe = wqe;
qp->s_sge.sge = wqe->sg_list[0];
qp->s_sge.sg_list = wqe->sg_list + 1;
- qp->s_sge.num_sge = wqe->wr.num_sge;
+ qp->s_sge.num_sge = wqe->ud_wr.wr.num_sge;
if (ah_attr->ah_flags & IB_AH_GRH) {
/* Header size in 32-bit words. */
@@ -340,9 +340,9 @@ int ipath_make_ud_req(struct ipath_qp *qp)
lrh0 = IPATH_LRH_BTH;
ohdr = &qp->s_hdr.u.oth;
}
- if (wqe->wr.opcode == IB_WR_SEND_WITH_IMM) {
+ if (wqe->ud_wr.wr.opcode == IB_WR_SEND_WITH_IMM) {
qp->s_hdrwords++;
- ohdr->u.ud.imm_data = wqe->wr.ex.imm_data;
+ ohdr->u.ud.imm_data = wqe->ud_wr.wr.ex.imm_data;
bth0 = IB_OPCODE_UD_SEND_ONLY_WITH_IMMEDIATE << 24;
} else
bth0 = IB_OPCODE_UD_SEND_ONLY << 24;
@@ -360,7 +360,7 @@ int ipath_make_ud_req(struct ipath_qp *qp)
qp->s_hdr.lrh[3] = cpu_to_be16(lid);
} else
qp->s_hdr.lrh[3] = IB_LID_PERMISSIVE;
- if (wqe->wr.send_flags & IB_SEND_SOLICITED)
+ if (wqe->ud_wr.wr.send_flags & IB_SEND_SOLICITED)
bth0 |= 1 << 23;
bth0 |= extra_bytes << 20;
bth0 |= qp->ibqp.qp_type == IB_QPT_SMI ? IPATH_DEFAULT_P_KEY :
@@ -372,14 +372,14 @@ int ipath_make_ud_req(struct ipath_qp *qp)
ohdr->bth[1] = ah_attr->dlid >= IPATH_MULTICAST_LID_BASE &&
ah_attr->dlid != IPATH_PERMISSIVE_LID ?
cpu_to_be32(IPATH_MULTICAST_QPN) :
- cpu_to_be32(wqe->wr.wr.ud.remote_qpn);
+ cpu_to_be32(wqe->ud_wr.remote_qpn);
ohdr->bth[2] = cpu_to_be32(qp->s_next_psn++ & IPATH_PSN_MASK);
/*
* Qkeys with the high order bit set mean use the
* qkey from the QP context instead of the WR (see 10.2.5).
*/
- ohdr->u.ud.deth[0] = cpu_to_be32((int)wqe->wr.wr.ud.remote_qkey < 0 ?
- qp->qkey : wqe->wr.wr.ud.remote_qkey);
+ ohdr->u.ud.deth[0] = cpu_to_be32((int)wqe->ud_wr.remote_qkey < 0 ?
+ qp->qkey : wqe->ud_wr.remote_qkey);
ohdr->u.ud.deth[1] = cpu_to_be32(qp->ibqp.qp_num);
done:
diff --git a/drivers/staging/rdma/ipath/ipath_verbs.c b/drivers/staging/rdma/ipath/ipath_verbs.c
index ed2bbc2..15633ec 100644
--- a/drivers/staging/rdma/ipath/ipath_verbs.c
+++ b/drivers/staging/rdma/ipath/ipath_verbs.c
@@ -374,7 +374,7 @@ static int ipath_post_one_send(struct ipath_qp *qp, struct ib_send_wr *wr)
wr->opcode != IB_WR_SEND_WITH_IMM)
goto bail_inval;
/* Check UD destination address PD */
- if (qp->ibqp.pd != wr->wr.ud.ah->pd)
+ if (qp->ibqp.pd != ud_wr(wr)->ah->pd)
goto bail_inval;
} else if ((unsigned) wr->opcode > IB_WR_ATOMIC_FETCH_AND_ADD)
goto bail_inval;
@@ -395,7 +395,23 @@ static int ipath_post_one_send(struct ipath_qp *qp, struct ib_send_wr *wr)
}
wqe = get_swqe_ptr(qp, qp->s_head);
- wqe->wr = *wr;
+
+ if (qp->ibqp.qp_type != IB_QPT_UC &&
+ qp->ibqp.qp_type != IB_QPT_RC)
+ memcpy(&wqe->ud_wr, ud_wr(wr), sizeof(wqe->ud_wr));
+ else if (wr->opcode == IB_WR_FAST_REG_MR)
+ memcpy(&wqe->fast_reg_wr, fast_reg_wr(wr),
+ sizeof(wqe->fast_reg_wr));
+ else if (wr->opcode == IB_WR_RDMA_WRITE_WITH_IMM ||
+ wr->opcode == IB_WR_RDMA_WRITE ||
+ wr->opcode == IB_WR_RDMA_READ)
+ memcpy(&wqe->rdma_wr, rdma_wr(wr), sizeof(wqe->rdma_wr));
+ else if (wr->opcode == IB_WR_ATOMIC_CMP_AND_SWP ||
+ wr->opcode == IB_WR_ATOMIC_FETCH_AND_ADD)
+ memcpy(&wqe->atomic_wr, atomic_wr(wr), sizeof(wqe->atomic_wr));
+ else
+ memcpy(&wqe->wr, wr, sizeof(wqe->wr));
+
wqe->length = 0;
if (wr->num_sge) {
acc = wr->opcode >= IB_WR_RDMA_READ ?
diff --git a/drivers/staging/rdma/ipath/ipath_verbs.h b/drivers/staging/rdma/ipath/ipath_verbs.h
index ec167e5..ed102a2 100644
--- a/drivers/staging/rdma/ipath/ipath_verbs.h
+++ b/drivers/staging/rdma/ipath/ipath_verbs.h
@@ -277,7 +277,14 @@ struct ipath_mr {
* in qp->s_max_sge.
*/
struct ipath_swqe {
- struct ib_send_wr wr; /* don't use wr.sg_list */
+ union {
+ struct ib_send_wr wr; /* don't use wr.sg_list */
+ struct ib_ud_wr ud_wr;
+ struct ib_fast_reg_wr fast_reg_wr;
+ struct ib_rdma_wr rdma_wr;
+ struct ib_atomic_wr atomic_wr;
+ };
+
u32 psn; /* first packet sequence number */
u32 lpsn; /* last packet sequence number */
u32 ssn; /* send sequence number */