summaryrefslogtreecommitdiff
path: root/drivers/staging/lustre/lnet/klnds
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/staging/lustre/lnet/klnds')
-rw-r--r--drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd.c16
-rw-r--r--drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd.h18
-rw-r--r--drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd_cb.c201
-rw-r--r--drivers/staging/lustre/lnet/klnds/socklnd/socklnd.c5
-rw-r--r--drivers/staging/lustre/lnet/klnds/socklnd/socklnd.h8
-rw-r--r--drivers/staging/lustre/lnet/klnds/socklnd/socklnd_cb.c48
-rw-r--r--drivers/staging/lustre/lnet/klnds/socklnd/socklnd_lib.c207
7 files changed, 184 insertions, 319 deletions
diff --git a/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd.c b/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd.c
index 4f5978b..c7a5d49 100644
--- a/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd.c
+++ b/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd.c
@@ -128,6 +128,7 @@ static int kiblnd_msgtype2size(int type)
static int kiblnd_unpack_rd(struct kib_msg *msg, int flip)
{
struct kib_rdma_desc *rd;
+ int msg_size;
int nob;
int n;
int i;
@@ -146,12 +147,6 @@ static int kiblnd_unpack_rd(struct kib_msg *msg, int flip)
n = rd->rd_nfrags;
- if (n <= 0 || n > IBLND_MAX_RDMA_FRAGS) {
- CERROR("Bad nfrags: %d, should be 0 < n <= %d\n",
- n, IBLND_MAX_RDMA_FRAGS);
- return 1;
- }
-
nob = offsetof(struct kib_msg, ibm_u) +
kiblnd_rd_msg_size(rd, msg->ibm_type, n);
@@ -161,6 +156,13 @@ static int kiblnd_unpack_rd(struct kib_msg *msg, int flip)
return 1;
}
+ msg_size = kiblnd_rd_size(rd);
+ if (msg_size <= 0 || msg_size > LNET_MAX_PAYLOAD) {
+ CERROR("Bad msg_size: %d, should be 0 < n <= %d\n",
+ msg_size, LNET_MAX_PAYLOAD);
+ return 1;
+ }
+
if (!flip)
return 0;
@@ -618,7 +620,7 @@ static int kiblnd_get_completion_vector(struct kib_conn *conn, int cpt)
}
struct kib_conn *kiblnd_create_conn(struct kib_peer *peer, struct rdma_cm_id *cmid,
- int state, int version)
+ int state, int version)
{
/*
* CAVEAT EMPTOR:
diff --git a/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd.h b/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd.h
index 078a0c3..1457697 100644
--- a/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd.h
+++ b/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd.h
@@ -113,8 +113,9 @@ extern struct kib_tunables kiblnd_tunables;
#define IBLND_OOB_CAPABLE(v) ((v) != IBLND_MSG_VERSION_1)
#define IBLND_OOB_MSGS(v) (IBLND_OOB_CAPABLE(v) ? 2 : 0)
-#define IBLND_MSG_SIZE (4 << 10) /* max size of queued messages (inc hdr) */
-#define IBLND_MAX_RDMA_FRAGS LNET_MAX_IOV /* max # of fragments supported */
+#define IBLND_FRAG_SHIFT (PAGE_SHIFT - 12) /* frag size on wire is in 4K units */
+#define IBLND_MSG_SIZE (4 << 10) /* max size of queued messages (inc hdr) */
+#define IBLND_MAX_RDMA_FRAGS (LNET_MAX_PAYLOAD >> 12)/* max # of fragments supported in 4K size */
/************************/
/* derived constants... */
@@ -133,8 +134,8 @@ extern struct kib_tunables kiblnd_tunables;
/* WRs and CQEs (per connection) */
#define IBLND_RECV_WRS(c) IBLND_RX_MSGS(c)
#define IBLND_SEND_WRS(c) \
- ((c->ibc_max_frags + 1) * kiblnd_concurrent_sends(c->ibc_version, \
- c->ibc_peer->ibp_ni))
+ (((c->ibc_max_frags + 1) << IBLND_FRAG_SHIFT) * \
+ kiblnd_concurrent_sends(c->ibc_version, c->ibc_peer->ibp_ni))
#define IBLND_CQ_ENTRIES(c) (IBLND_RECV_WRS(c) + IBLND_SEND_WRS(c))
struct kib_hca_dev;
@@ -582,6 +583,8 @@ struct kib_peer {
unsigned short ibp_connecting;
/* reconnect this peer later */
unsigned short ibp_reconnecting:1;
+ /* counter of how many times we triggered a conn race */
+ unsigned char ibp_races;
/* # consecutive reconnection attempts to this peer */
unsigned int ibp_reconnected;
/* errno on closing this peer */
@@ -607,14 +610,14 @@ kiblnd_cfg_rdma_frags(struct lnet_ni *ni)
tunables = &ni->ni_lnd_tunables->lt_tun_u.lt_o2ib;
mod = tunables->lnd_map_on_demand;
- return mod ? mod : IBLND_MAX_RDMA_FRAGS;
+ return mod ? mod : IBLND_MAX_RDMA_FRAGS >> IBLND_FRAG_SHIFT;
}
static inline int
kiblnd_rdma_frags(int version, struct lnet_ni *ni)
{
return version == IBLND_MSG_VERSION_1 ?
- IBLND_MAX_RDMA_FRAGS :
+ (IBLND_MAX_RDMA_FRAGS >> IBLND_FRAG_SHIFT) :
kiblnd_cfg_rdma_frags(ni);
}
@@ -1034,5 +1037,4 @@ int kiblnd_post_rx(struct kib_rx *rx, int credit);
int kiblnd_send(lnet_ni_t *ni, void *private, lnet_msg_t *lntmsg);
int kiblnd_recv(lnet_ni_t *ni, void *private, lnet_msg_t *lntmsg, int delayed,
- unsigned int niov, struct kvec *iov, lnet_kiov_t *kiov,
- unsigned int offset, unsigned int mlen, unsigned int rlen);
+ struct iov_iter *to, unsigned int rlen);
diff --git a/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd_cb.c b/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd_cb.c
index 596a697..b27de88 100644
--- a/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd_cb.c
+++ b/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd_cb.c
@@ -36,16 +36,19 @@
#include "o2iblnd.h"
+#define MAX_CONN_RACES_BEFORE_ABORT 20
+
static void kiblnd_peer_alive(struct kib_peer *peer);
static void kiblnd_peer_connect_failed(struct kib_peer *peer, int active, int error);
-static void kiblnd_check_sends(struct kib_conn *conn);
static void kiblnd_init_tx_msg(lnet_ni_t *ni, struct kib_tx *tx,
- int type, int body_nob);
+ int type, int body_nob);
static int kiblnd_init_rdma(struct kib_conn *conn, struct kib_tx *tx, int type,
- int resid, struct kib_rdma_desc *dstrd, __u64 dstcookie);
+ int resid, struct kib_rdma_desc *dstrd,
+ __u64 dstcookie);
static void kiblnd_queue_tx_locked(struct kib_tx *tx, struct kib_conn *conn);
static void kiblnd_queue_tx(struct kib_tx *tx, struct kib_conn *conn);
static void kiblnd_unmap_tx(lnet_ni_t *ni, struct kib_tx *tx);
+static void kiblnd_check_sends_locked(struct kib_conn *conn);
static void
kiblnd_tx_done(lnet_ni_t *ni, struct kib_tx *tx)
@@ -211,9 +214,9 @@ kiblnd_post_rx(struct kib_rx *rx, int credit)
conn->ibc_outstanding_credits++;
else
conn->ibc_reserved_credits++;
+ kiblnd_check_sends_locked(conn);
spin_unlock(&conn->ibc_lock);
- kiblnd_check_sends(conn);
out:
kiblnd_conn_decref(conn);
return rc;
@@ -344,8 +347,8 @@ kiblnd_handle_rx(struct kib_rx *rx)
!IBLND_OOB_CAPABLE(conn->ibc_version)) /* v1 only */
conn->ibc_outstanding_credits++;
+ kiblnd_check_sends_locked(conn);
spin_unlock(&conn->ibc_lock);
- kiblnd_check_sends(conn);
}
switch (msg->ibm_type) {
@@ -648,7 +651,7 @@ static int kiblnd_map_tx(lnet_ni_t *ni, struct kib_tx *tx, struct kib_rdma_desc
static int
kiblnd_setup_rd_iov(lnet_ni_t *ni, struct kib_tx *tx, struct kib_rdma_desc *rd,
- unsigned int niov, struct kvec *iov, int offset, int nob)
+ unsigned int niov, const struct kvec *iov, int offset, int nob)
{
struct kib_net *net = ni->ni_data;
struct page *page;
@@ -705,7 +708,7 @@ kiblnd_setup_rd_iov(lnet_ni_t *ni, struct kib_tx *tx, struct kib_rdma_desc *rd,
static int
kiblnd_setup_rd_kiov(lnet_ni_t *ni, struct kib_tx *tx, struct kib_rdma_desc *rd,
- int nkiov, lnet_kiov_t *kiov, int offset, int nob)
+ int nkiov, const lnet_kiov_t *kiov, int offset, int nob)
{
struct kib_net *net = ni->ni_data;
struct scatterlist *sg;
@@ -717,8 +720,8 @@ kiblnd_setup_rd_kiov(lnet_ni_t *ni, struct kib_tx *tx, struct kib_rdma_desc *rd,
LASSERT(nkiov > 0);
LASSERT(net);
- while (offset >= kiov->kiov_len) {
- offset -= kiov->kiov_len;
+ while (offset >= kiov->bv_len) {
+ offset -= kiov->bv_len;
nkiov--;
kiov++;
LASSERT(nkiov > 0);
@@ -728,10 +731,10 @@ kiblnd_setup_rd_kiov(lnet_ni_t *ni, struct kib_tx *tx, struct kib_rdma_desc *rd,
do {
LASSERT(nkiov > 0);
- fragnob = min((int)(kiov->kiov_len - offset), nob);
+ fragnob = min((int)(kiov->bv_len - offset), nob);
- sg_set_page(sg, kiov->kiov_page, fragnob,
- kiov->kiov_offset + offset);
+ sg_set_page(sg, kiov->bv_page, fragnob,
+ kiov->bv_offset + offset);
sg = sg_next(sg);
if (!sg) {
CERROR("lacking enough sg entries to map tx\n");
@@ -761,7 +764,6 @@ kiblnd_post_tx_locked(struct kib_conn *conn, struct kib_tx *tx, int credit)
LASSERT(tx->tx_queued);
/* We rely on this for QP sizing */
LASSERT(tx->tx_nwrq > 0);
- LASSERT(tx->tx_nwrq <= 1 + conn->ibc_max_frags);
LASSERT(!credit || credit == 1);
LASSERT(conn->ibc_outstanding_credits >= 0);
@@ -800,7 +802,7 @@ kiblnd_post_tx_locked(struct kib_conn *conn, struct kib_tx *tx, int credit)
conn->ibc_noops_posted == IBLND_OOB_MSGS(ver)))) {
/*
* OK to drop when posted enough NOOPs, since
- * kiblnd_check_sends will queue NOOP again when
+ * kiblnd_check_sends_locked will queue NOOP again when
* posted NOOPs complete
*/
spin_unlock(&conn->ibc_lock);
@@ -905,7 +907,7 @@ kiblnd_post_tx_locked(struct kib_conn *conn, struct kib_tx *tx, int credit)
}
static void
-kiblnd_check_sends(struct kib_conn *conn)
+kiblnd_check_sends_locked(struct kib_conn *conn)
{
int ver = conn->ibc_version;
lnet_ni_t *ni = conn->ibc_peer->ibp_ni;
@@ -918,8 +920,6 @@ kiblnd_check_sends(struct kib_conn *conn)
return;
}
- spin_lock(&conn->ibc_lock);
-
LASSERT(conn->ibc_nsends_posted <= kiblnd_concurrent_sends(ver, ni));
LASSERT(!IBLND_OOB_CAPABLE(ver) ||
conn->ibc_noops_posted <= IBLND_OOB_MSGS(ver));
@@ -969,8 +969,6 @@ kiblnd_check_sends(struct kib_conn *conn)
if (kiblnd_post_tx_locked(conn, tx, credit))
break;
}
-
- spin_unlock(&conn->ibc_lock);
}
static void
@@ -1016,16 +1014,11 @@ kiblnd_tx_complete(struct kib_tx *tx, int status)
if (idle)
list_del(&tx->tx_list);
- kiblnd_conn_addref(conn); /* 1 ref for me.... */
-
+ kiblnd_check_sends_locked(conn);
spin_unlock(&conn->ibc_lock);
if (idle)
kiblnd_tx_done(conn->ibc_peer->ibp_ni, tx);
-
- kiblnd_check_sends(conn);
-
- kiblnd_conn_decref(conn); /* ...until here */
}
static void
@@ -1078,6 +1071,15 @@ kiblnd_init_rdma(struct kib_conn *conn, struct kib_tx *tx, int type,
LASSERT(type == IBLND_MSG_GET_DONE ||
type == IBLND_MSG_PUT_DONE);
+ if (kiblnd_rd_size(srcrd) > conn->ibc_max_frags << PAGE_SHIFT) {
+ CERROR("RDMA is too large for peer %s (%d), src size: %d dst size: %d\n",
+ libcfs_nid2str(conn->ibc_peer->ibp_nid),
+ conn->ibc_max_frags << PAGE_SHIFT,
+ kiblnd_rd_size(srcrd), kiblnd_rd_size(dstrd));
+ rc = -EMSGSIZE;
+ goto too_big;
+ }
+
while (resid > 0) {
if (srcidx >= srcrd->rd_nfrags) {
CERROR("Src buffer exhausted: %d frags\n", srcidx);
@@ -1091,10 +1093,10 @@ kiblnd_init_rdma(struct kib_conn *conn, struct kib_tx *tx, int type,
break;
}
- if (tx->tx_nwrq >= conn->ibc_max_frags) {
+ if (tx->tx_nwrq >= IBLND_MAX_RDMA_FRAGS) {
CERROR("RDMA has too many fragments for peer %s (%d), src idx/frags: %d/%d dst idx/frags: %d/%d\n",
libcfs_nid2str(conn->ibc_peer->ibp_nid),
- conn->ibc_max_frags,
+ IBLND_MAX_RDMA_FRAGS,
srcidx, srcrd->rd_nfrags,
dstidx, dstrd->rd_nfrags);
rc = -EMSGSIZE;
@@ -1132,7 +1134,7 @@ kiblnd_init_rdma(struct kib_conn *conn, struct kib_tx *tx, int type,
wrq++;
sge++;
}
-
+too_big:
if (rc < 0) /* no RDMA if completing with failure */
tx->tx_nwrq = 0;
@@ -1204,9 +1206,8 @@ kiblnd_queue_tx(struct kib_tx *tx, struct kib_conn *conn)
{
spin_lock(&conn->ibc_lock);
kiblnd_queue_tx_locked(tx, conn);
+ kiblnd_check_sends_locked(conn);
spin_unlock(&conn->ibc_lock);
-
- kiblnd_check_sends(conn);
}
static int kiblnd_resolve_addr(struct rdma_cm_id *cmid,
@@ -1499,6 +1500,7 @@ kiblnd_send(lnet_ni_t *ni, void *private, lnet_msg_t *lntmsg)
lnet_kiov_t *payload_kiov = lntmsg->msg_kiov;
unsigned int payload_offset = lntmsg->msg_offset;
unsigned int payload_nob = lntmsg->msg_len;
+ struct iov_iter from;
struct kib_msg *ibmsg;
struct kib_rdma_desc *rd;
struct kib_tx *tx;
@@ -1518,6 +1520,17 @@ kiblnd_send(lnet_ni_t *ni, void *private, lnet_msg_t *lntmsg)
/* payload is either all vaddrs or all pages */
LASSERT(!(payload_kiov && payload_iov));
+ if (payload_kiov)
+ iov_iter_bvec(&from, ITER_BVEC | WRITE,
+ payload_kiov, payload_niov,
+ payload_nob + payload_offset);
+ else
+ iov_iter_kvec(&from, ITER_KVEC | WRITE,
+ payload_iov, payload_niov,
+ payload_nob + payload_offset);
+
+ iov_iter_advance(&from, payload_offset);
+
switch (type) {
default:
LBUG();
@@ -1637,17 +1650,8 @@ kiblnd_send(lnet_ni_t *ni, void *private, lnet_msg_t *lntmsg)
ibmsg = tx->tx_msg;
ibmsg->ibm_u.immediate.ibim_hdr = *hdr;
- if (payload_kiov)
- lnet_copy_kiov2flat(IBLND_MSG_SIZE, ibmsg,
- offsetof(struct kib_msg, ibm_u.immediate.ibim_payload),
- payload_niov, payload_kiov,
- payload_offset, payload_nob);
- else
- lnet_copy_iov2flat(IBLND_MSG_SIZE, ibmsg,
- offsetof(struct kib_msg, ibm_u.immediate.ibim_payload),
- payload_niov, payload_iov,
- payload_offset, payload_nob);
-
+ copy_from_iter(&ibmsg->ibm_u.immediate.ibim_payload, IBLND_MSG_SIZE,
+ &from);
nob = offsetof(struct kib_immediate_msg, ibim_payload[payload_nob]);
kiblnd_init_tx_msg(ni, tx, IBLND_MSG_IMMEDIATE, nob);
@@ -1719,8 +1723,7 @@ kiblnd_reply(lnet_ni_t *ni, struct kib_rx *rx, lnet_msg_t *lntmsg)
int
kiblnd_recv(lnet_ni_t *ni, void *private, lnet_msg_t *lntmsg, int delayed,
- unsigned int niov, struct kvec *iov, lnet_kiov_t *kiov,
- unsigned int offset, unsigned int mlen, unsigned int rlen)
+ struct iov_iter *to, unsigned int rlen)
{
struct kib_rx *rx = private;
struct kib_msg *rxmsg = rx->rx_msg;
@@ -1730,10 +1733,9 @@ kiblnd_recv(lnet_ni_t *ni, void *private, lnet_msg_t *lntmsg, int delayed,
int post_credit = IBLND_POSTRX_PEER_CREDIT;
int rc = 0;
- LASSERT(mlen <= rlen);
+ LASSERT(iov_iter_count(to) <= rlen);
LASSERT(!in_interrupt());
/* Either all pages or all vaddrs */
- LASSERT(!(kiov && iov));
switch (rxmsg->ibm_type) {
default:
@@ -1749,16 +1751,8 @@ kiblnd_recv(lnet_ni_t *ni, void *private, lnet_msg_t *lntmsg, int delayed,
break;
}
- if (kiov)
- lnet_copy_flat2kiov(niov, kiov, offset,
- IBLND_MSG_SIZE, rxmsg,
- offsetof(struct kib_msg, ibm_u.immediate.ibim_payload),
- mlen);
- else
- lnet_copy_flat2iov(niov, iov, offset,
- IBLND_MSG_SIZE, rxmsg,
- offsetof(struct kib_msg, ibm_u.immediate.ibim_payload),
- mlen);
+ copy_to_iter(&rxmsg->ibm_u.immediate.ibim_payload,
+ IBLND_MSG_SIZE, to);
lnet_finalize(ni, lntmsg, 0);
break;
@@ -1766,7 +1760,7 @@ kiblnd_recv(lnet_ni_t *ni, void *private, lnet_msg_t *lntmsg, int delayed,
struct kib_msg *txmsg;
struct kib_rdma_desc *rd;
- if (!mlen) {
+ if (!iov_iter_count(to)) {
lnet_finalize(ni, lntmsg, 0);
kiblnd_send_completion(rx->rx_conn, IBLND_MSG_PUT_NAK, 0,
rxmsg->ibm_u.putreq.ibprm_cookie);
@@ -1784,12 +1778,16 @@ kiblnd_recv(lnet_ni_t *ni, void *private, lnet_msg_t *lntmsg, int delayed,
txmsg = tx->tx_msg;
rd = &txmsg->ibm_u.putack.ibpam_rd;
- if (!kiov)
+ if (!(to->type & ITER_BVEC))
rc = kiblnd_setup_rd_iov(ni, tx, rd,
- niov, iov, offset, mlen);
+ to->nr_segs, to->kvec,
+ to->iov_offset,
+ iov_iter_count(to));
else
rc = kiblnd_setup_rd_kiov(ni, tx, rd,
- niov, kiov, offset, mlen);
+ to->nr_segs, to->bvec,
+ to->iov_offset,
+ iov_iter_count(to));
if (rc) {
CERROR("Can't setup PUT sink for %s: %d\n",
libcfs_nid2str(conn->ibc_peer->ibp_nid), rc);
@@ -2183,14 +2181,11 @@ kiblnd_connreq_done(struct kib_conn *conn, int status)
return;
}
- /**
- * refcount taken by cmid is not reliable after I released the glock
- * because this connection is visible to other threads now, another
- * thread can find and close this connection right after I released
- * the glock, if kiblnd_cm_callback for RDMA_CM_EVENT_DISCONNECTED is
- * called, it can release the connection refcount taken by cmid.
- * It means the connection could be destroyed before I finish my
- * operations on it.
+ /*
+ * +1 ref for myself, this connection is visible to other threads
+ * now, refcount of peer:ibp_conns can be released by connection
+ * close from either a different thread, or the calling of
+ * kiblnd_check_sends_locked() below. See bz21911 for details.
*/
kiblnd_conn_addref(conn);
write_unlock_irqrestore(&kiblnd_data.kib_global_lock, flags);
@@ -2202,10 +2197,9 @@ kiblnd_connreq_done(struct kib_conn *conn, int status)
kiblnd_queue_tx_locked(tx, conn);
}
+ kiblnd_check_sends_locked(conn);
spin_unlock(&conn->ibc_lock);
- kiblnd_check_sends(conn);
-
/* schedule blocked rxs */
kiblnd_handle_early_rxs(conn);
@@ -2240,6 +2234,7 @@ kiblnd_passive_connect(struct rdma_cm_id *cmid, void *priv, int priv_nob)
struct kib_rej rej;
int version = IBLND_MSG_VERSION;
unsigned long flags;
+ int max_frags;
int rc;
struct sockaddr_in *peer_addr;
@@ -2346,22 +2341,20 @@ kiblnd_passive_connect(struct rdma_cm_id *cmid, void *priv, int priv_nob)
goto failed;
}
- if (reqmsg->ibm_u.connparams.ibcp_max_frags >
- kiblnd_rdma_frags(version, ni)) {
- CWARN("Can't accept conn from %s (version %x): max_frags %d too large (%d wanted)\n",
- libcfs_nid2str(nid), version,
- reqmsg->ibm_u.connparams.ibcp_max_frags,
+ max_frags = reqmsg->ibm_u.connparams.ibcp_max_frags >> IBLND_FRAG_SHIFT;
+ if (max_frags > kiblnd_rdma_frags(version, ni)) {
+ CWARN("Can't accept conn from %s (version %x): max message size %d is too large (%d wanted)\n",
+ libcfs_nid2str(nid), version, max_frags,
kiblnd_rdma_frags(version, ni));
if (version >= IBLND_MSG_VERSION)
rej.ibr_why = IBLND_REJECT_RDMA_FRAGS;
goto failed;
- } else if (reqmsg->ibm_u.connparams.ibcp_max_frags <
- kiblnd_rdma_frags(version, ni) && !net->ibn_fmr_ps) {
- CWARN("Can't accept conn from %s (version %x): max_frags %d incompatible without FMR pool (%d wanted)\n",
- libcfs_nid2str(nid), version,
- reqmsg->ibm_u.connparams.ibcp_max_frags,
+ } else if (max_frags < kiblnd_rdma_frags(version, ni) &&
+ !net->ibn_fmr_ps) {
+ CWARN("Can't accept conn from %s (version %x): max message size %d incompatible without FMR pool (%d wanted)\n",
+ libcfs_nid2str(nid), version, max_frags,
kiblnd_rdma_frags(version, ni));
if (version == IBLND_MSG_VERSION)
@@ -2387,7 +2380,7 @@ kiblnd_passive_connect(struct rdma_cm_id *cmid, void *priv, int priv_nob)
}
/* We have validated the peer's parameters so use those */
- peer->ibp_max_frags = reqmsg->ibm_u.connparams.ibcp_max_frags;
+ peer->ibp_max_frags = max_frags;
peer->ibp_queue_depth = reqmsg->ibm_u.connparams.ibcp_queue_depth;
write_lock_irqsave(g_lock, flags);
@@ -2419,23 +2412,37 @@ kiblnd_passive_connect(struct rdma_cm_id *cmid, void *priv, int priv_nob)
goto failed;
}
- /* tie-break connection race in favour of the higher NID */
+ /*
+ * Tie-break connection race in favour of the higher NID.
+ * If we keep running into a race condition multiple times,
+ * we have to assume that the connection attempt with the
+ * higher NID is stuck in a connecting state and will never
+ * recover. As such, we pass through this if-block and let
+ * the lower NID connection win so we can move forward.
+ */
if (peer2->ibp_connecting &&
- nid < ni->ni_nid) {
+ nid < ni->ni_nid && peer2->ibp_races <
+ MAX_CONN_RACES_BEFORE_ABORT) {
+ peer2->ibp_races++;
write_unlock_irqrestore(g_lock, flags);
- CWARN("Conn race %s\n", libcfs_nid2str(peer2->ibp_nid));
+ CDEBUG(D_NET, "Conn race %s\n",
+ libcfs_nid2str(peer2->ibp_nid));
kiblnd_peer_decref(peer);
rej.ibr_why = IBLND_REJECT_CONN_RACE;
goto failed;
}
-
+ if (peer2->ibp_races >= MAX_CONN_RACES_BEFORE_ABORT)
+ CNETERR("Conn race %s: unresolved after %d attempts, letting lower NID win\n",
+ libcfs_nid2str(peer2->ibp_nid),
+ MAX_CONN_RACES_BEFORE_ABORT);
/**
* passive connection is allowed even this peer is waiting for
* reconnection.
*/
peer2->ibp_reconnecting = 0;
+ peer2->ibp_races = 0;
peer2->ibp_accepting++;
kiblnd_peer_addref(peer2);
@@ -2494,7 +2501,7 @@ kiblnd_passive_connect(struct rdma_cm_id *cmid, void *priv, int priv_nob)
kiblnd_init_msg(ackmsg, IBLND_MSG_CONNACK,
sizeof(ackmsg->ibm_u.connparams));
ackmsg->ibm_u.connparams.ibcp_queue_depth = conn->ibc_queue_depth;
- ackmsg->ibm_u.connparams.ibcp_max_frags = conn->ibc_max_frags;
+ ackmsg->ibm_u.connparams.ibcp_max_frags = conn->ibc_max_frags << IBLND_FRAG_SHIFT;
ackmsg->ibm_u.connparams.ibcp_max_msg_size = IBLND_MSG_SIZE;
kiblnd_pack_msg(ni, ackmsg, version, 0, nid, reqmsg->ibm_srcstamp);
@@ -2526,9 +2533,9 @@ kiblnd_passive_connect(struct rdma_cm_id *cmid, void *priv, int priv_nob)
failed:
if (ni) {
- lnet_ni_decref(ni);
rej.ibr_cp.ibcp_queue_depth = kiblnd_msg_queue_size(version, ni);
rej.ibr_cp.ibcp_max_frags = kiblnd_rdma_frags(version, ni);
+ lnet_ni_decref(ni);
}
rej.ibr_version = version;
@@ -2556,7 +2563,7 @@ kiblnd_check_reconnect(struct kib_conn *conn, int version,
if (cp) {
msg_size = cp->ibcp_max_msg_size;
- frag_num = cp->ibcp_max_frags;
+ frag_num = cp->ibcp_max_frags << IBLND_FRAG_SHIFT;
queue_dep = cp->ibcp_queue_depth;
}
@@ -2821,11 +2828,11 @@ kiblnd_check_connreply(struct kib_conn *conn, void *priv, int priv_nob)
goto failed;
}
- if (msg->ibm_u.connparams.ibcp_max_frags >
+ if ((msg->ibm_u.connparams.ibcp_max_frags >> IBLND_FRAG_SHIFT) >
conn->ibc_max_frags) {
CERROR("%s has incompatible max_frags %d (<=%d wanted)\n",
libcfs_nid2str(peer->ibp_nid),
- msg->ibm_u.connparams.ibcp_max_frags,
+ msg->ibm_u.connparams.ibcp_max_frags >> IBLND_FRAG_SHIFT,
conn->ibc_max_frags);
rc = -EPROTO;
goto failed;
@@ -2859,7 +2866,7 @@ kiblnd_check_connreply(struct kib_conn *conn, void *priv, int priv_nob)
conn->ibc_credits = msg->ibm_u.connparams.ibcp_queue_depth;
conn->ibc_reserved_credits = msg->ibm_u.connparams.ibcp_queue_depth;
conn->ibc_queue_depth = msg->ibm_u.connparams.ibcp_queue_depth;
- conn->ibc_max_frags = msg->ibm_u.connparams.ibcp_max_frags;
+ conn->ibc_max_frags = msg->ibm_u.connparams.ibcp_max_frags >> IBLND_FRAG_SHIFT;
LASSERT(conn->ibc_credits + conn->ibc_reserved_credits +
IBLND_OOB_MSGS(ver) <= IBLND_RX_MSGS(conn));
@@ -2916,7 +2923,7 @@ kiblnd_active_connect(struct rdma_cm_id *cmid)
memset(msg, 0, sizeof(*msg));
kiblnd_init_msg(msg, IBLND_MSG_CONNREQ, sizeof(msg->ibm_u.connparams));
msg->ibm_u.connparams.ibcp_queue_depth = conn->ibc_queue_depth;
- msg->ibm_u.connparams.ibcp_max_frags = conn->ibc_max_frags;
+ msg->ibm_u.connparams.ibcp_max_frags = conn->ibc_max_frags << IBLND_FRAG_SHIFT;
msg->ibm_u.connparams.ibcp_max_msg_size = IBLND_MSG_SIZE;
kiblnd_pack_msg(peer->ibp_ni, msg, version,
@@ -3233,7 +3240,11 @@ kiblnd_check_conns(int idx)
*/
list_for_each_entry_safe(conn, temp, &checksends, ibc_connd_list) {
list_del(&conn->ibc_connd_list);
- kiblnd_check_sends(conn);
+
+ spin_lock(&conn->ibc_lock);
+ kiblnd_check_sends_locked(conn);
+ spin_unlock(&conn->ibc_lock);
+
kiblnd_conn_decref(conn);
}
}
@@ -3419,6 +3430,12 @@ kiblnd_qp_event(struct ib_event *event, void *arg)
case IB_EVENT_COMM_EST:
CDEBUG(D_NET, "%s established\n",
libcfs_nid2str(conn->ibc_peer->ibp_nid));
+ /*
+ * We received a packet but connection isn't established
+ * probably handshake packet was lost, so free to
+ * force make connection established
+ */
+ rdma_notify(conn->ibc_cmid, IB_EVENT_COMM_EST);
return;
default:
diff --git a/drivers/staging/lustre/lnet/klnds/socklnd/socklnd.c b/drivers/staging/lustre/lnet/klnds/socklnd/socklnd.c
index 07ec540..cbc9a9c 100644
--- a/drivers/staging/lustre/lnet/klnds/socklnd/socklnd.c
+++ b/drivers/staging/lustre/lnet/klnds/socklnd/socklnd.c
@@ -1468,11 +1468,6 @@ ksocknal_close_conn_locked(struct ksock_conn *conn, int error)
conn->ksnc_route = NULL;
-#if 0 /* irrelevant with only eager routes */
- /* make route least favourite */
- list_del(&route->ksnr_list);
- list_add_tail(&route->ksnr_list, &peer->ksnp_routes);
-#endif
ksocknal_route_decref(route); /* drop conn's ref on route */
}
diff --git a/drivers/staging/lustre/lnet/klnds/socklnd/socklnd.h b/drivers/staging/lustre/lnet/klnds/socklnd/socklnd.h
index a56632b..e6ca0cf 100644
--- a/drivers/staging/lustre/lnet/klnds/socklnd/socklnd.h
+++ b/drivers/staging/lustre/lnet/klnds/socklnd/socklnd.h
@@ -86,8 +86,6 @@ struct ksock_sched { /* per scheduler state */
int kss_nconns; /* # connections assigned to
* this scheduler */
struct ksock_sched_info *kss_info; /* owner of it */
- struct page *kss_rx_scratch_pgs[LNET_MAX_IOV];
- struct kvec kss_scratch_iov[LNET_MAX_IOV];
};
struct ksock_sched_info {
@@ -616,9 +614,7 @@ void ksocknal_shutdown(lnet_ni_t *ni);
int ksocknal_ctl(lnet_ni_t *ni, unsigned int cmd, void *arg);
int ksocknal_send(lnet_ni_t *ni, void *private, lnet_msg_t *lntmsg);
int ksocknal_recv(lnet_ni_t *ni, void *private, lnet_msg_t *lntmsg,
- int delayed, unsigned int niov,
- struct kvec *iov, lnet_kiov_t *kiov,
- unsigned int offset, unsigned int mlen, unsigned int rlen);
+ int delayed, struct iov_iter *to, unsigned int rlen);
int ksocknal_accept(lnet_ni_t *ni, struct socket *sock);
int ksocknal_add_peer(lnet_ni_t *ni, lnet_process_id_t id, __u32 ip, int port);
@@ -635,7 +631,7 @@ int ksocknal_close_peer_conns_locked(struct ksock_peer *peer,
int ksocknal_close_conn_and_siblings(struct ksock_conn *conn, int why);
int ksocknal_close_matching_conns(lnet_process_id_t id, __u32 ipaddr);
struct ksock_conn *ksocknal_find_conn_locked(struct ksock_peer *peer,
- struct ksock_tx *tx, int nonblk);
+ struct ksock_tx *tx, int nonblk);
int ksocknal_launch_packet(lnet_ni_t *ni, struct ksock_tx *tx,
lnet_process_id_t id);
diff --git a/drivers/staging/lustre/lnet/klnds/socklnd/socklnd_cb.c b/drivers/staging/lustre/lnet/klnds/socklnd/socklnd_cb.c
index 303576d..c1c6f60 100644
--- a/drivers/staging/lustre/lnet/klnds/socklnd/socklnd_cb.c
+++ b/drivers/staging/lustre/lnet/klnds/socklnd/socklnd_cb.c
@@ -35,8 +35,8 @@ ksocknal_alloc_tx(int type, int size)
spin_lock(&ksocknal_data.ksnd_tx_lock);
if (!list_empty(&ksocknal_data.ksnd_idle_noop_txs)) {
- tx = list_entry(ksocknal_data.ksnd_idle_noop_txs. \
- next, struct ksock_tx, tx_list);
+ tx = list_entry(ksocknal_data.ksnd_idle_noop_txs.next,
+ struct ksock_tx, tx_list);
LASSERT(tx->tx_desc_size == size);
list_del(&tx->tx_list);
}
@@ -164,13 +164,13 @@ ksocknal_send_kiov(struct ksock_conn *conn, struct ksock_tx *tx)
do {
LASSERT(tx->tx_nkiov > 0);
- if (nob < (int)kiov->kiov_len) {
- kiov->kiov_offset += nob;
- kiov->kiov_len -= nob;
+ if (nob < (int)kiov->bv_len) {
+ kiov->bv_offset += nob;
+ kiov->bv_len -= nob;
return rc;
}
- nob -= (int)kiov->kiov_len;
+ nob -= (int)kiov->bv_len;
tx->tx_kiov = ++kiov;
tx->tx_nkiov--;
} while (nob);
@@ -326,13 +326,13 @@ ksocknal_recv_kiov(struct ksock_conn *conn)
do {
LASSERT(conn->ksnc_rx_nkiov > 0);
- if (nob < (int)kiov->kiov_len) {
- kiov->kiov_offset += nob;
- kiov->kiov_len -= nob;
+ if (nob < (int)kiov->bv_len) {
+ kiov->bv_offset += nob;
+ kiov->bv_len -= nob;
return -EAGAIN;
}
- nob -= kiov->kiov_len;
+ nob -= kiov->bv_len;
conn->ksnc_rx_kiov = ++kiov;
conn->ksnc_rx_nkiov--;
} while (nob);
@@ -1325,39 +1325,36 @@ ksocknal_process_receive(struct ksock_conn *conn)
int
ksocknal_recv(lnet_ni_t *ni, void *private, lnet_msg_t *msg, int delayed,
- unsigned int niov, struct kvec *iov, lnet_kiov_t *kiov,
- unsigned int offset, unsigned int mlen, unsigned int rlen)
+ struct iov_iter *to, unsigned int rlen)
{
struct ksock_conn *conn = private;
struct ksock_sched *sched = conn->ksnc_scheduler;
- LASSERT(mlen <= rlen);
- LASSERT(niov <= LNET_MAX_IOV);
+ LASSERT(iov_iter_count(to) <= rlen);
+ LASSERT(to->nr_segs <= LNET_MAX_IOV);
conn->ksnc_cookie = msg;
- conn->ksnc_rx_nob_wanted = mlen;
+ conn->ksnc_rx_nob_wanted = iov_iter_count(to);
conn->ksnc_rx_nob_left = rlen;
- if (!mlen || iov) {
+ if (to->type & ITER_KVEC) {
conn->ksnc_rx_nkiov = 0;
conn->ksnc_rx_kiov = NULL;
conn->ksnc_rx_iov = conn->ksnc_rx_iov_space.iov;
conn->ksnc_rx_niov =
lnet_extract_iov(LNET_MAX_IOV, conn->ksnc_rx_iov,
- niov, iov, offset, mlen);
+ to->nr_segs, to->kvec,
+ to->iov_offset, iov_iter_count(to));
} else {
conn->ksnc_rx_niov = 0;
conn->ksnc_rx_iov = NULL;
conn->ksnc_rx_kiov = conn->ksnc_rx_iov_space.kiov;
conn->ksnc_rx_nkiov =
lnet_extract_kiov(LNET_MAX_IOV, conn->ksnc_rx_kiov,
- niov, kiov, offset, mlen);
+ to->nr_segs, to->bvec,
+ to->iov_offset, iov_iter_count(to));
}
- LASSERT(mlen ==
- lnet_iov_nob(conn->ksnc_rx_niov, conn->ksnc_rx_iov) +
- lnet_kiov_nob(conn->ksnc_rx_nkiov, conn->ksnc_rx_kiov));
-
LASSERT(conn->ksnc_rx_scheduled);
spin_lock_bh(&sched->kss_lock);
@@ -2008,13 +2005,6 @@ ksocknal_connect(struct ksock_route *route)
list_splice_init(&peer->ksnp_tx_queue, &zombies);
}
-#if 0 /* irrelevant with only eager routes */
- if (!route->ksnr_deleted) {
- /* make this route least-favourite for re-selection */
- list_del(&route->ksnr_list);
- list_add_tail(&route->ksnr_list, &peer->ksnp_routes);
- }
-#endif
write_unlock_bh(&ksocknal_data.ksnd_global_lock);
ksocknal_peer_failed(peer);
diff --git a/drivers/staging/lustre/lnet/klnds/socklnd/socklnd_lib.c b/drivers/staging/lustre/lnet/klnds/socklnd/socklnd_lib.c
index 6a17757..6c95e98 100644
--- a/drivers/staging/lustre/lnet/klnds/socklnd/socklnd_lib.c
+++ b/drivers/staging/lustre/lnet/klnds/socklnd/socklnd_lib.c
@@ -73,9 +73,9 @@ ksocknal_lib_zc_capable(struct ksock_conn *conn)
int
ksocknal_lib_send_iov(struct ksock_conn *conn, struct ksock_tx *tx)
{
+ struct msghdr msg = {.msg_flags = MSG_DONTWAIT};
struct socket *sock = conn->ksnc_sock;
- int nob;
- int rc;
+ int nob, i;
if (*ksocknal_tunables.ksnd_enable_csum && /* checksum enabled */
conn->ksnc_proto == &ksocknal_protocol_v2x && /* V2.x connection */
@@ -83,34 +83,16 @@ ksocknal_lib_send_iov(struct ksock_conn *conn, struct ksock_tx *tx)
!tx->tx_msg.ksm_csum) /* not checksummed */
ksocknal_lib_csum_tx(tx);
- /*
- * NB we can't trust socket ops to either consume our iovs
- * or leave them alone.
- */
- {
-#if SOCKNAL_SINGLE_FRAG_TX
- struct kvec scratch;
- struct kvec *scratchiov = &scratch;
- unsigned int niov = 1;
-#else
- struct kvec *scratchiov = conn->ksnc_scheduler->kss_scratch_iov;
- unsigned int niov = tx->tx_niov;
-#endif
- struct msghdr msg = {.msg_flags = MSG_DONTWAIT};
- int i;
+ for (nob = i = 0; i < tx->tx_niov; i++)
+ nob += tx->tx_iov[i].iov_len;
- for (nob = i = 0; i < niov; i++) {
- scratchiov[i] = tx->tx_iov[i];
- nob += scratchiov[i].iov_len;
- }
+ if (!list_empty(&conn->ksnc_tx_queue) ||
+ nob < tx->tx_resid)
+ msg.msg_flags |= MSG_MORE;
- if (!list_empty(&conn->ksnc_tx_queue) ||
- nob < tx->tx_resid)
- msg.msg_flags |= MSG_MORE;
-
- rc = kernel_sendmsg(sock, &msg, scratchiov, niov, nob);
- }
- return rc;
+ iov_iter_kvec(&msg.msg_iter, WRITE | ITER_KVEC,
+ tx->tx_iov, tx->tx_niov, nob);
+ return sock_sendmsg(sock, &msg);
}
int
@@ -124,20 +106,16 @@ ksocknal_lib_send_kiov(struct ksock_conn *conn, struct ksock_tx *tx)
/* Not NOOP message */
LASSERT(tx->tx_lnetmsg);
- /*
- * NB we can't trust socket ops to either consume our iovs
- * or leave them alone.
- */
if (tx->tx_msg.ksm_zc_cookies[0]) {
/* Zero copy is enabled */
struct sock *sk = sock->sk;
- struct page *page = kiov->kiov_page;
- int offset = kiov->kiov_offset;
- int fragsize = kiov->kiov_len;
+ struct page *page = kiov->bv_page;
+ int offset = kiov->bv_offset;
+ int fragsize = kiov->bv_len;
int msgflg = MSG_DONTWAIT;
CDEBUG(D_NET, "page %p + offset %x for %d\n",
- page, offset, kiov->kiov_len);
+ page, offset, kiov->bv_len);
if (!list_empty(&conn->ksnc_tx_queue) ||
fragsize < tx->tx_resid)
@@ -150,34 +128,19 @@ ksocknal_lib_send_kiov(struct ksock_conn *conn, struct ksock_tx *tx)
rc = tcp_sendpage(sk, page, offset, fragsize, msgflg);
}
} else {
-#if SOCKNAL_SINGLE_FRAG_TX || !SOCKNAL_RISK_KMAP_DEADLOCK
- struct kvec scratch;
- struct kvec *scratchiov = &scratch;
- unsigned int niov = 1;
-#else
-#ifdef CONFIG_HIGHMEM
-#warning "XXX risk of kmap deadlock on multiple frags..."
-#endif
- struct kvec *scratchiov = conn->ksnc_scheduler->kss_scratch_iov;
- unsigned int niov = tx->tx_nkiov;
-#endif
struct msghdr msg = {.msg_flags = MSG_DONTWAIT};
int i;
- for (nob = i = 0; i < niov; i++) {
- scratchiov[i].iov_base = kmap(kiov[i].kiov_page) +
- kiov[i].kiov_offset;
- nob += scratchiov[i].iov_len = kiov[i].kiov_len;
- }
+ for (nob = i = 0; i < tx->tx_nkiov; i++)
+ nob += kiov[i].bv_len;
if (!list_empty(&conn->ksnc_tx_queue) ||
nob < tx->tx_resid)
msg.msg_flags |= MSG_MORE;
- rc = kernel_sendmsg(sock, &msg, (struct kvec *)scratchiov, niov, nob);
-
- for (i = 0; i < niov; i++)
- kunmap(kiov[i].kiov_page);
+ iov_iter_bvec(&msg.msg_iter, WRITE | ITER_BVEC,
+ kiov, tx->tx_nkiov, nob);
+ rc = sock_sendmsg(sock, &msg);
}
return rc;
}
@@ -201,14 +164,7 @@ ksocknal_lib_eager_ack(struct ksock_conn *conn)
int
ksocknal_lib_recv_iov(struct ksock_conn *conn)
{
-#if SOCKNAL_SINGLE_FRAG_RX
- struct kvec scratch;
- struct kvec *scratchiov = &scratch;
- unsigned int niov = 1;
-#else
- struct kvec *scratchiov = conn->ksnc_scheduler->kss_scratch_iov;
unsigned int niov = conn->ksnc_rx_niov;
-#endif
struct kvec *iov = conn->ksnc_rx_iov;
struct msghdr msg = {
.msg_flags = 0
@@ -220,20 +176,15 @@ ksocknal_lib_recv_iov(struct ksock_conn *conn)
int sum;
__u32 saved_csum;
- /*
- * NB we can't trust socket ops to either consume our iovs
- * or leave them alone.
- */
LASSERT(niov > 0);
- for (nob = i = 0; i < niov; i++) {
- scratchiov[i] = iov[i];
- nob += scratchiov[i].iov_len;
- }
+ for (nob = i = 0; i < niov; i++)
+ nob += iov[i].iov_len;
+
LASSERT(nob <= conn->ksnc_rx_nob_wanted);
- rc = kernel_recvmsg(conn->ksnc_sock, &msg, scratchiov, niov, nob,
- MSG_DONTWAIT);
+ iov_iter_kvec(&msg.msg_iter, READ | ITER_KVEC, iov, niov, nob);
+ rc = sock_recvmsg(conn->ksnc_sock, &msg, MSG_DONTWAIT);
saved_csum = 0;
if (conn->ksnc_proto == &ksocknal_protocol_v2x) {
@@ -259,67 +210,10 @@ ksocknal_lib_recv_iov(struct ksock_conn *conn)
return rc;
}
-static void
-ksocknal_lib_kiov_vunmap(void *addr)
-{
- if (!addr)
- return;
-
- vunmap(addr);
-}
-
-static void *
-ksocknal_lib_kiov_vmap(lnet_kiov_t *kiov, int niov,
- struct kvec *iov, struct page **pages)
-{
- void *addr;
- int nob;
- int i;
-
- if (!*ksocknal_tunables.ksnd_zc_recv || !pages)
- return NULL;
-
- LASSERT(niov <= LNET_MAX_IOV);
-
- if (niov < 2 ||
- niov < *ksocknal_tunables.ksnd_zc_recv_min_nfrags)
- return NULL;
-
- for (nob = i = 0; i < niov; i++) {
- if ((kiov[i].kiov_offset && i > 0) ||
- (kiov[i].kiov_offset + kiov[i].kiov_len != PAGE_SIZE && i < niov - 1))
- return NULL;
-
- pages[i] = kiov[i].kiov_page;
- nob += kiov[i].kiov_len;
- }
-
- addr = vmap(pages, niov, VM_MAP, PAGE_KERNEL);
- if (!addr)
- return NULL;
-
- iov->iov_base = addr + kiov[0].kiov_offset;
- iov->iov_len = nob;
-
- return addr;
-}
-
int
ksocknal_lib_recv_kiov(struct ksock_conn *conn)
{
-#if SOCKNAL_SINGLE_FRAG_RX || !SOCKNAL_RISK_KMAP_DEADLOCK
- struct kvec scratch;
- struct kvec *scratchiov = &scratch;
- struct page **pages = NULL;
- unsigned int niov = 1;
-#else
-#ifdef CONFIG_HIGHMEM
-#warning "XXX risk of kmap deadlock on multiple frags..."
-#endif
- struct kvec *scratchiov = conn->ksnc_scheduler->kss_scratch_iov;
- struct page **pages = conn->ksnc_scheduler->kss_rx_scratch_pgs;
unsigned int niov = conn->ksnc_rx_nkiov;
-#endif
lnet_kiov_t *kiov = conn->ksnc_rx_kiov;
struct msghdr msg = {
.msg_flags = 0
@@ -328,63 +222,32 @@ ksocknal_lib_recv_kiov(struct ksock_conn *conn)
int i;
int rc;
void *base;
- void *addr;
int sum;
int fragnob;
- int n;
-
- /*
- * NB we can't trust socket ops to either consume our iovs
- * or leave them alone.
- */
- addr = ksocknal_lib_kiov_vmap(kiov, niov, scratchiov, pages);
- if (addr) {
- nob = scratchiov[0].iov_len;
- n = 1;
- } else {
- for (nob = i = 0; i < niov; i++) {
- nob += scratchiov[i].iov_len = kiov[i].kiov_len;
- scratchiov[i].iov_base = kmap(kiov[i].kiov_page) +
- kiov[i].kiov_offset;
- }
- n = niov;
- }
+ for (nob = i = 0; i < niov; i++)
+ nob += kiov[i].bv_len;
LASSERT(nob <= conn->ksnc_rx_nob_wanted);
- rc = kernel_recvmsg(conn->ksnc_sock, &msg, (struct kvec *)scratchiov,
- n, nob, MSG_DONTWAIT);
+ iov_iter_bvec(&msg.msg_iter, READ | ITER_BVEC, kiov, niov, nob);
+ rc = sock_recvmsg(conn->ksnc_sock, &msg, MSG_DONTWAIT);
if (conn->ksnc_msg.ksm_csum) {
for (i = 0, sum = rc; sum > 0; i++, sum -= fragnob) {
LASSERT(i < niov);
- /*
- * Dang! have to kmap again because I have nowhere to
- * stash the mapped address. But by doing it while the
- * page is still mapped, the kernel just bumps the map
- * count and returns me the address it stashed.
- */
- base = kmap(kiov[i].kiov_page) + kiov[i].kiov_offset;
- fragnob = kiov[i].kiov_len;
+ base = kmap(kiov[i].bv_page) + kiov[i].bv_offset;
+ fragnob = kiov[i].bv_len;
if (fragnob > sum)
fragnob = sum;
conn->ksnc_rx_csum = ksocknal_csum(conn->ksnc_rx_csum,
base, fragnob);
- kunmap(kiov[i].kiov_page);
+ kunmap(kiov[i].bv_page);
}
}
-
- if (addr) {
- ksocknal_lib_kiov_vunmap(addr);
- } else {
- for (i = 0; i < niov; i++)
- kunmap(kiov[i].kiov_page);
- }
-
return rc;
}
@@ -406,12 +269,12 @@ ksocknal_lib_csum_tx(struct ksock_tx *tx)
if (tx->tx_kiov) {
for (i = 0; i < tx->tx_nkiov; i++) {
- base = kmap(tx->tx_kiov[i].kiov_page) +
- tx->tx_kiov[i].kiov_offset;
+ base = kmap(tx->tx_kiov[i].bv_page) +
+ tx->tx_kiov[i].bv_offset;
- csum = ksocknal_csum(csum, base, tx->tx_kiov[i].kiov_len);
+ csum = ksocknal_csum(csum, base, tx->tx_kiov[i].bv_len);
- kunmap(tx->tx_kiov[i].kiov_page);
+ kunmap(tx->tx_kiov[i].bv_page);
}
} else {
for (i = 1; i < tx->tx_niov; i++)