summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2016-05-24 21:39:20 (GMT)
committerLinus Torvalds <torvalds@linux-foundation.org>2016-05-24 21:39:20 (GMT)
commit5d22c5ab85e4de3c14d9e79eefc2835f4a8f9b2a (patch)
tree2aeaa4798bf856331609d90e5ccbb478014a66ce
parent0e01df100b6bf22a1de61b66657502a6454153c5 (diff)
parentc0cb8bf3a8e4bd82e640862cdd8891400405cb89 (diff)
downloadlinux-5d22c5ab85e4de3c14d9e79eefc2835f4a8f9b2a.tar.xz
Merge tag 'nfsd-4.7' of git://linux-nfs.org/~bfields/linux
Pull nfsd updates from Bruce Fields: "A very quiet cycle for nfsd, mainly just an RDMA update from Chuck Lever" * tag 'nfsd-4.7' of git://linux-nfs.org/~bfields/linux: sunrpc: fix stripping of padded MIC tokens svcrpc: autoload rdma module svcrdma: Generalize svc_rdma_xdr_decode_req() svcrdma: Eliminate code duplication in svc_rdma_recvfrom() svcrdma: Drain QP before freeing svcrdma_xprt svcrdma: Post Receives only for forward channel requests svcrdma: Remove superfluous line from rdma_read_chunks() svcrdma: svc_rdma_put_context() is invoked twice in Send error path svcrdma: Do not add XDR padding to xdr_buf page vector svcrdma: Support IPv6 with NFS/RDMA nfsd: handle seqid wraparound in nfsd4_preprocess_layout_stateid Remove unnecessary allocation
-rw-r--r--fs/nfsd/nfs3xdr.c2
-rw-r--r--fs/nfsd/nfs4layouts.c2
-rw-r--r--fs/nfsd/nfs4state.c8
-rw-r--r--fs/nfsd/state.h5
-rw-r--r--include/linux/sunrpc/svc_rdma.h2
-rw-r--r--net/sunrpc/auth_gss/svcauth_gss.c9
-rw-r--r--net/sunrpc/svc_xprt.c23
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_marshal.c32
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_recvfrom.c34
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_sendto.c28
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_transport.c17
11 files changed, 90 insertions, 72 deletions
diff --git a/fs/nfsd/nfs3xdr.c b/fs/nfsd/nfs3xdr.c
index 93d5853..dba2ff8 100644
--- a/fs/nfsd/nfs3xdr.c
+++ b/fs/nfsd/nfs3xdr.c
@@ -379,7 +379,7 @@ nfs3svc_decode_writeargs(struct svc_rqst *rqstp, __be32 *p,
*/
hdr = (void*)p - rqstp->rq_arg.head[0].iov_base;
dlen = rqstp->rq_arg.head[0].iov_len + rqstp->rq_arg.page_len
- - hdr;
+ + rqstp->rq_arg.tail[0].iov_len - hdr;
/*
* Round the length of the data which was specified up to
* the next multiple of XDR units and then compare that
diff --git a/fs/nfsd/nfs4layouts.c b/fs/nfsd/nfs4layouts.c
index 825c7bc..953c075 100644
--- a/fs/nfsd/nfs4layouts.c
+++ b/fs/nfsd/nfs4layouts.c
@@ -289,7 +289,7 @@ nfsd4_preprocess_layout_stateid(struct svc_rqst *rqstp,
status = nfserr_bad_stateid;
mutex_lock(&ls->ls_mutex);
- if (stateid->si_generation > stid->sc_stateid.si_generation)
+ if (nfsd4_stateid_generation_after(stateid, &stid->sc_stateid))
goto out_unlock_stid;
if (layout_type != ls->ls_layout_type)
goto out_unlock_stid;
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 0462eed..f5f82e1 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -4651,12 +4651,6 @@ grace_disallows_io(struct net *net, struct inode *inode)
return opens_in_grace(net) && mandatory_lock(inode);
}
-/* Returns true iff a is later than b: */
-static bool stateid_generation_after(stateid_t *a, stateid_t *b)
-{
- return (s32)(a->si_generation - b->si_generation) > 0;
-}
-
static __be32 check_stateid_generation(stateid_t *in, stateid_t *ref, bool has_session)
{
/*
@@ -4670,7 +4664,7 @@ static __be32 check_stateid_generation(stateid_t *in, stateid_t *ref, bool has_s
return nfs_ok;
/* If the client sends us a stateid from the future, it's buggy: */
- if (stateid_generation_after(in, ref))
+ if (nfsd4_stateid_generation_after(in, ref))
return nfserr_bad_stateid;
/*
* However, we could see a stateid from the past, even from a
diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
index c050c53..986e51e 100644
--- a/fs/nfsd/state.h
+++ b/fs/nfsd/state.h
@@ -573,6 +573,11 @@ enum nfsd4_cb_op {
NFSPROC4_CLNT_CB_SEQUENCE,
};
+/* Returns true iff a is later than b: */
+static inline bool nfsd4_stateid_generation_after(stateid_t *a, stateid_t *b)
+{
+ return (s32)(a->si_generation - b->si_generation) > 0;
+}
struct nfsd4_compound_state;
struct nfsd_net;
diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h
index 3081339..d6917b8 100644
--- a/include/linux/sunrpc/svc_rdma.h
+++ b/include/linux/sunrpc/svc_rdma.h
@@ -199,7 +199,7 @@ extern int svc_rdma_handle_bc_reply(struct rpc_xprt *xprt,
struct xdr_buf *rcvbuf);
/* svc_rdma_marshal.c */
-extern int svc_rdma_xdr_decode_req(struct rpcrdma_msg *, struct svc_rqst *);
+extern int svc_rdma_xdr_decode_req(struct xdr_buf *);
extern int svc_rdma_xdr_encode_error(struct svcxprt_rdma *,
struct rpcrdma_msg *,
enum rpcrdma_errcode, __be32 *);
diff --git a/net/sunrpc/auth_gss/svcauth_gss.c b/net/sunrpc/auth_gss/svcauth_gss.c
index 1095be9..e085f5a 100644
--- a/net/sunrpc/auth_gss/svcauth_gss.c
+++ b/net/sunrpc/auth_gss/svcauth_gss.c
@@ -569,10 +569,9 @@ gss_svc_searchbyctx(struct cache_detail *cd, struct xdr_netobj *handle)
struct rsc *found;
memset(&rsci, 0, sizeof(rsci));
- if (dup_to_netobj(&rsci.handle, handle->data, handle->len))
- return NULL;
+ rsci.handle.data = handle->data;
+ rsci.handle.len = handle->len;
found = rsc_lookup(cd, &rsci);
- rsc_free(&rsci);
if (!found)
return NULL;
if (cache_check(cd, &found->h, NULL))
@@ -857,8 +856,8 @@ unwrap_integ_data(struct svc_rqst *rqstp, struct xdr_buf *buf, u32 seq, struct g
goto out;
if (svc_getnl(&buf->head[0]) != seq)
goto out;
- /* trim off the mic at the end before returning */
- xdr_buf_trim(buf, mic.len + 4);
+ /* trim off the mic and padding at the end before returning */
+ xdr_buf_trim(buf, round_up_to_quad(mic.len) + 4);
stat = 0;
out:
kfree(mic.data);
diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c
index 7422f28..f5572e3 100644
--- a/net/sunrpc/svc_xprt.c
+++ b/net/sunrpc/svc_xprt.c
@@ -244,13 +244,12 @@ void svc_add_new_perm_xprt(struct svc_serv *serv, struct svc_xprt *new)
svc_xprt_received(new);
}
-int svc_create_xprt(struct svc_serv *serv, const char *xprt_name,
+int _svc_create_xprt(struct svc_serv *serv, const char *xprt_name,
struct net *net, const int family,
const unsigned short port, int flags)
{
struct svc_xprt_class *xcl;
- dprintk("svc: creating transport %s[%d]\n", xprt_name, port);
spin_lock(&svc_xprt_class_lock);
list_for_each_entry(xcl, &svc_xprt_class_list, xcl_list) {
struct svc_xprt *newxprt;
@@ -274,12 +273,28 @@ int svc_create_xprt(struct svc_serv *serv, const char *xprt_name,
}
err:
spin_unlock(&svc_xprt_class_lock);
- dprintk("svc: transport %s not found\n", xprt_name);
-
/* This errno is exposed to user space. Provide a reasonable
* perror msg for a bad transport. */
return -EPROTONOSUPPORT;
}
+
+int svc_create_xprt(struct svc_serv *serv, const char *xprt_name,
+ struct net *net, const int family,
+ const unsigned short port, int flags)
+{
+ int err;
+
+ dprintk("svc: creating transport %s[%d]\n", xprt_name, port);
+ err = _svc_create_xprt(serv, xprt_name, net, family, port, flags);
+ if (err == -EPROTONOSUPPORT) {
+ request_module("svc%s", xprt_name);
+ err = _svc_create_xprt(serv, xprt_name, net, family, port, flags);
+ }
+ if (err)
+ dprintk("svc: transport %s not found, err %d\n",
+ xprt_name, err);
+ return err;
+}
EXPORT_SYMBOL_GPL(svc_create_xprt);
/*
diff --git a/net/sunrpc/xprtrdma/svc_rdma_marshal.c b/net/sunrpc/xprtrdma/svc_rdma_marshal.c
index 765bca4..0ba9887 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_marshal.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_marshal.c
@@ -145,19 +145,32 @@ static __be32 *decode_reply_array(__be32 *va, __be32 *vaend)
return (__be32 *)&ary->wc_array[nchunks];
}
-int svc_rdma_xdr_decode_req(struct rpcrdma_msg *rmsgp, struct svc_rqst *rqstp)
+/**
+ * svc_rdma_xdr_decode_req - Parse incoming RPC-over-RDMA header
+ * @rq_arg: Receive buffer
+ *
+ * On entry, xdr->head[0].iov_base points to first byte in the
+ * RPC-over-RDMA header.
+ *
+ * On successful exit, head[0] points to first byte past the
+ * RPC-over-RDMA header. For RDMA_MSG, this is the RPC message.
+ * The length of the RPC-over-RDMA header is returned.
+ */
+int svc_rdma_xdr_decode_req(struct xdr_buf *rq_arg)
{
+ struct rpcrdma_msg *rmsgp;
__be32 *va, *vaend;
unsigned int len;
u32 hdr_len;
/* Verify that there's enough bytes for header + something */
- if (rqstp->rq_arg.len <= RPCRDMA_HDRLEN_ERR) {
+ if (rq_arg->len <= RPCRDMA_HDRLEN_ERR) {
dprintk("svcrdma: header too short = %d\n",
- rqstp->rq_arg.len);
+ rq_arg->len);
return -EINVAL;
}
+ rmsgp = (struct rpcrdma_msg *)rq_arg->head[0].iov_base;
if (rmsgp->rm_vers != rpcrdma_version) {
dprintk("%s: bad version %u\n", __func__,
be32_to_cpu(rmsgp->rm_vers));
@@ -189,10 +202,10 @@ int svc_rdma_xdr_decode_req(struct rpcrdma_msg *rmsgp, struct svc_rqst *rqstp)
be32_to_cpu(rmsgp->rm_body.rm_padded.rm_thresh);
va = &rmsgp->rm_body.rm_padded.rm_pempty[4];
- rqstp->rq_arg.head[0].iov_base = va;
+ rq_arg->head[0].iov_base = va;
len = (u32)((unsigned long)va - (unsigned long)rmsgp);
- rqstp->rq_arg.head[0].iov_len -= len;
- if (len > rqstp->rq_arg.len)
+ rq_arg->head[0].iov_len -= len;
+ if (len > rq_arg->len)
return -EINVAL;
return len;
default:
@@ -205,7 +218,7 @@ int svc_rdma_xdr_decode_req(struct rpcrdma_msg *rmsgp, struct svc_rqst *rqstp)
* chunk list and a reply chunk list.
*/
va = &rmsgp->rm_body.rm_chunks[0];
- vaend = (__be32 *)((unsigned long)rmsgp + rqstp->rq_arg.len);
+ vaend = (__be32 *)((unsigned long)rmsgp + rq_arg->len);
va = decode_read_list(va, vaend);
if (!va) {
dprintk("svcrdma: failed to decode read list\n");
@@ -222,10 +235,9 @@ int svc_rdma_xdr_decode_req(struct rpcrdma_msg *rmsgp, struct svc_rqst *rqstp)
return -EINVAL;
}
- rqstp->rq_arg.head[0].iov_base = va;
+ rq_arg->head[0].iov_base = va;
hdr_len = (unsigned long)va - (unsigned long)rmsgp;
- rqstp->rq_arg.head[0].iov_len -= hdr_len;
-
+ rq_arg->head[0].iov_len -= hdr_len;
return hdr_len;
}
diff --git a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
index fbe7444..2c25606 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
@@ -447,10 +447,8 @@ static int rdma_read_chunks(struct svcxprt_rdma *xprt,
head->arg.len = rqstp->rq_arg.len;
head->arg.buflen = rqstp->rq_arg.buflen;
- ch = (struct rpcrdma_read_chunk *)&rmsgp->rm_body.rm_chunks[0];
- position = be32_to_cpu(ch->rc_position);
-
/* RDMA_NOMSG: RDMA READ data should land just after RDMA RECV data */
+ position = be32_to_cpu(ch->rc_position);
if (position == 0) {
head->arg.pages = &head->pages[0];
page_offset = head->byte_len;
@@ -488,7 +486,7 @@ static int rdma_read_chunks(struct svcxprt_rdma *xprt,
if (page_offset & 3) {
u32 pad = 4 - (page_offset & 3);
- head->arg.page_len += pad;
+ head->arg.tail[0].iov_len += pad;
head->arg.len += pad;
head->arg.buflen += pad;
page_offset += pad;
@@ -510,11 +508,10 @@ static int rdma_read_chunks(struct svcxprt_rdma *xprt,
return ret;
}
-static int rdma_read_complete(struct svc_rqst *rqstp,
- struct svc_rdma_op_ctxt *head)
+static void rdma_read_complete(struct svc_rqst *rqstp,
+ struct svc_rdma_op_ctxt *head)
{
int page_no;
- int ret;
/* Copy RPC pages */
for (page_no = 0; page_no < head->count; page_no++) {
@@ -550,23 +547,6 @@ static int rdma_read_complete(struct svc_rqst *rqstp,
rqstp->rq_arg.tail[0] = head->arg.tail[0];
rqstp->rq_arg.len = head->arg.len;
rqstp->rq_arg.buflen = head->arg.buflen;
-
- /* Free the context */
- svc_rdma_put_context(head, 0);
-
- /* XXX: What should this be? */
- rqstp->rq_prot = IPPROTO_MAX;
- svc_xprt_copy_addrs(rqstp, rqstp->rq_xprt);
-
- ret = rqstp->rq_arg.head[0].iov_len
- + rqstp->rq_arg.page_len
- + rqstp->rq_arg.tail[0].iov_len;
- dprintk("svcrdma: deferred read ret=%d, rq_arg.len=%u, "
- "rq_arg.head[0].iov_base=%p, rq_arg.head[0].iov_len=%zu\n",
- ret, rqstp->rq_arg.len, rqstp->rq_arg.head[0].iov_base,
- rqstp->rq_arg.head[0].iov_len);
-
- return ret;
}
/* By convention, backchannel calls arrive via rdma_msg type
@@ -624,7 +604,8 @@ int svc_rdma_recvfrom(struct svc_rqst *rqstp)
dto_q);
list_del_init(&ctxt->dto_q);
spin_unlock_bh(&rdma_xprt->sc_rq_dto_lock);
- return rdma_read_complete(rqstp, ctxt);
+ rdma_read_complete(rqstp, ctxt);
+ goto complete;
} else if (!list_empty(&rdma_xprt->sc_rq_dto_q)) {
ctxt = list_entry(rdma_xprt->sc_rq_dto_q.next,
struct svc_rdma_op_ctxt,
@@ -655,7 +636,7 @@ int svc_rdma_recvfrom(struct svc_rqst *rqstp)
/* Decode the RDMA header. */
rmsgp = (struct rpcrdma_msg *)rqstp->rq_arg.head[0].iov_base;
- ret = svc_rdma_xdr_decode_req(rmsgp, rqstp);
+ ret = svc_rdma_xdr_decode_req(&rqstp->rq_arg);
if (ret < 0)
goto out_err;
if (ret == 0)
@@ -682,6 +663,7 @@ int svc_rdma_recvfrom(struct svc_rqst *rqstp)
return 0;
}
+complete:
ret = rqstp->rq_arg.head[0].iov_len
+ rqstp->rq_arg.page_len
+ rqstp->rq_arg.tail[0].iov_len;
diff --git a/net/sunrpc/xprtrdma/svc_rdma_sendto.c b/net/sunrpc/xprtrdma/svc_rdma_sendto.c
index 4f1b1c4..54d53330 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_sendto.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_sendto.c
@@ -463,25 +463,21 @@ static int send_reply(struct svcxprt_rdma *rdma,
struct svc_rqst *rqstp,
struct page *page,
struct rpcrdma_msg *rdma_resp,
- struct svc_rdma_op_ctxt *ctxt,
struct svc_rdma_req_map *vec,
int byte_count)
{
+ struct svc_rdma_op_ctxt *ctxt;
struct ib_send_wr send_wr;
u32 xdr_off;
int sge_no;
int sge_bytes;
int page_no;
int pages;
- int ret;
-
- ret = svc_rdma_repost_recv(rdma, GFP_KERNEL);
- if (ret) {
- svc_rdma_put_context(ctxt, 0);
- return -ENOTCONN;
- }
+ int ret = -EIO;
/* Prepare the context */
+ ctxt = svc_rdma_get_context(rdma);
+ ctxt->direction = DMA_TO_DEVICE;
ctxt->pages[0] = page;
ctxt->count = 1;
@@ -565,8 +561,7 @@ static int send_reply(struct svcxprt_rdma *rdma,
err:
svc_rdma_unmap_dma(ctxt);
svc_rdma_put_context(ctxt, 1);
- pr_err("svcrdma: failed to send reply, rc=%d\n", ret);
- return -EIO;
+ return ret;
}
void svc_rdma_prep_reply_hdr(struct svc_rqst *rqstp)
@@ -585,7 +580,6 @@ int svc_rdma_sendto(struct svc_rqst *rqstp)
int ret;
int inline_bytes;
struct page *res_page;
- struct svc_rdma_op_ctxt *ctxt;
struct svc_rdma_req_map *vec;
dprintk("svcrdma: sending response for rqstp=%p\n", rqstp);
@@ -598,8 +592,6 @@ int svc_rdma_sendto(struct svc_rqst *rqstp)
rp_ary = svc_rdma_get_reply_array(rdma_argp, wr_ary);
/* Build an req vec for the XDR */
- ctxt = svc_rdma_get_context(rdma);
- ctxt->direction = DMA_TO_DEVICE;
vec = svc_rdma_get_req_map(rdma);
ret = svc_rdma_map_xdr(rdma, &rqstp->rq_res, vec, wr_ary != NULL);
if (ret)
@@ -635,7 +627,12 @@ int svc_rdma_sendto(struct svc_rqst *rqstp)
inline_bytes -= ret;
}
- ret = send_reply(rdma, rqstp, res_page, rdma_resp, ctxt, vec,
+ /* Post a fresh Receive buffer _before_ sending the reply */
+ ret = svc_rdma_post_recv(rdma, GFP_KERNEL);
+ if (ret)
+ goto err1;
+
+ ret = send_reply(rdma, rqstp, res_page, rdma_resp, vec,
inline_bytes);
if (ret < 0)
goto err1;
@@ -648,7 +645,8 @@ int svc_rdma_sendto(struct svc_rqst *rqstp)
put_page(res_page);
err0:
svc_rdma_put_req_map(rdma, vec);
- svc_rdma_put_context(ctxt, 0);
+ pr_err("svcrdma: Could not send reply, err=%d. Closing transport.\n",
+ ret);
set_bit(XPT_CLOSE, &rdma->sc_xprt.xpt_flags);
return -ENOTCONN;
}
diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c
index 9066896..dd94401 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_transport.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c
@@ -789,7 +789,7 @@ static struct svc_xprt *svc_rdma_create(struct svc_serv *serv,
int ret;
dprintk("svcrdma: Creating RDMA socket\n");
- if (sa->sa_family != AF_INET) {
+ if ((sa->sa_family != AF_INET) && (sa->sa_family != AF_INET6)) {
dprintk("svcrdma: Address family %d is not supported.\n", sa->sa_family);
return ERR_PTR(-EAFNOSUPPORT);
}
@@ -805,6 +805,16 @@ static struct svc_xprt *svc_rdma_create(struct svc_serv *serv,
goto err0;
}
+ /* Allow both IPv4 and IPv6 sockets to bind a single port
+ * at the same time.
+ */
+#if IS_ENABLED(CONFIG_IPV6)
+ ret = rdma_set_afonly(listen_id, 1);
+ if (ret) {
+ dprintk("svcrdma: rdma_set_afonly failed = %d\n", ret);
+ goto err1;
+ }
+#endif
ret = rdma_bind_addr(listen_id, sa);
if (ret) {
dprintk("svcrdma: rdma_bind_addr failed = %d\n", ret);
@@ -1073,7 +1083,7 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
newxprt->sc_dev_caps |= SVCRDMA_DEVCAP_READ_W_INV;
/* Post receive buffers */
- for (i = 0; i < newxprt->sc_rq_depth; i++) {
+ for (i = 0; i < newxprt->sc_max_requests; i++) {
ret = svc_rdma_post_recv(newxprt, GFP_KERNEL);
if (ret) {
dprintk("svcrdma: failure posting receive buffers\n");
@@ -1170,6 +1180,9 @@ static void __svc_rdma_free(struct work_struct *work)
dprintk("svcrdma: %s(%p)\n", __func__, rdma);
+ if (rdma->sc_qp && !IS_ERR(rdma->sc_qp))
+ ib_drain_qp(rdma->sc_qp);
+
/* We should only be called from kref_put */
if (atomic_read(&xprt->xpt_ref.refcount) != 0)
pr_err("svcrdma: sc_xprt still in use? (%d)\n",