From df66e75395c839c3a373bae897dbb1248f741b45 Mon Sep 17 00:00:00 2001 From: Harshula Jayasuriya Date: Tue, 23 Jul 2013 14:05:14 +1000 Subject: nfsd: nfs4_file_get_access: need to be more careful with O_RDWR If fi_fds = {non-NULL, NULL, non-NULL} and oflag = O_WRONLY the WARN_ON_ONCE(!(fp->fi_fds[oflag] || fp->fi_fds[O_RDWR])) doesn't trigger when it should. Signed-off-by: Harshula Jayasuriya Signed-off-by: J. Bruce Fields diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 280acef..1cb6211 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -282,19 +282,14 @@ static unsigned int file_hashval(struct inode *ino) static struct hlist_head file_hashtbl[FILE_HASH_SIZE]; -static void __nfs4_file_get_access(struct nfs4_file *fp, int oflag) -{ - WARN_ON_ONCE(!(fp->fi_fds[oflag] || fp->fi_fds[O_RDWR])); - atomic_inc(&fp->fi_access[oflag]); -} - static void nfs4_file_get_access(struct nfs4_file *fp, int oflag) { + WARN_ON_ONCE(!fp->fi_fds[oflag]); if (oflag == O_RDWR) { - __nfs4_file_get_access(fp, O_RDONLY); - __nfs4_file_get_access(fp, O_WRONLY); + atomic_inc(&fp->fi_access[O_RDONLY]); + atomic_inc(&fp->fi_access[O_WRONLY]); } else - __nfs4_file_get_access(fp, oflag); + atomic_inc(&fp->fi_access[oflag]); } static void nfs4_file_put_fd(struct nfs4_file *fp, int oflag) -- cgit v0.10.2 From b1948a641daefe8d128749f3d419ed24d529a8ed Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Fri, 26 Jul 2013 16:57:20 -0400 Subject: nfsd4: fix setlease error return This actually makes a difference in the 4.1 case, since we use the status to decide what reason to give the client for the delegation refusal (see nfsd4_open_deleg_none_ext), and in theory a client might choose suboptimal behavior if we give the wrong answer. Reported-by: Jeff Layton Signed-off-by: J. Bruce Fields diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 1cb6211..1852f53 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -3028,7 +3028,7 @@ static int nfs4_setlease(struct nfs4_delegation *dp) if (status) { list_del_init(&dp->dl_perclnt); locks_free_lock(fl); - return -ENOMEM; + return status; } fp->fi_lease = fl; fp->fi_deleg_file = get_file(fl->fl_file); -- cgit v0.10.2 From 2f74f972d4cc7d83408ea0c32d424edcb44887bf Mon Sep 17 00:00:00 2001 From: Harshula Jayasuriya Date: Fri, 16 Aug 2013 03:46:40 +1000 Subject: sunrpc: prepare NFS for 2038 1) The kernel sunrpc code needs to handle seconds since epoch greater than 2147483647. This means functions that parse time as an int need to handle it as time_t. 2) The kernel changes must be accompanied by userspace changes in nfs-utils. Signed-off-by: Harshula Jayasuriya Signed-off-by: J. Bruce Fields diff --git a/include/linux/sunrpc/cache.h b/include/linux/sunrpc/cache.h index 6ce690d..437ddb6 100644 --- a/include/linux/sunrpc/cache.h +++ b/include/linux/sunrpc/cache.h @@ -264,12 +264,30 @@ static inline int get_uint(char **bpp, unsigned int *anint) return 0; } +static inline int get_time(char **bpp, time_t *time) +{ + char buf[50]; + long long ll; + int len = qword_get(bpp, buf, sizeof(buf)); + + if (len < 0) + return -EINVAL; + if (len == 0) + return -ENOENT; + + if (kstrtoll(buf, 0, &ll)) + return -EINVAL; + + *time = (time_t)ll; + return 0; +} + static inline time_t get_expiry(char **bpp) { - int rv; + time_t rv; struct timespec boot; - if (get_int(bpp, &rv)) + if (get_time(bpp, &rv)) return 0; if (rv < 0) return 0; -- cgit v0.10.2 From 3477565e6a73da7bb50fce6ac718b31eddb37fbb Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Fri, 23 Aug 2013 17:55:18 -0400 Subject: Revert "nfsd: nfs4_file_get_access: need to be more careful with O_RDWR" This reverts commit df66e75395c839c3a373bae897dbb1248f741b45. nfsd4_lock can get a read-only or write-only reference when only a read-write open is available. This is normal. Cc: Harshula Jayasuriya Signed-off-by: J. Bruce Fields diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 5e609b1..eb9cf81 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -282,14 +282,19 @@ static unsigned int file_hashval(struct inode *ino) static struct hlist_head file_hashtbl[FILE_HASH_SIZE]; +static void __nfs4_file_get_access(struct nfs4_file *fp, int oflag) +{ + WARN_ON_ONCE(!(fp->fi_fds[oflag] || fp->fi_fds[O_RDWR])); + atomic_inc(&fp->fi_access[oflag]); +} + static void nfs4_file_get_access(struct nfs4_file *fp, int oflag) { - WARN_ON_ONCE(!fp->fi_fds[oflag]); if (oflag == O_RDWR) { - atomic_inc(&fp->fi_access[O_RDONLY]); - atomic_inc(&fp->fi_access[O_WRONLY]); + __nfs4_file_get_access(fp, O_RDONLY); + __nfs4_file_get_access(fp, O_WRONLY); } else - atomic_inc(&fp->fi_access[oflag]); + __nfs4_file_get_access(fp, oflag); } static void nfs4_file_put_fd(struct nfs4_file *fp, int oflag) -- cgit v0.10.2 From bf7bd3e98be5c74813bee6ad496139fb0a011b3b Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Thu, 15 Aug 2013 16:55:26 -0400 Subject: nfsd4: fix leak of inode reference on delegation failure MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This fixes a regression from 68a3396178e6688ad7367202cdf0af8ed03c8727 "nfsd4: shut down more of delegation earlier". After that commit, nfs4_set_delegation() failures result in nfs4_put_delegation being called, but nfs4_put_delegation doesn't free the nfs4_file that has already been set by alloc_init_deleg(). This can result in an oops on later unmounting the exported filesystem. Note also delaying the fi_had_conflict check we're able to return a better error (hence give 4.1 clients a better idea why the delegation failed; though note CONFLICT isn't an exact match here, as that's supposed to indicate a current conflict, but all we know here is that there was one recently). Reported-by: Toralf Förster Tested-by: Toralf Förster Signed-off-by: J. Bruce Fields diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index eb9cf81..0874998 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -368,11 +368,8 @@ static struct nfs4_delegation * alloc_init_deleg(struct nfs4_client *clp, struct nfs4_ol_stateid *stp, struct svc_fh *current_fh) { struct nfs4_delegation *dp; - struct nfs4_file *fp = stp->st_file; dprintk("NFSD alloc_init_deleg\n"); - if (fp->fi_had_conflict) - return NULL; if (num_delegations > max_delegations) return NULL; dp = delegstateid(nfs4_alloc_stid(clp, deleg_slab)); @@ -389,8 +386,7 @@ alloc_init_deleg(struct nfs4_client *clp, struct nfs4_ol_stateid *stp, struct sv INIT_LIST_HEAD(&dp->dl_perfile); INIT_LIST_HEAD(&dp->dl_perclnt); INIT_LIST_HEAD(&dp->dl_recall_lru); - get_nfs4_file(fp); - dp->dl_file = fp; + dp->dl_file = NULL; dp->dl_type = NFS4_OPEN_DELEGATE_READ; fh_copy_shallow(&dp->dl_fh, ¤t_fh->fh_handle); dp->dl_time = 0; @@ -3044,22 +3040,35 @@ static int nfs4_setlease(struct nfs4_delegation *dp) return 0; } -static int nfs4_set_delegation(struct nfs4_delegation *dp) +static int nfs4_set_delegation(struct nfs4_delegation *dp, struct nfs4_file *fp) { - struct nfs4_file *fp = dp->dl_file; + int status; - if (!fp->fi_lease) - return nfs4_setlease(dp); + if (fp->fi_had_conflict) + return -EAGAIN; + get_nfs4_file(fp); + dp->dl_file = fp; + if (!fp->fi_lease) { + status = nfs4_setlease(dp); + if (status) + goto out_free; + return 0; + } spin_lock(&recall_lock); if (fp->fi_had_conflict) { spin_unlock(&recall_lock); - return -EAGAIN; + status = -EAGAIN; + goto out_free; } atomic_inc(&fp->fi_delegees); list_add(&dp->dl_perfile, &fp->fi_delegations); spin_unlock(&recall_lock); list_add(&dp->dl_perclnt, &dp->dl_stid.sc_client->cl_delegations); return 0; +out_free: + put_nfs4_file(fp); + dp->dl_file = fp; + return status; } static void nfsd4_open_deleg_none_ext(struct nfsd4_open *open, int status) @@ -3134,7 +3143,7 @@ nfs4_open_delegation(struct net *net, struct svc_fh *fh, dp = alloc_init_deleg(oo->oo_owner.so_client, stp, fh); if (dp == NULL) goto out_no_deleg; - status = nfs4_set_delegation(dp); + status = nfs4_set_delegation(dp, stp->st_file); if (status) goto out_free; -- cgit v0.10.2 From 248f807b479145194a83c5270440b3f51c1836d7 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Wed, 28 Aug 2013 08:49:45 -0400 Subject: nfsd4: nfsd4_create_clid_dir prints uninitialized data Take the easy way out and just remove the printk. Reported-by: David Howells diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c index 105a3b0..e0a65a9 100644 --- a/fs/nfsd/nfs4recover.c +++ b/fs/nfsd/nfs4recover.c @@ -173,8 +173,6 @@ nfsd4_create_clid_dir(struct nfs4_client *clp) int status; struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id); - dprintk("NFSD: nfsd4_create_clid_dir for \"%s\"\n", dname); - if (test_and_set_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags)) return; if (!nn->rec_file) -- cgit v0.10.2 From 11d2a1618e377236facdd404113992bde1083914 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Fri, 30 Aug 2013 16:50:06 -0400 Subject: svcrpc: remove unused rq_resused I forgot to remove this in afc59400d6c65bad66d4ad0b2daf879cbff8e23e "nfsd4: cleanup: replace rq_resused count by rq_next_page pointer". Signed-off-by: J. Bruce Fields diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h index 1f0216b..6eecfc2 100644 --- a/include/linux/sunrpc/svc.h +++ b/include/linux/sunrpc/svc.h @@ -243,7 +243,6 @@ struct svc_rqst { struct xdr_buf rq_res; struct page * rq_pages[RPCSVC_MAXPAGES]; struct page * *rq_respages; /* points into rq_pages */ - int rq_resused; /* number of pages used for result */ struct page * *rq_next_page; /* next reply page to use */ struct kvec rq_vec[RPCSVC_MAXPAGES]; /* generally useful.. */ -- cgit v0.10.2 From 778e512bb1d3315c6b55832248cd30c566c081d7 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Wed, 21 Aug 2013 10:32:52 -0400 Subject: rpc: clean up decoding of gssproxy linux creds We can use the normal coding infrastructure here. Two minor behavior changes: - we're assuming no wasted space at the end of the linux cred. That seems to match gss-proxy's behavior, and I can't see why it would need to do differently in the future. - NGROUPS_MAX check added: note groups_alloc doesn't do this, this is the caller's responsibility. Tested-by: Simo Sorce Signed-off-by: J. Bruce Fields diff --git a/net/sunrpc/auth_gss/gss_rpc_xdr.c b/net/sunrpc/auth_gss/gss_rpc_xdr.c index 3c85d1c..f5067b2 100644 --- a/net/sunrpc/auth_gss/gss_rpc_xdr.c +++ b/net/sunrpc/auth_gss/gss_rpc_xdr.c @@ -166,14 +166,14 @@ static int dummy_dec_opt_array(struct xdr_stream *xdr, return 0; } -static int get_s32(void **p, void *max, s32 *res) +static int get_s32(struct xdr_stream *xdr, s32 *res) { - void *base = *p; - void *next = (void *)((char *)base + sizeof(s32)); - if (unlikely(next > max || next < base)) + __be32 *p; + + p = xdr_inline_decode(xdr, 4); + if (!p) return -EINVAL; - memcpy(res, base, sizeof(s32)); - *p = next; + memcpy(res, p, sizeof(s32)); return 0; } @@ -182,7 +182,6 @@ static int gssx_dec_linux_creds(struct xdr_stream *xdr, { u32 length; __be32 *p; - void *q, *end; s32 tmp; int N, i, err; @@ -192,33 +191,28 @@ static int gssx_dec_linux_creds(struct xdr_stream *xdr, length = be32_to_cpup(p); - /* FIXME: we do not want to use the scratch buffer for this one - * may need to use functions that allows us to access an io vector - * directly */ - p = xdr_inline_decode(xdr, length); - if (unlikely(p == NULL)) + if (length > (3 + NGROUPS_MAX) * sizeof(u32)) return -ENOSPC; - q = p; - end = q + length; - /* uid */ - err = get_s32(&q, end, &tmp); + err = get_s32(xdr, &tmp); if (err) return err; creds->cr_uid = make_kuid(&init_user_ns, tmp); /* gid */ - err = get_s32(&q, end, &tmp); + err = get_s32(xdr, &tmp); if (err) return err; creds->cr_gid = make_kgid(&init_user_ns, tmp); /* number of additional gid's */ - err = get_s32(&q, end, &tmp); + err = get_s32(xdr, &tmp); if (err) return err; N = tmp; + if ((3 + N) * sizeof(u32) != length) + return -EINVAL; creds->cr_group_info = groups_alloc(N); if (creds->cr_group_info == NULL) return -ENOMEM; @@ -226,7 +220,7 @@ static int gssx_dec_linux_creds(struct xdr_stream *xdr, /* gid's */ for (i = 0; i < N; i++) { kgid_t kgid; - err = get_s32(&q, end, &tmp); + err = get_s32(xdr, &tmp); if (err) goto out_free_groups; err = -EINVAL; -- cgit v0.10.2 From 6a36978e6931e6601be586eb313375335f2cfaa3 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Fri, 23 Aug 2013 11:17:53 -0400 Subject: rpc: comment on linux_cred encoding, treat all as unsigned The encoding of linux creds is a bit confusing. Also: I think in practice it doesn't really matter whether we treat any of these things as signed or unsigned, but unsigned seems more straightforward: uid_t/gid_t are unsigned and it simplifies the ngroups overflow check. Tested-by: Simo Sorce Signed-off-by: J. Bruce Fields diff --git a/net/sunrpc/auth_gss/gss_rpc_xdr.c b/net/sunrpc/auth_gss/gss_rpc_xdr.c index f5067b2..3c19c7d 100644 --- a/net/sunrpc/auth_gss/gss_rpc_xdr.c +++ b/net/sunrpc/auth_gss/gss_rpc_xdr.c @@ -166,14 +166,15 @@ static int dummy_dec_opt_array(struct xdr_stream *xdr, return 0; } -static int get_s32(struct xdr_stream *xdr, s32 *res) +static int get_host_u32(struct xdr_stream *xdr, u32 *res) { __be32 *p; p = xdr_inline_decode(xdr, 4); if (!p) return -EINVAL; - memcpy(res, p, sizeof(s32)); + /* Contents of linux creds are all host-endian: */ + memcpy(res, p, sizeof(u32)); return 0; } @@ -182,8 +183,9 @@ static int gssx_dec_linux_creds(struct xdr_stream *xdr, { u32 length; __be32 *p; - s32 tmp; - int N, i, err; + u32 tmp; + u32 N; + int i, err; p = xdr_inline_decode(xdr, 4); if (unlikely(p == NULL)) @@ -195,19 +197,19 @@ static int gssx_dec_linux_creds(struct xdr_stream *xdr, return -ENOSPC; /* uid */ - err = get_s32(xdr, &tmp); + err = get_host_u32(xdr, &tmp); if (err) return err; creds->cr_uid = make_kuid(&init_user_ns, tmp); /* gid */ - err = get_s32(xdr, &tmp); + err = get_host_u32(xdr, &tmp); if (err) return err; creds->cr_gid = make_kgid(&init_user_ns, tmp); /* number of additional gid's */ - err = get_s32(xdr, &tmp); + err = get_host_u32(xdr, &tmp); if (err) return err; N = tmp; @@ -220,7 +222,7 @@ static int gssx_dec_linux_creds(struct xdr_stream *xdr, /* gid's */ for (i = 0; i < N; i++) { kgid_t kgid; - err = get_s32(xdr, &tmp); + err = get_host_u32(xdr, &tmp); if (err) goto out_free_groups; err = -EINVAL; -- cgit v0.10.2 From 9dfd87da1aeb0fd364167ad199f40fe96a6a87be Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Tue, 20 Aug 2013 18:13:27 -0400 Subject: rpc: fix huge kmalloc's in gss-proxy The reply to a gssproxy can include up to NGROUPS_MAX gid's, which will take up more than a page. We therefore need to allocate an array of pages to hold the reply instead of trying to allocate a single huge buffer. Tested-by: Simo Sorce Signed-off-by: J. Bruce Fields diff --git a/net/sunrpc/auth_gss/gss_rpc_upcall.c b/net/sunrpc/auth_gss/gss_rpc_upcall.c index af7ffd4..be95af3 100644 --- a/net/sunrpc/auth_gss/gss_rpc_upcall.c +++ b/net/sunrpc/auth_gss/gss_rpc_upcall.c @@ -213,6 +213,30 @@ static int gssp_call(struct net *net, struct rpc_message *msg) return status; } +static void gssp_free_receive_pages(struct gssx_arg_accept_sec_context *arg) +{ + int i; + + for (i = 0; i < arg->npages && arg->pages[i]; i++) + __free_page(arg->pages[i]); +} + +static int gssp_alloc_receive_pages(struct gssx_arg_accept_sec_context *arg) +{ + int i; + + arg->npages = DIV_ROUND_UP(NGROUPS_MAX * 4, PAGE_SIZE); + arg->pages = kzalloc(arg->npages * sizeof(struct page *), GFP_KERNEL); + + for (i=0; i < arg->npages; i++) { + arg->pages[i] = alloc_page(GFP_KERNEL); + if (arg->pages[i] == NULL) { + gssp_free_receive_pages(arg); + return -ENOMEM; + } + } + return 0; +} /* * Public functions @@ -261,10 +285,16 @@ int gssp_accept_sec_context_upcall(struct net *net, arg.context_handle = &ctxh; res.output_token->len = GSSX_max_output_token_sz; + ret = gssp_alloc_receive_pages(&arg); + if (ret) + return ret; + /* use nfs/ for targ_name ? */ ret = gssp_call(net, &msg); + gssp_free_receive_pages(&arg); + /* we need to fetch all data even in case of error so * that we can free special strctures is they have been allocated */ data->major_status = res.status.major_status; diff --git a/net/sunrpc/auth_gss/gss_rpc_xdr.c b/net/sunrpc/auth_gss/gss_rpc_xdr.c index 3c19c7d..f0f78c5 100644 --- a/net/sunrpc/auth_gss/gss_rpc_xdr.c +++ b/net/sunrpc/auth_gss/gss_rpc_xdr.c @@ -780,6 +780,9 @@ void gssx_enc_accept_sec_context(struct rpc_rqst *req, /* arg->options */ err = dummy_enc_opt_array(xdr, &arg->options); + xdr_inline_pages(&req->rq_rcv_buf, + PAGE_SIZE/2 /* pretty arbitrary */, + arg->pages, 0 /* page base */, arg->npages * PAGE_SIZE); done: if (err) dprintk("RPC: gssx_enc_accept_sec_context: %d\n", err); diff --git a/net/sunrpc/auth_gss/gss_rpc_xdr.h b/net/sunrpc/auth_gss/gss_rpc_xdr.h index 1c98b27..685a688 100644 --- a/net/sunrpc/auth_gss/gss_rpc_xdr.h +++ b/net/sunrpc/auth_gss/gss_rpc_xdr.h @@ -147,6 +147,8 @@ struct gssx_arg_accept_sec_context { struct gssx_cb *input_cb; u32 ret_deleg_cred; struct gssx_option_array options; + struct page **pages; + unsigned int npages; }; struct gssx_res_accept_sec_context { @@ -240,7 +242,8 @@ int gssx_dec_accept_sec_context(struct rpc_rqst *rqstp, 2 * GSSX_max_princ_sz + \ 8 + 8 + 4 + 4 + 4) #define GSSX_max_output_token_sz 1024 -#define GSSX_max_creds_sz (4 + 4 + 4 + NGROUPS_MAX * 4) +/* grouplist not included; we allocate separate pages for that: */ +#define GSSX_max_creds_sz (4 + 4 + 4 /* + NGROUPS_MAX*4 */) #define GSSX_RES_accept_sec_context_sz (GSSX_default_status_sz + \ GSSX_default_ctx_sz + \ GSSX_max_output_token_sz + \ -- cgit v0.10.2 From d4a516560fc96a9d486a9939bcb567e3fdce8f49 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Fri, 23 Aug 2013 17:26:28 -0400 Subject: rpc: let xdr layer allocate gssproxy receieve pages In theory the linux cred in a gssproxy reply can include up to NGROUPS_MAX data, 256K of data. In the common case we expect it to be shorter. So do as the nfsv3 ACL code does and let the xdr code allocate the pages as they come in, instead of allocating a lot of pages that won't typically be used. Tested-by: Simo Sorce Signed-off-by: J. Bruce Fields diff --git a/net/sunrpc/auth_gss/gss_rpc_upcall.c b/net/sunrpc/auth_gss/gss_rpc_upcall.c index be95af3..f1eb0d1 100644 --- a/net/sunrpc/auth_gss/gss_rpc_upcall.c +++ b/net/sunrpc/auth_gss/gss_rpc_upcall.c @@ -223,18 +223,14 @@ static void gssp_free_receive_pages(struct gssx_arg_accept_sec_context *arg) static int gssp_alloc_receive_pages(struct gssx_arg_accept_sec_context *arg) { - int i; - arg->npages = DIV_ROUND_UP(NGROUPS_MAX * 4, PAGE_SIZE); arg->pages = kzalloc(arg->npages * sizeof(struct page *), GFP_KERNEL); - - for (i=0; i < arg->npages; i++) { - arg->pages[i] = alloc_page(GFP_KERNEL); - if (arg->pages[i] == NULL) { - gssp_free_receive_pages(arg); - return -ENOMEM; - } - } + /* + * XXX: actual pages are allocated by xdr layer in + * xdr_partial_copy_from_skb. + */ + if (!arg->pages) + return -ENOMEM; return 0; } -- cgit v0.10.2