From 378b7d37f90399b7c34373a5925450529afb917b Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Tue, 25 May 2010 11:57:56 -0400 Subject: nfsd4: remove extra put() on callback errors Since rpc_call_async() guarantees that the release method will be called even on failure, this put is wrong. Signed-off-by: J. Bruce Fields diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c index eb78e7e..8a21db22 100644 --- a/fs/nfsd/nfs4callback.c +++ b/fs/nfsd/nfs4callback.c @@ -752,7 +752,6 @@ static void _nfsd4_cb_recall(struct nfs4_delegation *dp) .rpc_proc = &nfs4_cb_procedures[NFSPROC4_CLNT_CB_RECALL], .rpc_cred = callback_cred }; - int status; if (clnt == NULL) return; /* Client is shutting down; give up. */ @@ -760,10 +759,7 @@ static void _nfsd4_cb_recall(struct nfs4_delegation *dp) args->args_op = dp; msg.rpc_argp = args; dp->dl_retries = 1; - status = rpc_call_async(clnt, &msg, RPC_TASK_SOFT, - &nfsd4_cb_recall_ops, dp); - if (status) - nfs4_put_delegation(dp); + rpc_call_async(clnt, &msg, RPC_TASK_SOFT, &nfsd4_cb_recall_ops, dp); } void nfsd4_do_callback_rpc(struct work_struct *w) -- cgit v0.10.2 From 172c85dd5764d2766bfd68621e5b54e85c4a6cfa Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Sun, 30 May 2010 11:53:12 -0400 Subject: nfsd4: treat more recall errors as failures If a recall fails for some unexpected reason, instead of ignoring it and treating it like a success, it's safer to treat it as a failure, preventing further delgation grants and returning CB_PATH_DOWN. Also put put switches in a (two me) more logical order, with normal case first. Signed-off-by: J. Bruce Fields diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c index 8a21db22..ae91792 100644 --- a/fs/nfsd/nfs4callback.c +++ b/fs/nfsd/nfs4callback.c @@ -667,7 +667,14 @@ static void nfsd4_cb_recall_done(struct rpc_task *task, void *calldata) } switch (task->tk_status) { - case -EIO: + case 0: + return; + case -EBADHANDLE: + case -NFS4ERR_BAD_STATEID: + /* Race: client probably got cb_recall + * before open reply granting delegation */ + break; + default: /* Network partition? */ atomic_set(&clp->cl_cb_set, 0); warn_no_callback_path(clp, task->tk_status); @@ -676,14 +683,6 @@ static void nfsd4_cb_recall_done(struct rpc_task *task, void *calldata) nfsd4_cb_recall(dp); return; } - case -EBADHANDLE: - case -NFS4ERR_BAD_STATEID: - /* Race: client probably got cb_recall - * before open reply granting delegation */ - break; - default: - /* success, or error we can't handle */ - return; } if (dp->dl_retries--) { rpc_delay(task, 2*HZ); -- cgit v0.10.2 From 24a0111e405abeb74701ce3b7b665365c27de19e Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Tue, 18 May 2010 20:01:35 -0400 Subject: nfsd4: fix use of op_share_access NFSv4.1 adds additional flags to the share_access argument of the open call. These flags need to be masked out in some of the existing code, but current code does that inconsistently. Tested-by: Michael Groshans Signed-off-by: J. Bruce Fields diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 12f7109..1176708 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -2255,6 +2255,13 @@ find_delegation_file(struct nfs4_file *fp, stateid_t *stid) return NULL; } +int share_access_to_flags(u32 share_access) +{ + share_access &= ~NFS4_SHARE_WANT_MASK; + + return share_access == NFS4_SHARE_ACCESS_READ ? RD_STATE : WR_STATE; +} + static __be32 nfs4_check_deleg(struct nfs4_file *fp, struct nfsd4_open *open, struct nfs4_delegation **dp) @@ -2265,8 +2272,7 @@ nfs4_check_deleg(struct nfs4_file *fp, struct nfsd4_open *open, *dp = find_delegation_file(fp, &open->op_delegate_stateid); if (*dp == NULL) goto out; - flags = open->op_share_access == NFS4_SHARE_ACCESS_READ ? - RD_STATE : WR_STATE; + flags = share_access_to_flags(open->op_share_access); status = nfs4_check_delegmode(*dp, flags); if (status) *dp = NULL; @@ -2358,6 +2364,7 @@ nfs4_upgrade_open(struct svc_rqst *rqstp, struct svc_fh *cur_fh, struct nfs4_sta struct file *filp = stp->st_vfs_file; struct inode *inode = filp->f_path.dentry->d_inode; unsigned int share_access, new_writer; + u32 op_share_access; __be32 status; set_access(&share_access, stp->st_access_bmap); @@ -2380,8 +2387,9 @@ nfs4_upgrade_open(struct svc_rqst *rqstp, struct svc_fh *cur_fh, struct nfs4_sta return status; } /* remember the open */ - filp->f_mode |= open->op_share_access; - __set_bit(open->op_share_access, &stp->st_access_bmap); + op_share_access = open->op_share_access & ~NFS4_SHARE_WANT_MASK; + filp->f_mode |= op_share_access; + __set_bit(op_share_access, &stp->st_access_bmap); __set_bit(open->op_share_deny, &stp->st_deny_bmap); return nfs_ok; -- cgit v0.10.2 From 68a4b48ce6cb73a9643bae6dd3e0f062e3fd8ef7 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Thu, 27 May 2010 09:30:39 -0400 Subject: nfsd4: don't bother storing callback reply tag We don't use this, and probably never will. Signed-off-by: J. Bruce Fields diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c index ae91792..c8dd03c 100644 --- a/fs/nfsd/nfs4callback.c +++ b/fs/nfsd/nfs4callback.c @@ -143,8 +143,6 @@ struct nfs4_cb_compound_hdr { u32 minorversion; /* res */ int status; - u32 taglen; - char *tag; }; static struct { @@ -293,13 +291,14 @@ nfs4_xdr_enc_cb_recall(struct rpc_rqst *req, __be32 *p, static int decode_cb_compound_hdr(struct xdr_stream *xdr, struct nfs4_cb_compound_hdr *hdr){ __be32 *p; + u32 taglen; READ_BUF(8); READ32(hdr->status); - READ32(hdr->taglen); - READ_BUF(hdr->taglen + 4); - hdr->tag = (char *)p; - p += XDR_QUADLEN(hdr->taglen); + /* We've got no use for the tag; ignore it: */ + READ32(taglen); + READ_BUF(taglen + 4); + p += XDR_QUADLEN(taglen); READ32(hdr->nops); return 0; } -- cgit v0.10.2 From 76407f76e0f71428f3c31faff004bff87fea51ba Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Tue, 22 Jun 2010 14:10:14 -0400 Subject: nfsd4; fix session reference count leak Note the session has to be put() here regardless of what happens to the client. Signed-off-by: J. Bruce Fields diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 1176708..5a69ee6 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -714,7 +714,6 @@ release_session_client(struct nfsd4_session *session) } else renew_client_locked(clp); spin_unlock(&client_lock); - nfsd4_put_session(session); } /* must be called under the client_lock */ diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index ac17a70..835924f 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -3325,6 +3325,7 @@ nfs4svc_encode_compoundres(struct svc_rqst *rqstp, __be32 *p, struct nfsd4_compo } /* Renew the clientid on success and on replay */ release_session_client(cs->session); + nfsd4_put_session(cs->session); } return 1; } -- cgit v0.10.2 From 4731030d58a146630f5e8a0519661a5344a60f45 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Tue, 22 Jun 2010 16:17:12 -0400 Subject: nfsd4: translate memory errors to delay, not serverfault If the server is out of memory is better for clients to back off and retry than to just error out. Signed-off-by: J. Bruce Fields diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 5a69ee6..603076f 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -457,7 +457,7 @@ static int set_forechannel_drc_size(struct nfsd4_channel_attrs *fchan) spin_unlock(&nfsd_drc_lock); if (fchan->maxreqs == 0) - return nfserr_serverfault; + return nfserr_jukebox; fchan->maxresp_cached = size + NFSD_MIN_HDR_SEQ_SZ; return 0; @@ -542,7 +542,7 @@ alloc_init_session(struct svc_rqst *rqstp, struct nfs4_client *clp, BUILD_BUG_ON(NFSD_MAX_SLOTS_PER_SESSION * sizeof(struct nfsd4_slot) + sizeof(struct nfsd4_session) > PAGE_SIZE); - status = nfserr_serverfault; + status = nfserr_jukebox; /* allocate struct nfsd4_session and slot table pointers in one piece */ slotsize = tmp.se_fchannel.maxreqs * sizeof(struct nfsd4_slot *); new = kzalloc(sizeof(*new) + slotsize, GFP_KERNEL); @@ -1219,7 +1219,7 @@ out_new: /* Normal case */ new = create_client(exid->clname, dname, rqstp, &verf); if (new == NULL) { - status = nfserr_serverfault; + status = nfserr_jukebox; goto out; } -- cgit v0.10.2 From 9303bbd3de3b25061de11e4d8c891fa9592fad8c Mon Sep 17 00:00:00 2001 From: Benny Halevy Date: Tue, 25 May 2010 09:50:23 +0300 Subject: nfsd: nfs4callback encode_stateid helper function To be used also for the pnfs cb_layoutrecall callback Signed-off-by: Benny Halevy [nfsd4: fix cb_recall encoding] "nfsd: nfs4callback encode_stateid helper function" forgot to reserve more space after return from the new helper. Reported-by: Michael Groshans Signed-off-by: Benny Halevy Signed-off-by: J. Bruce Fields diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c index c8dd03c..874a56a 100644 --- a/fs/nfsd/nfs4callback.c +++ b/fs/nfsd/nfs4callback.c @@ -203,6 +203,16 @@ nfs_cb_stat_to_errno(int stat) */ static void +encode_stateid(struct xdr_stream *xdr, stateid_t *sid) +{ + __be32 *p; + + RESERVE_SPACE(sizeof(stateid_t)); + WRITE32(sid->si_generation); + WRITEMEM(&sid->si_opaque, sizeof(stateid_opaque_t)); +} + +static void encode_cb_compound_hdr(struct xdr_stream *xdr, struct nfs4_cb_compound_hdr *hdr) { __be32 * p; @@ -227,10 +237,10 @@ encode_cb_recall(struct xdr_stream *xdr, struct nfs4_delegation *dp, __be32 *p; int len = dp->dl_fh.fh_size; - RESERVE_SPACE(12+sizeof(dp->dl_stateid) + len); + RESERVE_SPACE(4); WRITE32(OP_CB_RECALL); - WRITE32(dp->dl_stateid.si_generation); - WRITEMEM(&dp->dl_stateid.si_opaque, sizeof(stateid_opaque_t)); + encode_stateid(xdr, &dp->dl_stateid); + RESERVE_SPACE(8 + (XDR_QUADLEN(len) << 2)); WRITE32(0); /* truncate optimization not implemented */ WRITE32(len); WRITEMEM(&dp->dl_fh.fh_base, len); -- cgit v0.10.2 From ec8acac84aea4245ae2cc999d56a68f0302cc847 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Wed, 16 Jun 2010 13:03:04 -0400 Subject: nfsd4: remove some debugging code This is overkill. Signed-off-by: J. Bruce Fields diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 603076f..182448f 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -591,10 +591,8 @@ find_in_sessionid_hashtbl(struct nfs4_sessionid *sessionid) dump_sessionid(__func__, sessionid); idx = hash_sessionid(sessionid); - dprintk("%s: idx is %d\n", __func__, idx); /* Search in the appropriate list */ list_for_each_entry(elem, &sessionid_hashtbl[idx], se_hash) { - dump_sessionid("list traversal", &elem->se_sessionid); if (!memcmp(elem->se_sessionid.data, sessionid->data, NFS4_MAX_SESSIONID_LEN)) { return elem; -- cgit v0.10.2 From ac94bf582529343bb7f354d0eef6dc4e566bbbd5 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Mon, 31 May 2010 19:06:39 -0400 Subject: nfsd4: fix deleg leak on callback error Signed-off-by: J. Bruce Fields diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c index 874a56a..a468632 100644 --- a/fs/nfsd/nfs4callback.c +++ b/fs/nfsd/nfs4callback.c @@ -761,8 +761,10 @@ static void _nfsd4_cb_recall(struct nfs4_delegation *dp) .rpc_cred = callback_cred }; - if (clnt == NULL) + if (clnt == NULL) { + nfs4_put_delegation(dp); return; /* Client is shutting down; give up. */ + } args->args_op = dp; msg.rpc_argp = args; -- cgit v0.10.2 From cba9ba4b902270c22f8b9c5149a284216b633fc1 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Tue, 1 Jun 2010 11:21:40 -0400 Subject: nfsd4: fix delegation recall race use-after-free When the rarely-used callback-connection-changing setclientid occurs simultaneously with a delegation recall, we rerun the recall by requeueing it on a workqueue. But we also need to take a reference on the delegation in that case, since the delegation held by the rpc itself will be released by the rpc_release callback. Signed-off-by: J. Bruce Fields diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c index a468632..1e6497e 100644 --- a/fs/nfsd/nfs4callback.c +++ b/fs/nfsd/nfs4callback.c @@ -689,6 +689,7 @@ static void nfsd4_cb_recall_done(struct rpc_task *task, void *calldata) warn_no_callback_path(clp, task->tk_status); if (current_rpc_client != task->tk_client) { /* queue a callback on the new connection: */ + atomic_inc(&dp->dl_count); nfsd4_cb_recall(dp); return; } -- cgit v0.10.2 From 8eab945c5616fc984e97b922d6a2559be93f39a1 Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Thu, 1 Jul 2010 18:05:56 +0300 Subject: sunrpc: make the cache cleaner workqueue deferrable This patch makes the cache_cleaner workqueue deferrable, to prevent unnecessary system wake-ups, which is very important for embedded battery-powered devices. do_cache_clean() is called every 30 seconds at the moment, and often makes the system wake up from its power-save sleep state. With this change, when the workqueue uses a deferrable timer, the do_cache_clean() invocation will be delayed and combined with the closest "real" wake-up. This improves the power consumption situation. Note, I tried to create a DECLARE_DELAYED_WORK_DEFERRABLE() helper macro, similar to DECLARE_DELAYED_WORK(), but failed because of the way the timer wheel core stores the deferrable flag (it is the LSBit in the time->base pointer). My attempt to define a static variable with this bit set ended up with the "initializer element is not constant" error. Thus, I have to use run-time initialization, so I created a new cache_initialize() function which is called once when sunrpc is being initialized. Signed-off-by: Artem Bityutskiy Signed-off-by: J. Bruce Fields diff --git a/include/linux/sunrpc/cache.h b/include/linux/sunrpc/cache.h index 6f52b4d..7bf3e84 100644 --- a/include/linux/sunrpc/cache.h +++ b/include/linux/sunrpc/cache.h @@ -192,6 +192,7 @@ extern int cache_check(struct cache_detail *detail, extern void cache_flush(void); extern void cache_purge(struct cache_detail *detail); #define NEVER (0x7FFFFFFF) +extern void __init cache_initialize(void); extern int cache_register(struct cache_detail *cd); extern void cache_unregister(struct cache_detail *cd); diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c index 58de76c..939d048 100644 --- a/net/sunrpc/cache.c +++ b/net/sunrpc/cache.c @@ -320,7 +320,7 @@ static struct cache_detail *current_detail; static int current_index; static void do_cache_clean(struct work_struct *work); -static DECLARE_DELAYED_WORK(cache_cleaner, do_cache_clean); +static struct delayed_work cache_cleaner; static void sunrpc_init_cache_detail(struct cache_detail *cd) { @@ -1504,6 +1504,11 @@ static int create_cache_proc_entries(struct cache_detail *cd) } #endif +void __init cache_initialize(void) +{ + INIT_DELAYED_WORK_DEFERRABLE(&cache_cleaner, do_cache_clean); +} + int cache_register(struct cache_detail *cd) { int ret; diff --git a/net/sunrpc/sunrpc_syms.c b/net/sunrpc/sunrpc_syms.c index f438347..c52b184 100644 --- a/net/sunrpc/sunrpc_syms.c +++ b/net/sunrpc/sunrpc_syms.c @@ -43,6 +43,7 @@ init_sunrpc(void) #ifdef CONFIG_PROC_FS rpc_proc_init(); #endif + cache_initialize(); cache_register(&ip_map_cache); cache_register(&unix_gid_cache); svc_init_xprt_sock(); /* svc sock transport */ -- cgit v0.10.2 From 6a85d6c76962db769bb2f2cb11b17b16f32c4158 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Tue, 6 Jul 2010 12:39:12 -0400 Subject: nfsd4: comment nitpick Reported-by: "Madan, Anshul" Signed-off-by: J. Bruce Fields diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index ebbf3b6..e3611b5 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -605,7 +605,7 @@ nfsd4_get_nfs4_acl(struct svc_rqst *rqstp, struct dentry *dentry, struct nfs4_ac return error; } -#endif /* defined(CONFIG_NFS_V4) */ +#endif /* defined(CONFIG_NFSD_V4) */ #ifdef CONFIG_NFSD_V3 /* -- cgit v0.10.2 From 43a9aa64a2f4330a9cb59aaf5c5636566bce067c Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 6 Jul 2010 16:53:34 -0400 Subject: NFSD: Fill in WCC data for REMOVE, RMDIR, MKNOD, and MKDIR Some well-known NFSv3 clients drop their directory entry caches when they receive replies with no WCC data. Without this data, they employ extra READ, LOOKUP, and GETATTR requests to ensure their directory entry caches are up to date, causing performance to suffer needlessly. In order to return WCC data, our server has to have both the pre-op and the post-op attribute data on hand when a reply is XDR encoded. The pre-op data is filled in when the incoming fh is locked, and the post-op data is filled in when the fh is unlocked. Unfortunately, for REMOVE, RMDIR, MKNOD, and MKDIR, the directory fh is not unlocked until well after the reply has been XDR encoded. This means that encode_wcc_data() does not have wcc_data for the parent directory, so none is returned to the client after these operations complete. By unlocking the parent directory fh immediately after the internal operations for each NFS procedure is complete, the post-op data is filled in before XDR encoding starts, so it can be returned to the client properly. Signed-off-by: Chuck Lever Signed-off-by: J. Bruce Fields diff --git a/fs/nfsd/nfs3proc.c b/fs/nfsd/nfs3proc.c index 3d68f45..9ae9331 100644 --- a/fs/nfsd/nfs3proc.c +++ b/fs/nfsd/nfs3proc.c @@ -271,7 +271,7 @@ nfsd3_proc_mkdir(struct svc_rqst *rqstp, struct nfsd3_createargs *argp, fh_init(&resp->fh, NFS3_FHSIZE); nfserr = nfsd_create(rqstp, &resp->dirfh, argp->name, argp->len, &argp->attrs, S_IFDIR, 0, &resp->fh); - + fh_unlock(&resp->dirfh); RETURN_STATUS(nfserr); } @@ -327,7 +327,7 @@ nfsd3_proc_mknod(struct svc_rqst *rqstp, struct nfsd3_mknodargs *argp, type = nfs3_ftypes[argp->ftype]; nfserr = nfsd_create(rqstp, &resp->dirfh, argp->name, argp->len, &argp->attrs, type, rdev, &resp->fh); - + fh_unlock(&resp->dirfh); RETURN_STATUS(nfserr); } @@ -348,6 +348,7 @@ nfsd3_proc_remove(struct svc_rqst *rqstp, struct nfsd3_diropargs *argp, /* Unlink. -S_IFDIR means file must not be a directory */ fh_copy(&resp->fh, &argp->fh); nfserr = nfsd_unlink(rqstp, &resp->fh, -S_IFDIR, argp->name, argp->len); + fh_unlock(&resp->fh); RETURN_STATUS(nfserr); } @@ -367,6 +368,7 @@ nfsd3_proc_rmdir(struct svc_rqst *rqstp, struct nfsd3_diropargs *argp, fh_copy(&resp->fh, &argp->fh); nfserr = nfsd_unlink(rqstp, &resp->fh, S_IFDIR, argp->name, argp->len); + fh_unlock(&resp->fh); RETURN_STATUS(nfserr); } -- cgit v0.10.2 From 55b13354d789dcf0b85db6d86fc3a9e57dca02c1 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Mon, 19 Jul 2010 16:38:24 -0400 Subject: nfsd: remove unused assignment from nfsd_link Trivial cleanup, since "dest" is never used. Reported-by: Anshul Madan Signed-off-by: J. Bruce Fields diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index e3611b5..5ca984b 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -1632,7 +1632,7 @@ nfsd_link(struct svc_rqst *rqstp, struct svc_fh *ffhp, char *name, int len, struct svc_fh *tfhp) { struct dentry *ddir, *dnew, *dold; - struct inode *dirp, *dest; + struct inode *dirp; __be32 err; int host_err; @@ -1660,7 +1660,6 @@ nfsd_link(struct svc_rqst *rqstp, struct svc_fh *ffhp, goto out_nfserr; dold = tfhp->fh_dentry; - dest = dold->d_inode; host_err = mnt_want_write(tfhp->fh_export->ex_path.mnt); if (host_err) { -- cgit v0.10.2 From 4ad9a344be2291b1e594a4a5aee25c5a5df34a97 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Mon, 19 Jul 2010 16:50:04 -0400 Subject: nfsd4: fix v4 state shutdown error paths If someone tries to shut down the laundry_wq while it isn't up it'll cause an oops. This can happen because write_ports can create a nfsd_svc before we really start the nfs server, and we may fail before the server is ever started. Also make sure state is shutdown on error paths in nfsd_svc(). Use a common global nfsd_up flag instead of nfs4_init, and create common helper functions for nfsd start/shutdown, as there will be other work that we want done only when we the number of nfsd threads transitions between zero and nonzero. Signed-off-by: J. Bruce Fields diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 182448f..9cc3b78 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -51,7 +51,6 @@ static time_t boot_time; static u32 current_ownerid = 1; static u32 current_fileid = 1; static u32 current_delegid = 1; -static u32 nfs4_init; static stateid_t zerostateid; /* bits all 0 */ static stateid_t onestateid; /* bits all 1 */ static u64 current_sessionid = 1; @@ -4071,16 +4070,8 @@ out_free_laundry: int nfs4_state_start(void) { - int ret; - - if (nfs4_init) - return 0; nfsd4_load_reboot_recovery_data(); - ret = __nfs4_state_start(); - if (ret) - return ret; - nfs4_init = 1; - return 0; + return __nfs4_state_start(); } static void @@ -4115,7 +4106,6 @@ __nfs4_state_shutdown(void) } nfsd4_shutdown_recdir(); - nfs4_init = 0; } void diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c index 06b2a26..d7a4d7b 100644 --- a/fs/nfsd/nfssvc.c +++ b/fs/nfsd/nfssvc.c @@ -180,6 +180,31 @@ int nfsd_nrthreads(void) return rv; } +static bool nfsd_up = false; + +static int nfsd_startup(unsigned short port, int nrservs) +{ + int ret; + + ret = nfs4_state_start(); + nfsd_up = true; + return ret; +} + +static void nfsd_shutdown(void) +{ + /* + * write_ports can create the server without actually starting + * any threads--if we get shut down before any threads are + * started, then nfsd_last_thread will be run before any of this + * other initialization has been done. + */ + if (!nfsd_up) + return; + nfs4_state_shutdown(); + nfsd_up = false; +} + static void nfsd_last_thread(struct svc_serv *serv) { /* When last nfsd thread exits we need to do some clean-up */ @@ -188,7 +213,7 @@ static void nfsd_last_thread(struct svc_serv *serv) lockd_down(); nfsd_serv = NULL; nfsd_racache_shutdown(); - nfs4_state_shutdown(); + nfsd_shutdown(); printk(KERN_WARNING "nfsd: last server has exited, flushing export " "cache\n"); @@ -380,6 +405,7 @@ int nfsd_svc(unsigned short port, int nrservs) { int error; + bool first_thread; mutex_lock(&nfsd_mutex); dprintk("nfsd: creating service\n"); @@ -395,19 +421,23 @@ nfsd_svc(unsigned short port, int nrservs) error = nfsd_racache_init(2*nrservs); if (error<0) goto out; - error = nfs4_state_start(); - if (error) - goto out; + + first_thread = (nfsd_serv->sv_nrthreads == 0) && (nrservs != 0); + + if (first_thread) { + error = nfsd_startup(port, nrservs); + if (error) + goto out; + } nfsd_reset_versions(); error = nfsd_create_serv(); - if (error) - goto out; + goto out_shutdown; error = nfsd_init_socks(port); if (error) - goto failure; + goto out_destroy; error = svc_set_num_threads(nfsd_serv, NULL, nrservs); if (error == 0) @@ -416,9 +446,12 @@ nfsd_svc(unsigned short port, int nrservs) * so subtract 1 */ error = nfsd_serv->sv_nrthreads - 1; - failure: +out_destroy: svc_destroy(nfsd_serv); /* Release server */ - out: +out_shutdown: + if (error < 0 && first_thread) + nfsd_shutdown(); +out: mutex_unlock(&nfsd_mutex); return error; } -- cgit v0.10.2 From 78a8d7c8ca3f0cb5cd2a276c6fc17c8c006d0b3c Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Mon, 19 Jul 2010 16:50:05 -0400 Subject: nfsd: fix error handling when starting nfsd with rpcbind down The refcounting for nfsd is a little goofy. What happens is that we create the nfsd RPC service, attach sockets to it but don't actually start the threads until someone writes to the "threads" procfile. To do this, __write_ports_addfd will create the nfsd service and then will decrement the refcount when exiting but won't actually destroy the service. This is fine when there aren't errors, but when there are this can cause later attempts to start nfsd to fail. nfsd_serv will be set, and that causes __write_versions to return EBUSY. Fix this by calling svc_destroy on nfsd_serv when this function is going to return error. Signed-off-by: Jeff Layton Signed-off-by: J. Bruce Fields diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c index 508941c..af7469e 100644 --- a/fs/nfsd/nfsctl.c +++ b/fs/nfsd/nfsctl.c @@ -950,14 +950,18 @@ static ssize_t __write_ports_addfd(char *buf) return err; err = lockd_up(); - if (err != 0) - goto out; + if (err != 0) { + svc_destroy(nfsd_serv); + return err; + } err = svc_addsock(nfsd_serv, fd, buf, SIMPLE_TRANSACTION_LIMIT); - if (err < 0) + if (err < 0) { lockd_down(); + svc_destroy(nfsd_serv); + return err; + } -out: /* Decrease the count, but don't shut down the service */ nfsd_serv->sv_nrthreads--; return err; -- cgit v0.10.2 From 0cd14a061e32d4ddaadad24d86d06cc860010591 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Mon, 19 Jul 2010 16:50:06 -0400 Subject: nfsd: fix error handling in __write_ports_addxprt __write_ports_addxprt calls nfsd_create_serv. That increases the refcount of nfsd_serv (which is tracked in sv_nrthreads). The service only decrements the thread count on error, not on success like __write_ports_addfd does, so using this interface leaves the nfsd thread count high. Fix this by having this function call svc_destroy() on error to release the reference (and possibly to tear down the service) and simply decrement the refcount without tearing down the service on success. This makes the sv_threads handling work basically the same in both __write_ports_addxprt and __write_ports_addfd. Signed-off-by: Jeff Layton Signed-off-by: J. Bruce Fields diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c index af7469e..9e8645a 100644 --- a/fs/nfsd/nfsctl.c +++ b/fs/nfsd/nfsctl.c @@ -1018,6 +1018,9 @@ static ssize_t __write_ports_addxprt(char *buf) PF_INET6, port, SVC_SOCK_ANONYMOUS); if (err < 0 && err != -EAFNOSUPPORT) goto out_close; + + /* Decrease the count, but don't shut down the service */ + nfsd_serv->sv_nrthreads--; return 0; out_close: xprt = svc_find_xprt(nfsd_serv, transport, PF_INET, port); @@ -1026,8 +1029,7 @@ out_close: svc_xprt_put(xprt); } out_err: - /* Decrease the count, but don't shut down the service */ - nfsd_serv->sv_nrthreads--; + svc_destroy(nfsd_serv); return err; } -- cgit v0.10.2 From 628b368728e23188ac41b3f00411b02be8e697f1 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Wed, 21 Jul 2010 16:40:08 -0400 Subject: nfsd: clean up nfsd_create_serv error handling There doesn't seem to be any need to reset the nfssvc_boot time if the nfsd startup failed. Signed-off-by: Jeff Layton Signed-off-by: J. Bruce Fields diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c index d7a4d7b..a631ea6 100644 --- a/fs/nfsd/nfssvc.c +++ b/fs/nfsd/nfssvc.c @@ -292,10 +292,9 @@ int nfsd_create_serv(void) nfsd_serv = svc_create_pooled(&nfsd_program, nfsd_max_blksize, nfsd_last_thread, nfsd, THIS_MODULE); if (nfsd_serv == NULL) - err = -ENOMEM; - else - set_max_drc(); + return -ENOMEM; + set_max_drc(); do_gettimeofday(&nfssvc_boot); /* record boot time */ return err; } -- cgit v0.10.2 From ac77efbe2b4d2a1e571a4f1e5b6e47de72a7d737 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Tue, 20 Jul 2010 14:10:22 -0400 Subject: nfsd: just keep single lockd reference for nfsd Right now, nfsd keeps a lockd reference for each socket that it has open. This is unnecessary and complicates the error handling on startup and shutdown. Change it to just do a lockd_up when starting the first nfsd thread just do a single lockd_down when taking down the last nfsd thread. Because of the strange way the sv_count is handled this requires an extra flag to tell whether the nfsd_serv holds a reference for lockd or not. Signed-off-by: Jeff Layton Signed-off-by: J. Bruce Fields diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c index 9e8645a..b1c5be8 100644 --- a/fs/nfsd/nfsctl.c +++ b/fs/nfsd/nfsctl.c @@ -949,15 +949,8 @@ static ssize_t __write_ports_addfd(char *buf) if (err != 0) return err; - err = lockd_up(); - if (err != 0) { - svc_destroy(nfsd_serv); - return err; - } - err = svc_addsock(nfsd_serv, fd, buf, SIMPLE_TRANSACTION_LIMIT); if (err < 0) { - lockd_down(); svc_destroy(nfsd_serv); return err; } @@ -982,9 +975,6 @@ static ssize_t __write_ports_delfd(char *buf) if (nfsd_serv != NULL) len = svc_sock_names(nfsd_serv, buf, SIMPLE_TRANSACTION_LIMIT, toclose); - if (len >= 0) - lockd_down(); - kfree(toclose); return len; } diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c index a631ea6..8a556ff 100644 --- a/fs/nfsd/nfssvc.c +++ b/fs/nfsd/nfssvc.c @@ -186,8 +186,16 @@ static int nfsd_startup(unsigned short port, int nrservs) { int ret; + ret = lockd_up(); + if (ret) + return ret; ret = nfs4_state_start(); + if (ret) + goto out_lockd; nfsd_up = true; + return 0; +out_lockd: + lockd_down(); return ret; } @@ -201,6 +209,7 @@ static void nfsd_shutdown(void) */ if (!nfsd_up) return; + lockd_down(); nfs4_state_shutdown(); nfsd_up = false; } @@ -208,9 +217,6 @@ static void nfsd_shutdown(void) static void nfsd_last_thread(struct svc_serv *serv) { /* When last nfsd thread exits we need to do some clean-up */ - struct svc_xprt *xprt; - list_for_each_entry(xprt, &serv->sv_permsocks, xpt_list) - lockd_down(); nfsd_serv = NULL; nfsd_racache_shutdown(); nfsd_shutdown(); @@ -310,19 +316,11 @@ static int nfsd_init_socks(int port) if (error < 0) return error; - error = lockd_up(); - if (error < 0) - return error; - error = svc_create_xprt(nfsd_serv, "tcp", PF_INET, port, SVC_SOCK_DEFAULTS); if (error < 0) return error; - error = lockd_up(); - if (error < 0) - return error; - return 0; } @@ -400,6 +398,11 @@ int nfsd_set_nrthreads(int n, int *nthreads) return err; } +/* + * Adjust the number of threads and return the new number of threads. + * This is also the function that starts the server if necessary, if + * this is the first time nrservs is nonzero. + */ int nfsd_svc(unsigned short port, int nrservs) { -- cgit v0.10.2 From 59db4a0c102e0de226a3395dbf25ea51bf845937 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Wed, 21 Jul 2010 18:29:25 -0400 Subject: nfsd: move more into nfsd_startup() This is just cleanup--it's harmless to call nfsd_rachache_init, nfsd_init_socks, and nfsd_reset_versions more than once. But there's no point to it. Signed-off-by: J. Bruce Fields diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c index 8a556ff..62a6c44 100644 --- a/fs/nfsd/nfssvc.c +++ b/fs/nfsd/nfssvc.c @@ -180,22 +180,54 @@ int nfsd_nrthreads(void) return rv; } +static int nfsd_init_socks(int port) +{ + int error; + if (!list_empty(&nfsd_serv->sv_permsocks)) + return 0; + + error = svc_create_xprt(nfsd_serv, "udp", PF_INET, port, + SVC_SOCK_DEFAULTS); + if (error < 0) + return error; + + error = svc_create_xprt(nfsd_serv, "tcp", PF_INET, port, + SVC_SOCK_DEFAULTS); + if (error < 0) + return error; + + return 0; +} + static bool nfsd_up = false; static int nfsd_startup(unsigned short port, int nrservs) { int ret; - + /* + * Readahead param cache - will no-op if it already exists. + * (Note therefore results will be suboptimal if number of + * threads is modified after nfsd start.) + */ + ret = nfsd_racache_init(2*nrservs); + if (ret) + return ret; + ret = nfsd_init_socks(port); + if (ret) + goto out_racache; ret = lockd_up(); if (ret) return ret; ret = nfs4_state_start(); if (ret) goto out_lockd; + nfsd_reset_versions(); nfsd_up = true; return 0; out_lockd: lockd_down(); +out_racache: + nfsd_racache_shutdown(); return ret; } @@ -209,8 +241,9 @@ static void nfsd_shutdown(void) */ if (!nfsd_up) return; - lockd_down(); nfs4_state_shutdown(); + lockd_down(); + nfsd_racache_shutdown(); nfsd_up = false; } @@ -218,7 +251,6 @@ static void nfsd_last_thread(struct svc_serv *serv) { /* When last nfsd thread exits we need to do some clean-up */ nfsd_serv = NULL; - nfsd_racache_shutdown(); nfsd_shutdown(); printk(KERN_WARNING "nfsd: last server has exited, flushing export " @@ -305,25 +337,6 @@ int nfsd_create_serv(void) return err; } -static int nfsd_init_socks(int port) -{ - int error; - if (!list_empty(&nfsd_serv->sv_permsocks)) - return 0; - - error = svc_create_xprt(nfsd_serv, "udp", PF_INET, port, - SVC_SOCK_DEFAULTS); - if (error < 0) - return error; - - error = svc_create_xprt(nfsd_serv, "tcp", PF_INET, port, - SVC_SOCK_DEFAULTS); - if (error < 0) - return error; - - return 0; -} - int nfsd_nrpools(void) { if (nfsd_serv == NULL) @@ -419,11 +432,6 @@ nfsd_svc(unsigned short port, int nrservs) if (nrservs == 0 && nfsd_serv == NULL) goto out; - /* Readahead param cache - will no-op if it already exists */ - error = nfsd_racache_init(2*nrservs); - if (error<0) - goto out; - first_thread = (nfsd_serv->sv_nrthreads == 0) && (nrservs != 0); if (first_thread) { @@ -431,16 +439,9 @@ nfsd_svc(unsigned short port, int nrservs) if (error) goto out; } - - nfsd_reset_versions(); - error = nfsd_create_serv(); if (error) goto out_shutdown; - error = nfsd_init_socks(port); - if (error) - goto out_destroy; - error = svc_set_num_threads(nfsd_serv, NULL, nrservs); if (error == 0) /* We are holding a reference to nfsd_serv which -- cgit v0.10.2 From af4718f3f996925f770e540004ec9224243d1682 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Wed, 21 Jul 2010 18:31:42 -0400 Subject: nfsd: minor nfsd_svc() cleanup More idiomatic to put the error case in the if clause. Signed-off-by: J. Bruce Fields diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c index 62a6c44..92173bd 100644 --- a/fs/nfsd/nfssvc.c +++ b/fs/nfsd/nfssvc.c @@ -443,12 +443,13 @@ nfsd_svc(unsigned short port, int nrservs) if (error) goto out_shutdown; error = svc_set_num_threads(nfsd_serv, NULL, nrservs); - if (error == 0) - /* We are holding a reference to nfsd_serv which - * we don't want to count in the return value, - * so subtract 1 - */ - error = nfsd_serv->sv_nrthreads - 1; + if (error) + goto out_destroy; + /* We are holding a reference to nfsd_serv which + * we don't want to count in the return value, + * so subtract 1 + */ + error = nfsd_serv->sv_nrthreads - 1; out_destroy: svc_destroy(nfsd_serv); /* Release server */ out_shutdown: -- cgit v0.10.2 From fa0a21269f807bb2e95b8b642c4a739714780172 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Tue, 27 Jul 2010 16:48:54 -0400 Subject: nfsd: bypass readahead cache when have struct file The readahead cache compensates for the fact that the NFS server currently does an open and close on every IO operation in the NFSv2 and NFSv3 case. In the NFSv4 case we have long-lived struct files associated with client opens, so there's no need for this. In fact, concurrent IO's using trying to modify the same file->f_ra may cause problems. So, don't bother with the readahead cache in that case. Note eventually we'll likely do this in the v2/v3 case as well by keeping a cache of struct files instead of struct file_ra_state's. Signed-off-by: J. Bruce Fields diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index 5ca984b..31d32ae 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -904,7 +904,6 @@ nfsd_vfs_read(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file, loff_t offset, struct kvec *vec, int vlen, unsigned long *count) { struct inode *inode; - struct raparms *ra; mm_segment_t oldfs; __be32 err; int host_err; @@ -915,12 +914,6 @@ nfsd_vfs_read(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file, if (svc_msnfs(fhp) && !lock_may_read(inode, offset, *count)) goto out; - /* Get readahead parameters */ - ra = nfsd_get_raparms(inode->i_sb->s_dev, inode->i_ino); - - if (ra && ra->p_set) - file->f_ra = ra->p_ra; - if (file->f_op->splice_read && rqstp->rq_splice_ok) { struct splice_desc sd = { .len = 0, @@ -938,16 +931,6 @@ nfsd_vfs_read(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file, set_fs(oldfs); } - /* Write back readahead params */ - if (ra) { - struct raparm_hbucket *rab = &raparm_hash[ra->p_hindex]; - spin_lock(&rab->pb_lock); - ra->p_ra = file->f_ra; - ra->p_set = 1; - ra->p_count--; - spin_unlock(&rab->pb_lock); - } - if (host_err >= 0) { nfsdstats.io_read += host_err; *count = host_err; @@ -1082,6 +1065,42 @@ out: return err; } +static __be32 nfsd_open_read(struct svc_rqst *rqstp, struct svc_fh *fhp, + loff_t offset, struct kvec *vec, int vlen, unsigned long *count) +{ + struct file *file; + struct inode *inode; + struct raparms *ra; + __be32 err; + + err = nfsd_open(rqstp, fhp, S_IFREG, NFSD_MAY_READ, &file); + if (err) + return err; + + inode = file->f_path.dentry->d_inode; + + /* Get readahead parameters */ + ra = nfsd_get_raparms(inode->i_sb->s_dev, inode->i_ino); + + if (ra && ra->p_set) + file->f_ra = ra->p_ra; + + err = nfsd_vfs_read(rqstp, fhp, file, offset, vec, vlen, count); + + /* Write back readahead params */ + if (ra) { + struct raparm_hbucket *rab = &raparm_hash[ra->p_hindex]; + spin_lock(&rab->pb_lock); + ra->p_ra = file->f_ra; + ra->p_set = 1; + ra->p_count--; + spin_unlock(&rab->pb_lock); + } + + nfsd_close(file); + return err; +} + /* * Read data from a file. count must contain the requested read count * on entry. On return, *count contains the number of bytes actually read. @@ -1100,13 +1119,8 @@ nfsd_read(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file, if (err) goto out; err = nfsd_vfs_read(rqstp, fhp, file, offset, vec, vlen, count); - } else { - err = nfsd_open(rqstp, fhp, S_IFREG, NFSD_MAY_READ, &file); - if (err) - goto out; - err = nfsd_vfs_read(rqstp, fhp, file, offset, vec, vlen, count); - nfsd_close(file); - } + } else + err = nfsd_open_read(rqstp, fhp, offset, vec, vlen, count); out: return err; } -- cgit v0.10.2 From c3e480808685dd13f03af1a8fdda581dcb54d92c Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Wed, 28 Jul 2010 10:08:57 -0400 Subject: nfsd4: don't pretend to support write delegations The delegation code mostly pretends to support either read or write delegations. However, correct support for write delegations would require, for example, breaking of delegations (and/or implementation of cb_getattr) on stat. Currently all that stops us from handing out delegations is a subtle reference-counting issue. Avoid confusion by adding an earlier check that explicitly refuses write delegations. For now, though, I'm not going so far as to rip out existing half-support for write delegations, in case we get around to using that soon. Signed-off-by: J. Bruce Fields diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 9cc3b78..c07c988 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -170,6 +170,13 @@ alloc_init_deleg(struct nfs4_client *clp, struct nfs4_stateid *stp, struct svc_f struct nfs4_cb_conn *cb = &stp->st_stateowner->so_client->cl_cb_conn; dprintk("NFSD alloc_init_deleg\n"); + /* + * Major work on the lease subsystem (for example, to support + * calbacks on stat) will be required before we can support + * write delegations properly. + */ + if (type != NFS4_OPEN_DELEGATE_READ) + return NULL; if (fp->fi_had_conflict) return NULL; if (num_delegations > max_delegations) -- cgit v0.10.2 From 21fb4016bd592409bc8f95737e365ac82413b795 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Wed, 28 Jul 2010 12:21:23 -0400 Subject: nfsd4: miscellaneous process_open2 cleanup Move more work into helper functions. Signed-off-by: J. Bruce Fields diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index c07c988..d9c8232 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -2317,10 +2317,21 @@ nfs4_alloc_stateid(void) return kmem_cache_alloc(stateid_slab, GFP_KERNEL); } +static inline int nfs4_access_to_access(u32 nfs4_access) +{ + int flags = 0; + + if (nfs4_access & NFS4_SHARE_ACCESS_READ) + flags |= NFSD_MAY_READ; + if (nfs4_access & NFS4_SHARE_ACCESS_WRITE) + flags |= NFSD_MAY_WRITE; + return flags; +} + static __be32 nfs4_new_open(struct svc_rqst *rqstp, struct nfs4_stateid **stpp, struct nfs4_delegation *dp, - struct svc_fh *cur_fh, int flags) + struct svc_fh *cur_fh, struct nfsd4_open *open) { struct nfs4_stateid *stp; @@ -2333,8 +2344,10 @@ nfs4_new_open(struct svc_rqst *rqstp, struct nfs4_stateid **stpp, stp->st_vfs_file = dp->dl_vfs_file; } else { __be32 status; - status = nfsd_open(rqstp, cur_fh, S_IFREG, flags, - &stp->st_vfs_file); + int access = nfs4_access_to_access(open->op_share_access); + + status = nfsd_open(rqstp, cur_fh, S_IFREG, access, + &stp->st_vfs_file); if (status) { if (status == nfserr_dropit) status = nfserr_jukebox; @@ -2531,12 +2544,7 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf update_stateid(&stp->st_stateid); } else { /* Stateid was not found, this is a new OPEN */ - int flags = 0; - if (open->op_share_access & NFS4_SHARE_ACCESS_READ) - flags |= NFSD_MAY_READ; - if (open->op_share_access & NFS4_SHARE_ACCESS_WRITE) - flags |= NFSD_MAY_WRITE; - status = nfs4_new_open(rqstp, &stp, dp, current_fh, flags); + status = nfs4_new_open(rqstp, &stp, dp, current_fh, open); if (status) goto out; init_stateid(stp, fp, open); -- cgit v0.10.2 From 02921914170e3b7fea1cd82dac9713685d2de5e2 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Thu, 29 Jul 2010 15:16:59 -0400 Subject: nfsd4: fix openmode checking on IO using lock stateid It is legal to perform a write using the lock stateid that was originally associated with a read lock, or with a file that was originally opened for read, but has since been upgraded. So, when checking the openmode, check the mode associated with the open stateid from which the lock was derived. Signed-off-by: J. Bruce Fields diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index d9c8232..b996a4b 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -2779,6 +2779,9 @@ __be32 nfs4_check_openmode(struct nfs4_stateid *stp, int flags) { __be32 status = nfserr_openmode; + /* For lock stateid's, we test the parent open, not the lock: */ + if (stp->st_openstp) + stp = stp->st_openstp; if ((flags & WR_STATE) && (!access_permit_write(stp->st_access_bmap))) goto out; if ((flags & RD_STATE) && (!access_permit_read(stp->st_access_bmap))) @@ -3466,7 +3469,6 @@ alloc_init_lock_stateid(struct nfs4_stateowner *sop, struct nfs4_file *fp, struc stp->st_stateid.si_fileid = fp->fi_id; stp->st_stateid.si_generation = 0; stp->st_vfs_file = open_stp->st_vfs_file; /* FIXME refcount?? */ - stp->st_access_bmap = open_stp->st_access_bmap; stp->st_deny_bmap = open_stp->st_deny_bmap; stp->st_openstp = open_stp; -- cgit v0.10.2 From f9d7562fdb9dc0ada3a7aba5dbbe9d965e2a105d Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Thu, 8 Jul 2010 11:02:09 -0400 Subject: nfsd4: share file descriptors between stateid's The vfs doesn't really allow us to "upgrade" a file descriptor from read-only to read-write, and our attempt to do so in nfs4_upgrade_open is ugly and incomplete. Move to a different scheme where we keep multiple opens, shared between open stateid's, in the nfs4_file struct. Each file will be opened at most 3 times (for read, write, and read-write), and those opens will be shared between all clients and openers. On upgrade we will do another open if necessary instead of attempting to upgrade an existing open. We keep count of the number of readers and writers so we know when to close the shared files. Signed-off-by: J. Bruce Fields diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index b996a4b..7ab572f 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -162,6 +162,28 @@ static struct list_head ownerstr_hashtbl[OWNER_HASH_SIZE]; static struct list_head file_hashtbl[FILE_HASH_SIZE]; static struct list_head stateid_hashtbl[STATEID_HASH_SIZE]; +static inline void nfs4_file_get_access(struct nfs4_file *fp, int oflag) +{ + BUG_ON(!(fp->fi_fds[oflag] || fp->fi_fds[O_RDWR])); + atomic_inc(&fp->fi_access[oflag]); +} + +static inline void nfs4_file_put_fd(struct nfs4_file *fp, int oflag) +{ + if (fp->fi_fds[oflag]) { + fput(fp->fi_fds[oflag]); + fp->fi_fds[oflag] = NULL; + } +} + +static inline void nfs4_file_put_access(struct nfs4_file *fp, int oflag) +{ + if (atomic_dec_and_test(&fp->fi_access[oflag])) { + nfs4_file_put_fd(fp, O_RDWR); + nfs4_file_put_fd(fp, oflag); + } +} + static struct nfs4_delegation * alloc_init_deleg(struct nfs4_client *clp, struct nfs4_stateid *stp, struct svc_fh *current_fh, u32 type) { @@ -191,9 +213,8 @@ alloc_init_deleg(struct nfs4_client *clp, struct nfs4_stateid *stp, struct svc_f dp->dl_client = clp; get_nfs4_file(fp); dp->dl_file = fp; + nfs4_file_get_access(fp, O_RDONLY); dp->dl_flock = NULL; - get_file(stp->st_vfs_file); - dp->dl_vfs_file = stp->st_vfs_file; dp->dl_type = type; dp->dl_ident = cb->cb_ident; dp->dl_stateid.si_boot = boot_time; @@ -228,15 +249,12 @@ nfs4_put_delegation(struct nfs4_delegation *dp) static void nfs4_close_delegation(struct nfs4_delegation *dp) { - struct file *filp = dp->dl_vfs_file; + struct file *filp = find_readable_file(dp->dl_file); dprintk("NFSD: close_delegation dp %p\n",dp); - dp->dl_vfs_file = NULL; - /* The following nfsd_close may not actually close the file, - * but we want to remove the lease in any case. */ if (dp->dl_flock) vfs_setlease(filp, F_UNLCK, &dp->dl_flock); - nfsd_close(filp); + nfs4_file_put_access(dp->dl_file, O_RDONLY); } /* Called under the state lock. */ @@ -308,8 +326,12 @@ static void free_generic_stateid(struct nfs4_stateid *stp) static void release_lock_stateid(struct nfs4_stateid *stp) { + struct file *file; + unhash_generic_stateid(stp); - locks_remove_posix(stp->st_vfs_file, (fl_owner_t)stp->st_stateowner); + file = find_any_file(stp->st_file); + if (file) + locks_remove_posix(file, (fl_owner_t)stp->st_stateowner); free_generic_stateid(stp); } @@ -347,11 +369,85 @@ release_stateid_lockowners(struct nfs4_stateid *open_stp) } } +/* + * We store the NONE, READ, WRITE, and BOTH bits separately in the + * st_{access,deny}_bmap field of the stateid, in order to track not + * only what share bits are currently in force, but also what + * combinations of share bits previous opens have used. This allows us + * to enforce the recommendation of rfc 3530 14.2.19 that the server + * return an error if the client attempt to downgrade to a combination + * of share bits not explicable by closing some of its previous opens. + * + * XXX: This enforcement is actually incomplete, since we don't keep + * track of access/deny bit combinations; so, e.g., we allow: + * + * OPEN allow read, deny write + * OPEN allow both, deny none + * DOWNGRADE allow read, deny none + * + * which we should reject. + */ +static void +set_access(unsigned int *access, unsigned long bmap) { + int i; + + *access = 0; + for (i = 1; i < 4; i++) { + if (test_bit(i, &bmap)) + *access |= i; + } +} + +static void +set_deny(unsigned int *deny, unsigned long bmap) { + int i; + + *deny = 0; + for (i = 0; i < 4; i++) { + if (test_bit(i, &bmap)) + *deny |= i ; + } +} + +static int +test_share(struct nfs4_stateid *stp, struct nfsd4_open *open) { + unsigned int access, deny; + + set_access(&access, stp->st_access_bmap); + set_deny(&deny, stp->st_deny_bmap); + if ((access & open->op_share_deny) || (deny & open->op_share_access)) + return 0; + return 1; +} + +static int nfs4_access_to_omode(u32 access) +{ + switch (access) { + case NFS4_SHARE_ACCESS_READ: + return O_RDONLY; + case NFS4_SHARE_ACCESS_WRITE: + return O_WRONLY; + case NFS4_SHARE_ACCESS_BOTH: + return O_RDWR; + } + BUG(); +} + +static int nfs4_access_bmap_to_omode(struct nfs4_stateid *stp) +{ + unsigned int access; + + set_access(&access, stp->st_access_bmap); + return nfs4_access_to_omode(access); +} + static void release_open_stateid(struct nfs4_stateid *stp) { + int oflag = nfs4_access_bmap_to_omode(stp); + unhash_generic_stateid(stp); release_stateid_lockowners(stp); - nfsd_close(stp->st_vfs_file); + nfs4_file_put_access(stp->st_file, oflag); free_generic_stateid(stp); } @@ -1763,6 +1859,8 @@ alloc_init_file(struct inode *ino) fp->fi_inode = igrab(ino); fp->fi_id = current_fileid++; fp->fi_had_conflict = false; + memset(fp->fi_fds, 0, sizeof(fp->fi_fds)); + memset(fp->fi_access, 0, sizeof(fp->fi_access)); spin_lock(&recall_lock); list_add(&fp->fi_hash, &file_hashtbl[hashval]); spin_unlock(&recall_lock); @@ -1974,57 +2072,6 @@ static inline int deny_valid(u32 x) } /* - * We store the NONE, READ, WRITE, and BOTH bits separately in the - * st_{access,deny}_bmap field of the stateid, in order to track not - * only what share bits are currently in force, but also what - * combinations of share bits previous opens have used. This allows us - * to enforce the recommendation of rfc 3530 14.2.19 that the server - * return an error if the client attempt to downgrade to a combination - * of share bits not explicable by closing some of its previous opens. - * - * XXX: This enforcement is actually incomplete, since we don't keep - * track of access/deny bit combinations; so, e.g., we allow: - * - * OPEN allow read, deny write - * OPEN allow both, deny none - * DOWNGRADE allow read, deny none - * - * which we should reject. - */ -static void -set_access(unsigned int *access, unsigned long bmap) { - int i; - - *access = 0; - for (i = 1; i < 4; i++) { - if (test_bit(i, &bmap)) - *access |= i; - } -} - -static void -set_deny(unsigned int *deny, unsigned long bmap) { - int i; - - *deny = 0; - for (i = 0; i < 4; i++) { - if (test_bit(i, &bmap)) - *deny |= i ; - } -} - -static int -test_share(struct nfs4_stateid *stp, struct nfsd4_open *open) { - unsigned int access, deny; - - set_access(&access, stp->st_access_bmap); - set_deny(&deny, stp->st_deny_bmap); - if ((access & open->op_share_deny) || (deny & open->op_share_access)) - return 0; - return 1; -} - -/* * Called to check deny when READ with all zero stateid or * WRITE with all zero or all one stateid */ @@ -2055,14 +2102,12 @@ out: } static inline void -nfs4_file_downgrade(struct file *filp, unsigned int share_access) +nfs4_file_downgrade(struct nfs4_file *fp, unsigned int share_access) { - if (share_access & NFS4_SHARE_ACCESS_WRITE) { - drop_file_write_access(filp); - spin_lock(&filp->f_lock); - filp->f_mode = (filp->f_mode | FMODE_READ) & ~FMODE_WRITE; - spin_unlock(&filp->f_lock); - } + if (share_access & NFS4_SHARE_ACCESS_WRITE) + nfs4_file_put_access(fp, O_WRONLY); + if (share_access & NFS4_SHARE_ACCESS_READ) + nfs4_file_put_access(fp, O_RDONLY); } /* @@ -2328,32 +2373,42 @@ static inline int nfs4_access_to_access(u32 nfs4_access) return flags; } +static __be32 nfs4_get_vfs_file(struct svc_rqst *rqstp, struct nfs4_file +*fp, struct svc_fh *cur_fh, u32 nfs4_access) +{ + __be32 status; + int oflag = nfs4_access_to_omode(nfs4_access); + int access = nfs4_access_to_access(nfs4_access); + + if (!fp->fi_fds[oflag]) { + status = nfsd_open(rqstp, cur_fh, S_IFREG, access, + &fp->fi_fds[oflag]); + if (status == nfserr_dropit) + status = nfserr_jukebox; + if (status) + return status; + } + nfs4_file_get_access(fp, oflag); + + return nfs_ok; +} + static __be32 nfs4_new_open(struct svc_rqst *rqstp, struct nfs4_stateid **stpp, - struct nfs4_delegation *dp, - struct svc_fh *cur_fh, struct nfsd4_open *open) + struct nfs4_file *fp, struct svc_fh *cur_fh, + struct nfsd4_open *open) { struct nfs4_stateid *stp; + __be32 status; stp = nfs4_alloc_stateid(); if (stp == NULL) return nfserr_resource; - if (dp) { - get_file(dp->dl_vfs_file); - stp->st_vfs_file = dp->dl_vfs_file; - } else { - __be32 status; - int access = nfs4_access_to_access(open->op_share_access); - - status = nfsd_open(rqstp, cur_fh, S_IFREG, access, - &stp->st_vfs_file); - if (status) { - if (status == nfserr_dropit) - status = nfserr_jukebox; - kmem_cache_free(stateid_slab, stp); - return status; - } + status = nfs4_get_vfs_file(rqstp, fp, cur_fh, open->op_share_access); + if (status) { + kmem_cache_free(stateid_slab, stp); + return status; } *stpp = stp; return 0; @@ -2375,36 +2430,29 @@ nfsd4_truncate(struct svc_rqst *rqstp, struct svc_fh *fh, } static __be32 -nfs4_upgrade_open(struct svc_rqst *rqstp, struct svc_fh *cur_fh, struct nfs4_stateid *stp, struct nfsd4_open *open) +nfs4_upgrade_open(struct svc_rqst *rqstp, struct nfs4_file *fp, struct svc_fh *cur_fh, struct nfs4_stateid *stp, struct nfsd4_open *open) { - struct file *filp = stp->st_vfs_file; - struct inode *inode = filp->f_path.dentry->d_inode; - unsigned int share_access, new_writer; - u32 op_share_access; + u32 op_share_access, new_access; __be32 status; - set_access(&share_access, stp->st_access_bmap); - new_writer = (~share_access) & open->op_share_access - & NFS4_SHARE_ACCESS_WRITE; - - if (new_writer) { - int err = get_write_access(inode); - if (err) - return nfserrno(err); - err = mnt_want_write(cur_fh->fh_export->ex_path.mnt); - if (err) - return nfserrno(err); - file_take_write(filp); + set_access(&new_access, stp->st_access_bmap); + new_access = (~new_access) & open->op_share_access & ~NFS4_SHARE_WANT_MASK; + + if (new_access) { + status = nfs4_get_vfs_file(rqstp, fp, cur_fh, new_access); + if (status) + return status; } status = nfsd4_truncate(rqstp, cur_fh, open); if (status) { - if (new_writer) - put_write_access(inode); + if (new_access) { + int oflag = nfs4_access_to_omode(new_access); + nfs4_file_put_access(fp, oflag); + } return status; } /* remember the open */ op_share_access = open->op_share_access & ~NFS4_SHARE_WANT_MASK; - filp->f_mode |= op_share_access; __set_bit(op_share_access, &stp->st_access_bmap); __set_bit(open->op_share_deny, &stp->st_deny_bmap); @@ -2468,13 +2516,14 @@ nfs4_open_delegation(struct svc_fh *fh, struct nfsd4_open *open, struct nfs4_sta fl.fl_type = flag == NFS4_OPEN_DELEGATE_READ? F_RDLCK: F_WRLCK; fl.fl_end = OFFSET_MAX; fl.fl_owner = (fl_owner_t)dp; - fl.fl_file = stp->st_vfs_file; + fl.fl_file = find_readable_file(stp->st_file); + BUG_ON(!fl.fl_file); fl.fl_pid = current->tgid; /* vfs_setlease checks to see if delegation should be handed out. * the lock_manager callbacks fl_mylease and fl_change are used */ - if ((status = vfs_setlease(stp->st_vfs_file, fl.fl_type, &flp))) { + if ((status = vfs_setlease(fl.fl_file, fl.fl_type, &flp))) { dprintk("NFSD: setlease failed [%d], no delegation\n", status); unhash_delegation(dp); flag = NFS4_OPEN_DELEGATE_NONE; @@ -2538,13 +2587,12 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf */ if (stp) { /* Stateid was found, this is an OPEN upgrade */ - status = nfs4_upgrade_open(rqstp, current_fh, stp, open); + status = nfs4_upgrade_open(rqstp, fp, current_fh, stp, open); if (status) goto out; update_stateid(&stp->st_stateid); } else { - /* Stateid was not found, this is a new OPEN */ - status = nfs4_new_open(rqstp, &stp, dp, current_fh, open); + status = nfs4_new_open(rqstp, &stp, fp, current_fh, open); if (status) goto out; init_stateid(stp, fp, open); @@ -2746,7 +2794,7 @@ search_close_lru(u32 st_id, int flags) static inline int nfs4_check_fh(struct svc_fh *fhp, struct nfs4_stateid *stp) { - return fhp->fh_dentry->d_inode != stp->st_vfs_file->f_path.dentry->d_inode; + return fhp->fh_dentry->d_inode != stp->st_file->fi_inode; } static int @@ -2894,7 +2942,8 @@ nfs4_preprocess_stateid_op(struct nfsd4_compound_state *cstate, goto out; renew_client(dp->dl_client); if (filpp) - *filpp = dp->dl_vfs_file; + *filpp = find_readable_file(dp->dl_file); + BUG_ON(!*filpp); } else { /* open or lock stateid */ stp = find_stateid(stateid, flags); if (!stp) @@ -2911,8 +2960,13 @@ nfs4_preprocess_stateid_op(struct nfsd4_compound_state *cstate, if (status) goto out; renew_client(stp->st_stateowner->so_client); - if (filpp) - *filpp = stp->st_vfs_file; + if (filpp) { + if (flags & RD_STATE) + *filpp = find_readable_file(stp->st_file); + else + *filpp = find_writeable_file(stp->st_file); + BUG_ON(!*filpp); /* assured by check_openmode */ + } } status = nfs_ok; out: @@ -3148,8 +3202,7 @@ nfsd4_open_downgrade(struct svc_rqst *rqstp, goto out; } set_access(&share_access, stp->st_access_bmap); - nfs4_file_downgrade(stp->st_vfs_file, - share_access & ~od->od_share_access); + nfs4_file_downgrade(stp->st_file, share_access & ~od->od_share_access); reset_union_bmap_access(od->od_share_access, &stp->st_access_bmap); reset_union_bmap_deny(od->od_share_deny, &stp->st_deny_bmap); @@ -3468,7 +3521,6 @@ alloc_init_lock_stateid(struct nfs4_stateowner *sop, struct nfs4_file *fp, struc stp->st_stateid.si_stateownerid = sop->so_id; stp->st_stateid.si_fileid = fp->fi_id; stp->st_stateid.si_generation = 0; - stp->st_vfs_file = open_stp->st_vfs_file; /* FIXME refcount?? */ stp->st_deny_bmap = open_stp->st_deny_bmap; stp->st_openstp = open_stp; @@ -3568,7 +3620,6 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, lock_sop = lock->lk_replay_owner; } /* lock->lk_replay_owner and lock_stp have been created or found */ - filp = lock_stp->st_vfs_file; status = nfserr_grace; if (locks_in_grace() && !lock->lk_reclaim) @@ -3581,11 +3632,13 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, switch (lock->lk_type) { case NFS4_READ_LT: case NFS4_READW_LT: + filp = find_readable_file(lock_stp->st_file); file_lock.fl_type = F_RDLCK; cmd = F_SETLK; break; case NFS4_WRITE_LT: case NFS4_WRITEW_LT: + filp = find_writeable_file(lock_stp->st_file); file_lock.fl_type = F_WRLCK; cmd = F_SETLK; break; @@ -3593,6 +3646,10 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, status = nfserr_inval; goto out; } + if (!filp) { + status = nfserr_openmode; + goto out; + } file_lock.fl_owner = (fl_owner_t)lock_sop; file_lock.fl_pid = current->tgid; file_lock.fl_file = filp; @@ -3761,7 +3818,11 @@ nfsd4_locku(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, &locku->lu_stateowner, &stp, NULL))) goto out; - filp = stp->st_vfs_file; + filp = find_any_file(stp->st_file); + if (!filp) { + status = nfserr_lock_range; + goto out; + } BUG_ON(!filp); locks_init_lock(&file_lock); file_lock.fl_type = F_UNLCK; @@ -3808,10 +3869,10 @@ out_nfserr: * 0: no locks held by lockowner */ static int -check_for_locks(struct file *filp, struct nfs4_stateowner *lowner) +check_for_locks(struct nfs4_file *filp, struct nfs4_stateowner *lowner) { struct file_lock **flpp; - struct inode *inode = filp->f_path.dentry->d_inode; + struct inode *inode = filp->fi_inode; int status = 0; lock_kernel(); @@ -3862,7 +3923,7 @@ nfsd4_release_lockowner(struct svc_rqst *rqstp, continue; list_for_each_entry(stp, &sop->so_stateids, st_perstateowner) { - if (check_for_locks(stp->st_vfs_file, sop)) + if (check_for_locks(stp->st_file, sop)) goto out; /* Note: so_perclient unused for lockowners, * so it's OK to fool with here. */ diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h index 7237776..b76ac3a 100644 --- a/fs/nfsd/nfsd.h +++ b/fs/nfsd/nfsd.h @@ -153,6 +153,7 @@ void nfsd_lockd_shutdown(void); #define nfserr_bad_seqid cpu_to_be32(NFSERR_BAD_SEQID) #define nfserr_symlink cpu_to_be32(NFSERR_SYMLINK) #define nfserr_not_same cpu_to_be32(NFSERR_NOT_SAME) +#define nfserr_lock_range cpu_to_be32(NFSERR_LOCK_RANGE) #define nfserr_restorefh cpu_to_be32(NFSERR_RESTOREFH) #define nfserr_attrnotsupp cpu_to_be32(NFSERR_ATTRNOTSUPP) #define nfserr_bad_xdr cpu_to_be32(NFSERR_BAD_XDR) diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h index 006c842..7731a75 100644 --- a/fs/nfsd/state.h +++ b/fs/nfsd/state.h @@ -88,7 +88,6 @@ struct nfs4_delegation { struct nfs4_client *dl_client; struct nfs4_file *dl_file; struct file_lock *dl_flock; - struct file *dl_vfs_file; u32 dl_type; time_t dl_time; /* For recall: */ @@ -342,12 +341,50 @@ struct nfs4_file { struct list_head fi_hash; /* hash by "struct inode *" */ struct list_head fi_stateids; struct list_head fi_delegations; + /* One each for O_RDONLY, O_WRONLY, O_RDWR: */ + struct file * fi_fds[3]; + /* One each for O_RDONLY, O_WRONLY: */ + atomic_t fi_access[2]; + /* + * Each open stateid contributes 1 to either fi_readers or + * fi_writers, or both, depending on the open mode. A + * delegation also takes an fi_readers reference. Lock + * stateid's take none. + */ + atomic_t fi_readers; + atomic_t fi_writers; struct inode *fi_inode; u32 fi_id; /* used with stateowner->so_id * for stateid_hashtbl hash */ bool fi_had_conflict; }; +/* XXX: for first cut may fall back on returning file that doesn't work + * at all? */ +static inline struct file *find_writeable_file(struct nfs4_file *f) +{ + if (f->fi_fds[O_RDWR]) + return f->fi_fds[O_RDWR]; + return f->fi_fds[O_WRONLY]; +} + +static inline struct file *find_readable_file(struct nfs4_file *f) +{ + if (f->fi_fds[O_RDWR]) + return f->fi_fds[O_RDWR]; + return f->fi_fds[O_RDONLY]; +} + +static inline struct file *find_any_file(struct nfs4_file *f) +{ + if (f->fi_fds[O_RDWR]) + return f->fi_fds[O_RDWR]; + else if (f->fi_fds[O_RDWR]) + return f->fi_fds[O_WRONLY]; + else + return f->fi_fds[O_RDONLY]; +} + /* * nfs4_stateid can either be an open stateid or (eventually) a lock stateid * @@ -373,7 +410,6 @@ struct nfs4_stateid { struct nfs4_stateowner * st_stateowner; struct nfs4_file * st_file; stateid_t st_stateid; - struct file * st_vfs_file; unsigned long st_access_bmap; unsigned long st_deny_bmap; struct nfs4_stateid * st_openstp; -- cgit v0.10.2 From 69049961014992f50b10d6c3cd3cd172d4aae5ac Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Tue, 20 Jul 2010 15:24:27 -0700 Subject: gcc-4.6: nfsd: fix initialized but not read warnings Fixes at least one real minor bug: the nfs4 recovery dir sysctl would not return its status properly. Also I finished Al's 1e41568d7378d ("Take ima_path_check() in nfsd past dentry_open() in nfsd_open()") commit, it moved the IMA code, but left the old path initializer in there. The rest is just dead code removed I think, although I was not fully sure about the "is_borc" stuff. Some more review would be still good. Found by gcc 4.6's new warnings. Signed-off-by: Andi Kleen Cc: Al Viro Cc: Neil Brown Signed-off-by: Andrew Morton Signed-off-by: J. Bruce Fields diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 7ab572f..0b4351f 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -3421,11 +3421,9 @@ static inline void nfs4_set_lock_denied(struct file_lock *fl, struct nfsd4_lock_denied *deny) { struct nfs4_stateowner *sop; - unsigned int hval; if (fl->fl_lmops == &nfsd_posix_mng_ops) { sop = (struct nfs4_stateowner *) fl->fl_owner; - hval = lockownerid_hashval(sop->so_id); kref_get(&sop->so_ref); deny->ld_sop = sop; deny->ld_clientid = sop->so_client->cl_clientid; diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c index b1c5be8..12f0ee7 100644 --- a/fs/nfsd/nfsctl.c +++ b/fs/nfsd/nfsctl.c @@ -1306,6 +1306,8 @@ static ssize_t __write_recoverydir(struct file *file, char *buf, size_t size) return -EINVAL; status = nfs4_reset_recoverydir(recdir); + if (status) + return status; } return scnprintf(buf, SIMPLE_TRANSACTION_LIMIT, "%s\n", diff --git a/fs/nfsd/nfsproc.c b/fs/nfsd/nfsproc.c index a047ad6..1edb78b 100644 --- a/fs/nfsd/nfsproc.c +++ b/fs/nfsd/nfsproc.c @@ -290,7 +290,6 @@ nfsd_proc_create(struct svc_rqst *rqstp, struct nfsd_createargs *argp, * gospel of sun micro */ if (type != S_IFREG) { - int is_borc = 0; if (type != S_IFBLK && type != S_IFCHR) { rdev = 0; } else if (type == S_IFCHR && !(attr->ia_valid & ATTR_SIZE)) { @@ -298,7 +297,6 @@ nfsd_proc_create(struct svc_rqst *rqstp, struct nfsd_createargs *argp, type = S_IFIFO; } else { /* Okay, char or block special */ - is_borc = 1; if (!rdev) rdev = wanted; } diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index 31d32ae..3458a8f 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -2052,7 +2052,6 @@ nfsd_permission(struct svc_rqst *rqstp, struct svc_export *exp, struct dentry *dentry, int acc) { struct inode *inode = dentry->d_inode; - struct path path; int err; if (acc == NFSD_MAY_NOP) @@ -2125,15 +2124,7 @@ nfsd_permission(struct svc_rqst *rqstp, struct svc_export *exp, if (err == -EACCES && S_ISREG(inode->i_mode) && acc == (NFSD_MAY_READ | NFSD_MAY_OWNER_OVERRIDE)) err = inode_permission(inode, MAY_EXEC); - if (err) - goto nfsd_out; - /* Do integrity (permission) checking now, but defer incrementing - * IMA counts to the actual file open. - */ - path.mnt = exp->ex_path.mnt; - path.dentry = dentry; -nfsd_out: return err? nfserrno(err) : 0; } -- cgit v0.10.2 From 039a87ca536a85bc169ce092e44bd57adfa1f563 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Fri, 30 Jul 2010 11:33:32 -0400 Subject: nfsd: minor nfsd read api cleanup Christoph points that the NFSv2/v3 callers know which case they want here, so we may as well just call the file=NULL case directly instead of making this conditional. Cc: Christoph Hellwig Signed-off-by: J. Bruce Fields diff --git a/fs/nfsd/nfs3proc.c b/fs/nfsd/nfs3proc.c index 9ae9331..5b7e302 100644 --- a/fs/nfsd/nfs3proc.c +++ b/fs/nfsd/nfs3proc.c @@ -168,7 +168,7 @@ nfsd3_proc_read(struct svc_rqst *rqstp, struct nfsd3_readargs *argp, svc_reserve_auth(rqstp, ((1 + NFS3_POST_OP_ATTR_WORDS + 3)<<2) + resp->count +4); fh_copy(&resp->fh, &argp->fh); - nfserr = nfsd_read(rqstp, &resp->fh, NULL, + nfserr = nfsd_read(rqstp, &resp->fh, argp->offset, rqstp->rq_vec, argp->vlen, &resp->count); diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 835924f..f8931ac 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -2630,7 +2630,7 @@ nfsd4_encode_read(struct nfsd4_compoundres *resp, __be32 nfserr, } read->rd_vlen = v; - nfserr = nfsd_read(read->rd_rqstp, read->rd_fhp, read->rd_filp, + nfserr = nfsd_read_file(read->rd_rqstp, read->rd_fhp, read->rd_filp, read->rd_offset, resp->rqstp->rq_vec, read->rd_vlen, &maxcount); diff --git a/fs/nfsd/nfsproc.c b/fs/nfsd/nfsproc.c index 1edb78b..08e1726 100644 --- a/fs/nfsd/nfsproc.c +++ b/fs/nfsd/nfsproc.c @@ -144,7 +144,7 @@ nfsd_proc_read(struct svc_rqst *rqstp, struct nfsd_readargs *argp, svc_reserve_auth(rqstp, (19<<2) + argp->count + 4); resp->count = argp->count; - nfserr = nfsd_read(rqstp, fh_copy(&resp->fh, &argp->fh), NULL, + nfserr = nfsd_read(rqstp, fh_copy(&resp->fh, &argp->fh), argp->offset, rqstp->rq_vec, argp->vlen, &resp->count); diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index 3458a8f..1709138 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -1065,7 +1065,12 @@ out: return err; } -static __be32 nfsd_open_read(struct svc_rqst *rqstp, struct svc_fh *fhp, +/* + * Read data from a file. count must contain the requested read count + * on entry. On return, *count contains the number of bytes actually read. + * N.B. After this call fhp needs an fh_put + */ +__be32 nfsd_read(struct svc_rqst *rqstp, struct svc_fh *fhp, loff_t offset, struct kvec *vec, int vlen, unsigned long *count) { struct file *file; @@ -1101,13 +1106,9 @@ static __be32 nfsd_open_read(struct svc_rqst *rqstp, struct svc_fh *fhp, return err; } -/* - * Read data from a file. count must contain the requested read count - * on entry. On return, *count contains the number of bytes actually read. - * N.B. After this call fhp needs an fh_put - */ +/* As above, but use the provided file descriptor. */ __be32 -nfsd_read(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file, +nfsd_read_file(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file, loff_t offset, struct kvec *vec, int vlen, unsigned long *count) { @@ -1119,8 +1120,8 @@ nfsd_read(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file, if (err) goto out; err = nfsd_vfs_read(rqstp, fhp, file, offset, vec, vlen, count); - } else - err = nfsd_open_read(rqstp, fhp, offset, vec, vlen, count); + } else /* Note file may still be NULL in NFSv4 special stateid case: */ + err = nfsd_read(rqstp, fhp, offset, vec, vlen, count); out: return err; } diff --git a/fs/nfsd/vfs.h b/fs/nfsd/vfs.h index 217a62c..9a370a5 100644 --- a/fs/nfsd/vfs.h +++ b/fs/nfsd/vfs.h @@ -64,7 +64,9 @@ __be32 nfsd_commit(struct svc_rqst *, struct svc_fh *, __be32 nfsd_open(struct svc_rqst *, struct svc_fh *, int, int, struct file **); void nfsd_close(struct file *); -__be32 nfsd_read(struct svc_rqst *, struct svc_fh *, struct file *, +__be32 nfsd_read(struct svc_rqst *, struct svc_fh *, + loff_t, struct kvec *, int, unsigned long *); +__be32 nfsd_read_file(struct svc_rqst *, struct svc_fh *, struct file *, loff_t, struct kvec *, int, unsigned long *); __be32 nfsd_write(struct svc_rqst *, struct svc_fh *,struct file *, loff_t, struct kvec *,int, unsigned long *, int *); -- cgit v0.10.2 From 774f8bbd9ef2e71d4ef4b89933d292091d31ca98 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Mon, 2 Aug 2010 14:12:44 -0400 Subject: nfsd: fix startup/shutdown order bug We must create the server before we can call init_socks or check the number of threads. Symptoms were a NULL pointer dereference in nfsd_svc(). Problem identified by Jeff Layton. Also fix a minor cleanup-on-error case in nfsd_startup(). Reported-by: Tetsuo Handa Signed-off-by: J. Bruce Fields diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c index 92173bd..39ced4a 100644 --- a/fs/nfsd/nfssvc.c +++ b/fs/nfsd/nfssvc.c @@ -204,6 +204,9 @@ static bool nfsd_up = false; static int nfsd_startup(unsigned short port, int nrservs) { int ret; + + if (nfsd_up) + return 0; /* * Readahead param cache - will no-op if it already exists. * (Note therefore results will be suboptimal if number of @@ -217,7 +220,7 @@ static int nfsd_startup(unsigned short port, int nrservs) goto out_racache; ret = lockd_up(); if (ret) - return ret; + goto out_racache; ret = nfs4_state_start(); if (ret) goto out_lockd; @@ -420,7 +423,7 @@ int nfsd_svc(unsigned short port, int nrservs) { int error; - bool first_thread; + bool nfsd_up_before; mutex_lock(&nfsd_mutex); dprintk("nfsd: creating service\n"); @@ -432,29 +435,28 @@ nfsd_svc(unsigned short port, int nrservs) if (nrservs == 0 && nfsd_serv == NULL) goto out; - first_thread = (nfsd_serv->sv_nrthreads == 0) && (nrservs != 0); - - if (first_thread) { - error = nfsd_startup(port, nrservs); - if (error) - goto out; - } error = nfsd_create_serv(); if (error) - goto out_shutdown; - error = svc_set_num_threads(nfsd_serv, NULL, nrservs); + goto out; + + nfsd_up_before = nfsd_up; + + error = nfsd_startup(port, nrservs); if (error) goto out_destroy; + error = svc_set_num_threads(nfsd_serv, NULL, nrservs); + if (error) + goto out_shutdown; /* We are holding a reference to nfsd_serv which * we don't want to count in the return value, * so subtract 1 */ error = nfsd_serv->sv_nrthreads - 1; -out_destroy: - svc_destroy(nfsd_serv); /* Release server */ out_shutdown: - if (error < 0 && first_thread) + if (error < 0 && !nfsd_up_before) nfsd_shutdown(); +out_destroy: + svc_destroy(nfsd_serv); /* Release server */ out: mutex_unlock(&nfsd_mutex); return error; -- cgit v0.10.2 From c18c821fd40ad0ffc199a55be874e556bf999416 Mon Sep 17 00:00:00 2001 From: Boaz Harrosh Date: Tue, 29 Jun 2010 14:33:55 +0300 Subject: nfsd41: Fix a crash when a callback is retried If a callback is retried at nfsd4_cb_recall_done() due to some error, the returned rpc reply crashes here: @@ -514,6 +514,7 @@ decode_cb_sequence(struct xdr_stream *xdr, struct nfsd4_cb_sequence *res, u32 dummy; __be32 *p; + BUG_ON(!res); if (res->cbs_minorversion == 0) return 0; [BUG_ON added for demonstration] This is because the nfsd4_cb_done_sequence() has NULLed out the task->tk_msg.rpc_resp pointer. Also eventually the rpc would use the new slot without making sure it is free by calling nfsd41_cb_setup_sequence(). This problem was introduced by a 4.1 protocol addition patch: [0421b5c5] nfsd41: Backchannel: Implement cb_recall over NFSv4.1 Which was overlooking the possibility of an RPC callback retries. For not-4.1 case redoing the _prepare is harmless. Signed-off-by: Boaz Harrosh Signed-off-by: J. Bruce Fields diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c index 1e6497e..988cbb3 100644 --- a/fs/nfsd/nfs4callback.c +++ b/fs/nfsd/nfs4callback.c @@ -697,7 +697,7 @@ static void nfsd4_cb_recall_done(struct rpc_task *task, void *calldata) if (dp->dl_retries--) { rpc_delay(task, 2*HZ); task->tk_status = 0; - rpc_restart_call(task); + rpc_restart_call_prepare(task); return; } else { atomic_set(&clp->cl_cb_set, 0); -- cgit v0.10.2 From e2aa7f8304b3b5656ded8699216cc65138d03b64 Mon Sep 17 00:00:00 2001 From: Andrea Gelmini Date: Thu, 5 Aug 2010 15:51:36 +0200 Subject: net: sunrpc: removed duplicated #include Signed-off-by: Andrea Gelmini Signed-off-by: J. Bruce Fields diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c index 939d048..2b06410 100644 --- a/net/sunrpc/cache.c +++ b/net/sunrpc/cache.c @@ -34,7 +34,6 @@ #include #include #include -#include #define RPCDBG_FACILITY RPCDBG_CACHE -- cgit v0.10.2 From e844a7b9805a2b74cfd34c8604f5bba3e0869305 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Fri, 6 Aug 2010 15:48:03 -0400 Subject: nfsd: initialize nfsd versions before creating svc Commit 59db4a0c102e0de226a3395dbf25ea51bf845937 "nfsd: move more into nfsd_startup()" inadvertently moved nfsd_versions after nfsd_create_svc(). On older distributions using an rpc.nfsd that does not explicitly set the list of nfsd versions, this results in svc-create_pooled() being called with an empty versions array. The resulting incomplete initialization leads to a NULL dereference in svc_process_common() the first time a client accesses the server. Move nfsd_reset_versions() back before the svc_create_pooled(); this time, put it closer to the svc_create_pooled() call, to make this mistake more difficult in the future. Signed-off-by: J. Bruce Fields diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c index 39ced4a..e2c4346 100644 --- a/fs/nfsd/nfssvc.c +++ b/fs/nfsd/nfssvc.c @@ -224,7 +224,6 @@ static int nfsd_startup(unsigned short port, int nrservs) ret = nfs4_state_start(); if (ret) goto out_lockd; - nfsd_reset_versions(); nfsd_up = true; return 0; out_lockd: @@ -329,6 +328,7 @@ int nfsd_create_serv(void) nfsd_max_blksize >= 8*1024*2) nfsd_max_blksize /= 2; } + nfsd_reset_versions(); nfsd_serv = svc_create_pooled(&nfsd_program, nfsd_max_blksize, nfsd_last_thread, nfsd, THIS_MODULE); -- cgit v0.10.2 From 7fa53cc872332b265bc5ba1266f39586f218ad4a Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Fri, 6 Aug 2010 18:00:33 -0400 Subject: nfsd: don't allow setting maxblksize after svc created It's harmless to set this after the server is created, but also ineffective, since the value is only used at the time of svc_create_pooled(). So fail the attempt, in keeping with the pattern set by write_versions, write_{lease,grace}time and write_recoverydir. (This could break userspace that tried to write to nfsd/max_block_size between setting up sockets and starting the server. However, such code wouldn't have worked anyway, and I don't know of any examples--rpc.nfsd in nfs-utils, probably the only user of the interface, doesn't do that.) Signed-off-by: J. Bruce Fields diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c index 12f0ee7..b53b1d0 100644 --- a/fs/nfsd/nfsctl.c +++ b/fs/nfsd/nfsctl.c @@ -1190,7 +1190,7 @@ static ssize_t write_maxblksize(struct file *file, char *buf, size_t size) bsize = NFSSVC_MAXBLKSIZE; bsize &= ~(1024-1); mutex_lock(&nfsd_mutex); - if (nfsd_serv && nfsd_serv->sv_nrthreads) { + if (nfsd_serv) { mutex_unlock(&nfsd_mutex); return -EBUSY; } -- cgit v0.10.2 From 998db52c03cd293d16a457f1b396cea932244147 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Sat, 7 Aug 2010 09:21:41 -0400 Subject: nfsd4: fix file open accounting for RDWR opens Commit f9d7562fdb9dc0ada3a7aba5dbbe9d965e2a105d "nfsd4: share file descriptors between stateid's" didn't correctly account for O_RDWR opens. Symptoms include leaked files, resulting in failures to unmount and/or warnings about orphaned inodes on reboot. Signed-off-by: J. Bruce Fields diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 0b4351f..0a02491 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -162,13 +162,22 @@ static struct list_head ownerstr_hashtbl[OWNER_HASH_SIZE]; static struct list_head file_hashtbl[FILE_HASH_SIZE]; static struct list_head stateid_hashtbl[STATEID_HASH_SIZE]; -static inline void nfs4_file_get_access(struct nfs4_file *fp, int oflag) +static void __nfs4_file_get_access(struct nfs4_file *fp, int oflag) { BUG_ON(!(fp->fi_fds[oflag] || fp->fi_fds[O_RDWR])); atomic_inc(&fp->fi_access[oflag]); } -static inline void nfs4_file_put_fd(struct nfs4_file *fp, int oflag) +static void nfs4_file_get_access(struct nfs4_file *fp, int oflag) +{ + if (oflag == O_RDWR) { + __nfs4_file_get_access(fp, O_RDONLY); + __nfs4_file_get_access(fp, O_WRONLY); + } else + __nfs4_file_get_access(fp, oflag); +} + +static void nfs4_file_put_fd(struct nfs4_file *fp, int oflag) { if (fp->fi_fds[oflag]) { fput(fp->fi_fds[oflag]); @@ -176,7 +185,7 @@ static inline void nfs4_file_put_fd(struct nfs4_file *fp, int oflag) } } -static inline void nfs4_file_put_access(struct nfs4_file *fp, int oflag) +static void __nfs4_file_put_access(struct nfs4_file *fp, int oflag) { if (atomic_dec_and_test(&fp->fi_access[oflag])) { nfs4_file_put_fd(fp, O_RDWR); @@ -184,6 +193,15 @@ static inline void nfs4_file_put_access(struct nfs4_file *fp, int oflag) } } +static void nfs4_file_put_access(struct nfs4_file *fp, int oflag) +{ + if (oflag == O_RDWR) { + __nfs4_file_put_access(fp, O_RDONLY); + __nfs4_file_put_access(fp, O_WRONLY); + } else + __nfs4_file_put_access(fp, oflag); +} + static struct nfs4_delegation * alloc_init_deleg(struct nfs4_client *clp, struct nfs4_stateid *stp, struct svc_fh *current_fh, u32 type) { -- cgit v0.10.2