From c551866e649bac66a5145d100f34086d6edb581e Mon Sep 17 00:00:00 2001 From: Ricardo Labiaga Date: Fri, 11 Dec 2009 19:10:49 -0800 Subject: nfsd41: nfsd4_decode_compound() does not recognize all ops The server incorrectly assumes that the operations in the array start with value 0. The first operation (OP_ACCESS) has a value of 3, causing the check in nfsd4_decode_compound to be off. Instead of comparing that the operation number is less than the number of elements in the array, the server should verify that it is less than the maximum valid operation number defined by LAST_NFS4_OP. Signed-off-by: Ricardo Labiaga Signed-off-by: J. Bruce Fields diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index a8587e9..4f14f0c 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -1434,7 +1434,7 @@ nfsd4_decode_compound(struct nfsd4_compoundargs *argp) } op->opnum = ntohl(*argp->p++); - if (op->opnum >= OP_ACCESS && op->opnum < ops->nops) + if (op->opnum >= OP_ACCESS && op->opnum <= LAST_NFS4_OP) op->status = ops->decoders[op->opnum](argp, &op->u); else { op->opnum = OP_ILLEGAL; -- cgit v0.10.2 From de3cab793c6a5c8505d66bee111edcc7098380ba Mon Sep 17 00:00:00 2001 From: Ricardo Labiaga Date: Fri, 11 Dec 2009 20:03:27 -0800 Subject: nfsd4: Use FIRST_NFS4_OP in nfsd4_decode_compound() Since we're checking for LAST_NFS4_OP, use FIRST_NFS4_OP to be consistent. Signed-off-by: Ricardo Labiaga Signed-off-by: J. Bruce Fields diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 4f14f0c..c458fb1 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -1434,7 +1434,7 @@ nfsd4_decode_compound(struct nfsd4_compoundargs *argp) } op->opnum = ntohl(*argp->p++); - if (op->opnum >= OP_ACCESS && op->opnum <= LAST_NFS4_OP) + if (op->opnum >= FIRST_NFS4_OP && op->opnum <= LAST_NFS4_OP) op->status = ops->decoders[op->opnum](argp, &op->u); else { op->opnum = OP_ILLEGAL; -- cgit v0.10.2 From 6a68f89ee1f2d177af4a5410fa7a45734c975fd6 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 25 Dec 2009 17:45:13 +0100 Subject: nfsd: use vfs_fsync for non-directories Instead of opencoding the fsync calling sequence use vfs_fsync. This also gets rid of the useless i_mutex over the data writeout. Consolidate the remaining special code for syncing directories and document it's quirks. Signed-off-by: Christoph Hellwig Signed-off-by: J. Bruce Fields diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index c194793..79d216f 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -769,40 +769,25 @@ nfsd_close(struct file *filp) } /* - * Sync a file - * As this calls fsync (not fdatasync) there is no need for a write_inode - * after it. + * Sync a directory to disk. + * + * We can't just call vfs_fsync because our requirements are slightly odd: + * + * a) we do not have a file struct available + * b) we expect to have i_mutex already held by the caller */ -static inline int nfsd_dosync(struct file *filp, struct dentry *dp, - const struct file_operations *fop) -{ - struct inode *inode = dp->d_inode; - int (*fsync) (struct file *, struct dentry *, int); - int err; - - err = filemap_write_and_wait(inode->i_mapping); - if (err == 0 && fop && (fsync = fop->fsync)) - err = fsync(filp, dp, 0); - return err; -} - -static int -nfsd_sync(struct file *filp) +int +nfsd_sync_dir(struct dentry *dentry) { - int err; - struct inode *inode = filp->f_path.dentry->d_inode; - dprintk("nfsd: sync file %s\n", filp->f_path.dentry->d_name.name); - mutex_lock(&inode->i_mutex); - err=nfsd_dosync(filp, filp->f_path.dentry, filp->f_op); - mutex_unlock(&inode->i_mutex); + struct inode *inode = dentry->d_inode; + int error; - return err; -} + WARN_ON(!mutex_is_locked(&inode->i_mutex)); -int -nfsd_sync_dir(struct dentry *dp) -{ - return nfsd_dosync(NULL, dp, dp->d_inode->i_fop); + error = filemap_write_and_wait(inode->i_mapping); + if (!error && inode->i_fop->fsync) + error = inode->i_fop->fsync(NULL, dentry, 0); + return error; } /* @@ -1008,7 +993,7 @@ static int wait_for_concurrent_writes(struct file *file) if (inode->i_state & I_DIRTY) { dprintk("nfsd: write sync %d\n", task_pid_nr(current)); - err = nfsd_sync(file); + err = vfs_fsync(file, file->f_path.dentry, 0); } last_ino = inode->i_ino; last_dev = inode->i_sb->s_dev; @@ -1177,7 +1162,7 @@ nfsd_commit(struct svc_rqst *rqstp, struct svc_fh *fhp, return err; if (EX_ISSYNC(fhp->fh_export)) { if (file->f_op && file->f_op->fsync) { - err = nfserrno(nfsd_sync(file)); + err = nfserrno(vfs_fsync(file, file->f_path.dentry, 0)); } else { err = nfserr_notsupp; } -- cgit v0.10.2 From 8b8aae4009349397fffe7bd38a8fa200c9a5bcad Mon Sep 17 00:00:00 2001 From: Ricardo Labiaga Date: Fri, 11 Dec 2009 19:10:48 -0800 Subject: nfsd41: Create the recovery entry for the NFSv4.1 client Signed-off-by: Ricardo Labiaga Signed-off-by: J. Bruce Fields diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index f19ed86..3a20c09 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -2480,8 +2480,10 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf } memcpy(&open->op_stateid, &stp->st_stateid, sizeof(stateid_t)); - if (nfsd4_has_session(&resp->cstate)) + if (nfsd4_has_session(&resp->cstate)) { open->op_stateowner->so_confirmed = 1; + nfsd4_create_clid_dir(open->op_stateowner->so_client); + } /* * Attempt to hand out a delegation. No error return, because the -- cgit v0.10.2 From 73834d6f90f6833663f9effd4cf9b79b63bc36e1 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Wed, 20 Jan 2010 17:17:04 -0500 Subject: nfsd: 4.1 has an rfc number No need to refer to an internet draft; there's an RFC now. Signed-off-by: J. Bruce Fields diff --git a/Documentation/filesystems/nfs/nfs41-server.txt b/Documentation/filesystems/nfs/nfs41-server.txt index 1bd0d0c..6a53a84 100644 --- a/Documentation/filesystems/nfs/nfs41-server.txt +++ b/Documentation/filesystems/nfs/nfs41-server.txt @@ -17,8 +17,7 @@ kernels must turn 4.1 on or off *before* turning support for version 4 on or off; rpc.nfsd does this correctly.) The NFSv4 minorversion 1 (NFSv4.1) implementation in nfsd is based -on the latest NFSv4.1 Internet Draft: -http://tools.ietf.org/html/draft-ietf-nfsv4-minorversion1-29 +on RFC 5661. From the many new features in NFSv4.1 the current implementation focuses on the mandatory-to-implement NFSv4.1 Sessions, providing @@ -44,7 +43,7 @@ interoperability problems with future clients. Known issues: trunking, but this is a mandatory feature, and its use is recommended to clients in a number of places. (E.g. to ensure timely renewal in case an existing connection's retry timeouts - have gotten too long; see section 8.3 of the draft.) + have gotten too long; see section 8.3 of the RFC.) Therefore, lack of this feature may cause future clients to fail. - Incomplete backchannel support: incomplete backchannel gss -- cgit v0.10.2 From 07396051a5c6901693a97e35cb731a01b0b348e4 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 26 Jan 2010 14:03:47 -0500 Subject: SUNRPC: Use rpc_pton() in ip_map_parse() The existing logic in ip_map_parse() can not currently parse shorthanded IPv6 addresses (anything with a double colon), nor can it parse an IPv6 presentation address with a scope ID. An IPv6-enabled mountd can pass down both. Signed-off-by: Chuck Lever Signed-off-by: J. Bruce Fields diff --git a/net/sunrpc/svcauth_unix.c b/net/sunrpc/svcauth_unix.c index d8c0411..97f0e9e 100644 --- a/net/sunrpc/svcauth_unix.c +++ b/net/sunrpc/svcauth_unix.c @@ -15,6 +15,7 @@ #include #define RPCDBG_FACILITY RPCDBG_AUTH +#include /* * AUTHUNIX and AUTHNULL credentials are both handled here. @@ -187,10 +188,13 @@ static int ip_map_parse(struct cache_detail *cd, * for scratch: */ char *buf = mesg; int len; - int b1, b2, b3, b4, b5, b6, b7, b8; - char c; char class[8]; - struct in6_addr addr; + union { + struct sockaddr sa; + struct sockaddr_in s4; + struct sockaddr_in6 s6; + } address; + struct sockaddr_in6 sin6; int err; struct ip_map *ipmp; @@ -209,24 +213,24 @@ static int ip_map_parse(struct cache_detail *cd, len = qword_get(&mesg, buf, mlen); if (len <= 0) return -EINVAL; - if (sscanf(buf, "%u.%u.%u.%u%c", &b1, &b2, &b3, &b4, &c) == 4) { - addr.s6_addr32[0] = 0; - addr.s6_addr32[1] = 0; - addr.s6_addr32[2] = htonl(0xffff); - addr.s6_addr32[3] = - htonl((((((b1<<8)|b2)<<8)|b3)<<8)|b4); - } else if (sscanf(buf, "%04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x%c", - &b1, &b2, &b3, &b4, &b5, &b6, &b7, &b8, &c) == 8) { - addr.s6_addr16[0] = htons(b1); - addr.s6_addr16[1] = htons(b2); - addr.s6_addr16[2] = htons(b3); - addr.s6_addr16[3] = htons(b4); - addr.s6_addr16[4] = htons(b5); - addr.s6_addr16[5] = htons(b6); - addr.s6_addr16[6] = htons(b7); - addr.s6_addr16[7] = htons(b8); - } else + if (rpc_pton(buf, len, &address.sa, sizeof(address)) == 0) return -EINVAL; + switch (address.sa.sa_family) { + case AF_INET: + /* Form a mapped IPv4 address in sin6 */ + memset(&sin6, 0, sizeof(sin6)); + sin6.sin6_family = AF_INET6; + sin6.sin6_addr.s6_addr32[2] = htonl(0xffff); + sin6.sin6_addr.s6_addr32[3] = address.s4.sin_addr.s_addr; + break; +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) + case AF_INET6: + memcpy(&sin6, &address.s6, sizeof(sin6)); + break; +#endif + default: + return -EINVAL; + } expiry = get_expiry(&mesg); if (expiry ==0) @@ -243,7 +247,8 @@ static int ip_map_parse(struct cache_detail *cd, } else dom = NULL; - ipmp = ip_map_lookup(class, &addr); + /* IPv6 scope IDs are ignored for now */ + ipmp = ip_map_lookup(class, &sin6.sin6_addr); if (ipmp) { err = ip_map_update(ipmp, container_of(dom, struct unix_domain, h), -- cgit v0.10.2 From 205ba42308729f4f41f21d314a4435e7de5c9a2e Mon Sep 17 00:00:00 2001 From: Aime Le Rouzic Date: Tue, 26 Jan 2010 14:03:56 -0500 Subject: NFSD: Support AF_INET6 in svc_addsock() function Relax the address family check at the top of svc_addsock() to allow AF_INET6 listener sockets to be specified via /proc/fs/nfsd/portlist. Signed-off-by: Aime Le Rouzic Signed-off-by: Chuck Lever Signed-off-by: J. Bruce Fields diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index 870929e..9e09391 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -1357,7 +1357,7 @@ int svc_addsock(struct svc_serv *serv, const int fd, char *name_return, if (!so) return err; - if (so->sk->sk_family != AF_INET) + if ((so->sk->sk_family != PF_INET) && (so->sk->sk_family != PF_INET6)) err = -EAFNOSUPPORT; else if (so->sk->sk_protocol != IPPROTO_TCP && so->sk->sk_protocol != IPPROTO_UDP) -- cgit v0.10.2 From d6783b2b6c4050df0ba0a84c6842cf5bc2212ef9 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 26 Jan 2010 14:04:04 -0500 Subject: SUNRPC: Bury "#ifdef IPV6" in svc_create_xprt() Clean up: Bruce observed we have more or less common logic in each of svc_create_xprt()'s callers: the check to create an IPv6 RPC listener socket only if CONFIG_IPV6 is set. I'm about to add another case that does just the same. If we move the ifdefs into __svc_xpo_create(), then svc_create_xprt() call sites can get rid of the "#ifdef" ugliness, and can use the same logic with or without IPv6 support available in the kernel. Signed-off-by: Chuck Lever Signed-off-by: J. Bruce Fields diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c index e50cfa3..7d15051 100644 --- a/fs/lockd/svc.c +++ b/fs/lockd/svc.c @@ -243,11 +243,9 @@ static int make_socks(struct svc_serv *serv) if (err < 0) goto out_err; -#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) err = create_lockd_family(serv, PF_INET6); if (err < 0 && err != -EAFNOSUPPORT) goto out_err; -#endif /* CONFIG_IPV6 || CONFIG_IPV6_MODULE */ warned = 0; return 0; diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c index 73ab220..36dfdae 100644 --- a/fs/nfs/callback.c +++ b/fs/nfs/callback.c @@ -118,7 +118,6 @@ nfs4_callback_up(struct svc_serv *serv) dprintk("NFS: Callback listener port = %u (af %u)\n", nfs_callback_tcpport, PF_INET); -#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) ret = svc_create_xprt(serv, "tcp", PF_INET6, nfs_callback_set_tcpport, SVC_SOCK_ANONYMOUS); if (ret > 0) { @@ -129,7 +128,6 @@ nfs4_callback_up(struct svc_serv *serv) ret = 0; else goto out_err; -#endif /* defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) */ return svc_prepare_thread(serv, &serv->sv_pools[0]); diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c index 7d1f9e9..f886ff3 100644 --- a/net/sunrpc/svc_xprt.c +++ b/net/sunrpc/svc_xprt.c @@ -173,11 +173,13 @@ static struct svc_xprt *__svc_xpo_create(struct svc_xprt_class *xcl, .sin_addr.s_addr = htonl(INADDR_ANY), .sin_port = htons(port), }; +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) struct sockaddr_in6 sin6 = { .sin6_family = AF_INET6, .sin6_addr = IN6ADDR_ANY_INIT, .sin6_port = htons(port), }; +#endif /* defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) */ struct sockaddr *sap; size_t len; @@ -186,10 +188,12 @@ static struct svc_xprt *__svc_xpo_create(struct svc_xprt_class *xcl, sap = (struct sockaddr *)&sin; len = sizeof(sin); break; +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) case PF_INET6: sap = (struct sockaddr *)&sin6; len = sizeof(sin6); break; +#endif /* defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) */ default: return ERR_PTR(-EAFNOSUPPORT); } -- cgit v0.10.2 From 68717908155a9dcd4161f4d730fea478712d9794 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 26 Jan 2010 14:04:13 -0500 Subject: SUNRPC: NFS kernel APIs shouldn't return ENOENT for "transport not found" write_ports() converts svc_create_xprt()'s ENOENT error return to EPROTONOSUPPORT so that rpc.nfsd (in user space) can report an error message that makes sense. It turns out that several of the other kernel APIs rpc.nfsd use can also return ENOENT from svc_create_xprt(), by way of lockd_up(). On the client side, an NFSv2 or NFSv3 mount request can also return the result of lockd_up(). This error may also be returned during an NFSv4 mount request, since the NFSv4 callback service uses svc_create_xprt() to create the callback listener. An ENOENT error return results in a confusing error message from the mount command. Let's have svc_create_xprt() return EPROTONOSUPPORT instead of ENOENT. Signed-off-by: Chuck Lever Signed-off-by: J. Bruce Fields diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c index 2604c3e..f43ecd6 100644 --- a/fs/nfsd/nfsctl.c +++ b/fs/nfsd/nfsctl.c @@ -1002,12 +1002,8 @@ static ssize_t __write_ports_addxprt(char *buf) err = svc_create_xprt(nfsd_serv, transport, PF_INET, port, SVC_SOCK_ANONYMOUS); - if (err < 0) { - /* Give a reasonable perror msg for bad transport string */ - if (err == -ENOENT) - err = -EPROTONOSUPPORT; + if (err < 0) return err; - } return 0; } diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c index f886ff3..d7ec5ca 100644 --- a/net/sunrpc/svc_xprt.c +++ b/net/sunrpc/svc_xprt.c @@ -235,7 +235,10 @@ int svc_create_xprt(struct svc_serv *serv, const char *xprt_name, err: spin_unlock(&svc_xprt_class_lock); dprintk("svc: transport %s not found\n", xprt_name); - return -ENOENT; + + /* This errno is exposed to user space. Provide a reasonable + * perror msg for a bad transport. */ + return -EPROTONOSUPPORT; } EXPORT_SYMBOL_GPL(svc_create_xprt); -- cgit v0.10.2 From 37498292aa97658a5d0a9bb84699ce8c1016bb74 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 26 Jan 2010 14:04:22 -0500 Subject: NFSD: Create PF_INET6 listener in write_ports Try to create a PF_INET6 listener for NFSD, if IPv6 is enabled in the kernel. Make sure nfsd_serv's reference count is decreased if __write_ports_addxprt() failed to create a listener. See __write_ports_addfd(). Our current plan is to rely on rpc.nfsd to create appropriate IPv6 listeners when server-side NFS/IPv6 support is desired. Legacy behavior, via the write_threads or write_svc kernel APIs, will remain the same -- only IPv4 listeners are created. Signed-off-by: Chuck Lever [bfields@citi.umich.edu: Move error-handling code to end] Signed-off-by: J. Bruce Fields diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c index f43ecd6..0f0e77f 100644 --- a/fs/nfsd/nfsctl.c +++ b/fs/nfsd/nfsctl.c @@ -988,6 +988,7 @@ static ssize_t __write_ports_delfd(char *buf) static ssize_t __write_ports_addxprt(char *buf) { char transport[16]; + struct svc_xprt *xprt; int port, err; if (sscanf(buf, "%15s %4u", transport, &port) != 2) @@ -1003,8 +1004,23 @@ static ssize_t __write_ports_addxprt(char *buf) err = svc_create_xprt(nfsd_serv, transport, PF_INET, port, SVC_SOCK_ANONYMOUS); if (err < 0) - return err; + goto out_err; + + err = svc_create_xprt(nfsd_serv, transport, + PF_INET6, port, SVC_SOCK_ANONYMOUS); + if (err < 0 && err != -EAFNOSUPPORT) + goto out_close; return 0; +out_close: + xprt = svc_find_xprt(nfsd_serv, transport, PF_INET, port); + if (xprt != NULL) { + svc_close_xprt(xprt); + svc_xprt_put(xprt); + } +out_err: + /* Decrease the count, but don't shut down the service */ + nfsd_serv->sv_nrthreads--; + return err; } /* -- cgit v0.10.2 From aa696a6f349638428982bb52763f4cda851632fa Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 29 Jan 2010 16:44:25 -0500 Subject: nfsd: Use vfs_fsync_range() in nfsd_commit The NFS COMMIT operation allows the client to specify the exact byte range that it wishes to sync to disk in order to optimise server performance. Signed-off-by: Trond Myklebust Reviewed-by: Christoph Hellwig Signed-off-by: J. Bruce Fields diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index 79d216f..ed024d3 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -1141,8 +1141,9 @@ out: #ifdef CONFIG_NFSD_V3 /* * Commit all pending writes to stable storage. - * Strictly speaking, we could sync just the indicated file region here, - * but there's currently no way we can ask the VFS to do so. + * + * Note: we only guarantee that data that lies within the range specified + * by the 'offset' and 'count' parameters will be synced. * * Unfortunately we cannot lock the file to make sure we return full WCC * data to the client, as locking happens lower down in the filesystem. @@ -1152,23 +1153,32 @@ nfsd_commit(struct svc_rqst *rqstp, struct svc_fh *fhp, loff_t offset, unsigned long count) { struct file *file; - __be32 err; + loff_t end = LLONG_MAX; + __be32 err = nfserr_inval; - if ((u64)count > ~(u64)offset) - return nfserr_inval; + if (offset < 0) + goto out; + if (count != 0) { + end = offset + (loff_t)count - 1; + if (end < offset) + goto out; + } err = nfsd_open(rqstp, fhp, S_IFREG, NFSD_MAY_WRITE, &file); if (err) - return err; + goto out; if (EX_ISSYNC(fhp->fh_export)) { - if (file->f_op && file->f_op->fsync) { - err = nfserrno(vfs_fsync(file, file->f_path.dentry, 0)); - } else { + int err2 = vfs_fsync_range(file, file->f_path.dentry, + offset, end, 0); + + if (err2 != -EINVAL) + err = nfserrno(err2); + else err = nfserr_notsupp; - } } nfsd_close(file); +out: return err; } #endif /* CONFIG_NFSD_V3 */ -- cgit v0.10.2 From cdd30fa1664e0245fa64330c7cc2ddab7e47c223 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Fri, 5 Feb 2010 15:09:12 -0500 Subject: lockd: release reference to nsm_handle in nlm_host_rebooted nsm_reboot_lookup takes a reference to the nsm_handle that it returns, but nlm_host_rebooted never releases that reference. Signed-off-by: Jeff Layton Reviewed-by: Chuck Lever Signed-off-by: J. Bruce Fields diff --git a/fs/lockd/host.c b/fs/lockd/host.c index 4600c20..bb464d1210 100644 --- a/fs/lockd/host.c +++ b/fs/lockd/host.c @@ -479,8 +479,8 @@ again: mutex_lock(&nlm_host_mutex); } } } - mutex_unlock(&nlm_host_mutex); + nsm_release(nsm); } /* -- cgit v0.10.2 From 7e469af97eed947ba9204712601281a69ae8eb6c Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Fri, 5 Feb 2010 15:09:22 -0500 Subject: lockd: don't clear sm_monitored on nsm_reboot_lookup When lockd gets a notify downcall from statd, it'll search its hosts cache and then clear the sm_monitored bit on the host it finds. The idea is apparently to make lockd redo a SM_MON on the next lock request. This is unnecessary and causes the kernel's NSM cache to go out of sync with statd. statd doesn't stop monitoring a host when it gets a SM_NOTIFY and there's no guarantee that another lock will occur after the reclaim and before the unmount. In that event, no SM_UNMON will occur. Signed-off-by: Jeff Layton Reviewed-by: Chuck Lever Signed-off-by: J. Bruce Fields diff --git a/fs/lockd/mon.c b/fs/lockd/mon.c index f956651..fefa4df 100644 --- a/fs/lockd/mon.c +++ b/fs/lockd/mon.c @@ -349,9 +349,9 @@ retry: * nsm_reboot_lookup - match NLMPROC_SM_NOTIFY arguments to an nsm_handle * @info: pointer to NLMPROC_SM_NOTIFY arguments * - * Returns a matching nsm_handle if found in the nsm cache; the returned - * nsm_handle's reference count is bumped and sm_monitored is cleared. - * Otherwise returns NULL if some error occurred. + * Returns a matching nsm_handle if found in the nsm cache. The returned + * nsm_handle's reference count is bumped. Otherwise returns NULL if some + * error occurred. */ struct nsm_handle *nsm_reboot_lookup(const struct nlm_reboot *info) { @@ -370,12 +370,6 @@ struct nsm_handle *nsm_reboot_lookup(const struct nlm_reboot *info) atomic_inc(&cached->sm_count); spin_unlock(&nsm_lock); - /* - * During subsequent lock activity, force a fresh - * notification to be set up for this host. - */ - cached->sm_monitored = 0; - dprintk("lockd: host %s (%s) rebooted, cnt %d\n", cached->sm_name, cached->sm_addrbuf, atomic_read(&cached->sm_count)); -- cgit v0.10.2 From f501912a35c02eadc55ca9396ece55fe36f785d0 Mon Sep 17 00:00:00 2001 From: Ben Myers Date: Wed, 17 Feb 2010 14:05:11 -0600 Subject: commit_metadata export operation replacing nfsd_sync_dir - Add commit_metadata export_operation to allow the underlying filesystem to decide how to commit an inode most efficiently. - Usage of nfsd_sync_dir and write_inode_now has been replaced with the commit_metadata function that takes a svc_fh. - The commit_metadata function calls the commit_metadata export_op if it's there, or else falls back to sync_inode instead of fsync and write_inode_now because only metadata need be synced here. - nfsd4_sync_rec_dir now uses vfs_fsync so that commit_metadata can be static Signed-off-by: Ben Myers Reviewed-by: Christoph Hellwig Signed-off-by: J. Bruce Fields diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c index 5a754f7..98fb98e 100644 --- a/fs/nfsd/nfs4recover.c +++ b/fs/nfsd/nfs4recover.c @@ -119,9 +119,7 @@ out_no_tfm: static void nfsd4_sync_rec_dir(void) { - mutex_lock(&rec_dir.dentry->d_inode->i_mutex); - nfsd_sync_dir(rec_dir.dentry); - mutex_unlock(&rec_dir.dentry->d_inode->i_mutex); + vfs_fsync(NULL, rec_dir.dentry, 0); } int diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index ed024d3..8afdba5 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -27,6 +27,8 @@ #include #include #include +#include +#include #ifdef CONFIG_NFSD_V3 #include "xdr3.h" @@ -271,6 +273,32 @@ out: return err; } +/* + * Commit metadata changes to stable storage. + */ +static int +commit_metadata(struct svc_fh *fhp) +{ + struct inode *inode = fhp->fh_dentry->d_inode; + const struct export_operations *export_ops = inode->i_sb->s_export_op; + int error = 0; + + if (!EX_ISSYNC(fhp->fh_export)) + return 0; + + if (export_ops->commit_metadata) { + error = export_ops->commit_metadata(inode); + } else { + struct writeback_control wbc = { + .sync_mode = WB_SYNC_ALL, + .nr_to_write = 0, /* metadata only */ + }; + + error = sync_inode(inode, &wbc); + } + + return error; +} /* * Set various file attributes. @@ -769,28 +797,6 @@ nfsd_close(struct file *filp) } /* - * Sync a directory to disk. - * - * We can't just call vfs_fsync because our requirements are slightly odd: - * - * a) we do not have a file struct available - * b) we expect to have i_mutex already held by the caller - */ -int -nfsd_sync_dir(struct dentry *dentry) -{ - struct inode *inode = dentry->d_inode; - int error; - - WARN_ON(!mutex_is_locked(&inode->i_mutex)); - - error = filemap_write_and_wait(inode->i_mapping); - if (!error && inode->i_fop->fsync) - error = inode->i_fop->fsync(NULL, dentry, 0); - return error; -} - -/* * Obtain the readahead parameters for the file * specified by (dev, ino). */ @@ -1331,12 +1337,14 @@ nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp, goto out_nfserr; } - if (EX_ISSYNC(fhp->fh_export)) { - err = nfserrno(nfsd_sync_dir(dentry)); - write_inode_now(dchild->d_inode, 1); - } + err = nfsd_create_setattr(rqstp, resfhp, iap); - err2 = nfsd_create_setattr(rqstp, resfhp, iap); + /* + * nfsd_setattr already committed the child. Transactional filesystems + * had a chance to commit changes for both parent and child + * simultaneously making the following commit_metadata a noop. + */ + err2 = nfserrno(commit_metadata(fhp)); if (err2) err = err2; mnt_drop_write(fhp->fh_export->ex_path.mnt); @@ -1368,7 +1376,6 @@ nfsd_create_v3(struct svc_rqst *rqstp, struct svc_fh *fhp, struct dentry *dentry, *dchild = NULL; struct inode *dirp; __be32 err; - __be32 err2; int host_err; __u32 v_mtime=0, v_atime=0; @@ -1463,11 +1470,6 @@ nfsd_create_v3(struct svc_rqst *rqstp, struct svc_fh *fhp, if (created) *created = 1; - if (EX_ISSYNC(fhp->fh_export)) { - err = nfserrno(nfsd_sync_dir(dentry)); - /* setattr will sync the child (or not) */ - } - nfsd_check_ignore_resizing(iap); if (createmode == NFS3_CREATE_EXCLUSIVE) { @@ -1482,9 +1484,13 @@ nfsd_create_v3(struct svc_rqst *rqstp, struct svc_fh *fhp, } set_attr: - err2 = nfsd_create_setattr(rqstp, resfhp, iap); - if (err2) - err = err2; + err = nfsd_create_setattr(rqstp, resfhp, iap); + + /* + * nfsd_setattr already committed the child (and possibly also the parent). + */ + if (!err) + err = nfserrno(commit_metadata(fhp)); mnt_drop_write(fhp->fh_export->ex_path.mnt); /* @@ -1599,12 +1605,9 @@ nfsd_symlink(struct svc_rqst *rqstp, struct svc_fh *fhp, } } else host_err = vfs_symlink(dentry->d_inode, dnew, path); - - if (!host_err) { - if (EX_ISSYNC(fhp->fh_export)) - host_err = nfsd_sync_dir(dentry); - } err = nfserrno(host_err); + if (!err) + err = nfserrno(commit_metadata(fhp)); fh_unlock(fhp); mnt_drop_write(fhp->fh_export->ex_path.mnt); @@ -1666,11 +1669,9 @@ nfsd_link(struct svc_rqst *rqstp, struct svc_fh *ffhp, } host_err = vfs_link(dold, dirp, dnew); if (!host_err) { - if (EX_ISSYNC(ffhp->fh_export)) { - err = nfserrno(nfsd_sync_dir(ddir)); - write_inode_now(dest, 1); - } - err = 0; + err = nfserrno(commit_metadata(ffhp)); + if (!err) + err = nfserrno(commit_metadata(tfhp)); } else { if (host_err == -EXDEV && rqstp->rq_vers == 2) err = nfserr_acces; @@ -1766,10 +1767,10 @@ nfsd_rename(struct svc_rqst *rqstp, struct svc_fh *ffhp, char *fname, int flen, goto out_dput_new; host_err = vfs_rename(fdir, odentry, tdir, ndentry); - if (!host_err && EX_ISSYNC(tfhp->fh_export)) { - host_err = nfsd_sync_dir(tdentry); + if (!host_err) { + host_err = commit_metadata(tfhp); if (!host_err) - host_err = nfsd_sync_dir(fdentry); + host_err = commit_metadata(ffhp); } mnt_drop_write(ffhp->fh_export->ex_path.mnt); @@ -1850,12 +1851,9 @@ nfsd_unlink(struct svc_rqst *rqstp, struct svc_fh *fhp, int type, dput(rdentry); - if (host_err) - goto out_drop; - if (EX_ISSYNC(fhp->fh_export)) - host_err = nfsd_sync_dir(dentry); + if (!host_err) + host_err = commit_metadata(fhp); -out_drop: mnt_drop_write(fhp->fh_export->ex_path.mnt); out_nfserr: err = nfserrno(host_err); diff --git a/include/linux/exportfs.h b/include/linux/exportfs.h index dc12f41..a9cd507 100644 --- a/include/linux/exportfs.h +++ b/include/linux/exportfs.h @@ -96,6 +96,7 @@ struct fid { * @fh_to_parent: find the implied object's parent and get a dentry for it * @get_name: find the name for a given inode in a given directory * @get_parent: find the parent of a given directory + * @commit_metadata: commit metadata changes to stable storage * * See Documentation/filesystems/nfs/Exporting for details on how to use * this interface correctly. @@ -137,6 +138,9 @@ struct fid { * is also a directory. In the event that it cannot be found, or storage * space cannot be allocated, a %ERR_PTR should be returned. * + * commit_metadata: + * @commit_metadata should commit metadata changes to stable storage. + * * Locking rules: * get_parent is called with child->d_inode->i_mutex down * get_name is not (which is possibly inconsistent) @@ -152,6 +156,7 @@ struct export_operations { int (*get_name)(struct dentry *parent, char *name, struct dentry *child); struct dentry * (*get_parent)(struct dentry *child); + int (*commit_metadata)(struct inode *inode); }; extern int exportfs_encode_fh(struct dentry *dentry, struct fid *fid, -- cgit v0.10.2 From 978ebd97d1426d5708d3f353179ab81f191a7eeb Mon Sep 17 00:00:00 2001 From: Ben Myers Date: Wed, 17 Feb 2010 14:05:16 -0600 Subject: xfs_export_operations.commit_metadata This is the commit_metadata export operation for XFS. - Takes one inode to be committed. - Forces the log up to the lsn of the inode. - Doesn't force the log if the inode doesn't have a pincount. Signed-off-by: Ben Myers Reviewed-by: Christoph Hellwig Reviewed-by: Dave Chinner [bfields@citi.umich.edu: trivial whitespace fix] Signed-off-by: J. Bruce Fields diff --git a/fs/xfs/linux-2.6/xfs_export.c b/fs/xfs/linux-2.6/xfs_export.c index 87b8cbd..8f4d707 100644 --- a/fs/xfs/linux-2.6/xfs_export.c +++ b/fs/xfs/linux-2.6/xfs_export.c @@ -29,6 +29,7 @@ #include "xfs_vnodeops.h" #include "xfs_bmap_btree.h" #include "xfs_inode.h" +#include "xfs_inode_item.h" /* * Note that we only accept fileids which are long enough rather than allow @@ -215,9 +216,28 @@ xfs_fs_get_parent( return d_obtain_alias(VFS_I(cip)); } +STATIC int +xfs_fs_nfs_commit_metadata( + struct inode *inode) +{ + struct xfs_inode *ip = XFS_I(inode); + struct xfs_mount *mp = ip->i_mount; + int error = 0; + + xfs_ilock(ip, XFS_ILOCK_SHARED); + if (xfs_ipincount(ip)) { + error = _xfs_log_force(mp, ip->i_itemp->ili_last_lsn, + XFS_LOG_FORCE | XFS_LOG_SYNC, NULL); + } + xfs_iunlock(ip, XFS_ILOCK_SHARED); + + return error; +} + const struct export_operations xfs_export_operations = { .encode_fh = xfs_fs_encode_fh, .fh_to_dentry = xfs_fs_fh_to_dentry, .fh_to_parent = xfs_fs_fh_to_parent, .get_parent = xfs_fs_get_parent, + .commit_metadata = xfs_fs_nfs_commit_metadata, }; -- cgit v0.10.2 From 58255a4e3ce506b43bb14d5579006731a981490d Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 24 Feb 2010 13:48:06 -0800 Subject: NFSD: NFSv4 callback client should use RPC_TASK_SOFTCONN The server's callback client should stop trying to connect to the client's callback server as soon as it gets ECONNREFUSED. The NFS server's callback client does not call rpc_ping(), but appears to have it's own "ping" procedure, so it wasn't covered by commit caabea8a. Signed-off-by: Chuck Lever Signed-off-by: J. Bruce Fields diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c index c6eed2a..8fa412c 100644 --- a/fs/nfsd/nfs4callback.c +++ b/fs/nfsd/nfs4callback.c @@ -542,7 +542,8 @@ void do_probe_callback(struct nfs4_client *clp) }; int status; - status = rpc_call_async(cb->cb_client, &msg, RPC_TASK_SOFT, + status = rpc_call_async(cb->cb_client, &msg, + RPC_TASK_SOFT | RPC_TASK_SOFTCONN, &nfsd4_cb_probe_ops, (void *)clp); if (status) { warn_no_callback_path(clp, status); -- cgit v0.10.2 From ab1b18f70a007ea6caeb007d269abb75b131a410 Mon Sep 17 00:00:00 2001 From: Neil Brown Date: Sat, 27 Feb 2010 09:33:40 +1100 Subject: sunrpc: remove unnecessary svc_xprt_put The 'struct svc_deferred_req's on the xpt_deferred queue do not own a reference to the owning xprt. This is seen in svc_revisit which is where things are added to this queue. dr->xprt is set to NULL and the reference to the xprt it put. So when this list is cleaned up in svc_delete_xprt, we mustn't put the reference. Also, replace the 'for' with a 'while' which is arguably simpler and more likely to compile efficiently. Cc: Tom Tucker Signed-off-by: NeilBrown Cc: stable@kernel.org Signed-off-by: J. Bruce Fields diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c index d7ec5ca..0983830 100644 --- a/net/sunrpc/svc_xprt.c +++ b/net/sunrpc/svc_xprt.c @@ -896,11 +896,8 @@ void svc_delete_xprt(struct svc_xprt *xprt) if (test_bit(XPT_TEMP, &xprt->xpt_flags)) serv->sv_tmpcnt--; - for (dr = svc_deferred_dequeue(xprt); dr; - dr = svc_deferred_dequeue(xprt)) { - svc_xprt_put(xprt); + while ((dr = svc_deferred_dequeue(xprt)) != NULL) kfree(dr); - } svc_xprt_put(xprt); spin_unlock_bh(&serv->sv_lock); -- cgit v0.10.2 From f5822754ea006563e1bf0a1f43faaad49c0d8bb2 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Sun, 28 Feb 2010 16:32:51 -0500 Subject: Revert "sunrpc: fix peername failed on closed listener" This reverts commit b292cf9ce70d221c3f04ff62db5ab13d9a249ca8. The commit that it attempted to patch up, b0401d725334a94d57335790b8ac2404144748ee, was fundamentally wrong, and will also be reverted. Signed-off-by: J. Bruce Fields diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c index 0983830..818c4c3 100644 --- a/net/sunrpc/svc_xprt.c +++ b/net/sunrpc/svc_xprt.c @@ -706,8 +706,7 @@ int svc_recv(struct svc_rqst *rqstp, long timeout) spin_unlock_bh(&pool->sp_lock); len = 0; - if (test_bit(XPT_LISTENER, &xprt->xpt_flags) && - !test_bit(XPT_CLOSE, &xprt->xpt_flags)) { + if (test_bit(XPT_LISTENER, &xprt->xpt_flags)) { struct svc_xprt *newxpt; newxpt = xprt->xpt_ops->xpo_accept(xprt); if (newxpt) { -- cgit v0.10.2 From 1b644b6e6f6160ae35ce4b52c2ca89ed3e356e18 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Sun, 28 Feb 2010 16:33:31 -0500 Subject: Revert "sunrpc: move the close processing after do recvfrom method" This reverts commit b0401d725334a94d57335790b8ac2404144748ee, which moved svc_delete_xprt() outside of XPT_BUSY, and allowed it to be called after svc_xpt_recived(), removing its last reference and destroying it after it had already been queued for future processing. Signed-off-by: J. Bruce Fields diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c index 818c4c3..8f0f1fb 100644 --- a/net/sunrpc/svc_xprt.c +++ b/net/sunrpc/svc_xprt.c @@ -706,7 +706,10 @@ int svc_recv(struct svc_rqst *rqstp, long timeout) spin_unlock_bh(&pool->sp_lock); len = 0; - if (test_bit(XPT_LISTENER, &xprt->xpt_flags)) { + if (test_bit(XPT_CLOSE, &xprt->xpt_flags)) { + dprintk("svc_recv: found XPT_CLOSE\n"); + svc_delete_xprt(xprt); + } else if (test_bit(XPT_LISTENER, &xprt->xpt_flags)) { struct svc_xprt *newxpt; newxpt = xprt->xpt_ops->xpo_accept(xprt); if (newxpt) { @@ -732,7 +735,7 @@ int svc_recv(struct svc_rqst *rqstp, long timeout) svc_xprt_received(newxpt); } svc_xprt_received(xprt); - } else if (!test_bit(XPT_CLOSE, &xprt->xpt_flags)) { + } else { dprintk("svc: server %p, pool %u, transport %p, inuse=%d\n", rqstp, pool->sp_id, xprt, atomic_read(&xprt->xpt_ref.refcount)); @@ -745,11 +748,6 @@ int svc_recv(struct svc_rqst *rqstp, long timeout) dprintk("svc: got len=%d\n", len); } - if (test_bit(XPT_CLOSE, &xprt->xpt_flags)) { - dprintk("svc_recv: found XPT_CLOSE\n"); - svc_delete_xprt(xprt); - } - /* No data, incomplete (TCP) read, or accept() */ if (len == 0 || len == -EAGAIN) { rqstp->rq_res.len = 0; -- cgit v0.10.2 From 301e99ce4a2f42a317129230fd42e6cd874c64b0 Mon Sep 17 00:00:00 2001 From: Neil Brown Date: Sun, 28 Feb 2010 22:01:05 -0500 Subject: nfsd: ensure sockets are closed on error One the changes in commit d7979ae4a "svc: Move close processing to a single place" is: err_delete: - svc_delete_socket(svsk); + set_bit(SK_CLOSE, &svsk->sk_flags); return -EAGAIN; This is insufficient. The recvfrom methods must always call svc_xprt_received on completion so that the socket gets re-queued if there is any more work to do. This particular path did not make that call because it actually destroyed the svsk, making requeue pointless. When the svc_delete_socket was change to just set a bit, we should have added a call to svc_xprt_received, This is the problem that b0401d7253 attempted to fix, incorrectly. Signed-off-by: J. Bruce Fields diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index 9e09391..a29f259 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -968,6 +968,7 @@ static int svc_tcp_recv_record(struct svc_sock *svsk, struct svc_rqst *rqstp) return len; err_delete: set_bit(XPT_CLOSE, &svsk->sk_xprt.xpt_flags); + svc_xprt_received(&svsk->sk_xprt); err_again: return -EAGAIN; } -- cgit v0.10.2 From ccdb357ccb77cc4cbe4f7abee9efd19957f0753a Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Tue, 2 Mar 2010 15:49:21 -0500 Subject: svcrpc: treat uid's as unsigned We should consistently treat uid's as unsigned--it's confusing when the display of uid's in the cache contents isn't consistent with their representation in upcalls. Signed-off-by: J. Bruce Fields diff --git a/net/sunrpc/svcauth_unix.c b/net/sunrpc/svcauth_unix.c index 97f0e9e..afdcb04 100644 --- a/net/sunrpc/svcauth_unix.c +++ b/net/sunrpc/svcauth_unix.c @@ -624,7 +624,7 @@ static int unix_gid_show(struct seq_file *m, else glen = 0; - seq_printf(m, "%d %d:", ug->uid, glen); + seq_printf(m, "%u %d:", ug->uid, glen); for (i = 0; i < glen; i++) seq_printf(m, " %d", GROUP_AT(ug->gi, i)); seq_printf(m, "\n"); -- cgit v0.10.2 From 8d75da8afd068fa58b35e69c7c8c46770d9e7a98 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Wed, 3 Mar 2010 16:13:29 -0500 Subject: nfsd4: fix minor memory leak There's no need to allocate this cred more than once. Signed-off-by: J. Bruce Fields diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c index 8fa412c..4bc22c7 100644 --- a/fs/nfsd/nfs4callback.c +++ b/fs/nfsd/nfs4callback.c @@ -525,6 +525,8 @@ static struct rpc_cred *callback_cred; int set_callback_cred(void) { + if (callback_cred) + return 0; callback_cred = rpc_lookup_machine_cred(); if (!callback_cred) return -ENOMEM; -- cgit v0.10.2